Levenshtein distance
From Free Pascal wiki
Jump to navigationJump to search
│
English (en) │
The Levenshtein distance is a string metric for measuring the difference between two sequences. Informally, the Levenshtein distance between two words is the minimum number of single-character edits (insertion, deletion, substitution) required to change one word into the other.
This works on both ASCII and UTF-8 encoding. This implementations use two-dimensional dynamic array to store the distances of prefixes of the words compared.
uses
Classes, SysUtils, Math, LazUTF8;
function LevenshteinDistance(const s1 : string; s2 : string) : integer;
function LevenshteinDistanceText(const s1, s2: string): Integer;
implementation
{------------------------------------------------------------------------------
Name: LevenshteinDistance
Params: s1, s2 - UTF8 encoded strings
Returns: Minimum number of single-character edits.
Compare 2 UTF8 encoded strings, case sensitive.
------------------------------------------------------------------------------}
function LevenshteinDistance(const s1 : string; s2 : string) : integer;
var
length1, length2, i, j ,
value1, value2, value3 : integer;
matrix : array of array of integer;
begin
length1 := UTF8Length( s1 );
length2 := UTF8Length( s2 );
SetLength (matrix, length1 + 1, length2 + 1);
for i := 0 to length1 do matrix [i, 0] := i;
for j := 0 to length2 do matrix [0, j] := j;
for i := 1 to length1 do
for j := 1 to length2 do
begin
if UTF8Copy( s1, i, 1) = UTF8Copy( s2, j, 1 )
then matrix[i,j] := matrix[i-1,j-1]
else begin
value1 := matrix [i-1, j] + 1;
value2 := matrix [i, j-1] + 1;
value3 := matrix[i-1, j-1] + 1;
matrix [i, j] := min( value1, min( value2, value3 ));
end;
end;
result := matrix [length1, length2];
end;
{------------------------------------------------------------------------------
Name: LevenshteinDistanceText
Params: s1, s2 - UTF8 encoded strings
Returns: Minimum number of single-character edits.
Compare 2 UTF8 encoded strings, case insensitive.
------------------------------------------------------------------------------}
function LevenshteinDistanceText(const s1, s2: string): integer;
var
s1lower, s2lower: string;
begin
s1lower := UTF8LowerCase( s1 );
s2lower := UTF8LowerCase( s2 );
result := LevenshteinDistance( s1lower, s2lower );
end;
end.