Damerau-Levenshtein distance algorithm SQL

CREATE function dbo.DamerauLevenshteinDistance(@str1 varchar(48), @str2 varchar(48))
RETURNS int 
AS
begin
Declare @str1_len int,
@str2_len int,
@distance int

select @str1_len = len(@str1),
@str2_len = len(@str2)

if ((@str1 IS NULL OR @str1 = '') AND (@str2 IS NULL OR @str2 = ''))
set @distance = 0

if (@str1 IS NULL OR @str1 = '')
set @distance = @str2_len

if (@str2 IS NULL OR @str2 = '')
set @distance = @str1_len

if ((@str1 IS NOT NULL OR @str1 <> '') AND (@str2 IS NOT NULL OR @str2 <> ''))
begin
-- matrix that stores cumulative scores as strings are traversed
Declare @i int,
@j int,
@cost int

Declare @d table(i int NULL,j int NULL,value1 int NULL)

-- fill in scores for first row and column
set @i = 0
while (@i <= @str1_len)
begin
insert into @d (i,j,value1) values(@i,0,@i)
set @i = @i+1
end --while
set @j = 0

while (@j <= @str2_len)
begin
insert into @d (i,j,value1) values(0,@j,@j)
set @j = @j+1
end --while

Declare @newvalue int,
@x int,
@y int,
@z int,
@tcost int,
@a int,
@b int

set @i = 1
while (@i <= @str1_len)
begin
set @j = 1
while (@j <= @str2_len)
begin
if (substring(@str1,@i,1) = substring(@str2,@j,1))
set @cost = 0
else
set @cost = 1
-- addition, subtraction, substitution costs
set @x=(select value1 + 1 from @d where i=@i - 1 AND j=@j)
set @y=(select value1 + 1 from @d where i=@i AND j=@j-1)
set @z=(select distinct value1 from @d where i=@i-1 AND j=@j-1)
set @z = @z + @cost
set @newvalue = dbo.Minimum(dbo.Minimum(@x, @y),@z)
insert into @d (i,j,value1) values(@i,@j,@newvalue)
if (@i > 1 AND @j > 1 AND substring(@str1,@i,1) = substring
(@str2,@j - 1,1) AND substring(@str1,@i - 1,1) = substring(@str2,@j,1))
begin
-- transposition cost
set @a=(select value1 from @d where i=@i AND j=@j)
set @b=(select distinct value1 from @d where i=@i - 2 AND j=@j-2)
set @b = @b +@cost
set @tcost=dbo.Minimum(@a,@b)
Update @d set value1 = @tcost where i=@i AND j=@j
end
set @j = @j+1
end --while
set @i = @i+1

end --while
set @distance =(select value1 from @d where i=@str1_len AND j=@str2_len)
end

return @distance
end

Комментарии

Популярные сообщения из этого блога

MapBox-compatible file formats and limitations