Фокусное расстояние
- 1 year ago
- 0
- 0
Расстояние Дамерау — Левенштейна (названо в честь учёных Фредерика Дамерау и Владимира Левенштейна ) — это мера разницы двух строк символов, определяемая как минимальное количество операций вставки, удаления, замены и транспозиции (перестановки двух соседних символов), необходимых для перевода одной строки в другую. Является модификацией расстояния Левенштейна : к операциям вставки, удаления и замены символов, определённых в расстоянии Левенштейна добавлена операция транспозиции (перестановки) символов.
Расстояние Дамерау — Левенштейна между двумя строками и определяется функцией как:
где это индикаторная функция , равная нулю при и 1 в противном случае.
Каждый рекурсивный вызов соответствует одному из случаев:
def damerau_levenshtein_distance(s1, s2):
d = {}
lenstr1 = len(s1)
lenstr2 = len(s2)
for i in range(-1,lenstr1+1):
d[(i,-1)] = i+1
for j in range(-1,lenstr2+1):
d[(-1,j)] = j+1
for i in range(lenstr1):
for j in range(lenstr2):
if s1[i] == s2[j]:
cost = 0
else:
cost = 1
d[(i,j)] = min(
d[(i-1,j)] + 1, # deletion
d[(i,j-1)] + 1, # insertion
d[(i-1,j-1)] + cost, # substitution
)
if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
d[(i,j)] = min(d[(i,j)], d[i-2,j-2] + 1) # transposition
return d[lenstr1-1,lenstr2-1]
function damerau_levenshtein_distance(s1, s2: string): integer;
begin
var d: TArray<TArray<integer>>;
SetLength(d, Length(s1) + 1);
for var i := Low(d) to High(d) do
SetLength(d[i], Length(s2) + 1);
for var i := Low(d) to High(d) do
begin
d[i, 0] := i;
for var j := Low(d[i]) to High(d[i]) do
d[0, j] := j;
end;
for var i := Low(d) + 1 to High(d) do
for var j := Low(d[i]) + 1 to High(d[i]) do
d[i, j] := Min(Min(
d[i - 1, j] + 1,
d[i, j - 1] + 1),
d[i - 1, j - 1] + IfThen(s1[i] = s2[j], 0, 1));
Result := d[Length(s1), Length(s2)];
end;
function Damerau_Levenshtein_Distance (L, R : String) return Natural is
D : array (L'First - 1 .. L'Last, R'First - 1 .. R'Last) of Natural;
begin
for I in D'Range (1) loop
D (I, D'First (2)) := I;
end loop;
for I in D'Range (2) loop
D (D'First (1), I) := I;
end loop;
for J in R'Range loop
for I in L'Range loop
D (I, J) :=
Natural'Min
(Natural'Min (D (I - 1, J), D (I, J - 1)) + 1,
D (I - 1, J - 1) + (if L (I) = R (J) then 0 else 1));
if J > R'First and then I > L'First
and then R (J) = L (I - 1) and then R (J - 1) = L (I)
then
D (I, J) := Natural'Min (D (I, J), D (I - 2, J - 2) + 1);
end if;
end loop;
end loop;
return D (L'Last, R'Last);
end Damerau_Levenshtein_Distance;
Public Function WeightedDL(source As String, target As String) As Double
Dim deleteCost As Double
Dim insertCost As Double
Dim replaceCost As Double
Dim swapCost As Double
deleteCost = 1
insertCost = 1
replaceCost = 1
swapCost = 1
Dim i As Integer
Dim j As Integer
Dim k As Integer
If Len(source) = 0 Then
WeightedDL = Len(target) * insertCost
Exit Function
End If
If Len(target) = 0 Then
WeightedDL = Len(source) * deleteCost
Exit Function
End If
Dim table() As Double
ReDim table(Len(source), Len(target))
Dim sourceIndexByCharacter() As Variant
ReDim sourceIndexByCharacter(0 To 1, 0 To Len(source) - 1) As Variant
If Left(source, 1) <> Left(target, 1) Then
table(0, 0) = Application.Min(replaceCost, (deleteCost + insertCost))
End If
sourceIndexByCharacter(0, 0) = Left(source, 1)
sourceIndexByCharacter(1, 0) = 0
Dim deleteDistance As Double
Dim insertDistance As Double
Dim matchDistance As Double
For i = 1 To Len(source) - 1
deleteDistance = table(i - 1, 0) + deleteCost
insertDistance = ((i + 1) * deleteCost) + insertCost
If Mid(source, i + 1, 1) = Left(target, 1) Then
matchDistance = (i * deleteCost) + 0
Else
matchDistance = (i * deleteCost) + replaceCost
End If
table(i, 0) = Application.Min(Application.Min(deleteDistance, insertDistance), matchDistance)
Next
For j = 1 To Len(target) - 1
deleteDistance = table(0, j - 1) + insertCost
insertDistance = ((j + 1) * insertCost) + deleteCost
If Left(source, 1) = Mid(target, j + 1, 1) Then
matchDistance = (j * insertCost) + 0
Else
matchDistance = (j * insertCost) + replaceCost
End If
table(0, j) = Application.Min(Application.Min(deleteDistance, insertDistance), matchDistance)
Next
For i = 1 To Len(source) - 1
Dim maxSourceLetterMatchIndex As Integer
If Mid(source, i + 1, 1) = Left(target, 1) Then
maxSourceLetterMatchIndex = 0
Else
maxSourceLetterMatchIndex = -1
End If
For j = 1 To Len(target) - 1
Dim candidateSwapIndex As Integer
candidateSwapIndex = -1
For k = 0 To UBound(sourceIndexByCharacter, 2)
If sourceIndexByCharacter(0, k) = Mid(target, j + 1, 1) Then candidateSwapIndex = sourceIndexByCharacter(1, k)
Next
Dim jSwap As Integer
jSwap = maxSourceLetterMatchIndex
deleteDistance = table(i - 1, j) + deleteCost
insertDistance = table(i, j - 1) + insertCost
matchDistance = table(i - 1, j - 1)
If Mid(source, i + 1, 1) <> Mid(target, j + 1, 1) Then
matchDistance = matchDistance + replaceCost
Else
maxSourceLetterMatchIndex = j
End If
Dim swapDistance As Double
If candidateSwapIndex <> -1 And jSwap <> -1 Then
Dim iSwap As Integer
iSwap = candidateSwapIndex
Dim preSwapCost
If iSwap = 0 And jSwap = 0 Then
preSwapCost = 0
Else
preSwapCost = table(Application.Max(0, iSwap - 1), Application.Max(0, jSwap - 1))
End If
swapDistance = preSwapCost + ((i - iSwap - 1) * deleteCost) + ((j - jSwap - 1) * insertCost) + swapCost
Else
swapDistance = 500
End If
table(i, j) = Application.Min(Application.Min(Application.Min(deleteDistance, insertDistance), _
matchDistance), swapDistance)
Next
sourceIndexByCharacter(0, i) = Mid(source, i + 1, 1)
sourceIndexByCharacter(1, i) = i
Next
WeightedDL = table(Len(source) - 1, Len(target) - 1)
End Function