-
Notifications
You must be signed in to change notification settings - Fork 273
/
Copy pathedit_distance.cpp
73 lines (69 loc) · 2.13 KB
/
edit_distance.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/// \file
/// \author Diffblue Ltd.
///
/// Provides a way to compute edit distance between two strings
#include "edit_distance.h"
levenshtein_automatont::levenshtein_automatont(
const std::string &string,
std::size_t allowed_errors)
{
const std::size_t layer_offset = string.size() + 1;
for(std::size_t i = 0; i <= allowed_errors; ++i)
{
final_states.push_back(string.size() + layer_offset * i);
}
for(std::size_t string_index = 0; string_index < string.size();
++string_index)
{
for(std::size_t error_layer = 0; error_layer <= allowed_errors;
++error_layer)
{
// position string_index matches
nfa.add_transition(
error_layer * layer_offset + string_index,
string[string_index],
error_layer * layer_offset + string_index + 1);
if(error_layer < allowed_errors)
{
// insertion, swap or deletion
nfa.add_arbitrary_transition(
error_layer * layer_offset + string_index,
(error_layer + 1) * layer_offset + string_index);
nfa.add_epsilon_transition(
error_layer * layer_offset + string_index,
(error_layer + 1) * layer_offset + string_index + 1);
nfa.add_arbitrary_transition(
error_layer * layer_offset + string_index,
(error_layer + 1) * layer_offset + string_index + 1);
}
}
}
for(std::size_t error_layer = 0; error_layer < allowed_errors; ++error_layer)
{
// arbitrary transitions between error layers
nfa.add_arbitrary_transition(
error_layer * layer_offset + string.size(),
(error_layer + 1) * layer_offset + string.size());
}
}
bool levenshtein_automatont::matches(const std::string &string) const
{
return get_edit_distance(string).has_value();
}
std::optional<std::size_t>
levenshtein_automatont::get_edit_distance(const std::string &string) const
{
auto current = nfa.initial_state(0);
for(const auto c : string)
{
current = nfa.next_state(current, c);
}
for(std::size_t distance = 0; distance < final_states.size(); ++distance)
{
if(current.contains(final_states[distance]))
{
return distance;
}
}
return {};
}