This repository has been archived by the owner on Dec 5, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathjquery.fuzzymatch.js
198 lines (175 loc) · 7.53 KB
/
jquery.fuzzymatch.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
/**
* jQuery.fuzzyMatch.js, version 0.3 (2011-06-22)
*
* https://github.com/rapportive-oss/jquery-fuzzymatch
*
* A fuzzy string-matching plugin for autocompleting in jQuery,
* based on LiquidMetal http://github.com/rmm5t/liquidmetal/blob/master/liquidmetal.js
* quicksilver.js http://code.google.com/p/rails-oceania/source/browse/lachiecox/qs_score/trunk/qs_score.js
* QuickSilver http://code.google.com/p/blacktree-alchemy/source/browse/trunk/Crucible/Code/NSString_BLTRExtensions.m#61
* FuzzyString https://github.com/dcparker/jquery_plugins/blob/master/fuzzy-string/fuzzy-string.js
*
* Copyright (c) 2011, Conrad Irwin ([email protected])
* Licensed under the MIT: http://www.opensource.org/licenses/mit-license.php
*
* TODO: Tweak heuristics, typo correction support?
**/
(function ($) {
// The scores are arranged so that a continuous match of characters will
// result in a total score of 1.
//
// The best case, this character is a match, and either this is the start
// of the string, or the previous character was also a match.
var SCORE_CONTINUE_MATCH = 1,
// A new match at the start of a word scores better than a new match
// elsewhere as it's more likely that the user will type the starts
// of fragments.
// (Our notion of word includes CamelCase and hypen-separated, etc.)
SCORE_START_WORD = 0.9,
// Any other match isn't ideal, but it's probably ok.
SCORE_OK = 0.8,
// The goodness of a match should decay slightly with each missing
// character.
//
// i.e. "bad" is more likely than "bard" when "bd" is typed.
//
// This will not change the order of suggestions based on SCORE_* until
// 100 characters are inserted between matches.
PENALTY_SKIPPED = 0.999,
// The goodness of an exact-case match should be higher than a
// case-insensitive match by a small amount.
//
// i.e. "HTML" is more likely than "haml" when "HM" is typed.
//
// This will not change the order of suggestions based on SCORE_* until
// 1000 characters are inserted between matches.
PENALTY_CASE_MISMATCH = 0.9999,
// If the word has more characters than the user typed, it should
// be penalised slightly.
//
// i.e. "html" is more likely than "html5" if I type "html".
//
// However, it may well be the case that there's a sensible secondary
// ordering (like alphabetical) that it makes sense to rely on when
// there are many prefix matches, so we don't make the penalty increase
// with the number of tokens.
PENALTY_NOT_COMPLETE = 0.99;
/**
* Generates all possible split objects by splitting a string around a
* character in as many ways as possible.
*
* @param string The string to split
* @param char A character on which to split it.
*
* @return [{
* before: The fragment of the string before this occurance of the
* character.
*
* char: The original coy of this character (which may differ in case
* from the "char" parameter).
*
* after: The fragment of the string after the occurance of the character.
* }]
**/
function allCaseInsensitiveSplits(string, chr) {
var lower = string.toLowerCase(),
lchr = chr.toLowerCase(),
i = lower.indexOf(lchr),
result = [];
while (i > -1) {
result.push({
before: string.slice(0, i),
chr: string.charAt(i),
after: string.slice(i + 1)
});
i = lower.indexOf(lchr, i + 1);
}
return result;
}
/**
* Escapes a string so that it can be interpreted as HTML node content.
*
* WARNING: The output isn't safe for including in attributes, and I
* haven't considered other HTML contexts.
* NOTE: This really is worth it compared to using $('<div>').text(foo).html().
*
* @param string, the string to escape
* @return string, the escaped version.
*/
function htmlEscape(string) {
return string.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>');
}
/**
* Generates a case-insensitive match of the abbreviation against the string
*
* @param string, a canonical string to be matched against.
* @param abbreviation, an abbreviation that a user may have typed
* in order to specify that string.
*
* @cache (private), a cache that reduces the expected running time of the
* algorithm in the case there are many repeated characters.
*
* @return {
* score: A score (0 <= score <= 1) that indicates how likely it is that
* the abbreviation matches the string.
*
* The score is 0 if the characters in the abbreviation do not
* all appear in order in the string.
*
* The score is 1 if the user typed the exact string.
*
* Scores are designed to be comparable when many different
* strings are matched against the same abbreviation, for example
* for autocompleting.
*
* html: A copy of the input string html-escaped, with matching letters
* surrounded by <b> and </b>.
*
* }
**/
$.fuzzyMatch = function (string, abbreviation, cache) {
if (abbreviation === "") {
return {
score: string === "" ? SCORE_CONTINUE_MATCH : PENALTY_NOT_COMPLETE,
html: htmlEscape(string)
};
}
if (cache && cache[string] && cache[string][abbreviation]) {
return $.extend({}, cache[string][abbreviation]);
}
cache = cache || {};
cache[string] = cache[string] || {};
cache[string][abbreviation] =
$(allCaseInsensitiveSplits(string, abbreviation.charAt(0)))
.map(function (i, split) {
var result = $.fuzzyMatch(split.after, abbreviation.slice(1), cache),
preceding_char = split.before.charAt(split.before.length - 1);
if (split.before === "") {
result.score *= SCORE_CONTINUE_MATCH;
} else if (preceding_char.match(/[\\\/\-_+.# \t"@\[\(\{&]/) ||
(split.chr.toLowerCase() !== split.chr && preceding_char.toLowerCase() === preceding_char)) {
result.score *= SCORE_START_WORD;
} else {
result.score *= SCORE_OK;
}
if (split.chr !== abbreviation.charAt(0)) {
result.score *= PENALTY_CASE_MISMATCH;
}
result.score *= Math.pow(PENALTY_SKIPPED, split.before.length);
result.html = htmlEscape(split.before) + '<b>' + htmlEscape(split.chr) + '</b>' + result.html;
return result;
})
.sort(function (a, b) {
return a.score < b.score ? 1 : a.score === b.score ? 0 : -1;
})[0] ||
// No matches for the next character in the abbreviation, abort!
{
score: 0, // This 0 will multiply up to the top, giving a total of 0
html: htmlEscape(string)
};
return $.extend({}, cache[string][abbreviation]);
};
/*global jQuery */
}(jQuery));