Add tests on long strings with few different characters

Benchmarks are run before and after optimization "Remove leading and trailing identical runes". Long strings with differences at the beginning (long_lead), in the middle (long_middle) or at the end (long_trail) show significant improvements in processing time and memory allocations. When the optimization is ineffective due to different leading and trailing characters (long_diff) there is no change in processing time or memory allocation. goos: linux goarch: amd64 pkg: github.com/agnivade/levenshtein cpu: AMD Ryzen 7 7840U w/ Radeon 780M Graphics │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ Simple/ASCII-16 134.20n ± 0% 79.03n ± 0% -41.11% (p=0.000 n=20) Simple/French-16 254.8n ± 0% 129.7n ± 0% -49.09% (p=0.000 n=20) Simple/Nordic-16 500.6n ± 1% 208.0n ± 0% -58.45% (p=0.000 n=20) Simple/Long_lead-16 1862.0n ± 0% 209.6n ± 1% -88.75% (p=0.000 n=20) Simple/Long_middle-16 3613.0n ± 0% 325.0n ± 0% -91.00% (p=0.000 n=20) Simple/Long_trail-16 3911.0n ± 0% 399.0n ± 1% -89.80% (p=0.000 n=20) Simple/Long_diff-16 4.030µ ± 0% 4.029µ ± 1% ~ (p=0.899 n=20) Simple/Tibetan-16 413.0n ± 0% 277.3n ± 0% -32.86% (p=0.000 n=20) geomean 964.6n 299.5n -68.95% │ before.txt │ after.txt │ │ B/op │ B/op vs base │ Simple/ASCII-16 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ Simple/French-16 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ Simple/Nordic-16 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ Simple/Long_lead-16 464.0 ± 0% 368.0 ± 0% -20.69% (p=0.000 n=20) Simple/Long_middle-16 672.0 ± 0% 544.0 ± 0% -19.05% (p=0.000 n=20) Simple/Long_trail-16 720.0 ± 0% 576.0 ± 0% -20.00% (p=0.000 n=20) Simple/Long_diff-16 720.0 ± 0% 720.0 ± 0% ~ (p=1.000 n=20) ¹ Simple/Tibetan-16 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ geomean ² -7.99% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ Simple/ASCII-16 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ Simple/French-16 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ Simple/Nordic-16 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ Simple/Long_lead-16 3.000 ± 0% 2.000 ± 0% -33.33% (p=0.000 n=20) Simple/Long_middle-16 3.000 ± 0% 2.000 ± 0% -33.33% (p=0.000 n=20) Simple/Long_trail-16 3.000 ± 0% 2.000 ± 0% -33.33% (p=0.000 n=20) Simple/Long_diff-16 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=20) ¹ Simple/Tibetan-16 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ geomean ² -14.11% ² ¹ all samples are equal ² summaries must be >0 to compute geomean
agnivade · Sep 24, 2024 · fac16fe · fac16fe
1 parent 4e472bb
commit fac16fe
Showing 1 changed file with 27 additions and 5 deletions.
diff --git a/levenshtein_test.go b/levenshtein_test.go
@@ -66,13 +66,35 @@ func BenchmarkSimple(b *testing.B) {
 		name string
 	}{
 		// ASCII
-		{"levenshtein", "frankenstein", "ASCII"},
+		{a: "levenshtein", b: "frankenstein", name: "ASCII"},
 		// Testing acutes and umlauts
-		{"resumé and café", "resumés and cafés", "French"},
-		{"Hafþór Júlíus Björnsson", "Hafþor Julius Bjornsson", "Nordic"},
-		{"a very long string that is meant to exceed", "another very long string that is meant to exceed", "long string"},
+		{a: "resumé and café", b: "resumés and cafés", name: "French"},
+		{a: "Hafþór Júlíus Björnsson", b: "Hafþor Julius Bjornsson", name: "Nordic"},
+
+		// Long strings
+		{
+			a:    "a very long string that is meant to exceed",
+			b:    "another very long string that is meant to exceed",
+			name: "Long lead",
+		},
+		{
+			a:    "a very long string with a word in the middle that is different",
+			b:    "a very long string with some text in the middle that is different",
+			name: "Long middle",
+		},
+		{
+			a:    "a very long string with some text at the end that is not the same",
+			b:    "a very long string with some text at the end that is very different",
+			name: "Long trail",
+		},
+		{
+			a:    "+a very long string with different leading and trailing characters+",
+			b:    "-a very long string with different leading and trailing characters-",
+			name: "Long diff",
+		},
+
 		// Only 2 characters are less in the 2nd string
-		{"།་གམ་འས་པ་་མ།", "།་གམའས་པ་་མ", "Tibetan"},
+		{a: "།་གམ་འས་པ་་མ།", b: "།་གམའས་པ་་མ", name: "Tibetan"},
 	}
 	tmp := 0
 	for _, test := range tests {