This repository has been archived by the owner on Feb 25, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathkotowaza.go
104 lines (84 loc) · 2.67 KB
/
kotowaza.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
package yomichan
import (
"regexp"
"strings"
zig "foosoft.net/projects/zero-epwing-go"
)
type kotowazaExtractor struct {
readGroupExp *regexp.Regexp
readGroupAltsExp *regexp.Regexp
readGroupNoAltsExp *regexp.Regexp
wordGroupExp *regexp.Regexp
}
func makeKotowazaExtractor() epwingExtractor {
return &kotowazaExtractor{
readGroupExp: regexp.MustCompile(`([^ぁ-ゖァ-ヺ]*)(\([^)]*\))`),
readGroupAltsExp: regexp.MustCompile(`\(([^)]*)\)`),
readGroupNoAltsExp: regexp.MustCompile(`\(([^・)]*)\)`),
wordGroupExp: regexp.MustCompile(`=([^〔=]*)〔=([^〕]*)〕`),
}
}
func (e *kotowazaExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
heading := entry.Heading
queue := []string{heading}
reducedExpressions := []string{}
for len(queue) > 0 {
expression := queue[0]
queue = queue[1:]
matches := e.wordGroupExp.FindStringSubmatch(expression)
if matches == nil {
reducedExpressions = append(reducedExpressions, expression)
} else {
replacements := []string{matches[1]}
replacements = append(replacements, strings.Split(matches[2], "・")...)
for _, replacement := range replacements {
queue = append(queue, strings.Replace(expression, matches[0], replacement, -1))
}
}
}
var terms []dbTerm
for _, reducedExpression := range reducedExpressions {
expression := e.readGroupExp.ReplaceAllString(reducedExpression, "$1")
readAltsExpression := e.readGroupExp.ReplaceAllString(reducedExpression, "$2")
readAltsExpression = e.readGroupNoAltsExp.ReplaceAllString(readAltsExpression, "$1")
var readings []string
queue = []string{readAltsExpression}
for len(queue) > 0 {
readExpression := queue[0]
queue = queue[1:]
matches := e.readGroupAltsExp.FindStringSubmatch(readExpression)
if matches == nil {
readings = append(readings, readExpression)
} else {
replacements := strings.Split(matches[1], "・")
for _, replacement := range replacements {
queue = append(queue, strings.Replace(readExpression, matches[0], replacement, -1))
}
}
}
for _, reading := range readings {
term := dbTerm{
Expression: expression,
Reading: reading,
Glossary: []any{entry.Text},
Sequence: sequence,
}
terms = append(terms, term)
}
}
return terms
}
func (e *kotowazaExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
return nil
}
func (e *kotowazaExtractor) exportRules(term *dbTerm, tags []string) {
}
func (*kotowazaExtractor) getRevision() string {
return "kotowaza1"
}
func (*kotowazaExtractor) getFontNarrow() map[int]string {
return map[int]string{}
}
func (*kotowazaExtractor) getFontWide() map[int]string {
return map[int]string{}
}