-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhighlight.py
44 lines (39 loc) · 1.6 KB
/
highlight.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from typing import List, Tuple
import spacy
nlp = spacy.load("en_core_web_sm")
def highlight(text: str, terms_list: List[Tuple[int, int]]) -> str:
"""Return a string with highlighted phrases enclosed in tags.
Parameters
----------
text : str
document
terms_list : List[tuple[int, int]]
list of phrases to highlight, where each element represents a phrase's spacy
start and end token positions in the document; list must be sorted in ascending
order of start index
Returns
-------
str
string with highlighted phrases enclosed in <em> tags
"""
doc = nlp(text)
result = ""
previous_end = 0 # store end of last span
for term_start, term_end in terms_list:
word = doc[term_start:term_end].text
word_id = word.replace(" ", "")
# get the span before the first highlighted term
if term_start > 0:
# might need to be smarter about the whitespace padding;
# i.e. if the original text has \n, should use that instead of space
result += f"{doc[previous_end:term_start].text} "
# get all highlighted terms
result += f'<button class="usa-button usa-button--accent-warm padding-1 margin-right-0" onclick="suggestAlternative(\'{word_id}\')">{word}</button>'
# if the current highlighted term is in the middle of the doc, right pad with a space
if term_end < len(doc):
result += " "
previous_end = term_end
# append remaining text at end of doc
if len(doc) > previous_end:
result += doc[previous_end:].text
return result