Skip to content

Commit

Permalink
JAMES-4100 Improve Search Snippet display (#2583)
Browse files Browse the repository at this point in the history
Co-authored-by: hung phan <[email protected]>
  • Loading branch information
hungphan227 and hung phan authored Jan 22, 2025
1 parent 853de14 commit a6f9f32
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -532,4 +532,43 @@ default void shouldHighLightBodyWhenHTMLBodyMatched() throws Exception {
softly.assertThat(searchSnippets.getFirst().highlightedBody().get()).contains("<mark>barcamp</mark>");
});
}

@Test
default void highlightSearchShouldShortenGreaterThanCharacters() throws Exception {
MailboxSession session = session(USERNAME1);

// Given m1,m2 with m1 has body containing the searched word (contentA)
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo, Thx Matthieu for your help")
.setBody("Start \n>>>>>>>>>> append contentA to > inbox \n>>>>>> End",
StandardCharsets.UTF_8)),
session).getId();

ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
.setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox", StandardCharsets.UTF_8)),
session).getId();

verifyMessageWasIndexed(2);

// When searching for the word (contentA) in the body
MultimailboxesSearchQuery multiMailboxSearch = MultimailboxesSearchQuery.from(SearchQuery.of(
SearchQuery.bodyContains("contentA")))
.inMailboxes(List.of(m1.getMailboxId(), m2.getMailboxId()))
.build();

// Then highlightSearch should return the SearchSnippet with the highlightedBody containing the word (contentA)
List<SearchSnippet> searchSnippets = Flux.from(testee().highlightSearch(List.of(m1.getMessageId(), m2.getMessageId()), multiMailboxSearch, session))
.collectList()
.block();
assertThat(searchSnippets).hasSize(1);
assertSoftly(softly -> {
softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId());
softly.assertThat(searchSnippets.getFirst().highlightedBody().get()).isEqualTo("Start \n append <mark>contentA</mark> to &gt; inbox \n End");
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ public Document createMessageDocument(MailboxMessage message, MailboxSession ses
doc.add(new TextField(BCC_FIELD, uppercase(EMailers.from(headerCollection.getBccAddressSet()).serialize()), Field.Store.YES));

// index body
Optional<String> bodyText = mimePartExtracted.locateFirstTextBody();
Optional<String> bodyText = mimePartExtracted.locateFirstTextBody().map(SearchUtil::removeGreaterThanCharactersAtBeginningOfLine);
Optional<String> bodyHtml = mimePartExtracted.locateFirstHtmlBody();

bodyText.or(() -> bodyHtml)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;

Expand Down Expand Up @@ -126,6 +127,7 @@ private Highlighter highlighter(SearchQuery searchQuery) {
Query query = buildQueryFromSearchQuery(searchQuery);
QueryScorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setEncoder(new SimpleHTMLEncoder());
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, configuration.fragmentSize()));
return highlighter;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ private Mono<IndexableMessage> instantiateIndexedMessage() throws IOException, M
.asMimePart(textExtractor)
.map(parsingResult -> {

Optional<String> bodyText = parsingResult.locateFirstTextBody();
Optional<String> bodyText = parsingResult.locateFirstTextBody().map(SearchUtil::removeGreaterThanCharactersAtBeginningOfLine);
Optional<String> bodyHtml = parsingResult.locateFirstHtmlBody();

boolean hasAttachment = MessageAttachmentMetadata.hasNonInlinedAttachment(message.getAttachments());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -473,5 +473,22 @@ public boolean test(MessageId input) {
};
}

public static String removeGreaterThanCharactersAtBeginningOfLine(String text) {
StringBuilder result = new StringBuilder();
boolean isNewLine = false;

for (int i = 0; i < text.length(); i++) {
char current = text.charAt(i);

if (current == '\n') {
isNewLine = true;
result.append(current);
} else if (!isNewLine || current != '>') {
result.append(current);
isNewLine = false;
}
}

return result.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -529,12 +529,12 @@ trait SearchSnippetGetMethodContract {
| "list": [
| {
| "emailId": "${messageId1.serialize}",
| "subject": "Weekly report - <mark>vttran</mark> 27/02-03/03/2023",
| "subject": "Weekly report - <mark>vttran</mark> 27&#x2F;02-03&#x2F;03&#x2F;2023",
| "preview": null
| },
| {
| "emailId": "${messageId2.serialize}",
| "subject": "Weekly report - <mark>vttran</mark> 19/08-23/08/2024",
| "subject": "Weekly report - <mark>vttran</mark> 19&#x2F;08-23&#x2F;08&#x2F;2024",
| "preview": null
| }
| ],
Expand Down Expand Up @@ -610,7 +610,7 @@ trait SearchSnippetGetMethodContract {
| "list": [
| {
| "emailId": "${messageId1.serialize}",
| "subject": "Weekly report - <mark>vttran</mark> 27/02-03/03/2023",
| "subject": "Weekly report - <mark>vttran</mark> 27&#x2F;02-03&#x2F;03&#x2F;2023",
| "preview": null
| },
| {
Expand Down

0 comments on commit a6f9f32

Please sign in to comment.