From 02e4517c6b071bba1a1be6cc665b7856ba1dd5e2 Mon Sep 17 00:00:00 2001 From: Neha Date: Tue, 16 Jul 2024 22:05:49 +0400 Subject: [PATCH] made changes to count an mbox as one file --- .../main/java/org/freeeed/main/DiscoveryFile.java | 9 +++++++++ .../main/java/org/freeeed/main/FileProcessor.java | 4 +++- .../main/java/org/freeeed/main/ZipFileProcessor.java | 12 +++++++----- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/freeeed-processing/src/main/java/org/freeeed/main/DiscoveryFile.java b/freeeed-processing/src/main/java/org/freeeed/main/DiscoveryFile.java index e8c41118..ab3ed79c 100644 --- a/freeeed-processing/src/main/java/org/freeeed/main/DiscoveryFile.java +++ b/freeeed-processing/src/main/java/org/freeeed/main/DiscoveryFile.java @@ -33,6 +33,7 @@ public class DiscoveryFile { private boolean hasAttachments; private boolean hasParent; private String custodian; + private boolean isPartOfMbox; /** * Constructor with two parameters and the rest defaults: no attachments or parents. @@ -175,4 +176,12 @@ public String getCustodian() { public void setCustodian(String custodian) { this.custodian = custodian; } + + public boolean isPartOfMbox() { + return isPartOfMbox; + } + + public void setPartOfMbox(boolean partOfMbox) { + isPartOfMbox = partOfMbox; + } } diff --git a/freeeed-processing/src/main/java/org/freeeed/main/FileProcessor.java b/freeeed-processing/src/main/java/org/freeeed/main/FileProcessor.java index 573f4bfb..38253ce6 100644 --- a/freeeed-processing/src/main/java/org/freeeed/main/FileProcessor.java +++ b/freeeed-processing/src/main/java/org/freeeed/main/FileProcessor.java @@ -274,7 +274,9 @@ private void writeMetadata(DiscoveryFile discoveryFile, DocumentMetadata metadat throws IOException, InterruptedException { Map mapWritable = createMapWritable(metadata, discoveryFile); metadataWriter.processMap(mapWritable,discoveryFile); - Stats.getInstance().increaseItemCount(); + if(!discoveryFile.isPartOfMbox()) { + Stats.getInstance().increaseItemCount(); + } } /** diff --git a/freeeed-processing/src/main/java/org/freeeed/main/ZipFileProcessor.java b/freeeed-processing/src/main/java/org/freeeed/main/ZipFileProcessor.java index 429d063d..360d076d 100644 --- a/freeeed-processing/src/main/java/org/freeeed/main/ZipFileProcessor.java +++ b/freeeed-processing/src/main/java/org/freeeed/main/ZipFileProcessor.java @@ -210,20 +210,22 @@ private void processZipEntry(ZipInputStream zipInputStream, ZipEntry zipEntry) t // this file can be mbox file and the file can be extracted and processed separately if(tempFile.endsWith(".mbox")){ List emailFiles = MboxToEmlConverter.convertMboxToEml(tempFile, "/tmp/mboxfiles"); - for (String eml : emailFiles) { - processSingleFile(eml, eml); + for (int i = 0; i < emailFiles.size(); i++) { + processSingleFile(emailFiles.get(i), emailFiles.get(i), i>0); } Util.deleteDirectory(new File("/tmp/mboxfiles")); // keep it clean for next processing }else { - processSingleFile(tempFile, zipEntry.getName()); + processSingleFile(tempFile, zipEntry.getName(), false); } } - private void processSingleFile(String tempFile, String fileName) throws Exception { + private void processSingleFile(String tempFile, String fileName, boolean isMboxFile) throws Exception { if (PstProcessor.isPST(tempFile)) { new PstProcessor(tempFile, metadataWriter, getLuceneIndex()).process(); } else { - processFileEntry(new DiscoveryFile(tempFile, fileName)); + DiscoveryFile file = new DiscoveryFile(tempFile, fileName); + file.setPartOfMbox(isMboxFile); + processFileEntry(file); } }