From 6b4864e1aa5e6a97583a7dc9662d5971867b5de2 Mon Sep 17 00:00:00 2001 From: emiliodskinner Date: Thu, 16 Jan 2025 19:41:05 +0000 Subject: [PATCH 1/6] task-2628-ingest-core: warehouse/ingest-core pt1 --- .../java/datawave/ingest/TableCreator.java | 5 +- .../ingest/config/BaseHdfsFileCacheUtil.java | 17 +-- .../data/config/XMLFieldConfigHelper.java | 15 +-- .../config/ingest/AbstractIngestHelper.java | 11 +- .../data/config/ingest/BaseIngestHelper.java | 117 ++++++++---------- .../data/config/ingest/CSVIngestHelper.java | 11 +- .../data/config/ingest/CompositeIngest.java | 9 +- .../ingest/ContentBaseIngestHelper.java | 9 +- .../ingest/EventFieldNormalizerHelper.java | 13 +- .../data/config/ingest/IngestFieldFilter.java | 17 ++- .../data/normalizer/AbstractNormalizer.java | 21 ++-- .../SimpleGroupFieldNameParser.java | 9 +- .../data/tokenize/TokenizationHelper.java | 11 +- .../reader/AbstractEventRecordReader.java | 13 +- ...ataTypeDiscardFutureIntervalPredicate.java | 11 +- .../DataTypeDiscardIntervalPredicate.java | 11 +- .../handler/atom/AtomDataTypeHandler.java | 5 +- .../handler/edge/EdgeKeyVersioningCache.java | 17 +-- .../handler/edge/define/EdgeDataBundle.java | 9 +- .../error/ErrorShardedDataTypeHandler.java | 7 +- .../mapreduce/handler/shard/NumShards.java | 25 ++-- .../summary/CoreSummaryDataTypeHandler.java | 5 +- .../MetricsSummaryDataTypeHandler.java | 3 +- ...ndedContentIndexingColumnBasedHandler.java | 33 ++--- .../AbstractNGramTokenizationStrategy.java | 5 +- .../datawave/ingest/util/BloomFilterUtil.java | 15 +-- .../datawave/util/flag/FlagEntryMover.java | 15 ++- .../java/datawave/util/flag/FlagMetrics.java | 11 +- .../java/datawave/util/flag/FlagSocket.java | 17 +-- .../java/datawave/util/flag/SimpleMover.java | 11 +- 30 files changed, 260 insertions(+), 218 deletions(-) diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/TableCreator.java b/warehouse/ingest-core/src/main/java/datawave/ingest/TableCreator.java index 131b994feac..0316b69fd00 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/TableCreator.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/TableCreator.java @@ -1,7 +1,8 @@ package datawave.ingest; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import datawave.ingest.mapreduce.job.TableConfigurationUtil; @@ -9,7 +10,7 @@ public class TableCreator { private static final Configuration config = new Configuration(); - private static final Logger log = Logger.getLogger(TableCreator.class); + private static final Logger log = LoggerFactory.getLogger(TableCreator.class); public static void main(String[] args) { Configuration conf = OptionsParser.parseArguments(args, config); diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/config/BaseHdfsFileCacheUtil.java b/warehouse/ingest-core/src/main/java/datawave/ingest/config/BaseHdfsFileCacheUtil.java index facc250aa68..d41c067e487 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/config/BaseHdfsFileCacheUtil.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/config/BaseHdfsFileCacheUtil.java @@ -9,7 +9,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import datawave.ingest.data.config.ingest.AccumuloHelper; @@ -23,7 +24,7 @@ public abstract class BaseHdfsFileCacheUtil { private static final int MAX_RETRIES = 3; protected short cacheReplicas = 3; - private static final Logger log = Logger.getLogger(BaseHdfsFileCacheUtil.class); + private static final Logger log = LoggerFactory.getLogger(BaseHdfsFileCacheUtil.class); public BaseHdfsFileCacheUtil(Configuration conf) { Validate.notNull(conf, "Configuration object passed in null"); @@ -48,7 +49,7 @@ public void read() throws IOException { while (retry && attempts <= MAX_RETRIES) { attempts++; - log.info("Reading cache at " + this.cacheFilePath); + log.info("Reading cache at {}", this.cacheFilePath); try (BufferedReader in = new BufferedReader(new InputStreamReader(FileSystem.get(this.cacheFilePath.toUri(), conf).open(this.cacheFilePath)))) { readCache(in); retry = false; @@ -80,7 +81,7 @@ public void update() { cleanup(fs, tempFile); } - log.error("Unable to update cache file " + cacheFilePath + ". " + e.getMessage(), e); + log.error("Unable to update cache file {}. {}", cacheFilePath, e.getMessage(), e); } } @@ -99,10 +100,10 @@ public void createCacheFile(FileSystem fs, Path tmpCacheFile) { throw new IOException("Failed to rename temporary cache file"); } } catch (Exception e) { - log.warn("Unable to rename " + tmpCacheFile + " to " + this.cacheFilePath + "probably because somebody else replaced it ", e); + log.warn("Unable to rename {} to {} probably because somebody else replaced it", tmpCacheFile, this.cacheFilePath, e); cleanup(fs, tmpCacheFile); } - log.info("Updated " + cacheFilePath); + log.info("Updated {}", cacheFilePath); } @@ -110,7 +111,7 @@ protected void cleanup(FileSystem fs, Path tmpCacheFile) { try { fs.delete(tmpCacheFile, false); } catch (Exception e) { - log.error("Unable to clean up " + tmpCacheFile, e); + log.error("Unable to clean up {}", tmpCacheFile, e); } } @@ -132,7 +133,7 @@ public Path createTempFile(FileSystem fs) throws IOException { do { Path parentDirectory = this.cacheFilePath.getParent(); String fileName = this.cacheFilePath.getName() + "." + count; - log.info("Attempting to create " + fileName + "under " + parentDirectory); + log.info("Attempting to create {} under {}", fileName, parentDirectory); tmpCacheFile = new Path(parentDirectory, fileName); count++; } while (!fs.createNewFile(tmpCacheFile)); diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/XMLFieldConfigHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/XMLFieldConfigHelper.java index 6d09758cc4e..dfca4f17fde 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/XMLFieldConfigHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/XMLFieldConfigHelper.java @@ -14,7 +14,8 @@ import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.xerces.jaxp.SAXParserFactoryImpl; import org.xml.sax.Attributes; import org.xml.sax.SAXException; @@ -28,7 +29,7 @@ /** Helper class to read XML based Field Configurations */ public final class XMLFieldConfigHelper implements FieldConfigHelper { - private static final Logger log = Logger.getLogger(XMLFieldConfigHelper.class); + private static final Logger log = LoggerFactory.getLogger(XMLFieldConfigHelper.class); /** be explicit and use Apache Xerces-J here instead of relying on java to plug in the proper parser */ private static final SAXParserFactory parserFactory = SAXParserFactoryImpl.newInstance(); @@ -69,7 +70,7 @@ public static XMLFieldConfigHelper load(String fieldConfigFile, BaseIngestHelper try (InputStream in = getAsStream(fieldConfigFile)) { if (in != null) { - log.info("Loading field configuration from configuration file: " + fieldConfigFile); + log.info("Loading field configuration from configuration file: {}", fieldConfigFile); return new XMLFieldConfigHelper(in, baseIngestHelper); } else { throw new IllegalArgumentException("Field config file '" + fieldConfigFile + "' not found!"); @@ -92,7 +93,7 @@ private static InputStream getAsStream(String fieldConfigPath) { try { return uri.toURL().openStream(); } catch (IOException e) { - log.error("Could not open config location: " + fieldConfigPath, e); + log.error("Could not open config location: {}", fieldConfigPath, e); return null; } } @@ -110,7 +111,7 @@ public XMLFieldConfigHelper(InputStream in, BaseIngestHelper helper) throws Pars SAXParser parser = parserFactory.newSAXParser(); parser.parse(in, handler); - log.info("Loaded FieldConfigHelper: " + this); + log.info("Loaded FieldConfigHelper: {}", this); } public boolean addKnownField(String fieldName, FieldInfo info) { @@ -440,7 +441,7 @@ void startField(String uri, String localName, String qName, Attributes attribute if (this.ingestHelper != null) { this.ingestHelper.updateDatawaveTypes(name, fieldType); } else if (fieldType.equals(this.defaultFieldType)) { - log.warn("No BaseIngestHelper set, ignoring type information for " + name + " in configuration file"); + log.warn("No BaseIngestHelper set, ignoring type information for {} in configuration file", name); } } } @@ -496,7 +497,7 @@ void startFieldPattern(String uri, String localName, String qName, Attributes at if (this.ingestHelper != null) { this.ingestHelper.updateDatawaveTypes(pattern, fieldType); } else if (!fieldType.equals(this.defaultFieldType)) { - log.warn("No BaseIngestHelper set, ignoring type information for " + pattern + " in configuration file"); + log.warn("No BaseIngestHelper set, ignoring type information for {} in configuration file", pattern); } } } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/AbstractIngestHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/AbstractIngestHelper.java index de17da681c5..6bffa329d08 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/AbstractIngestHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/AbstractIngestHelper.java @@ -5,7 +5,8 @@ import java.util.Set; import java.util.TreeSet; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import datawave.data.normalizer.NormalizationException; import datawave.data.type.Type; @@ -17,7 +18,7 @@ * fields values from the datatypes that they represent. */ public abstract class AbstractIngestHelper extends DataTypeHelperImpl implements IngestHelperInterface { - private static final Logger log = Logger.getLogger(AbstractIngestHelper.class); + private static final Logger log = LoggerFactory.getLogger(AbstractIngestHelper.class); protected boolean deleteMode = false; protected boolean replaceMalformedUTF8 = false; @@ -64,7 +65,9 @@ public String getNormalizedMaskedValue(final String key) { final Set normalizedValues = normalizeFieldValue(fieldName.toUpperCase(), value); return normalizedValues.iterator().next(); } catch (final Exception ex) { - log.warn(this.getType().typeName() + ": Unable to normalize masked value of '" + value + "' for " + fieldName, ex); + if (log.isWarnEnabled()) { + log.warn("{}: Unable to normalize masked value of {} for {}", this.getType().typeName(), value, fieldName, ex); + } return value; } } @@ -145,7 +148,7 @@ public void upperCaseSetEntries(Set input, String warnMessage) { if (!s.toUpperCase().equals(s)) { removeList.add(s); addList.add(s.toUpperCase()); - log.warn(" has a value " + s + "that was converted to uppercase."); + log.warn(" has a value {} that was converted to uppercase.", s); } } input.removeAll(removeList); diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java index c3d28d3a2d8..1f25521b658 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java @@ -15,7 +15,8 @@ import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Splitter; import com.google.common.collect.HashMultimap; @@ -139,7 +140,7 @@ public abstract class BaseIngestHelper extends AbstractIngestHelper implements C public static final String FIELD_CONFIG_FILE = ".data.category.field.config.file"; - private static final Logger log = ThreadConfigurableLogger.getLogger(BaseIngestHelper.class); + private static final Logger log = LoggerFactory.getLogger(BaseIngestHelper.class); private Multimap> typeFieldMap = null; private Multimap> typePatternMap = null; @@ -256,7 +257,7 @@ public void setup(Configuration config) { final String fieldConfigFile = config.get(this.getType().typeName() + FIELD_CONFIG_FILE); if (fieldConfigFile != null) { if (log.isDebugEnabled()) { - log.debug("Field config file " + fieldConfigFile + " specified for: " + this.getType().typeName() + FIELD_CONFIG_FILE); + log.debug("Field config file {} specified for: {}", fieldConfigFile, this.getType().typeName() + FIELD_CONFIG_FILE); } this.fieldConfigHelper = XMLFieldConfigHelper.load(fieldConfigFile, this); } @@ -264,7 +265,7 @@ public void setup(Configuration config) { // Process the indexed fields if (config.get(this.getType().typeName() + DISALLOWLIST_INDEX_FIELDS) != null) { if (log.isDebugEnabled()) { - log.debug("Disallowlist specified for: " + this.getType().typeName() + DISALLOWLIST_INDEX_FIELDS); + log.debug("Disallowlist specified for: {}", this.getType().typeName() + DISALLOWLIST_INDEX_FIELDS); } super.setHasIndexDisallowlist(true); configProperty = DISALLOWLIST_INDEX_FIELDS; @@ -275,10 +276,10 @@ public void setup(Configuration config) { } // Load the proper list of fields to (not) index - if (fieldConfigHelper != null) { - log.info("Using field config helper for " + this.getType().typeName()); - } else if (configProperty == null) { - log.warn("No index fields or disallowlist fields specified, not generating index fields for " + this.getType().typeName()); + if (fieldConfigHelper != null && log.isInfoEnabled()) { + log.info("Using field config helper for {}", this.getType().typeName()); + } else if (configProperty == null && log.isWarnEnabled()) { + log.warn("No index fields or disallowlist fields specified, not generating index fields for {}", this.getType().typeName()); } else { this.indexedFields = Sets.newHashSet(); Collection indexedStrings = config.getStringCollection(this.getType().typeName() + configProperty); @@ -288,7 +289,9 @@ public void setup(Configuration config) { } this.moveToPatternMap(this.indexedFields, this.indexedPatterns); } else { - log.warn(this.getType().typeName() + configProperty + " not specified."); + if (log.isWarnEnabled()) { + log.warn("{} not specified.", this.getType().typeName() + configProperty); + } } } @@ -306,7 +309,7 @@ public void setup(Configuration config) { // Process the reverse index fields if (config.get(this.getType().typeName() + DISALLOWLIST_REVERSE_INDEX_FIELDS) != null) { if (log.isDebugEnabled()) { - log.debug("Disallowlist specified for: " + this.getType().typeName() + DISALLOWLIST_REVERSE_INDEX_FIELDS); + log.debug("Disallowlist specified for: {}", this.getType().typeName() + DISALLOWLIST_REVERSE_INDEX_FIELDS); } this.setHasReverseIndexDisallowlist(true); @@ -314,16 +317,16 @@ public void setup(Configuration config) { configProperty = DISALLOWLIST_REVERSE_INDEX_FIELDS; } else if (config.get(this.getType().typeName() + REVERSE_INDEX_FIELDS) != null) { if (log.isDebugEnabled()) { - log.debug("Reverse Index specified.for: " + this.getType().typeName() + REVERSE_INDEX_FIELDS); + log.debug("Reverse Index specified.for: {}", this.getType().typeName() + REVERSE_INDEX_FIELDS); } this.setHasReverseIndexDisallowlist(false); configProperty = REVERSE_INDEX_FIELDS; } // Load the proper list of fields to (not) reverse index - if (configProperty == null) { - log.warn("No reverse index fields or disallowlist reverse index fields specified, not generating reverse index fields for " - + this.getType().typeName()); + if (configProperty == null && log.isWarnEnabled()) { + log.warn("No reverse index fields or disallowlist reverse index fields specified, not generating reverse index fields for {}", + this.getType().typeName()); } else { reverseIndexedFields = Sets.newHashSet(); Collection reverseIndexedStrings = config.getStringCollection(this.getType().typeName() + configProperty); @@ -333,7 +336,9 @@ public void setup(Configuration config) { } this.moveToPatternMap(this.reverseIndexedFields, this.reverseIndexedPatterns); } else { - log.warn(this.getType().typeName() + configProperty + " not specified"); + if (log.isWarnEnabled()) { + log.warn("{} not specified", this.getType().typeName() + configProperty); + } } } @@ -389,7 +394,7 @@ public void setup(Configuration config) { try { policy = FailurePolicy.valueOf(property.getValue()); } catch (Exception e) { - log.error("Unable to parse field normalization failure policy: " + property.getValue(), e); + log.error("Unable to parse field normalization failure policy: {}", property.getValue(), e); throw new IllegalArgumentException("Unable to parse field normalization failure policy: " + property.getValue(), e); } if (fieldName.indexOf('*') >= 0) { @@ -521,7 +526,7 @@ public static String getFieldName(Type dataType, String property, String propert // if this type already has a '.', then we have a malformed property // name if (dataType.typeName().indexOf('.') >= 0) { - log.error(propertyPattern + " property malformed: " + property); + log.error("{} property malformed: {}", propertyPattern, property); throw new IllegalArgumentException(propertyPattern + " property malformed: " + property); } @@ -544,7 +549,7 @@ public static String getFieldType(Type dataType, String property, String propert // if this type already has a '.', then we have a malformed property // name if (dataType.typeName().indexOf('.') >= 0) { - log.error(propertyPattern + " property malformed: " + property); + log.error("{} property malformed: {}", propertyPattern, property); throw new IllegalArgumentException(propertyPattern + " property malformed: " + property); } @@ -668,8 +673,8 @@ public static Matcher getBestMatch(Set patterns, String fieldName) { } if (patternMatcher.reset(fieldName).matches()) { if (bestMatch != null) { - log.warn("Multiple regular expression patterns with the same length exist for matching field " + fieldName - + ". The pattern that sorts lexicographically last will be used. Please verify your configurations."); + log.warn("Multiple regular expression patterns with the same length exist for matching field {}. " + + "The pattern that sorts lexicographically last will be used. Please verify your configurations.", fieldName); break; } else { bestMatch = patternMatcher; @@ -746,9 +751,7 @@ public HashSet normalizeFieldValue(String fieldName, value.setEventFieldValue(null); } values.add(value); - if (log.isDebugEnabled()) { - log.debug("added normalized field " + value + " to values set."); - } + log.debug("added normalized field {} to values set.", value); } return values; } @@ -774,20 +777,14 @@ protected NormalizedContentInterface normalizeFieldValue(NormalizedContentInterf */ protected Set normalize(NormalizedContentInterface normalizedContent) { String eventFieldName = normalizedContent.getEventFieldName(); - if (log.isDebugEnabled()) { - log.debug("event field name is " + eventFieldName + " in " + normalizedContent); - } + log.debug("event field name is {} in {}", eventFieldName, normalizedContent); String indexedFieldName = normalizedContent.getIndexedFieldName(); - if (log.isDebugEnabled()) { - log.debug("indexed field name is " + indexedFieldName + " in " + normalizedContent); - } + log.debug("indexed field name is {} in {}", indexedFieldName, normalizedContent); // if it is indexed, set the index part, if (this.isIndexedField(eventFieldName) || this.isIndexedField(indexedFieldName)) { - if (log.isDebugEnabled()) { - log.debug("eventFieldName=" + eventFieldName + ", indexedFieldName =" + indexedFieldName + " we have an indexed field here " - + normalizedContent); - } + log.debug("eventFieldName={}, indexedFieldName={} we have an indexed field here {}", + eventFieldName, indexedFieldName, normalizedContent); Collection> dataTypes = getDataTypes(normalizedContent.getIndexedFieldName()); HashSet values = new HashSet<>(dataTypes.size()); for (datawave.data.type.Type dataType : dataTypes) { @@ -796,40 +793,30 @@ protected Set normalize(NormalizedContentInterface n } else { values.add(normalize(normalizedContent, dataType)); } - if (log.isDebugEnabled()) { - log.debug("added normalized field " + normalizedContent + " to values " + values); - } + log.debug("added normalized field {} to values {}", normalizedContent, values); } return values; } // if it is normalized, set the field value part and the (unused) // indexed field value part if (this.isNormalizedField(eventFieldName) || this.isNormalizedField(indexedFieldName)) { - if (log.isDebugEnabled()) { - log.debug("eventFieldName=" + eventFieldName + ", indexedFieldName =" + indexedFieldName + " we have a normalized field here " - + normalizedContent); - } + log.debug("eventFieldName={}, indexedFieldName={} we have a normalized field here {}", + eventFieldName, indexedFieldName, normalizedContent); Collection> dataTypes = getDataTypes(normalizedContent.getIndexedFieldName()); HashSet values = new HashSet<>(dataTypes.size()); for (datawave.data.type.Type dataType : dataTypes) { values.add(normalizeFieldValue(normalizedContent, dataType)); - if (log.isDebugEnabled()) { - log.debug("added normalized field " + normalizedContent + " to values " + values); - } + log.debug("added normalized field {} to values {}", normalizedContent, values); } return values; } else { // gets the default normalizer, if present - if (log.isDebugEnabled()) { - log.debug("not a normalized field: " + indexedFieldName + " nor " + eventFieldName); - } + log.debug("not a normalized field: {} nor {}", indexedFieldName, eventFieldName); Collection> dataTypes = getDataTypes(normalizedContent.getIndexedFieldName()); HashSet values = new HashSet<>(dataTypes.size()); for (datawave.data.type.Type dataType : dataTypes) { values.add(normalize(normalizedContent, dataType)); - if (log.isDebugEnabled()) { - log.debug("added normalized field " + normalizedContent + " to values " + values); - } + log.debug("added normalized field {} to values {}", normalizedContent, values); } return values; } @@ -935,7 +922,9 @@ public Multimap normalize(Multimap normalizeMap(Multimap normalizeAndAlias(NormalizedContentInt ns = normalize(aliaser.normalizeAndAlias(nArg)); } catch (Exception e) { if (log.isTraceEnabled()) { - log.trace(this.getType().typeName() + ": Field failed normalization: " + nArg, e); + log.trace("{}: Field failed normalization: {}", this.getType().typeName(), nArg, e); } nArg.setError(e); return Collections.singleton(nArg); @@ -1120,10 +1111,12 @@ public boolean verify() { // first verify the index fields if (this.indexedFields == null) { retVal = false; - log.error(this.getType().typeName() + ": index set has been set to null."); + if (log.isErrorEnabled()) { + log.error("{}: index set has been set to null.", this.getType().typeName()); + } } else if (this.indexedFields.isEmpty()) { if (log.isDebugEnabled()) { - log.debug(this.getType().typeName() + ": no fields have been set to index."); + log.debug("{}: no fields have been set to index.", this.getType().typeName()); } } else { upperCaseSetEntries(this.indexedFields, this.getType().typeName() + ": index fields"); @@ -1131,10 +1124,12 @@ public boolean verify() { // next verify the index fields if (this.reverseIndexedFields == null) { retVal = false; - log.error(this.getType().typeName() + ": reverse index set has been set to null."); + if (log.isErrorEnabled()) { + log.error("{}: reverse index set has been set to null.", this.getType().typeName()); + } } else if (this.reverseIndexedFields.isEmpty()) { if (log.isDebugEnabled()) { - log.debug(this.getType().typeName() + ": no fields have been set to reverse index."); + log.debug("{}: no fields have been set to reverse index.", this.getType().typeName()); } } else { upperCaseSetEntries(this.reverseIndexedFields, this.getType().typeName() + ": reverse index fields"); @@ -1208,9 +1203,7 @@ public Multimap getVirtualFields(Multimap= 0 || fieldName.indexOf('+') >= 0) { // We need a more conclusive test for regex typePatternMap.put(fieldName, datawaveType); @@ -1228,7 +1219,7 @@ public void updateDatawaveTypes(String fieldName, String typeClasses) { typeFieldMap.put(fieldName, datawaveType); } if (log.isDebugEnabled()) { - log.debug("Registered a " + typeClass + " for type[" + this.getType().typeName() + "], field[" + fieldName + "]"); + log.debug("Registered a {} for type[{}], field[{}}]", typeClass, this.getType().typeName(), fieldName); } } } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/CSVIngestHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/CSVIngestHelper.java index 3bbf5bbbfae..3edbe64e951 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/CSVIngestHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/CSVIngestHelper.java @@ -5,7 +5,8 @@ import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.text.StrTokenizer; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; @@ -17,7 +18,7 @@ public class CSVIngestHelper extends ContentBaseIngestHelper { - private static final Logger log = Logger.getLogger(CSVIngestHelper.class); + private static final Logger log = LoggerFactory.getLogger(CSVIngestHelper.class); protected CSVHelper helper = null; @Override @@ -156,12 +157,12 @@ protected void processExtraField(Multimap fields, String fieldVal } } } else { - log.error("Unable to process the following as a name=value pair: " + fieldValue); + log.error("Unable to process the following as a name=value pair: {}", fieldValue); } } /** - * Process a field. This will split multi-valued fields as necessary and call processField on each part. + * Process a field. This will split multivalued fields as necessary and call processField on each part. * * @param fields * list of fields @@ -173,7 +174,7 @@ protected void processExtraField(Multimap fields, String fieldVal protected void processPreSplitField(Multimap fields, String fieldName, String fieldValue) { if (fieldValue != null) { if (helper.isMultiValuedField(fieldName)) { - // Value can be multiple parts, need to break on semi-colon + // Value can be multiple parts, need to break on semicolon String singleFieldName = helper.usingMultiValuedFieldsDisallowlist() ? fieldName : helper.getMultiValuedFields().get(fieldName); int limit = helper.getMultiFieldSizeThreshold(); int count = 0; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/CompositeIngest.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/CompositeIngest.java index a1f7f437cc1..a5fef654c62 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/CompositeIngest.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/CompositeIngest.java @@ -16,7 +16,8 @@ import org.apache.commons.lang.NotImplementedException; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Objects; import com.google.common.collect.HashMultimap; @@ -78,7 +79,7 @@ static boolean isOverloadedCompositeField(Collection compFields, String class CompositeFieldNormalizer { private static final long serialVersionUID = -3892470989028896718L; - private static final Logger log = Logger.getLogger(CompositeFieldNormalizer.class); + private static final Logger log = LoggerFactory.getLogger(CompositeFieldNormalizer.class); private static final String DEFAULT_SEPARATOR = new String(Character.toChars(Character.MAX_CODE_POINT)); @@ -149,7 +150,7 @@ public void setup(Type type, Configuration config) { // if any members are indexOnly fields, skip this one if (!Sets.intersection(Sets.newHashSet(componentFields), indexOnly).isEmpty()) { - log.warn("rejecting " + compositeField + " which includes index only field in " + indexOnly); + log.warn("rejecting {} which includes index only field in {}", compositeField, indexOnly); continue; } @@ -187,7 +188,7 @@ public void setup(Type type, Configuration config) { Set emptySet = Collections.emptySet(); ignoreNormalizationForFields = (null != ignoreNormalization) ? cleanSet(ignoreNormalization) : emptySet; - log.debug("setup with composites " + this.compositeToFieldMap); + log.debug("setup with composites {}", this.compositeToFieldMap); } /** diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/ContentBaseIngestHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/ContentBaseIngestHelper.java index c85b66638ae..466b76ff859 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/ContentBaseIngestHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/ContentBaseIngestHelper.java @@ -5,7 +5,8 @@ import java.util.Set; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; @@ -16,7 +17,7 @@ */ public abstract class ContentBaseIngestHelper extends AbstractContentIngestHelper { - private static final Logger log = Logger.getLogger(ContentBaseIngestHelper.class); + private static final Logger log = LoggerFactory.getLogger(ContentBaseIngestHelper.class); private final Set contentIndexAllowlist = new HashSet<>(); private final Set contentReverseIndexAllowlist = new HashSet<>(); @@ -107,8 +108,8 @@ public void setup(Configuration config) throws IllegalArgumentException { : rawDocumentViewName; if (log.isTraceEnabled()) { log.trace("saveRawDataOption was true"); - log.trace("getType().typeName()+RAW_DOCUMENT_VIEW_NAME: " + getType().typeName() + RAW_DOCUMENT_VIEW_NAME); - log.trace("config.get(getType().typeName()+RAW_DOCUMENT_VIEW_NAME): " + config.get(getType().typeName() + RAW_DOCUMENT_VIEW_NAME)); + log.trace("getType().typeName()+RAW_DOCUMENT_VIEW_NAME: {}", getType().typeName() + RAW_DOCUMENT_VIEW_NAME); + log.trace("config.get(getType().typeName()+RAW_DOCUMENT_VIEW_NAME): {}", config.get(getType().typeName() + RAW_DOCUMENT_VIEW_NAME)); } } } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/EventFieldNormalizerHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/EventFieldNormalizerHelper.java index df9fc378b63..1984979ae05 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/EventFieldNormalizerHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/EventFieldNormalizerHelper.java @@ -5,7 +5,6 @@ import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; import com.google.common.collect.Maps; @@ -14,6 +13,8 @@ import datawave.ingest.data.TypeRegistry; import datawave.ingest.data.config.ConfigurationHelper; import datawave.ingest.data.config.DataTypeHelper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This class can be used to help normalize the event field values akin to how the BaseIngestHelper can normalize the indexed field values. This was not cooked @@ -25,7 +26,7 @@ * * public void setup(Configuration config) { ... eventFieldNormalizerHelper = new EventFieldNormalizerHelper(config); ... } * - * then override the normalize(NormalizedContentInterface) as follows: + * then override the method normalize(NormalizedContentInterface) as follows: * * public NormalizedContentInterface normalize(NormalizedContentInterface nci) { * @@ -46,7 +47,7 @@ public class EventFieldNormalizerHelper { private Map> typeCompiledPatternMap = null; private static final Type NO_OP_TYPE = new NoOpType(); - private static final Logger log = Logger.getLogger(EventFieldNormalizerHelper.class); + private static final Logger log = LoggerFactory.getLogger(EventFieldNormalizerHelper.class); /** * @@ -85,7 +86,9 @@ public EventFieldNormalizerHelper(Configuration config) { } else { typeFieldMap.put(fieldName, normalizer); } - log.debug("Registered a " + normalizerClass + " for type[" + this.getType().typeName() + "], EVENT (not index) field[" + fieldName + "]"); + if (log.isDebugEnabled()) { + log.debug("Registered a {} for type [{}], EVENT (not index) field[{}]", normalizerClass, this.getType().typeName(), fieldName); + } } } } @@ -151,7 +154,7 @@ protected String getFieldName(String property, String propertyPattern) { if (fieldName.indexOf('.') >= 0) { // if this type already has a '.', then we have a malformed property name if (this.getType().typeName().indexOf('.') >= 0) { - log.error(propertyPattern + " property malformed: " + property); + log.error("{} property malformed: {}", propertyPattern, property); throw new IllegalArgumentException(propertyPattern + " property malformed: " + property); } fieldName = null; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/IngestFieldFilter.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/IngestFieldFilter.java index 06f637925cd..c555f973aac 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/IngestFieldFilter.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/IngestFieldFilter.java @@ -10,7 +10,8 @@ import org.apache.commons.collections4.keyvalue.AbstractMapEntry; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; @@ -46,7 +47,7 @@ */ public class IngestFieldFilter { - private static final Logger logger = Logger.getLogger(IngestFieldFilter.class); + private static final Logger logger = LoggerFactory.getLogger(IngestFieldFilter.class); @Deprecated public static final String FILTER_FIELD_SUFFIX = ".data.field.filter"; @@ -75,10 +76,10 @@ public void setup(Configuration conf) { fieldNameFilters = new FieldConfiguration(); fieldNameFilters.load(conf.get(dataType.typeName() + FILTER_FIELD_SUFFIX), false); fieldNameFilters.load(conf.get(dataType.typeName() + FILTER_FIELD_NAME_SUFFIX), false); - logger.info("Field Name Filters for " + dataType.typeName() + ": " + fieldNameFilters); + logger.info("Field Name Filters for {}: {}", dataType.typeName(), fieldNameFilters); fieldValueFilters = new FieldConfiguration(conf.get(dataType.typeName() + FILTER_FIELD_VALUE_SUFFIX), true); - logger.info("Field Value Filters for " + dataType.typeName() + ": " + fieldValueFilters); + logger.info("Field Value Filters for {}: {}", dataType.typeName(), fieldValueFilters); } /** @@ -92,7 +93,7 @@ public void apply(Multimap fields) { for (FieldFilter filter : fieldNameFilters) { if (fields.keySet().containsAll(filter.getKeepFields())) { if (logger.isTraceEnabled()) { - logger.trace("Removing " + filter.getDropFields() + " because " + filter.getKeepFields() + " exists in event"); + logger.trace("Removing {} because {} exists in event", filter.getDropFields(), filter.getKeepFields()); } fields.keySet().removeAll(filter.getDropFields()); } @@ -103,9 +104,7 @@ public void apply(Multimap fields) { for (List keepValues : gatherValueLists(fields, filter.getKeepFields(), -1, null)) { for (List toRemoveValues : gatherValueLists(fields, filter.getDropFields(), -1, null)) { if (equalValues(keepValues, toRemoveValues)) { - if (logger.isTraceEnabled()) { - logger.trace("Removing " + toRemoveValues + " because " + keepValues + " exists in event"); - } + logger.trace("Removing {} because {} exists in event", toRemoveValues, keepValues); for (FieldValue toRemoveValue : toRemoveValues) { fields.remove(toRemoveValue.getKey(), toRemoveValue.getValue()); } @@ -266,7 +265,7 @@ public String getGroup() { } /** - * Determine if the raw value in this matches the raw value in another + * Determine if the raw value in this, matches the raw value in another * * @param other * the field value to check diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/normalizer/AbstractNormalizer.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/normalizer/AbstractNormalizer.java index 186055207d7..317ba39e12a 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/normalizer/AbstractNormalizer.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/normalizer/AbstractNormalizer.java @@ -6,7 +6,8 @@ import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; @@ -17,7 +18,7 @@ import datawave.ingest.data.config.NormalizedFieldAndValue; public abstract class AbstractNormalizer implements TextNormalizer { - private static final Logger log = Logger.getLogger(AbstractNormalizer.class); + private static final Logger log = LoggerFactory.getLogger(AbstractNormalizer.class); @Override public void setup(Type type, String instance, Configuration config) {} @@ -33,7 +34,7 @@ public void setup(Type type, String instance, Configuration config) {} * configuration to use * @param normalizerClass * the normalizerClass to set up - * @return An configured instance of the normalizerClass + * @return A configured instance of the normalizerClass */ public static TextNormalizer createNormalizer(Type type, String instance, Configuration config, String normalizerClass) { Class c; @@ -50,7 +51,7 @@ public static TextNormalizer createNormalizer(Type type, String instance, Config throw new IllegalArgumentException("Error creating instance of class " + normalizerClass + ':' + e.getLocalizedMessage(), e); } if (o instanceof TextNormalizer) { - // setup the normalizer + // set up the normalizer ((TextNormalizer) o).setup(type, instance, config); } else { throw new IllegalArgumentException(normalizerClass + " is not an instance of " + TextNormalizer.class.getName()); @@ -223,7 +224,9 @@ public NormalizedContentInterface normalize(NormalizedContentInterface field) { if (field.getEventFieldName().equals("IP_GEO_FM_COORDINATES") && field.getEventFieldValue().equals("-99.999/-999.999")) { log.warn("Found know bad default value: IP_GEO_FM_COORDINATES=-99.999/-999.999"); } else { - log.error("Failed to normalize " + field.getEventFieldName() + '=' + field.getEventFieldValue(), e); + if (log.isErrorEnabled()) { + log.error("Failed to normalize {}={}", field.getEventFieldName(), field.getEventFieldValue(), e); + } } n.setError(e); } @@ -240,7 +243,9 @@ public Multimap normalize(Multimap extractFieldNameComponents(Mu try { revisedField = extractFieldNameComponents(field); } catch (Exception e) { - log.error("Failed to extract field name components: " + field.getIndexedFieldName() + '=' + field.getIndexedFieldValue(), e); + if (log.isErrorEnabled()) { + log.error("Failed to extract field name components: {}={}", field.getIndexedFieldName(), field.getIndexedFieldValue(), e); + } revisedField.setError(e); } results.put(revisedField.getIndexedFieldName(), revisedField); diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/tokenize/TokenizationHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/tokenize/TokenizationHelper.java index 931b2efa40e..682ad11319a 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/tokenize/TokenizationHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/tokenize/TokenizationHelper.java @@ -3,7 +3,8 @@ import java.io.IOException; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CharArraySet; @@ -12,7 +13,7 @@ public class TokenizationHelper { - private static final Logger log = Logger.getLogger(TokenizationHelper.class); + private static final Logger log = LoggerFactory.getLogger(TokenizationHelper.class); /** * Used to track tokenization execution time. It's too expensive to perform a call to System.currentTimeMillis() each time we produce a new token, so spawn @@ -21,7 +22,7 @@ public class TokenizationHelper { * The main thread will check the counter value each time it produces a new token and thus track the number of ticks that have elapsed. */ public static class HeartBeatThread extends Thread { - private static final Logger log = Logger.getLogger(HeartBeatThread.class); + private static final Logger log = LoggerFactory.getLogger(HeartBeatThread.class); public static final long INTERVAL = 500; // half second resolution public static volatile int counter = 0; @@ -45,11 +46,11 @@ public void run() { } // verify that we're exeuting in a timely fashion - // ..if not warn. + // if not warn. long currentRun = System.currentTimeMillis(); long delta = currentRun - lastRun; if (delta > (INTERVAL * 1.5)) { - log.warn("HeartBeatThread starved for cpu, " + "should execute every " + INTERVAL + " ms, " + "latest: " + delta + " ms."); + log.warn("HeartBeatThread starved for cpu, should execute every {}ms, latest: {}ms.", INTERVAL, delta); } lastRun = currentRun; counter++; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/input/reader/AbstractEventRecordReader.java b/warehouse/ingest-core/src/main/java/datawave/ingest/input/reader/AbstractEventRecordReader.java index 20083dae89e..cb35e5deb91 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/input/reader/AbstractEventRecordReader.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/input/reader/AbstractEventRecordReader.java @@ -18,7 +18,8 @@ import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.FileSplit; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Sets; @@ -41,7 +42,7 @@ public abstract class AbstractEventRecordReader extends RecordReader uidOverrideFields = new TreeMap<>(); @@ -193,7 +194,7 @@ protected void setDefaultSecurityMarkings(RawRecordContainer event) { * Ability to override the UID value. This is useful for datatypes where we want the UID to be based off the configured id field's value instead of the * entire record, so that the csv records and bud file content are merged into one event in the shard table. For the enrichment data, we want to base the * UID off of the MD5 hash and some other metadata, but not the dates in the record. This is because we will have to reload the enrichment data on a regular - * basis and we want the same hashes to merge. + * basis, and we want the same hashes to merge. * * @param event * the event container to examine @@ -238,12 +239,12 @@ protected void extractEventDate(final String fieldName, final String fieldValue) try { event.setDate(format.parse(DateNormalizer.convertMicroseconds(fieldValue, format.toPattern())).getTime()); if (logger.isDebugEnabled()) { - logger.debug("Parsed date from '" + fieldName + "' using formatter " + format.toPattern()); + logger.debug("Parsed date from {} using formatter {}", fieldName, format.toPattern()); } break; } catch (java.text.ParseException e) { if (logger.isTraceEnabled()) { - logger.trace("Error parsing date from hash record using format " + format.toPattern(), e); + logger.trace("Error parsing date from hash record using format {}", format.toPattern(), e); } } } @@ -252,7 +253,7 @@ protected void extractEventDate(final String fieldName, final String fieldValue) for (SimpleDateFormat formatter : formatters) { patterns.add(formatter.toPattern()); } - logger.error("Unable to parse date '" + fieldValue + "' from field '" + fieldName + " using formatters " + patterns); + logger.error("Unable to parse date {} from field {} using formatters {}", fieldValue, fieldName, patterns); } } else if (formatter != null) { try { diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardFutureIntervalPredicate.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardFutureIntervalPredicate.java index 71d6bcb7fe0..2965a2f98f6 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardFutureIntervalPredicate.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardFutureIntervalPredicate.java @@ -1,7 +1,8 @@ package datawave.ingest.mapreduce; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import datawave.ingest.data.RawRecordContainer; import datawave.ingest.metric.IngestInput; @@ -9,10 +10,10 @@ public class DataTypeDiscardFutureIntervalPredicate implements RawRecordPredicate { - private static final Logger log = Logger.getLogger(DataTypeDiscardFutureIntervalPredicate.class); + private static final Logger log = LoggerFactory.getLogger(DataTypeDiscardFutureIntervalPredicate.class); /** - * number which will be used to evaluate whether or not an Event should be processed. If the Event.getEventDate() is less than (now + interval) then it will + * number which will be used to evaluate whether an Event should be processed. If the Event.getEventDate() is less than (now + interval) then it will * be processed. */ public static final String DISCARD_FUTURE_INTERVAL = "event.discard.future.interval"; @@ -25,7 +26,7 @@ public class DataTypeDiscardFutureIntervalPredicate implements RawRecordPredicat public void setConfiguration(String type, Configuration conf) { long defaultInterval = conf.getLong(DISCARD_FUTURE_INTERVAL, 0l); this.discardFutureInterval = conf.getLong(type + "." + DISCARD_FUTURE_INTERVAL, defaultInterval); - log.info("Setting up type: " + type + " with future interval " + this.discardFutureInterval); + log.info("Setting up type: {} with future interval {}", type, this.discardFutureInterval); } @Override @@ -33,7 +34,7 @@ public boolean shouldProcess(RawRecordContainer record) { // Determine whether the event date is greater than the interval. Excluding fatal error events. if (discardFutureInterval != 0L && (record.getDate() > (now.get() + discardFutureInterval))) { if (log.isInfoEnabled()) - log.info("Event with time " + record.getDate() + " newer than specified interval of " + (now.get() + discardFutureInterval) + ", skipping..."); + log.info("Event with time {} newer than specified interval of {}, skipping...", record.getDate(), (now.get() + discardFutureInterval)); return false; } return true; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardIntervalPredicate.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardIntervalPredicate.java index 69a0857cc26..c4846eaf966 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardIntervalPredicate.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardIntervalPredicate.java @@ -1,7 +1,8 @@ package datawave.ingest.mapreduce; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import datawave.ingest.data.RawRecordContainer; import datawave.ingest.metric.IngestInput; @@ -9,10 +10,10 @@ public class DataTypeDiscardIntervalPredicate implements RawRecordPredicate { - private static final Logger log = Logger.getLogger(DataTypeDiscardIntervalPredicate.class); + private static final Logger log = LoggerFactory.getLogger(DataTypeDiscardIntervalPredicate.class); /** - * number which will be used to evaluate whether or not an Event should be processed. If the Event.getEventDate() is greater than (now - interval) then it + * number which will be used to evaluate whether an Event should be processed. If the Event.getEventDate() is greater than (now - interval) then it * will be processed. */ public static final String DISCARD_INTERVAL = "event.discard.interval"; @@ -25,7 +26,7 @@ public class DataTypeDiscardIntervalPredicate implements RawRecordPredicate { public void setConfiguration(String type, Configuration conf) { long defaultInterval = conf.getLong(DISCARD_INTERVAL, 0l); this.discardInterval = conf.getLong(type + "." + DISCARD_INTERVAL, defaultInterval); - log.info("Setting up type: " + type + " with interval " + this.discardInterval); + log.info("Setting up type: {} with interval {}", type, this.discardInterval); } @Override @@ -33,7 +34,7 @@ public boolean shouldProcess(RawRecordContainer record) { // Determine whether the event date is greater than the interval. Excluding fatal error events. if (discardInterval != 0L && (record.getDate() < (now.get() - discardInterval))) { if (log.isInfoEnabled()) - log.info("Event with time " + record.getDate() + " older than specified interval of " + (now.get() - discardInterval) + ", skipping..."); + log.info("Event with time {} older than specified interval of {}, skipping...", record.getDate(), (now.get() - discardInterval)); return false; } return true; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/atom/AtomDataTypeHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/atom/AtomDataTypeHandler.java index f48dfa0a9ec..5ba56f69fda 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/atom/AtomDataTypeHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/atom/AtomDataTypeHandler.java @@ -13,7 +13,8 @@ import org.apache.hadoop.mapreduce.StatusReporter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskInputOutputContext; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Strings; import com.google.common.collect.Multimap; @@ -42,7 +43,7 @@ */ public class AtomDataTypeHandler implements ExtendedDataTypeHandler { - private static final Logger log = Logger.getLogger(AtomDataTypeHandler.class); + private static final Logger log = LoggerFactory.getLogger(AtomDataTypeHandler.class); public static final String ATOM_TYPE = "atom"; public static final String ATOM_TABLE_NAME = ATOM_TYPE + ".table.name"; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/edge/EdgeKeyVersioningCache.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/edge/EdgeKeyVersioningCache.java index 726dadabdb2..b63a1898272 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/edge/EdgeKeyVersioningCache.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/edge/EdgeKeyVersioningCache.java @@ -30,7 +30,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import datawave.data.normalizer.DateNormalizer; import datawave.data.type.util.NumericalEncoder; @@ -52,7 +53,7 @@ public class EdgeKeyVersioningCache { - private static final Logger log = Logger.getLogger(EdgeKeyVersioningCache.class); + private static final Logger log = LoggerFactory.getLogger(EdgeKeyVersioningCache.class); public static final String METADATA_TABLE_NAME = "metadata.table.name"; public static final String KEY_VERSION_CACHE_DIR = "datawave.ingest.key.version.cache.dir"; @@ -98,7 +99,7 @@ public Map getEdgeKeyVersionDateChange() throws IOException { */ public void updateCache(FileSystem fs) throws AccumuloSecurityException, AccumuloException, IOException, TableNotFoundException { - log.info("Reading the " + metadataTableName + " for edge key version ..."); + log.info("Reading the {} for edge key version ...", metadataTableName); if (this.cbHelper == null) { this.cbHelper = new AccumuloHelper(); this.cbHelper.setup(conf); @@ -134,7 +135,7 @@ public void updateCache(FileSystem fs) throws AccumuloSecurityException, Accumul * "old" edge key from being created...that is, with EdgeKey.DATE_TYPE.OLD_EVENT (See ProtobufEdgeDataTypeHandler.writeEdges) */ Date then = new Date(0); - log.warn("Could not find any edge key version entries in the " + metadataTableName + " table. Automatically seeding with date: " + then); + log.warn("Could not find any edge key version entries in the {} table. Automatically seeding with date: {}", metadataTableName, then); String dateString = seedMetadataTable(client, then.getTime(), 1); versionDates.put(1, dateString); } @@ -165,11 +166,11 @@ public void updateCache(FileSystem fs) throws AccumuloSecurityException, Accumul throw new IOException("Failed to rename temporary splits file"); } } catch (Exception e) { - log.warn("Unable to rename " + tmpVersionFile + " to " + this.versioningCache + " probably because somebody else replaced it", e); + log.warn("Unable to rename {} to {} probably because somebody else replaced it", tmpVersionFile, this.versioningCache, e); try { fs.delete(tmpVersionFile, false); } catch (Exception e2) { - log.error("Unable to clean up " + tmpVersionFile, e2); + log.error("Unable to clean up {}", tmpVersionFile, e2); } } } catch (Exception e) { @@ -247,11 +248,11 @@ public void createMetadataEntry(long time, int keyVersionNum) throws Exception { private void ensureTableExists(AccumuloClient client) throws AccumuloSecurityException, AccumuloException { TableOperations tops = client.tableOperations(); if (!tops.exists(metadataTableName)) { - log.info("Creating table: " + metadataTableName); + log.info("Creating table: {}", metadataTableName); try { tops.create(metadataTableName); } catch (TableExistsException e) { - log.error(metadataTableName + " already exists someone got here first."); + log.error("{} already exists someone got here first.", metadataTableName); } } } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/edge/define/EdgeDataBundle.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/edge/define/EdgeDataBundle.java index 18c5488886e..77d25163774 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/edge/define/EdgeDataBundle.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/edge/define/EdgeDataBundle.java @@ -7,7 +7,8 @@ import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.security.ColumnVisibility; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import datawave.edge.util.EdgeKey; import datawave.edge.util.EdgeValue.EdgeValueBuilder; @@ -28,7 +29,7 @@ */ public class EdgeDataBundle { - private static final Logger log = Logger.getLogger(EdgeDataBundle.class); + private static final Logger log = LoggerFactory.getLogger(EdgeDataBundle.class); // Input/Setup variables // final so you're not tempted to change them @@ -91,7 +92,7 @@ public EdgeDataBundle(EdgeDefinition edgeDef, NormalizedContentInterface ifaceSo if (event.getAltIds() != null && !event.getAltIds().isEmpty()) { this.uuid = event.getAltIds().iterator().next(); } - // even though event, etc references are saved above, passing in the event + // even though event, etc. references are saved above, passing in the event // prevents future bug this.initFieldMasking(helper, event); this.initMarkings(getSource().getMarkings(), getSink().getMarkings()); @@ -533,7 +534,7 @@ public static Value getStatsLinkValue(final String realmedIdentifier) { return (new Value(hll.getBytes())); } catch (IOException e) { - log.warn("Failed to add " + realmedIdentifier + " to HyperLogLog", e); + log.warn("Failed to add {} to HyperLogLog", realmedIdentifier, e); return (null); } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/error/ErrorShardedDataTypeHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/error/ErrorShardedDataTypeHandler.java index e167321c8d7..1354ece7ae1 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/error/ErrorShardedDataTypeHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/error/ErrorShardedDataTypeHandler.java @@ -16,7 +16,8 @@ import org.apache.hadoop.mapreduce.StatusReporter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskInputOutputContext; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; @@ -102,7 +103,7 @@ */ public class ErrorShardedDataTypeHandler extends AbstractColumnBasedHandler implements ExtendedDataTypeHandler { - private static final Logger log = Logger.getLogger(ErrorShardedDataTypeHandler.class); + private static final Logger log = LoggerFactory.getLogger(ErrorShardedDataTypeHandler.class); public static final String ERROR_PROP_PREFIX = "error."; @@ -226,7 +227,7 @@ public long process(KEYIN key, RawRecordContainer record, Multimap= System.currentTimeMillis() @@ -211,7 +214,7 @@ public boolean isInitialized() { public void updateCache() throws AccumuloException, AccumuloSecurityException, TableNotFoundException, IOException { FileSystem fs = this.numShardsCachePath.getFileSystem(this.conf); String metadataTableName = ConfigurationHelper.isNull(this.conf, ShardedDataTypeHandler.METADATA_TABLE_NAME, String.class); - log.info("Reading the " + metadataTableName + " for multiple numshards configuration"); + log.info("Reading the {} for multiple numshards configuration", metadataTableName); if (this.aHelper == null) { this.aHelper = new AccumuloHelper(); @@ -255,7 +258,7 @@ public void updateCache() throws AccumuloException, AccumuloSecurityException, T // now move the temporary file to the file cache try { fs.delete(this.numShardsCachePath, false); - // Note this rename will fail if the file already exists (i.e. the delete failed or somebody just replaced it) + // Note this rename will fail if the file already exists (i.e. the delete behavior failed or somebody just replaced it) // but this is OK... if (!fs.rename(tmpShardCacheFile, this.numShardsCachePath)) { throw new IOException("Failed to rename temporary multiple numshards cache file"); @@ -263,11 +266,11 @@ public void updateCache() throws AccumuloException, AccumuloSecurityException, T isCacheLoaded = true; } catch (Exception e) { - log.warn("Unable to rename " + tmpShardCacheFile + " to " + this.numShardsCachePath + " probably because somebody else replaced it", e); + log.warn("Unable to rename {} to {} probably because somebody else replaced it", tmpShardCacheFile, this.numShardsCachePath, e); try { fs.delete(tmpShardCacheFile, false); } catch (Exception e2) { - log.error("Unable to clean up " + tmpShardCacheFile, e2); + log.error("Unable to clean up {}", tmpShardCacheFile, e2); } } } @@ -280,11 +283,11 @@ public void updateCache() throws AccumuloException, AccumuloSecurityException, T private void ensureTableExists(AccumuloClient client, String metadataTableName) throws AccumuloException, AccumuloSecurityException { TableOperations tops = client.tableOperations(); if (!tops.exists(metadataTableName)) { - log.info("Creating table: " + metadataTableName); + log.info("Creating table: {}", metadataTableName); try { tops.create(metadataTableName); } catch (TableExistsException tee) { - log.error(metadataTableName + " already exists someone got here first"); + log.error("{} already exists someone got here first", metadataTableName); } } } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/CoreSummaryDataTypeHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/CoreSummaryDataTypeHandler.java index 94894ba67b5..e7ddb6d7214 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/CoreSummaryDataTypeHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/CoreSummaryDataTypeHandler.java @@ -5,7 +5,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.StatusReporter; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Multimap; @@ -20,7 +21,7 @@ import datawave.ingest.metadata.RawRecordMetadata; public abstract class CoreSummaryDataTypeHandler implements DataTypeHandler { - private static final Logger log = ThreadConfigurableLogger.getLogger(CoreSummaryDataTypeHandler.class); + private static final Logger log = LoggerFactory.getLogger(CoreSummaryDataTypeHandler.class); private Configuration mConf = null; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/MetricsSummaryDataTypeHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/MetricsSummaryDataTypeHandler.java index b0d5433abc5..ef86aced004 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/MetricsSummaryDataTypeHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/MetricsSummaryDataTypeHandler.java @@ -14,7 +14,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Preconditions; import com.google.common.collect.ArrayListMultimap; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ExtendedContentIndexingColumnBasedHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ExtendedContentIndexingColumnBasedHandler.java index 0f7f29d61fb..c0a97c80cdd 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ExtendedContentIndexingColumnBasedHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ExtendedContentIndexingColumnBasedHandler.java @@ -30,7 +30,8 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskInputOutputContext; import org.apache.hadoop.util.bloom.BloomFilter; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.lucene.analysis.CharArraySet; import org.infinispan.commons.util.Base64; @@ -123,7 +124,7 @@ public abstract class ExtendedContentIndexingColumnBasedHandler (INTERVAL * 1.5)) { - log.warn("HeartBeatThread starved for cpu, " + "should execute every " + INTERVAL + " ms, " + "latest: " + delta + " ms."); + log.warn("HeartBeatThread starved for cpu, should execute every {}ms, latest: {}ms.", INTERVAL, delta); } lastRun = currentRun; counter++; @@ -693,7 +696,7 @@ private class DocWriter implements Runnable { @Override public void run() { - log.debug("Writing out a document of size " + value.get().length + " bytes."); + log.debug("Writing out a document of size {} bytes.", value.get().length); Mutation m = new Mutation(new Text(shardId)); m.put(k.getColumnFamily(), k.getColumnQualifier(), new ColumnVisibility(visibility), k.getTimestamp(), value); try { diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/util/AbstractNGramTokenizationStrategy.java b/warehouse/ingest-core/src/main/java/datawave/ingest/util/AbstractNGramTokenizationStrategy.java index 093b0997a3b..4b86ede7be7 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/util/AbstractNGramTokenizationStrategy.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/util/AbstractNGramTokenizationStrategy.java @@ -1,6 +1,7 @@ package datawave.ingest.util; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.lucene.analysis.ngram.NGramTokenizer; import com.google.common.hash.BloomFilter; @@ -20,7 +21,7 @@ public abstract class AbstractNGramTokenizationStrategy { protected static final int DEFAULT_MAX_NGRAM_LENGTH = 25; private BloomFilter filter; - private final Logger log = Logger.getLogger(AbstractNGramTokenizationStrategy.class); + private final Logger log = LoggerFactory.getLogger(AbstractNGramTokenizationStrategy.class); private AbstractNGramTokenizationStrategy source; /** diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/util/BloomFilterUtil.java b/warehouse/ingest-core/src/main/java/datawave/ingest/util/BloomFilterUtil.java index 1db52c0c2df..24aca277810 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/util/BloomFilterUtil.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/util/BloomFilterUtil.java @@ -7,7 +7,8 @@ import java.util.Map; import java.util.Map.Entry; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.lucene.analysis.ngram.NGramTokenizer; import com.google.common.collect.Multimap; @@ -31,7 +32,7 @@ public class BloomFilterUtil { private static final float FILTER_SIZE_TO_NGRAM_COUNT_FACTOR = 1.1f; private final AbstractContentIngestHelper helper; - private final Logger log = Logger.getLogger(BloomFilterUtil.class); + private final Logger log = LoggerFactory.getLogger(BloomFilterUtil.class); private final int maxAllowedExecutionTime; private int maxNGramLength = AbstractNGramTokenizationStrategy.DEFAULT_MAX_NGRAM_LENGTH; private final String minDiskSpacePath; @@ -80,7 +81,7 @@ protected BloomFilterUtil(final AbstractContentIngestHelper helper, float minMem * the n-gram tokenization strategy * @return The number of generated n-grams * @throws TimeoutException - * if the tokenization operation takes too long in relation to the overall mapred.task.timeout + * if the tokenization operation takes too long in relation to the overall mapreduce.task.timeout */ private int applyNgrams(final String fieldName, final Collection ncis, final AbstractNGramTokenizationStrategy strategy) throws TokenizationException { @@ -129,7 +130,7 @@ public int getMaxNGramLength() { } /** - * Returns the desired filter size to output from the applyNGrams(..) method. This value is meant as an approximation to help limit and optimize the number + * Returns the desired filter size to output from the applyNGrams(.) method. This value is meant as an approximation to help limit and optimize the number * of n-grams applied to a generated filter. A value less than or equal to the EMPTY_FILTER_SIZE effectively turns off pruning optimizations based on filter * size, which could result in unexpectedly large bloom filters. * @@ -157,11 +158,11 @@ public BloomFilterWrapper newDefaultFilter(int expectedInsertions) { } /** - * Create a BloomFilter based on a multi-map of fields + * Create a BloomFilter based on a multimap of fields * * @param fields * The fields and their values with which to create a bloom filter - * @return a wrapped BloomFilter based on a multi-map of fields + * @return a wrapped BloomFilter based on a multimap of fields */ public BloomFilterWrapper newMultimapBasedFilter(final Multimap fields) { // Declare the return value @@ -309,7 +310,7 @@ public BloomFilterWrapper newNGramBasedFilter(final Multimap directoryCache, FileSystem fs, InputFile entry) { @@ -80,12 +81,16 @@ private boolean resolveConflict(final Path src, final Path dest) throws IOExcept if (resolved) { // rename tracked locations - log.warn("duplicate ingest file name with different payload(" + src.toUri().toString() + ") - appending timestamp to destination file name"); + if (log.isWarnEnabled()) { + log.warn("duplicate ingest file name with different payload( {} ) - appending timestamp to destination file name", src.toUri().toString()); + } this.entry.renameTrackedLocations(); } else { - log.warn("discarding duplicate ingest file (" + src.toUri().toString() + ") duplicate (" + dest.toUri().toString() + ")"); + if (log.isWarnEnabled()) { + log.warn("discarding duplicate ingest file ( {} ) duplicate ( {} )", src.toUri().toString(), dest.toUri().toString()); + } if (!fs.delete(src, false)) { - log.error("unable to delete duplicate ingest file (" + src.toUri().toString() + ")"); + log.error("unable to delete duplicate ingest file ( {} )", src.toUri().toString()); } } diff --git a/warehouse/ingest-core/src/main/java/datawave/util/flag/FlagMetrics.java b/warehouse/ingest-core/src/main/java/datawave/util/flag/FlagMetrics.java index 3d6a10e366d..81ceef4cc41 100644 --- a/warehouse/ingest-core/src/main/java/datawave/util/flag/FlagMetrics.java +++ b/warehouse/ingest-core/src/main/java/datawave/util/flag/FlagMetrics.java @@ -11,7 +11,8 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapreduce.Counters; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import datawave.ingest.mapreduce.StandaloneStatusReporter; import datawave.ingest.mapreduce.StandaloneTaskAttemptContext; @@ -21,7 +22,7 @@ */ public class FlagMetrics { - private static final Logger log = Logger.getLogger(FlagMetrics.class); + private static final Logger log = LoggerFactory.getLogger(FlagMetrics.class); private static final CompressionCodec cc = new GzipCodec(); private static final SequenceFile.CompressionType ct = SequenceFile.CompressionType.BLOCK; @@ -72,14 +73,14 @@ protected void writeMetrics(final String metricsDirectory, final String baseName Path src = new Path(fileName + ".working"); if (!fs.exists(finishedMetricsFile.getParent())) { if (!fs.mkdirs(finishedMetricsFile.getParent())) { - log.warn("unable to create directory (" + finishedMetricsFile.getParent() + ") metrics write terminated"); + log.warn("unable to create directory ( {} ) metrics write terminated", finishedMetricsFile.getParent()); return; } } if (!fs.exists(src.getParent())) { if (!fs.mkdirs(src.getParent())) { - log.warn("unable to create directory (" + src.getParent() + ") metrics write terminated"); + log.warn("unable to create directory ( {} ) metrics write terminated", src.getParent()); return; } } @@ -99,7 +100,7 @@ protected void writeMetrics(final String metricsDirectory, final String baseName break; // delete src - it will be recreated by while statement if (fs.delete(src, false)) { - log.warn("unable to delete metrics file (" + src + ")"); + log.warn("unable to delete metrics file ( {} )", src); } } diff --git a/warehouse/ingest-core/src/main/java/datawave/util/flag/FlagSocket.java b/warehouse/ingest-core/src/main/java/datawave/util/flag/FlagSocket.java index 6bcd36ed317..e7a9577459e 100644 --- a/warehouse/ingest-core/src/main/java/datawave/util/flag/FlagSocket.java +++ b/warehouse/ingest-core/src/main/java/datawave/util/flag/FlagSocket.java @@ -12,14 +12,15 @@ import java.util.Observable; import java.util.Observer; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * */ public class FlagSocket extends Observable implements Runnable, Observer { - private static final Logger log = Logger.getLogger(FlagSocket.class); + private static final Logger log = LoggerFactory.getLogger(FlagSocket.class); private ServerSocket serverSocket; private volatile boolean running = true; @@ -31,13 +32,15 @@ public FlagSocket(int port) throws IOException { public void run() { // register ourselves to observe... addObserver(this); - log.info("Listening for shutdown commands on port " + serverSocket.getLocalPort()); + if (log.isInfoEnabled()) { + log.info("Listening for shutdown commands on port {}", serverSocket.getLocalPort()); + } while (running) { try { Socket s = serverSocket.accept(); SocketAddress remoteAddress = s.getRemoteSocketAddress(); try { - log.info(remoteAddress + " connected to the shutdown port"); + log.info("{} connected to the shutdown port", remoteAddress); s.setSoTimeout(30000); InputStream is = s.getInputStream(); BufferedReader rdr = new BufferedReader(new InputStreamReader(is)); @@ -47,14 +50,14 @@ public void run() { setChanged(); notifyObservers(line); } catch (SocketTimeoutException e) { - log.info("Timed out waiting for input from " + remoteAddress); + log.info("Timed out waiting for input from {}", remoteAddress); } } catch (SocketException e) { if (running) { - log.info("Socket Exception occurred: " + e.getMessage(), e); + log.info("Socket Exception occurred: {}", e.getMessage(), e); } } catch (IOException e) { - log.error("Error waiting for shutdown connection: " + e.getMessage(), e); + log.error("Error waiting for shutdown connection: {}", e.getMessage(), e); } } } diff --git a/warehouse/ingest-core/src/main/java/datawave/util/flag/SimpleMover.java b/warehouse/ingest-core/src/main/java/datawave/util/flag/SimpleMover.java index bb0205c64eb..0b9a23bed72 100644 --- a/warehouse/ingest-core/src/main/java/datawave/util/flag/SimpleMover.java +++ b/warehouse/ingest-core/src/main/java/datawave/util/flag/SimpleMover.java @@ -5,7 +5,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.cache.Cache; @@ -16,7 +17,7 @@ */ public class SimpleMover implements Callable { - private static final Logger log = Logger.getLogger(SimpleMover.class); + private static final Logger log = LoggerFactory.getLogger(SimpleMover.class); final InputFile entry; final TrackedDir target; @@ -38,7 +39,9 @@ public InputFile call() throws IOException { if (entry.getCurrentDir() == dst || (!fs.exists(dst) && fs.rename(entry.getCurrentDir(), dst))) { entry.updateCurrentDir(this.target); } else { - log.error("Unable to move file " + entry.getCurrentDir().toUri() + " to " + dst.toUri() + ", skipping"); + if (log.isErrorEnabled()) { + log.error("Unable to move file {} to {}, skipping", entry.getCurrentDir().toUri(), dst.toUri()); + } } return entry; @@ -50,7 +53,7 @@ Path checkParent(Path path) throws IOException { if (fs.mkdirs(parent)) { directoryCache.put(parent, parent); } else { - log.warn("unable to create directory (" + parent + ")"); + log.warn("unable to create directory ( {} )", parent); } } return path; From e41b051da781792830770d126865bcd69581ba8e Mon Sep 17 00:00:00 2001 From: emiliodskinner Date: Thu, 16 Jan 2025 19:47:44 +0000 Subject: [PATCH 2/6] task-2628-ingest-core: warehouse/ingest-core pt1 --- .../data/config/XMLFieldConfigHelper.java | 2 +- .../data/config/ingest/BaseIngestHelper.java | 22 ++++++++----------- .../ingest/EventFieldNormalizerHelper.java | 4 ++-- .../data/tokenize/TokenizationHelper.java | 4 ++-- ...ataTypeDiscardFutureIntervalPredicate.java | 4 ++-- .../DataTypeDiscardIntervalPredicate.java | 4 ++-- .../MetricsSummaryDataTypeHandler.java | 3 +-- ...ndedContentIndexingColumnBasedHandler.java | 4 ++-- .../AbstractNGramTokenizationStrategy.java | 2 +- .../datawave/ingest/util/BloomFilterUtil.java | 2 +- 10 files changed, 23 insertions(+), 28 deletions(-) diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/XMLFieldConfigHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/XMLFieldConfigHelper.java index dfca4f17fde..3670b6ee23e 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/XMLFieldConfigHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/XMLFieldConfigHelper.java @@ -14,9 +14,9 @@ import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; +import org.apache.xerces.jaxp.SAXParserFactoryImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.xerces.jaxp.SAXParserFactoryImpl; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java index 1f25521b658..bffded63810 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java @@ -673,8 +673,8 @@ public static Matcher getBestMatch(Set patterns, String fieldName) { } if (patternMatcher.reset(fieldName).matches()) { if (bestMatch != null) { - log.warn("Multiple regular expression patterns with the same length exist for matching field {}. " + - "The pattern that sorts lexicographically last will be used. Please verify your configurations.", fieldName); + log.warn("Multiple regular expression patterns with the same length exist for matching field {}. " + + "The pattern that sorts lexicographically last will be used. Please verify your configurations.", fieldName); break; } else { bestMatch = patternMatcher; @@ -783,8 +783,7 @@ protected Set normalize(NormalizedContentInterface n // if it is indexed, set the index part, if (this.isIndexedField(eventFieldName) || this.isIndexedField(indexedFieldName)) { - log.debug("eventFieldName={}, indexedFieldName={} we have an indexed field here {}", - eventFieldName, indexedFieldName, normalizedContent); + log.debug("eventFieldName={}, indexedFieldName={} we have an indexed field here {}", eventFieldName, indexedFieldName, normalizedContent); Collection> dataTypes = getDataTypes(normalizedContent.getIndexedFieldName()); HashSet values = new HashSet<>(dataTypes.size()); for (datawave.data.type.Type dataType : dataTypes) { @@ -800,8 +799,7 @@ protected Set normalize(NormalizedContentInterface n // if it is normalized, set the field value part and the (unused) // indexed field value part if (this.isNormalizedField(eventFieldName) || this.isNormalizedField(indexedFieldName)) { - log.debug("eventFieldName={}, indexedFieldName={} we have a normalized field here {}", - eventFieldName, indexedFieldName, normalizedContent); + log.debug("eventFieldName={}, indexedFieldName={} we have a normalized field here {}", eventFieldName, indexedFieldName, normalizedContent); Collection> dataTypes = getDataTypes(normalizedContent.getIndexedFieldName()); HashSet values = new HashSet<>(dataTypes.size()); for (datawave.data.type.Type dataType : dataTypes) { @@ -921,9 +919,8 @@ public Multimap normalize(Multimap e : fields.entries()) { if (e.getValue() != null) { applyNormalizationAndAddToResults(results, new NormalizedFieldAndValue(e.getKey(), e.getValue())); - } else - if (log.isWarnEnabled()) { - log.warn("{} has key {} with a null value.", this.getType().typeName(), e.getKey()); + } else if (log.isWarnEnabled()) { + log.warn("{} has key {} with a null value.", this.getType().typeName(), e.getKey()); } } return results; @@ -941,10 +938,9 @@ public Multimap normalizeMap(Multimap e : fields.entries()) { if (e.getValue() != null) { applyNormalizationAndAddToResults(results, e.getValue()); - } else - if (log.isWarnEnabled()) { - log.warn("{} has key {} with a null value.", this.getType().typeName(), e.getKey()); - } + } else if (log.isWarnEnabled()) { + log.warn("{} has key {} with a null value.", this.getType().typeName(), e.getKey()); + } } return results; } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/EventFieldNormalizerHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/EventFieldNormalizerHelper.java index 1984979ae05..b3c96118be4 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/EventFieldNormalizerHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/EventFieldNormalizerHelper.java @@ -5,6 +5,8 @@ import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Maps; @@ -13,8 +15,6 @@ import datawave.ingest.data.TypeRegistry; import datawave.ingest.data.config.ConfigurationHelper; import datawave.ingest.data.config.DataTypeHelper; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * This class can be used to help normalize the event field values akin to how the BaseIngestHelper can normalize the indexed field values. This was not cooked diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/tokenize/TokenizationHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/tokenize/TokenizationHelper.java index 682ad11319a..1a5f2fa0fad 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/tokenize/TokenizationHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/tokenize/TokenizationHelper.java @@ -3,10 +3,10 @@ import java.io.IOException; import org.apache.hadoop.conf.Configuration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CharArraySet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import datawave.ingest.data.config.DataTypeHelper; import datawave.util.ObjectFactory; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardFutureIntervalPredicate.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardFutureIntervalPredicate.java index 2965a2f98f6..6ad5d61a79c 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardFutureIntervalPredicate.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardFutureIntervalPredicate.java @@ -13,8 +13,8 @@ public class DataTypeDiscardFutureIntervalPredicate implements RawRecordPredicat private static final Logger log = LoggerFactory.getLogger(DataTypeDiscardFutureIntervalPredicate.class); /** - * number which will be used to evaluate whether an Event should be processed. If the Event.getEventDate() is less than (now + interval) then it will - * be processed. + * number which will be used to evaluate whether an Event should be processed. If the Event.getEventDate() is less than (now + interval) then it will be + * processed. */ public static final String DISCARD_FUTURE_INTERVAL = "event.discard.future.interval"; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardIntervalPredicate.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardIntervalPredicate.java index c4846eaf966..da3a0df0540 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardIntervalPredicate.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/DataTypeDiscardIntervalPredicate.java @@ -13,8 +13,8 @@ public class DataTypeDiscardIntervalPredicate implements RawRecordPredicate { private static final Logger log = LoggerFactory.getLogger(DataTypeDiscardIntervalPredicate.class); /** - * number which will be used to evaluate whether an Event should be processed. If the Event.getEventDate() is greater than (now - interval) then it - * will be processed. + * number which will be used to evaluate whether an Event should be processed. If the Event.getEventDate() is greater than (now - interval) then it will be + * processed. */ public static final String DISCARD_INTERVAL = "event.discard.interval"; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/MetricsSummaryDataTypeHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/MetricsSummaryDataTypeHandler.java index ef86aced004..b0d5433abc5 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/MetricsSummaryDataTypeHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/MetricsSummaryDataTypeHandler.java @@ -14,8 +14,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.log4j.Logger; import com.google.common.base.Preconditions; import com.google.common.collect.ArrayListMultimap; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ExtendedContentIndexingColumnBasedHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ExtendedContentIndexingColumnBasedHandler.java index c0a97c80cdd..4eb50f48933 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ExtendedContentIndexingColumnBasedHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ExtendedContentIndexingColumnBasedHandler.java @@ -30,10 +30,10 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskInputOutputContext; import org.apache.hadoop.util.bloom.BloomFilter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.lucene.analysis.CharArraySet; import org.infinispan.commons.util.Base64; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Multimap; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/util/AbstractNGramTokenizationStrategy.java b/warehouse/ingest-core/src/main/java/datawave/ingest/util/AbstractNGramTokenizationStrategy.java index 4b86ede7be7..7661c21d366 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/util/AbstractNGramTokenizationStrategy.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/util/AbstractNGramTokenizationStrategy.java @@ -1,8 +1,8 @@ package datawave.ingest.util; +import org.apache.lucene.analysis.ngram.NGramTokenizer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.lucene.analysis.ngram.NGramTokenizer; import com.google.common.hash.BloomFilter; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/util/BloomFilterUtil.java b/warehouse/ingest-core/src/main/java/datawave/ingest/util/BloomFilterUtil.java index 24aca277810..93c27310916 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/util/BloomFilterUtil.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/util/BloomFilterUtil.java @@ -7,9 +7,9 @@ import java.util.Map; import java.util.Map.Entry; +import org.apache.lucene.analysis.ngram.NGramTokenizer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.lucene.analysis.ngram.NGramTokenizer; import com.google.common.collect.Multimap; import com.google.common.hash.BloomFilter; From 483aa49259e4a0d17d0db5f98854bf46a851e758 Mon Sep 17 00:00:00 2001 From: emiliodskinner Date: Wed, 22 Jan 2025 18:11:25 +0000 Subject: [PATCH 3/6] task-2628-ingest-core: pt2 --- .../data/hash/SnowflakeUIDBuilder.java | 44 ++--- .../ingest/config/TableConfigCache.java | 15 +- .../config/TableConfigCacheGenerator.java | 7 +- .../datawave/ingest/data/TypeRegistry.java | 24 +-- .../dateindex/DateIndexDataTypeHandler.java | 19 +- .../handler/error/ErrorDataTypeHandler.java | 5 +- .../mapreduce/handler/facet/FacetHandler.java | 17 +- .../handler/shard/ShardedDataTypeHandler.java | 45 ++--- .../MetricsSummaryDataTypeHandler.java | 11 +- .../ContentIndexingColumnBasedHandler.java | 29 +-- .../job/BulkIngestMapFileLoader.java | 178 +++++++++--------- .../job/CBMutationOutputFormatter.java | 7 +- .../mapreduce/job/ConstraintChecker.java | 7 +- .../mapreduce/job/DelegatingPartitioner.java | 7 +- .../util/DiskSpaceStarvationStrategy.java | 5 +- .../GenerateMultipleNumShardsCacheFile.java | 5 +- .../ingest/util/MemoryStarvationStrategy.java | 5 +- .../ingest/util/ResourceAvailabilityUtil.java | 5 +- .../java/datawave/ingest/util/ThreadUtil.java | 13 +- 19 files changed, 220 insertions(+), 228 deletions(-) diff --git a/warehouse/core/src/main/java/datawave/data/hash/SnowflakeUIDBuilder.java b/warehouse/core/src/main/java/datawave/data/hash/SnowflakeUIDBuilder.java index 5d823f9caba..6902cbe2965 100644 --- a/warehouse/core/src/main/java/datawave/data/hash/SnowflakeUIDBuilder.java +++ b/warehouse/core/src/main/java/datawave/data/hash/SnowflakeUIDBuilder.java @@ -9,7 +9,8 @@ import java.util.Map; import org.apache.commons.cli.Option; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Builds a sequence of SnowflakeUIDs for a particular "machine" instance, which is based on a unique combination of host, process, and process thread. @@ -18,7 +19,7 @@ public class SnowflakeUIDBuilder extends AbstractUIDBuilder { private static final BigInteger UNDEFINED_MACHINE_ID = BigInteger.valueOf(-1); private static final BigInteger UNDEFINED_SNOWFLAKE = BigInteger.valueOf(-1); - private static final Logger LOGGER = Logger.getLogger(SnowflakeUIDBuilder.class); + private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeUIDBuilder.class); private final BigInteger mid; @@ -259,10 +260,7 @@ protected static int newMachineId(final Map options) { try { hostId = Integer.parseInt(option.getValue()); } catch (final Exception e) { - if (LOGGER.isDebugEnabled()) { - final String message = "Invalid " + HOST_INDEX_OPT + ": " + option; - LOGGER.warn(message, e); - } + LOGGER.warn("Invalid {}: {}", HOST_INDEX_OPT, option); } } @@ -271,10 +269,7 @@ protected static int newMachineId(final Map options) { try { processId = Integer.parseInt(option.getValue()); } catch (final Exception e) { - if (LOGGER.isDebugEnabled()) { - final String message = "Invalid " + PROCESS_INDEX_OPT + ": " + option; - LOGGER.warn(message, e); - } + LOGGER.warn("Invalid {}: {}", PROCESS_INDEX_OPT, option); } } @@ -283,20 +278,14 @@ protected static int newMachineId(final Map options) { try { threadId = Integer.parseInt(option.getValue()); } catch (final Exception e) { - if (LOGGER.isDebugEnabled()) { - final String message = "Invalid " + THREAD_INDEX_OPT + ": " + option; - LOGGER.warn(message, e); - } + LOGGER.warn("Invalid {}: {}", THREAD_INDEX_OPT, option); } } try { machineId = validateMachineIds(hostId, processId, threadId).intValue(); } catch (Exception e) { - if (LOGGER.isDebugEnabled()) { - final String message = "Unable to generate Snowflake machine ID"; - LOGGER.warn(message, e); - } + LOGGER.warn("Unable to generate Snowflake machine ID", e); } return machineId; @@ -387,8 +376,8 @@ private long validateTimestamp(long timestamp) { } if (timestamp <= this.previousTid) { - LOGGER.warn("Current tid is less than the previous. This could cause uid collisions.\n" + "Mid: " + mid + ", Timestamp: " + timestamp - + ", Previous: " + previousTid + ", System Time: " + System.currentTimeMillis()); + LOGGER.warn("Current tid is less than the previous. This could cause uid collisions.\nMid: {}, Timestamp: {}, Previous: {}, System Time: {}", + mid, timestamp, previousTid, System.currentTimeMillis()); timestamp = this.previousTid + 1; } @@ -404,12 +393,9 @@ private void storeTimestamp() { if (ZkSnowflakeCache.isInitialized()) { try { ZkSnowflakeCache.store(mid, this.previousTid); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug("Caching ZK ts: " + this.previousTid + ", mid: " + this.mid); - } - + LOGGER.debug("Caching ZK ts: {}, mid: {}", this.previousTid, this.mid); } catch (Exception e) { - LOGGER.error("Unable to store snowflake id from zookeeper for " + mid, e); + LOGGER.error("Unable to store snowflake id from zookeeper for {}", mid, e); throw new RuntimeException(e); } } @@ -421,14 +407,10 @@ private long initializeTimestamp() { if (ZkSnowflakeCache.isInitialized()) { try { lastCachedTid = ZkSnowflakeCache.getLastCachedTid(this.mid); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug("Getting ZK ts: " + lastCachedTid + " mid: " + this.mid); - } - + LOGGER.debug("Getting ZK ts: {}, mid: {}", lastCachedTid, this.mid); } catch (Exception e) { - LOGGER.error("Unable to retrieve snowflake id from zookeeper for " + mid, e); + LOGGER.error("Unable to retrieve snowflake id from zookeeper for {}", mid, e); throw new RuntimeException(e); - } } if (lastCachedTid > 0) { diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/config/TableConfigCache.java b/warehouse/ingest-core/src/main/java/datawave/ingest/config/TableConfigCache.java index 730dda45e18..632f7a39985 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/config/TableConfigCache.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/config/TableConfigCache.java @@ -12,7 +12,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class TableConfigCache extends BaseHdfsFileCacheUtil { @@ -27,7 +28,7 @@ public class TableConfigCache extends BaseHdfsFileCacheUtil { private static final Object lock = new Object(); - protected static final Logger log = Logger.getLogger("datawave.ingest"); + protected static final Logger log = LoggerFactory.getLogger("datawave.ingest"); private TableConfigCache(Configuration conf) { super(conf); @@ -58,7 +59,7 @@ public boolean isInitialized() { public void writeCacheFile(FileSystem fs, Path tmpCacheFile) throws IOException { Map> tempValidationMap = configMap; - log.info("Writing to temp file " + tmpCacheFile.getName()); + log.info("Writing to temp file {}", tmpCacheFile.getName()); try (PrintStream out = new PrintStream(new BufferedOutputStream(fs.create(tmpCacheFile)), false, "UTF-8")) { for (Map.Entry> table : configMap.entrySet()) { for (Map.Entry tableProp : table.getValue().entrySet()) { @@ -66,16 +67,16 @@ public void writeCacheFile(FileSystem fs, Path tmpCacheFile) throws IOException } } } catch (IOException e) { - log.error("Unable to write cache file " + tmpCacheFile, e); + log.error("Unable to write cache file {}", tmpCacheFile, e); throw e; } // validate temp file - log.info("Validating file: " + tmpCacheFile.getName()); + log.info("Validating file: {}", tmpCacheFile.getName()); try (BufferedReader in = new BufferedReader(new InputStreamReader(FileSystem.get(tmpCacheFile.toUri(), conf).open(tmpCacheFile)))) { readCache(in); } catch (IOException ex) { - log.error("Error reading cache temp file: " + tmpCacheFile, ex); + log.error("Error reading cache temp file: {}", tmpCacheFile, ex); throw ex; } @@ -138,7 +139,7 @@ public Map getTableProperties(String tableName) throws IOExceptio read(); } if (null == this.configMap.get(tableName) || this.configMap.get(tableName).isEmpty()) { - log.error("No accumulo config cache for " + tableName + ". Please generate the accumulo config cache after ensuring the table exists."); + log.error("No accumulo config cache for {}. Please generate the accumulo config cache after ensuring the table exists.", tableName); } return this.configMap.get(tableName); diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/config/TableConfigCacheGenerator.java b/warehouse/ingest-core/src/main/java/datawave/ingest/config/TableConfigCacheGenerator.java index 21a29dd3146..26adb56f70b 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/config/TableConfigCacheGenerator.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/config/TableConfigCacheGenerator.java @@ -1,7 +1,8 @@ package datawave.ingest.config; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import datawave.ingest.OptionsParser; import datawave.ingest.mapreduce.job.TableConfigurationUtil; @@ -9,7 +10,7 @@ public class TableConfigCacheGenerator { protected static final Configuration config = new Configuration(); - protected static final Logger log = Logger.getLogger(TableConfigCache.class); + protected static final Logger log = LoggerFactory.getLogger(TableConfigCache.class); public static void main(String[] args) { @@ -19,7 +20,7 @@ public static void main(String[] args) { TableConfigurationUtil tcu = new TableConfigurationUtil(conf); tcu.updateCacheFile(); } catch (Exception e) { - log.error("Unable to generate accumulo config cache " + e.getMessage()); + log.error("Unable to generate accumulo config cache {}", e.getMessage()); } } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/TypeRegistry.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/TypeRegistry.java index 98537561123..e95eb7357b4 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/TypeRegistry.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/TypeRegistry.java @@ -12,12 +12,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; -import datawave.core.common.logging.ThreadConfigurableLogger; import datawave.ingest.data.config.ConfigurationHelper; import datawave.ingest.data.config.DataTypeOverrideHelper; import datawave.ingest.data.config.filter.KeyValueFilter; @@ -28,7 +28,7 @@ public class TypeRegistry extends HashMap { - private static final Logger log = ThreadConfigurableLogger.getLogger(TypeRegistry.class); + private static final Logger log = LoggerFactory.getLogger(TypeRegistry.class); public static final String ALL_PREFIX = "all"; @@ -73,7 +73,7 @@ public static TypeRegistry getInstance(Configuration config) { } /** - * Helps determine whether or not the registry instance has been instantiated. + * Helps determine whether the registry instance has been instantiated. * * @return true if the registry exists, false otherwise */ @@ -189,13 +189,13 @@ private TypeRegistry(Configuration config) { try { helperClassName = ConfigurationHelper.isNull(config, typeName + INGEST_HELPER, String.class); } catch (IllegalArgumentException e) { - log.debug("No helper class defined for type: " + typeName); + log.debug("No helper class defined for type: {}", typeName); } String readerClassName = null; try { readerClassName = ConfigurationHelper.isNull(config, typeName + RAW_READER, String.class); } catch (IllegalArgumentException e) { - log.debug("No reader class defined for type: " + typeName); + log.debug("No reader class defined for type: {}", typeName); } String[] handlerClassNames = null; try { @@ -207,7 +207,7 @@ private TypeRegistry(Configuration config) { .asList(StringUtils.trimAndRemoveEmptyStrings(ConfigurationHelper.isNull(config, EXCLUDED_HANDLER_CLASSES, String[].class))); handlerClassNames = getClassnamesWithoutExclusions(handlerClassNames, exclusions); } catch (IllegalArgumentException e) { - log.debug("No handler classes defined for type: " + typeName); + log.debug("No handler classes defined for type: {}", typeName); } String[] filterClassNames = null; @@ -217,7 +217,7 @@ private TypeRegistry(Configuration config) { StringUtils.trimAndRemoveEmptyStrings(ConfigurationHelper.isNull(config, typeName + FILTER_CLASSES, String[].class))); filterPriority = config.getInt(typeName + FILTER_PRIORITY, Integer.MAX_VALUE); } catch (IllegalArgumentException e) { - log.debug("No filter classes defined for type: " + typeName); + log.debug("No filter classes defined for type: {}", typeName); } String outputName = config.get(typeName + OUTPUT_NAME, typeName); @@ -236,27 +236,27 @@ private TypeRegistry(Configuration config) { // performing `configurationKey.split(".")[0]`. Using a period inside datatype name muddies later code // due to the manner than Hadoop Configurations operate. if (typeName.indexOf('.') != -1) { - log.error("Datatypes ('" + INGEST_DATA_TYPES + "') cannot contain a period. Offending datatype: '" + typeName + "'"); + log.error("Datatypes ({}) cannot contain a period. Offending datatype: {}", INGEST_DATA_TYPES, typeName); throw new IllegalArgumentException( "Datatypes ('" + INGEST_DATA_TYPES + "') cannot contain a period. Offending datatype: '" + typeName + "'"); } Type t = new Type(typeName, outputName, helperClass, readerClass, handlerClassNames, filterPriority, filterClassNames); - log.debug("Registered type " + t); + log.debug("Registered type {}", t); this.put(typeName, t); if (null != config.get(typeName + DataTypeOverrideHelper.Properties.DATA_TYPE_VALUES)) { for (String type : config.getStrings(typeName + DataTypeOverrideHelper.Properties.DATA_TYPE_VALUES)) { outputName = config.get(type + OUTPUT_NAME, outputName); t = new Type(type, outputName, helperClass, readerClass, handlerClassNames, filterPriority, filterClassNames); - log.debug("Registered child type:" + type); + log.debug("Registered child type: {}", type); this.put(type, t); } } } } catch (ClassNotFoundException cnfe) { - log.error("Unable to create supporting class for type " + typeName, cnfe); + log.error("Unable to create supporting class for type {}", typeName, cnfe); } } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/dateindex/DateIndexDataTypeHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/dateindex/DateIndexDataTypeHandler.java index 25fa6caee29..acb8cf8b8a6 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/dateindex/DateIndexDataTypeHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/dateindex/DateIndexDataTypeHandler.java @@ -20,7 +20,8 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.StatusReporter; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; @@ -78,7 +79,7 @@ */ public class DateIndexDataTypeHandler implements DataTypeHandler, RawRecordMetadata { - private static final Logger log = ThreadConfigurableLogger.getLogger(DateIndexDataTypeHandler.class); + private static final Logger log = LoggerFactory.getLogger(DateIndexDataTypeHandler.class); public static final String DATEINDEX_TNAME = "date.index.table.name"; public static final String DATEINDEX_LPRIORITY = "date.index.table.loader.priority"; @@ -152,7 +153,7 @@ public void setup(TaskAttemptContext context) { String tableName = conf.get(DATEINDEX_TNAME, null); if (null == tableName) { - log.error(DATEINDEX_TNAME + " not specified, no date index will be created"); + log.error("{} not specified, no date index will be created", DATEINDEX_TNAME); } else { setDateIndexTableName(new Text(tableName)); } @@ -176,7 +177,9 @@ public void setup(TaskAttemptContext context) { } typeToFields.put(parts[0], parts[1]); } - log.info(this.getClass().getSimpleName() + " configured for " + dataType.typeName() + ": " + typeToFields); + if (log.isInfoEnabled()) { + log.info("{} configured for {}: {}", this.getClass().getSimpleName(), dataType.typeName(), typeToFields); + } dataTypeToTypeToFields.put(dataType.typeName(), typeToFields); } } @@ -243,7 +246,7 @@ private void getBulkIngestKeys(RawRecordContainer event, Multimap implements ExtendedDataTypeHandler { - private static final Logger log = Logger.getLogger(ErrorDataTypeHandler.class); + private static final Logger log = LoggerFactory.getLogger(ErrorDataTypeHandler.class); public static final String ERROR_TABLE = "error.table"; public static final String ERROR_TABLE_NAME = ERROR_TABLE + ".name"; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/facet/FacetHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/facet/FacetHandler.java index cd72c72acee..0fec6879aa0 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/facet/FacetHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/facet/FacetHandler.java @@ -21,7 +21,8 @@ import org.apache.hadoop.mapreduce.StatusReporter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskInputOutputContext; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.geotools.feature.type.DateUtil; import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus; @@ -51,7 +52,7 @@ public class FacetHandler implements ExtendedDataTypeHandler, FacetedEstimator { - private static final Logger log = Logger.getLogger(FacetHandler.class); + private static final Logger log = LoggerFactory.getLogger(FacetHandler.class); /* Global configuration properties */ @@ -272,10 +273,8 @@ public long process(KEYIN key, RawRecordContainer event, Multimap extends StatsDEnabledDataTypeHandler implements DataTypeHandler { - private static final Logger log = ThreadConfigurableLogger.getLogger(ShardedDataTypeHandler.class); + private static final Logger log = LoggerFactory.getLogger(ShardedDataTypeHandler.class); public static final String NUM_SHARDS = ShardIdFactory.NUM_SHARDS; public static final String SHARD_TNAME = "shard.table.name"; @@ -227,13 +228,13 @@ public void setup(TaskAttemptContext context) { String tableName = conf.get(SHARD_TNAME, null); if (null == tableName) - log.error(SHARD_TNAME + " not specified, no events will be created, and the global index will be useless"); + log.error("{} not specified, no events will be created, and the global index will be useless", SHARD_TNAME); else setShardTableName(new Text(tableName)); tableName = conf.get(SHARD_STATS_TNAME, null); if (null == tableName) - log.warn(SHARD_STATS_TNAME + " not specified, no global index mutations will be created."); + log.warn("{} not specified, no global index mutations will be created.", SHARD_STATS_TNAME); else { setIndexStatsTableName(new Text(tableName)); setProduceStats(true); @@ -241,25 +242,25 @@ public void setup(TaskAttemptContext context) { tableName = conf.get(SHARD_GIDX_TNAME, null); if (null == tableName) - log.warn(SHARD_GIDX_TNAME + " not specified, no global index mutations will be created."); + log.warn("{} not specified, no global index mutations will be created.", SHARD_GIDX_TNAME); else setShardIndexTableName(new Text(tableName)); tableName = conf.get(SHARD_GRIDX_TNAME, null); if (null == tableName) - log.warn(SHARD_GRIDX_TNAME + " not specified, no global reverse index mutations will be created."); + log.warn("{} not specified, no global reverse index mutations will be created.", SHARD_GRIDX_TNAME); else setShardReverseIndexTableName(new Text(tableName)); tableName = conf.get(METADATA_TABLE_NAME, null); if (null == tableName) - log.warn(METADATA_TABLE_NAME + " not specified, no metadata will be created, I hope nothing requires normalizers."); + log.warn("{} not specified, no metadata will be created, I hope nothing requires normalizers.", METADATA_TABLE_NAME); else setMetadataTableName(new Text(tableName)); tableName = (LoadDateTableConfigHelper.isLoadDatesEnabled(conf) ? LoadDateTableConfigHelper.getLoadDatesTableName(conf) : null); if (null == tableName) - log.warn(LoadDateTableConfigHelper.LOAD_DATES_TABLE_NAME_PROP + " not specified, no load dates will be created"); + log.warn("{} not specified, no load dates will be created", LoadDateTableConfigHelper.LOAD_DATES_TABLE_NAME_PROP); else setLoadDatesTableName(new Text(tableName)); @@ -270,7 +271,7 @@ public void setup(TaskAttemptContext context) { tableName = conf.get(SHARD_DINDX_NAME, null); if (null == tableName) { - log.warn(SHARD_DINDX_NAME + " not specified, no term dictionary will be created."); + log.warn("{} not specified, no term dictionary will be created.", SHARD_DINDX_NAME); } else { setShardDictionaryIndexTableName(new Text(tableName)); this.setupDictionaryCache(conf.getInt(SHARD_DICTIONARY_CACHE_ENTRIES, SHARD_DINDEX_CACHE_DEFAULT_SIZE)); @@ -299,21 +300,17 @@ public void setup(TaskAttemptContext context) { private void setupToReindexIfEnabled(Configuration conf) { this.isReindexEnabled = conf.getBoolean(IS_REINDEX_ENABLED, false); - log.info("isReindexEnabled: " + this.isReindexEnabled); + log.info("isReindexEnabled: {}", this.isReindexEnabled); if (this.isReindexEnabled) { String commaSeparatedFieldNames = conf.get(FIELDS_TO_REINDEX); - if (log.isDebugEnabled()) { - log.debug("configured reindex fields: " + commaSeparatedFieldNames); - } + log.debug("configured reindex fields: {}", commaSeparatedFieldNames); if (null != commaSeparatedFieldNames) { this.requestedFieldsForReindex = Arrays.asList(commaSeparatedFieldNames.split(",")); } if (null == this.requestedFieldsForReindex || this.requestedFieldsForReindex.isEmpty()) { throw new RuntimeException("Missing or empty " + FIELDS_TO_REINDEX + " from configuration: " + conf); } - if (log.isDebugEnabled()) { - log.debug("list of fields to reindex: " + requestedFieldsForReindex); - } + log.debug("list of fields to reindex: {}", requestedFieldsForReindex); } } @@ -469,7 +466,7 @@ protected Multimap createColumns(RawRecordContainer event, NormalizedContentInterface value = e.getValue(); byte[] visibility = getVisibility(event, value); if (log.isTraceEnabled()) { - log.trace("Is " + e.getKey() + " indexed? " + hasIndexTerm(e.getKey()) + " " + helper.isIndexedField(e.getKey())); + log.trace("Is {} indexed? {} {}", e.getKey(), hasIndexTerm(e.getKey()), helper.isIndexedField(e.getKey())); } values.putAll(createForwardIndices(helper, event, fields, value, visibility, maskedVisibility, maskedFieldHelper, shardId, indexedValue, @@ -714,9 +711,7 @@ protected Multimap createTermIndexColumn(RawRecordContainer Multimap values = ArrayListMultimap.create(); - if (log.isTraceEnabled()) { - log.trace("Create index column " + tableName); - } + log.trace("Create index column {}", tableName); if (null == tableName) { return values; } @@ -738,9 +733,7 @@ protected Multimap createTermIndexColumn(RawRecordContainer if (!StringUtils.isEmpty(normalizedMaskedValue)) { if (direction == Direction.REVERSE) { normalizedMaskedValue = new StringBuilder(normalizedMaskedValue).reverse().toString(); - if (log.isTraceEnabled()) { - log.trace("normalizedMaskedValue is reversed to: " + normalizedMaskedValue); - } + log.trace("normalizedMaskedValue is reversed to: {}", normalizedMaskedValue); } // Create a key for the masked field value with the masked visibility. Key k = this.createIndexKey(normalizedMaskedValue.getBytes(), colf, colq, maskedVisibility, event.getTimestamp(), false); @@ -981,8 +974,7 @@ protected Multimap createShardEventColumn(RawRecordContaine // Else create one key for the field with the original value and the masked visiblity Key cbKey = createKey(shardId, colf, unmaskedColq, refVisibility, event.getTimestamp(), deleteMode); BulkIngestKey bKey = new BulkIngestKey(this.getShardTableName(), cbKey); - if (log.isTraceEnabled()) - log.trace("Creating bulk ingest Key " + bKey); + log.trace("Creating bulk ingest Key {}", bKey); values.put(bKey, NULL_VALUE); } @@ -1033,8 +1025,7 @@ public void createShardFieldIndexColumn(RawRecordContainer event, Multimap createShardFieldIndexColumn(RawRecordContainer event, String fieldName, String fieldValue, byte[] visibility, byte[] maskedVisibility, MaskedFieldHelper maskedFieldHelper, byte[] shardId, Value value) { - if (log.isTraceEnabled()) - log.trace("Field value is " + fieldValue); + log.trace("Field value is {}", fieldValue); // hold on to the helper IngestHelperInterface helper = this.getHelper(event.getDataType()); diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/MetricsSummaryDataTypeHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/MetricsSummaryDataTypeHandler.java index b0d5433abc5..a0e3870a4c2 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/MetricsSummaryDataTypeHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/summary/MetricsSummaryDataTypeHandler.java @@ -14,7 +14,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Preconditions; import com.google.common.collect.ArrayListMultimap; @@ -39,7 +40,7 @@ */ public class MetricsSummaryDataTypeHandler extends SummaryDataTypeHandler { - private static final Logger log = ThreadConfigurableLogger.getLogger(MetricsSummaryDataTypeHandler.class); + private static final Logger log = LoggerFactory.getLogger(MetricsSummaryDataTypeHandler.class); // configuration keys public static final String METRICS_SUMMARY_PROP_PREFIX = "metrics-"; @@ -142,7 +143,7 @@ public void setMetricsSummaryFormatter(MetricsSummaryFormatter metricsSummaryFor public void setTableName(Configuration conf) { String tableName = conf.get(METRICS_SUMMARY_TABLE_NAME); if (tableName == null) { - log.warn(METRICS_SUMMARY_TABLE_NAME + " not specified, no summary data will be created."); + log.warn("{} not specified, no summary data will be created.", METRICS_SUMMARY_TABLE_NAME); } else { this.metricsSummaryTableName = new Text(tableName); } @@ -199,7 +200,7 @@ public Multimap createEntries(RawRecordContainer record, Mu Set colQs = Sets.newHashSet(metricsSummaryFormatter.getSummaryValuesRegex(colQualFieldsRegexList, fields)); if (log.isTraceEnabled()) { - log.trace("Creating Keys for...rowIds.size() [" + rowIds.size() + "] colFs.size() [" + colFs.size() + "] colQs.size() [" + colQs.size() + "]"); + log.trace("Creating Keys for...rowIds.size() [{}] colFs.size() [{}] colQs.size() [{}]", rowIds.size(), colFs.size(), colQs.size()); } ColumnVisibility vis = new ColumnVisibility(origVis.flatten()); @@ -220,7 +221,7 @@ public Multimap createEntries(RawRecordContainer record, Mu } if (log.isTraceEnabled()) { - log.trace("Created [" + values.size() + "] keys for ingest"); + log.trace("Created [{}] keys for ingest", values.size()); } return values; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ContentIndexingColumnBasedHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ContentIndexingColumnBasedHandler.java index 34f83102685..de8d89fef30 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ContentIndexingColumnBasedHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ContentIndexingColumnBasedHandler.java @@ -16,7 +16,10 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.StatusReporter; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.Marker; +import org.slf4j.MarkerFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -62,7 +65,9 @@ */ public abstract class ContentIndexingColumnBasedHandler extends AbstractColumnBasedHandler implements TermFrequencyIngestHelperInterface { - private static final Logger log = Logger.getLogger(ContentIndexingColumnBasedHandler.class); + private static final Logger log = LoggerFactory.getLogger(ContentIndexingColumnBasedHandler.class); + + private static Marker fatal = MarkerFactory.getMarker("FATAL"); public abstract AbstractContentIngestHelper getContentIndexingDataTypeHelper(); @@ -192,7 +197,7 @@ protected void flushTokenOffsetCache(RawRecordContainer event, Multimap tokenHelper.getTokenizerTimeWarnThresholdMsec() && !tokenizerTimeWarned) { long realDelta = System.currentTimeMillis() - start; counters.incrementValue(ContentIndexCounters.TOKENIZER_TIME_WARNINGS, 1, reporter); - log.warn("Tokenization of field " + modifiedFieldName + " has exceeded warning threshold " - + tokenHelper.getTokenizerTimeWarnThresholdMsec() + "ms (" + realDelta + "ms)"); + log.warn("Tokenization of field {} has exceeded warning threshold {}ms ({}ms)", + modifiedFieldName, tokenHelper.getTokenizerTimeErrorThresholdMsec(), realDelta); tokenizerTimeWarned = true; } @@ -423,30 +428,26 @@ protected void tokenizeField(final Analyzer a, final NormalizedContentInterface // Make sure the term length is greater than the minimum allowed length int tlen = token.length(); if (tlen < tokenHelper.getTermLengthMinimum()) { - log.debug("Ignoring token of length " + token.length() + " because it is too short"); + log.debug("Ignoring token of length {} because it is too short", token.length()); counters.increment(ContentIndexCounters.TOO_SHORT_COUNTER, reporter); continue; } // skip the term if it is over the length limit unless it is a FILE, URL or HTTP_REQUEST if (tlen > tokenHelper.getTermLengthLimit() && (!(type.equals("FILE") || type.equals("URL") || type.equals("HTTP_REQUEST")))) { - if (log.isDebugEnabled()) { - log.debug("Ignoring " + type + " token due to excessive length"); - } + log.debug("Ignoring {} token due to excessive length", type); counters.increment(ContentIndexCounters.EXCESSIVE_LENGTH_COUNTER, reporter); continue; } if (tlen > tokenHelper.getTermLengthWarningLimit()) { - log.warn("Encountered long term: " + tlen + " characters, '" + token + "'"); + log.warn("Encountered long term: {} characters, {}", tlen, token); counters.increment(ContentIndexCounters.LENGTH_WARNING_COUNTER, reporter); } if (truncAtt.isTruncated()) { - if (log.isDebugEnabled()) { - log.debug("Encountered truncated term: " + tlen + " characters, '" + token + "'"); - } + log.debug("Encountered truncated term: {} characters, {}", tlen, token); counters.increment(ContentIndexCounters.TRUNCATION_COUNTER, reporter); } @@ -674,7 +675,7 @@ protected BloomFilterUtil newBloomFilterUtil(final Configuration configuration) final String message = "Unable to create factory for N-grams. ContentIngestHelperInterface is null."; ; - Logger.getLogger(BloomFilterUtil.class).warn(message, new IllegalStateException()); + LoggerFactory.getLogger(BloomFilterUtil.class).warn(message, new IllegalStateException()); } return util; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/BulkIngestMapFileLoader.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/BulkIngestMapFileLoader.java index cfd066ee75b..5889a456823 100755 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/BulkIngestMapFileLoader.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/BulkIngestMapFileLoader.java @@ -57,8 +57,10 @@ import org.apache.hadoop.tools.DistCp; import org.apache.hadoop.tools.DistCpOptions; import org.apache.hadoop.util.ToolRunner; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.Marker; +import org.slf4j.MarkerFactory; import com.google.common.base.Objects; import com.google.common.collect.Lists; @@ -73,7 +75,8 @@ * various tablet servers. */ public final class BulkIngestMapFileLoader implements Runnable { - private static Logger log = Logger.getLogger(BulkIngestMapFileLoader.class); + private static Logger log = LoggerFactory.getLogger(BulkIngestMapFileLoader.class); + private static Marker fatal = MarkerFactory.getMarker("fatal"); private static int SLEEP_TIME = 30000; private static int FAILURE_SLEEP_TIME = 10 * 60 * 1000; // 10 minutes private static int MAX_DIRECTORIES = 1; @@ -288,48 +291,48 @@ public static void main(String[] args) throws AccumuloSecurityException, IOExcep try { String[] classes = jobObserverClasses.split(","); for (String jobObserverClass : classes) { - log.info("Adding job observer: " + jobObserverClass); + log.info("Adding job observer: {}", jobObserverClass); Class clazz = Class.forName(jobObserverClass); Observer o = (Observer) clazz.getDeclaredConstructor().newInstance(); jobObservers.add(o); } } catch (ClassNotFoundException | IllegalAccessException | InstantiationException | InvocationTargetException e) { - log.error("cannot instantiate job observer class '" + jobObserverClasses + "'", e); + log.error("cannot instantiate job observer class {}", jobObserverClasses, e); System.exit(-2); } catch (ClassCastException e) { - log.error("cannot cast '" + jobObserverClasses + "' to Observer", e); + log.error("cannot cast {} to Observer", jobObserverClasses, e); System.exit(-2); } } else if (args[i].startsWith("-")) { int index = args[i].indexOf('=', 1); if (index < 0) { - log.error("WARN: skipping bad property configuration " + args[i]); + log.error("WARN: skipping bad property configuration {}", args[i]); } else { String[] strArr = new String[] {args[i].substring(1, index), args[i].substring(index + 1)}; - log.info("Setting " + strArr[0] + " = \"" + strArr[1] + '"'); + log.info("Setting {} = {}", strArr[0], strArr[1]); properties.add(strArr); } } else { - log.info("Adding resource " + args[i]); + log.info("Adding resource {}", args[i]); conf.addResource(args[i]); } } } - log.info("Set sleep time to " + SLEEP_TIME + "ms"); - log.info("Will wait to bring map files online if there are more than " + MAJC_THRESHOLD + " running or queued major compactions."); - log.info("Will not bring map files online unless at least " + MAJC_WAIT_TIMEOUT + "ms have passed since last time."); - log.info("Will check the majcThreshold and majcDelay every " + MAJC_CHECK_INTERVAL + " bulk loads."); - log.info("Processing a max of " + MAX_DIRECTORIES + " directories"); - log.info("Using " + numBulkThreads + " bulk load threads"); - log.info("Using " + numHdfsThreads + " HDFS operation threads"); - log.info("Using " + numBulkAssignThreads + " bulk assign threads"); - log.info("Using " + seqFileHdfs + " as the file system containing the original sequence files"); - log.info("Using " + srcHdfs + " as the source file system"); - log.info("Using " + destHdfs + " as the destination file system"); - log.info("Using " + jobtracker + " as the jobtracker"); - log.info("Using " + SHUTDOWN_PORT + " as the shutdown port"); - log.info("Using " + (FIFO ? "FIFO" : "LIFO") + " processing order"); + log.info("Set sleep time to {}ms", SLEEP_TIME); + log.info("Will wait to bring map files online if there are more than {} running or queued major compactions.", MAJC_THRESHOLD); + log.info("Will not bring map files online unless at least {}ms have passed since last time.", MAJC_WAIT_TIMEOUT); + log.info("Will check the majcThreshold and majcDelay every {} bulk loads.", MAJC_CHECK_INTERVAL); + log.info("Processing a max of {} directories", MAX_DIRECTORIES); + log.info("Using {} bulk load threads", numBulkThreads); + log.info("Using {} HDFS operation threads", numHdfsThreads); + log.info("Using {} bulk assign threads", numBulkAssignThreads); + log.info("Using {} as the file system containing the original sequence files", seqFileHdfs); + log.info("Using {} as the source file system", srcHdfs); + log.info("Using {} as the destination file system", destHdfs); + log.info("Using {} as the jobtracker", jobtracker); + log.info("Using {} as the shutdown port", SHUTDOWN_PORT); + log.info("Using {} processing order", (FIFO ? "FIFO" : "LIFO")); for (String[] s : properties) { conf.set(s[0], s[1]); @@ -347,7 +350,7 @@ public static void main(String[] args) throws AccumuloSecurityException, IOExcep log.error("Configured tables for configured data types is empty"); System.exit(-2); } - log.info("Found table priorities: " + tablePriorities); + log.info("Found table priorities: {}", tablePriorities); String workDir = args[0]; String jobDirPattern = args[1].replaceAll("'", ""); @@ -431,7 +434,7 @@ public void run() { try { cleanJobDirectoriesOnStartup(); } catch (IOException e) { - log.error("Error Cleaning Up Directories. Manually check for orphans: " + e.getMessage(), e); + log.error("Error Cleaning Up Directories. Manually check for orphans: {}", e.getMessage(), e); } try { @@ -475,7 +478,7 @@ public void run() { URI workingHdfs = srcHdfs; try { - log.info("Started processing " + mapFilesDir); + log.info("Started processing {}", mapFilesDir); long start = System.currentTimeMillis(); // copy the data if needed @@ -493,12 +496,12 @@ public void run() { cleanUpJobDirectory(mapFilesDir); long end = System.currentTimeMillis(); - log.info("Finished processing " + mapFilesDir + ", duration (sec): " + ((end - start) / 1000)); + log.info("Finished processing {}, duration (sec): {}", mapFilesDir, ((end - start) / 1000)); // now that we actually processed something, reset the last load message time to force a message on the next round lastLoadMessageTime = 0; } catch (Exception e) { - log.error("Failed to process " + mapFilesDir, e); + log.error("Failed to process {}", mapFilesDir, e); boolean marked = markJobDirectoryFailed(workingHdfs, dstJobDirectory); if (!marked) { ++fsAccessFailures; @@ -506,7 +509,7 @@ public void run() { log.error("Too many failures updating marker files. Exiting..."); shutdown(); } else { - log.warn("Failed to mark " + dstJobDirectory + " as failed. Sleeping in case this was a transient failure."); + log.warn("Failed to mark {} as failed. Sleeping in case this was a transient failure.", dstJobDirectory); try { Thread.sleep(FAILURE_SLEEP_TIME); } catch (InterruptedException ie) { @@ -528,7 +531,7 @@ public void run() { } } } catch (Exception e) { - log.error("Error: " + e.getMessage(), e); + log.error("Error: {}", e.getMessage(), e); } } @@ -548,7 +551,7 @@ protected void cleanJobDirectoriesOnStartup() throws IOException { try { getFileSystem(destHdfs).delete(cleanupDirectories[i], true); } catch (IOException e) { - log.warn("Unable to delete directory " + cleanupDirectories[i], e); + log.warn("Unable to delete directory {}", cleanupDirectories[i], e); } } @@ -567,13 +570,13 @@ protected void shutdown() { * the server socket */ protected void listenForShutdownCommand(ServerSocket serverSocket) { - log.info("Listening for shutdown commands on port " + serverSocket.getLocalPort()); + log.info("Listening for shutdown commands on port {}", serverSocket.getLocalPort()); while (true) { try { Socket s = serverSocket.accept(); SocketAddress remoteAddress = s.getRemoteSocketAddress(); try { - log.info(remoteAddress + " connected to the shutdown port"); + log.info("{} connected to the shutdown port", remoteAddress); s.setSoTimeout(30000); InputStream is = s.getInputStream(); BufferedReader rdr = new BufferedReader(new InputStreamReader(is)); @@ -586,13 +589,13 @@ protected void listenForShutdownCommand(ServerSocket serverSocket) { serverSocket.close(); break; } else { - log.info("Unkown command [" + line + "] received from " + remoteAddress + ". Ignoring."); + log.info("Unknown command [{}] received from {}. Ignoring.", line, remoteAddress); } } catch (SocketTimeoutException e) { - log.info("Timed out waiting for input from " + remoteAddress); + log.info("Timed out waiting for input from {}", remoteAddress); } } catch (IOException e) { - log.error("Error waiting for shutdown connection: " + e.getMessage(), e); + log.error("Error waiting for shutdown connection: {}", e.getMessage(), e); } } } @@ -610,7 +613,7 @@ private Path distCpDirectory(Path jobDirectory) throws Exception { Path destPath = dest.makeQualified(new Path(jobDirectory.toUri().getPath())); Path logPath = new Path(destPath, "logs"); - log.info("Copying (using distcp) " + srcPath + " to " + destPath); + log.info("Copying (using distcp) {} to {}", srcPath, destPath); // Make sure the destination path doesn't already exist, so that distcp won't // complain. We could add -i to the distcp command, but we don't want to hide @@ -635,7 +638,7 @@ private Path distCpDirectory(Path jobDirectory) throws Exception { String[] args = (jobtracker == null) ? new String[0] : new String[] {"-jt", jobtracker}; int res = ToolRunner.run(conf, new DistCp(conf, options), args); if (res != 0) { - log.error("The toolrunner failed to execute. Returned with exit code of " + res); + log.error("The toolrunner failed to execute. Returned with exit code of {}", res); throw new RuntimeException("Failed to DistCp: " + res); } else { // verify the data was copied @@ -647,7 +650,7 @@ private Path distCpDirectory(Path jobDirectory) throws Exception { for (FileStatus srcFile : src.listStatus(srcPath)) { FileStatus destFile = destFiles.get(srcFile.getPath().getName()); if (destFile == null || destFile.getLen() != srcFile.getLen()) { - log.error("The DistCp failed to copy " + srcFile.getPath()); + log.error("The DistCp failed to copy {}", srcFile.getPath()); throw new RuntimeException("Failed to DistCp " + srcFile.getPath()); } } @@ -672,12 +675,15 @@ private Path distCpDirectory(Path jobDirectory) throws Exception { * @return boolean flag */ public boolean canBringMapFilesOnline(long lastOnlineTime, boolean logInfo) { - Level level = (logInfo ? Level.INFO : Level.DEBUG); int majC = getMajorCompactionCount(); - log.log(level, "There are " + majC + " compactions currently running or queued."); - long delta = System.currentTimeMillis() - lastOnlineTime; - log.log(level, "Time since map files last brought online: " + (delta / 1000) + "s"); + if (logInfo) { + log.info("There are {} compactions currently running or queued.", majC); + log.info("Time since map files last brought online: {}s", delta / 1000); + } else { + log.debug("There are {} compactions currently running or queued.", majC); + log.debug("Time since map files last brought online: {}s", delta / 1000); + } return (delta > MAJC_WAIT_TIMEOUT) && (majC < MAJC_THRESHOLD); } @@ -699,7 +705,7 @@ private int getMajorCompactionCount() { } catch (Exception e) { // Accumulo API changed, catch exception for now until we redeploy // accumulo on lightning. - log.error("Unable to retrieve major compaction stats: " + e.getMessage()); + log.error("Unable to retrieve major compaction stats: {}", e.getMessage()); } finally { if (client != null) { ThriftUtil.close(client, context); @@ -739,7 +745,7 @@ private Path[] getJobDirectories(URI hdfs, Path pathPattern) throws IOException } else { jobDirectories = new Path[0]; } - log.debug("Completed job directories: " + Arrays.toString(jobDirectories)); + log.debug("Completed job directories: {}", Arrays.toString(jobDirectories)); return jobDirectories; } @@ -759,7 +765,7 @@ private Path[] getJobDirectories(URI hdfs, Path pathPattern) throws IOException * if the table is not found */ public void bringMapFilesOnline(Path mapFilesDir) throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException { - log.info("Bringing all mapFiles under " + mapFilesDir + " online."); + log.info("Bringing all mapFiles under {} online.", mapFilesDir); // By now the map files should be on the local filesystem FileSystem fs = getFileSystem(destHdfs); @@ -795,16 +801,16 @@ public void bringMapFilesOnline(Path mapFilesDir) throws IOException, AccumuloEx String tableName = tableDir.getName(); if (!tableIds.containsKey(tableName)) { - log.debug("Skipping " + tableDir + " since it is not a accumulo table directory."); + log.debug("Skipping {} since it is not a accumulo table directory.", tableDir); continue; } if (tableNames.containsKey(tableName)) { if (tableNames.get(tableName).equals(tableDir)) { - log.warn("Skipping " + tableDir + " since we already processed " + tableName + " under " + tableNames.get(tableName)); + log.warn("Skipping {} since we already processed {} under {}", tableDir, tableName, tableNames.get(tableName)); continue; } else { - log.error("We got two different paths for " + tableName + ": " + tableNames.get(tableName) + " and " + tableDir); + log.error("We got two different paths for {}: {} and {}", tableName, tableNames.get(tableName), tableDir); throw new IOException("We got two different paths for " + tableName + ": " + tableNames.get(tableName) + " and " + tableDir); } } @@ -904,7 +910,7 @@ public void waitForCompletion() throws InterruptedException { public void run() { try { - // Ensure all of the files put just under tableDir.... + // Ensure all the files put just under tableDir.... collapseDirectory(); // create the failures directory @@ -912,18 +918,18 @@ public void run() { Path failuresPath = new Path(failuresDir); FileSystem fileSystem = FileSystem.get(srcHdfs, new Configuration()); if (fileSystem.exists(failuresPath)) { - log.fatal("Cannot bring map files online because a failures directory already exists: " + failuresDir); + log.error(fatal, "Cannot bring map files online because a failures directory already exists: {}", failuresDir); throw new IOException("Cannot bring map files online because a failures directory already exists: " + failuresDir); } fileSystem.mkdirs(failuresPath); // import the directory - log.info("Bringing Map Files online for " + tableName); + log.info("Bringing Map Files online for {}", tableName); accumuloClient.tableOperations().importDirectory(tableName, tableDir.toString(), failuresDir, false); - log.info("Completed bringing map files online for " + tableName); + log.info("Completed bringing map files online for {}", tableName); validateComplete(); } catch (Exception e) { - log.error("Error importing files into table " + tableName + " from directory " + mapFilesDir, e); + log.error("Error importing files into table {} from directory {}", tableName, mapFilesDir, e); this.exception = e; } finally { this.complete = true; @@ -943,7 +949,7 @@ private void collapseDirectory(Path dir) throws IOException { for (FileStatus file : fileSystem.listStatus(dir)) { if (file.isDirectory()) { Path filePath = file.getPath(); - log.warn("Found an unexpected subdirectory " + filePath + ". Collapsing into " + tableDir + "."); + log.warn("Found an unexpected subdirectory {}. Collapsing into {}.", filePath, tableDir); collapseDirectory(filePath); for (FileStatus subFile : fileSystem.listStatus(filePath)) { Path subFilePath = subFile.getPath(); @@ -953,7 +959,7 @@ private void collapseDirectory(Path dir) throws IOException { FileChecksum subFileCheckSum = fileSystem.getFileChecksum(subFilePath); FileChecksum destFileCheckSum = fileSystem.getFileChecksum(destFilePath); if (subFileCheckSum.equals(destFileCheckSum)) { - log.info(subFilePath + " and " + destFilePath + " are identical, removing the former"); + log.info("{} and {} are identical, removing the former", subFilePath, destFilePath); fileSystem.delete(subFilePath, false); } else { // Attempt to rename the file instead of failing @@ -961,17 +967,17 @@ private void collapseDirectory(Path dir) throws IOException { while (fileSystem.exists(destFilePath)) { destFilePath = new Path(tableDir, getNextName(destFilePath.getName())); } - log.info("Renaming " + subFilePath + " to " + destFilePath); + log.info("Renaming {} to {}", subFilePath, destFilePath); fileSystem.rename(subFilePath, destFilePath); } } else { - log.info("Renaming " + subFilePath + " to " + destFilePath); + log.info("Renaming {} to {}", subFilePath, destFilePath); fileSystem.rename(subFilePath, destFilePath); } } // verify the directory is empty if (fileSystem.listStatus(filePath).length > 0) { - log.fatal("Failed to collapse subdirectory " + filePath); + log.error(fatal, "Failed to collapse subdirectory {}", filePath); throw new IOException("Failed to collapse subdirectory " + filePath); } fileSystem.delete(filePath, false); @@ -998,7 +1004,7 @@ private String getNextName(String rfile) { private void validateComplete() throws IOException { FileSystem fileSystem = FileSystem.get(srcHdfs, new Configuration()); if (fileSystem.listStatus(tableDir).length > 0) { - log.fatal("Failed to completely import " + tableDir); + log.error(fatal, "Failed to completely import {}", tableDir); throw new IOException("Failed to completely import " + tableDir); } } @@ -1049,19 +1055,17 @@ public void cleanUpJobDirectory(Path mapFilesDir) throws IOException { // delete the successfully loaded map files directory and its parent directory destFs.delete(jobDirectory, true); } else { - log.error("There were failures bringing map files online. See: failed." + mapFilesDir.getName() + "failures/* for details"); - + log.error("There were failures bringing map files online. See: failed. {} failures/* for details", mapFilesDir.getName()); // rename the map files directory boolean success = destFs.rename(mapFilesDir, new Path(mapFilesDir.getParent(), "failed." + mapFilesDir.getName())); if (!success) - log.error("Unable to rename map files directory " + destFs.getUri() + " " + mapFilesDir + " to failed." + mapFilesDir.getName()); - + log.error("Unable to rename map files directory {} {} to failed. {}", destFs.getUri(), mapFilesDir, mapFilesDir.getName()); // create the job.failed file (renamed from job.loading if possible) success = destFs.rename(new Path(jobDirectory, LOADING_FILE_MARKER), new Path(jobDirectory, FAILED_FILE_MARKER)); if (!success) { success = destFs.createNewFile(new Path(jobDirectory, FAILED_FILE_MARKER)); if (!success) - log.error("Unable to create " + FAILED_FILE_MARKER + " file in " + jobDirectory); + log.error("Unable to create {} file in {}", FAILED_FILE_MARKER, jobDirectory); } } @@ -1081,44 +1085,44 @@ public boolean takeOwnershipJobDirectory(Path jobDirectory) { try { success = fs.rename(new Path(jobDirectory, COMPLETE_FILE_MARKER), new Path(jobDirectory, LOADING_FILE_MARKER)); - log.info("Renamed " + jobDirectory + '/' + COMPLETE_FILE_MARKER + " to " + LOADING_FILE_MARKER); + log.info("Renamed {}/{} to {}", jobDirectory, COMPLETE_FILE_MARKER, LOADING_FILE_MARKER); } catch (IOException e2) { - log.error("Exception while marking " + jobDirectory + " for loading: " + e2.getMessage(), e2); + log.error("Exception while marking {} for loading: {}", jobDirectory, e2.getMessage(), e2); } // if not successful, see if we can provide a reason if (!success) { if (fs.exists(new Path(jobDirectory, LOADING_FILE_MARKER))) { - log.info("Another process already took ownership of " + jobDirectory + " for loading"); + log.info("Another process already took ownership of {} for loading", jobDirectory); } else { - log.error("Unable to take ownership of " + jobDirectory + " for loading"); + log.error("Unable to take ownership of {} for loading", jobDirectory); } } else { if (!fs.exists(new Path(jobDirectory, LOADING_FILE_MARKER))) { // if the loading file marker does not exist, then we did not really succeed....hadoop strangeness? - log.error("Rename returned success but yet we did not take ownership of " + jobDirectory + " (" + LOADING_FILE_MARKER + " does not exist)"); + log.error("Rename returned success but yet we did not take ownership of {} ({} does not exist)", jobDirectory, LOADING_FILE_MARKER); success = false; } else if (fs.exists(new Path(jobDirectory, COMPLETE_FILE_MARKER))) { // if the complete file still exists, then perhaps the IngestJob received a create failure and subsequently reattempted. - log.error("Rename returned success but yet we did not fully take ownership of " + jobDirectory + " (" + COMPLETE_FILE_MARKER + " moved to " - + LOADING_FILE_MARKER + " but " + COMPLETE_FILE_MARKER + " still exists)"); + log.error("Rename returned success but yet we did not fully take ownership of {} ({} moved to {} but {} still exists)", + jobDirectory, COMPLETE_FILE_MARKER, LOADING_FILE_MARKER, COMPLETE_FILE_MARKER); success = false; // move the job.loading out of the way. I don't want to delete any files just in case hadoop is getting confused - // and a delete might result in both files deleted and then we might think this is simply a failed distcp finally + // and a delete might result in both files deleted, and then we might think this is simply a failed distcp finally // resulting in lost data. int count = 0; boolean done = false; while (!done && fs.exists(new Path(jobDirectory, COMPLETE_FILE_MARKER)) && count < 10) { count++; if (fs.rename(new Path(jobDirectory, LOADING_FILE_MARKER), new Path(jobDirectory, ATTEMPT_FILE_MARKER + '.' + count))) { - log.error("Moved " + LOADING_FILE_MARKER + " to " + ATTEMPT_FILE_MARKER + '.' + count); + log.error("Moved {} to {}. {}", LOADING_FILE_MARKER, ATTEMPT_FILE_MARKER, count); done = true; } } } } } catch (IOException e) { - log.error("Exception while marking " + jobDirectory + " for loading: " + e.getMessage(), e); + log.error("Exception while marking {} for loading: {}", jobDirectory, e.getMessage(), e); } return success; } @@ -1141,10 +1145,10 @@ public boolean markJobDirectoryFailed(URI workingHdfs, Path jobDirectory) { if (!success) { success = fs.createNewFile(new Path(jobDirectory, FAILED_FILE_MARKER)); if (!success) - log.error("Unable to create " + FAILED_FILE_MARKER + " file in " + jobDirectory); + log.error("Unable to create {} file in {}", FAILED_FILE_MARKER, jobDirectory); } } catch (IOException e) { - log.error("Exception while marking " + jobDirectory + " as failed: " + e.getMessage(), e); + log.error("Exception while marking {} as failed: {}", jobDirectory, e.getMessage(), e); } return success; } @@ -1175,7 +1179,7 @@ public void markSourceFilesLoaded(Path jobDirectory) throws IOException { try { renamed = sourceFs.rename(new Path(file), dst); } catch (Exception e) { - log.warn("Exception renaming " + file + " to " + dst, e); + log.warn("Exception renaming {} to {}", file, dst, e); renamed = false; } if (!renamed) { @@ -1187,7 +1191,7 @@ public void markSourceFilesLoaded(Path jobDirectory) throws IOException { throw new IOException( "Unable to rename " + file + " (exists=" + flaggedExists + ") to " + dst + " (exists=" + loadedExists + ")"); } else { - log.warn("File was already moved to loaded: " + dst); + log.warn("File was already moved to loaded: {}", dst); renamed = true; } } @@ -1201,7 +1205,9 @@ public void markSourceFilesLoaded(Path jobDirectory) throws IOException { }); } try { - log.info("Marking " + renameCallables.size() + " sequence files from flagged to loaded"); + if (log.isInfoEnabled()) { + log.info("Marking {} sequence files from flagged to loaded", renameCallables.size()); + } if (!renameCallables.isEmpty()) { List> execResults = executor.invokeAll(renameCallables); @@ -1227,10 +1233,12 @@ public void markSourceFilesLoaded(Path jobDirectory) throws IOException { } if (jobFile != null) { - log.info("Notifying observers for job: " + jobFile.getName() + " from work dir: " + jobDirectory); + if (log.isInfoEnabled()) { + log.info("Notifying observers for job: {} from work dir: {}", jobFile.getName(), jobDirectory); + } jobObservable.setJobId(jobFile.getName()); } else { - log.warn("no job file found for: " + jobDirectory); + log.warn("no job file found for: {}", jobDirectory); } } } @@ -1252,9 +1260,9 @@ public boolean markDirectoryForCleanup(Path jobDirectory, URI destFs) { boolean success = false; try { success = getFileSystem(destFs).rename(new Path(jobDirectory, LOADING_FILE_MARKER), new Path(jobDirectory, CLEANUP_FILE_MARKER)); - log.info("Renamed " + jobDirectory + '/' + LOADING_FILE_MARKER + " to " + CLEANUP_FILE_MARKER); + log.info("Renamed {}/{} to {}", jobDirectory, LOADING_FILE_MARKER, CLEANUP_FILE_MARKER); } catch (IOException e2) { - log.error("Exception while marking " + jobDirectory + " for Cleanup: " + e2.getMessage(), e2); + log.error("Exception while marking {} for Cleanup: {}", jobDirectory, e2.getMessage(), e2); } return success; @@ -1292,7 +1300,7 @@ private void writeStats(Path[] jobDirectories) throws IOException { if (!fs.exists(mDir)) fs.mkdirs(mDir); Path dst = new Path(mDir, src.getName()); - log.info("Copying file " + src + " to " + dst); + log.info("Copying file {} to {}", src, dst); fs.copyFromLocalFile(false, true, src, dst); // If this worked, then remove the local file rawFS.delete(src, false); diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/CBMutationOutputFormatter.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/CBMutationOutputFormatter.java index 368c7cbfb67..4648ae17d49 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/CBMutationOutputFormatter.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/CBMutationOutputFormatter.java @@ -14,13 +14,14 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import datawave.ingest.data.config.ingest.AccumuloHelper; import datawave.ingest.mapreduce.handler.shard.ShardedDataTypeHandler; public class CBMutationOutputFormatter extends AccumuloOutputFormat { - private static final Logger log = Logger.getLogger(CBMutationOutputFormatter.class); + private static final Logger log = LoggerFactory.getLogger(CBMutationOutputFormatter.class); @Override public RecordWriter getRecordWriter(TaskAttemptContext attempt) throws IOException { @@ -51,7 +52,7 @@ public static class CBRecordWriter extends RecordWriter { public CBRecordWriter(RecordWriter writer, TaskAttemptContext context) throws IOException { this.delegate = writer; eventTable = context.getConfiguration().get(ShardedDataTypeHandler.SHARD_TNAME, ""); - log.info("Event Table Name property for " + ShardedDataTypeHandler.SHARD_TNAME + " is " + eventTable); + log.info("Event Table Name property for {} is {}", ShardedDataTypeHandler.SHARD_TNAME, eventTable); } @Override diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/ConstraintChecker.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/ConstraintChecker.java index 7805bd98e74..6af7ccdd623 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/ConstraintChecker.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/ConstraintChecker.java @@ -4,7 +4,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; @@ -14,7 +15,7 @@ */ public class ConstraintChecker { - private static final Logger log = Logger.getLogger(ConstraintChecker.class); + private static final Logger log = LoggerFactory.getLogger(ConstraintChecker.class); public static final String INITIALIZERS = "visibility.constraint.initializers"; @@ -42,7 +43,7 @@ public static ConstraintChecker create(Configuration conf) { initializer.addConstraints(conf, constraints); } catch (Exception e) { - log.error("Could invoke ConstraintInitializer: " + initializerClass, e); + log.error("Could invoke ConstraintInitializer: {}", initializerClass, e); throw new RuntimeException("Could invoke ConstraintInitializer: " + initializerClass, e); } } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/DelegatingPartitioner.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/DelegatingPartitioner.java index 6c4f52af353..fc933a7ae9a 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/DelegatingPartitioner.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/DelegatingPartitioner.java @@ -11,7 +11,8 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.util.StringUtils; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This partitioner delegates the partitioning logic to other partitioners based on table name. * The table may have its own dedicated partitioner or may share @@ -21,7 +22,7 @@ * partitioners each limit their output to 10 partitioners. The first will go to 0-9 and the other to 10-19. See DelegatePartitioner's getNumPartitions */ public class DelegatingPartitioner extends Partitioner implements Configurable { - protected static final Logger log = Logger.getLogger(DelegatingPartitioner.class); + protected static final Logger log = LoggerFactory.getLogger(DelegatingPartitioner.class); // this gets populated with the table names that have non-default partitioners defined static final String TABLE_NAMES_WITH_CUSTOM_PARTITIONERS = "DelegatingPartitioner.custom.delegate._tablenames"; @@ -101,7 +102,7 @@ public void setConf(Configuration conf) { try { createDelegatesForTables(); } catch (ClassNotFoundException e) { - log.error(e); + log.error("ClassNotFoundException:", e); // the validation step during the job set up identifies missing classes, so fail the mapper throw new RuntimeException(e); } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/util/DiskSpaceStarvationStrategy.java b/warehouse/ingest-core/src/main/java/datawave/ingest/util/DiskSpaceStarvationStrategy.java index 8601c17908a..c1f37eeb137 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/util/DiskSpaceStarvationStrategy.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/util/DiskSpaceStarvationStrategy.java @@ -1,6 +1,7 @@ package datawave.ingest.util; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.hash.BloomFilter; @@ -14,7 +15,7 @@ public class DiskSpaceStarvationStrategy extends AbstractNGramTokenizationStrate public static final String DEFAULT_PATH_FOR_DISK_SPACE_VALIDATION = ResourceAvailabilityUtil.ROOT_PATH; - private final Logger log = Logger.getLogger(DiskSpaceStarvationStrategy.class); + private final Logger log = LoggerFactory.getLogger(DiskSpaceStarvationStrategy.class); private TokenizationException lowDiskSpaceException; private final float minDiskSpaceThreshold; private final String minDiskSpacePath; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/util/GenerateMultipleNumShardsCacheFile.java b/warehouse/ingest-core/src/main/java/datawave/ingest/util/GenerateMultipleNumShardsCacheFile.java index 5f57cbc32a7..bb867bb2fd9 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/util/GenerateMultipleNumShardsCacheFile.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/util/GenerateMultipleNumShardsCacheFile.java @@ -12,12 +12,13 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import datawave.ingest.mapreduce.handler.shard.NumShards; public class GenerateMultipleNumShardsCacheFile { - private static final Logger log = Logger.getLogger(GenerateMultipleNumShardsCacheFile.class); + private static final Logger log = LoggerFactory.getLogger(GenerateMultipleNumShardsCacheFile.class); public static final String MULTIPLE_NUMSHARD_CACHE_FILE_LOCATION_OVERRIDE = "ns"; public static final String CONFIG_DIRECTORY_LOCATION_OVERRIDE = "cd"; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/util/MemoryStarvationStrategy.java b/warehouse/ingest-core/src/main/java/datawave/ingest/util/MemoryStarvationStrategy.java index 49d9772b437..8f360fadf60 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/util/MemoryStarvationStrategy.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/util/MemoryStarvationStrategy.java @@ -1,6 +1,7 @@ package datawave.ingest.util; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.hash.BloomFilter; @@ -14,7 +15,7 @@ public class MemoryStarvationStrategy extends AbstractNGramTokenizationStrategy public static final String DEFAULT_PATH_FOR_DISK_SPACE_VALIDATION = ResourceAvailabilityUtil.ROOT_PATH; - private final Logger log = Logger.getLogger(MemoryStarvationStrategy.class); + private final Logger log = LoggerFactory.getLogger(MemoryStarvationStrategy.class); private TokenizationException lowMemoryException; private final float minMemoryThreshold; private int ngramCount; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/util/ResourceAvailabilityUtil.java b/warehouse/ingest-core/src/main/java/datawave/ingest/util/ResourceAvailabilityUtil.java index dec1fdd8ce4..2fec7eed9f7 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/util/ResourceAvailabilityUtil.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/util/ResourceAvailabilityUtil.java @@ -4,7 +4,8 @@ import java.util.HashSet; import java.util.Set; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Utility for checking resource availability, such as disk space and memory. @@ -66,7 +67,7 @@ public static boolean isDiskAvailable(final String path, float minPercentageAvai } catch (final Throwable e) { final String toString = e.toString(); if (!LOGGED_EXCEPTIONS.contains(toString)) { - Logger.getLogger(ResourceAvailabilityUtil.class).warn("Unable to check disk space availability based on path " + path, e); + LoggerFactory.getLogger(ResourceAvailabilityUtil.class).warn("Unable to check disk space availability based on path " + path, e); LOGGED_EXCEPTIONS.add(toString); } } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/util/ThreadUtil.java b/warehouse/ingest-core/src/main/java/datawave/ingest/util/ThreadUtil.java index 2205ab79a29..0ebe6cf3537 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/util/ThreadUtil.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/util/ThreadUtil.java @@ -3,14 +3,15 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Utilities for working with ThreadPools. */ public class ThreadUtil { - private static final Logger logger = Logger.getLogger(ThreadUtil.class); + private static final Logger logger = LoggerFactory.getLogger(ThreadUtil.class); /** * Shuts down the executor and gives tasks that are still in progress the given amount of time before continuing. @@ -59,8 +60,8 @@ public static long waitForThreads(Logger log, ThreadPoolExecutor executor, Strin long time = 0; while (((qSize > 0) || (active > 0) || (compl < workUnits)) && !executor.isTerminated()) { if (log != null && (time < (System.currentTimeMillis() - (1000L * 10L)))) { - log.info(type + " running, T: " + active + "/" + poolSize + ", Completed: " + compl + "/" + workUnits + ", " + ", Remaining: " + qSize + ", " - + (cur - start) + " ms elapsed"); + log.info("{} running, T: {}/{}, Completed: {}/{}, Remaining: {}, {} ms elapsed", + type, active, poolSize, compl, workUnits, qSize, (cur - start)); time = System.currentTimeMillis(); } cur = System.currentTimeMillis(); @@ -69,8 +70,8 @@ public static long waitForThreads(Logger log, ThreadPoolExecutor executor, Strin compl = executor.getCompletedTaskCount(); } if (log != null) { - log.info("Finished Waiting for " + type + " running, T: " + active + "/" + poolSize + ", Completed: " + compl + "/" + workUnits + ", " - + ", Remaining: " + qSize + ", " + (cur - start) + " ms elapsed"); + log.info("Finished Waiting for {} running, T: {}/{}, Completed: {}/{}, Remaining: {}, {} ms elapsed", + type, active, poolSize, compl, workUnits, qSize, (cur - start)); } long stop = System.currentTimeMillis(); From 7412787219673db33750f7173c0f1e9d1e9b3015 Mon Sep 17 00:00:00 2001 From: emiliodskinner Date: Wed, 22 Jan 2025 18:22:47 +0000 Subject: [PATCH 4/6] task-2628-ingest-core: pt2 --- .../datawave/data/hash/SnowflakeUIDBuilder.java | 4 ++-- .../mapreduce/handler/facet/FacetHandler.java | 2 +- .../ContentIndexingColumnBasedHandler.java | 12 ++++++------ .../mapreduce/job/BulkIngestMapFileLoader.java | 4 ++-- .../ingest/mapreduce/job/SplitsFile.java | 17 +++++++++-------- .../java/datawave/ingest/util/ThreadUtil.java | 8 ++++---- .../util/NGramTokenizationStrategyTest.java | 9 +++++---- 7 files changed, 29 insertions(+), 27 deletions(-) diff --git a/warehouse/core/src/main/java/datawave/data/hash/SnowflakeUIDBuilder.java b/warehouse/core/src/main/java/datawave/data/hash/SnowflakeUIDBuilder.java index 6902cbe2965..8baacddbb76 100644 --- a/warehouse/core/src/main/java/datawave/data/hash/SnowflakeUIDBuilder.java +++ b/warehouse/core/src/main/java/datawave/data/hash/SnowflakeUIDBuilder.java @@ -376,8 +376,8 @@ private long validateTimestamp(long timestamp) { } if (timestamp <= this.previousTid) { - LOGGER.warn("Current tid is less than the previous. This could cause uid collisions.\nMid: {}, Timestamp: {}, Previous: {}, System Time: {}", - mid, timestamp, previousTid, System.currentTimeMillis()); + LOGGER.warn("Current tid is less than the previous. This could cause uid collisions.\nMid: {}, Timestamp: {}, Previous: {}, System Time: {}", mid, + timestamp, previousTid, System.currentTimeMillis()); timestamp = this.previousTid + 1; } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/facet/FacetHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/facet/FacetHandler.java index 0fec6879aa0..648ae021fda 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/facet/FacetHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/facet/FacetHandler.java @@ -21,9 +21,9 @@ import org.apache.hadoop.mapreduce.StatusReporter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskInputOutputContext; +import org.geotools.feature.type.DateUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.geotools.feature.type.DateUtil; import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus; import com.clearspring.analytics.stream.cardinality.ICardinality; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ContentIndexingColumnBasedHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ContentIndexingColumnBasedHandler.java index de8d89fef30..231289a5919 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ContentIndexingColumnBasedHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/tokenize/ContentIndexingColumnBasedHandler.java @@ -16,15 +16,15 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.StatusReporter; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.slf4j.Marker; -import org.slf4j.MarkerFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.Marker; +import org.slf4j.MarkerFactory; import com.google.common.base.Preconditions; import com.google.common.collect.HashMultimap; @@ -391,8 +391,8 @@ protected void tokenizeField(final Analyzer a, final NormalizedContentInterface if (elapsedEstimateMsec > tokenHelper.getTokenizerTimeWarnThresholdMsec() && !tokenizerTimeWarned) { long realDelta = System.currentTimeMillis() - start; counters.incrementValue(ContentIndexCounters.TOKENIZER_TIME_WARNINGS, 1, reporter); - log.warn("Tokenization of field {} has exceeded warning threshold {}ms ({}ms)", - modifiedFieldName, tokenHelper.getTokenizerTimeErrorThresholdMsec(), realDelta); + log.warn("Tokenization of field {} has exceeded warning threshold {}ms ({}ms)", modifiedFieldName, + tokenHelper.getTokenizerTimeErrorThresholdMsec(), realDelta); tokenizerTimeWarned = true; } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/BulkIngestMapFileLoader.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/BulkIngestMapFileLoader.java index 5889a456823..2b9eaae6e65 100755 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/BulkIngestMapFileLoader.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/BulkIngestMapFileLoader.java @@ -1104,8 +1104,8 @@ public boolean takeOwnershipJobDirectory(Path jobDirectory) { success = false; } else if (fs.exists(new Path(jobDirectory, COMPLETE_FILE_MARKER))) { // if the complete file still exists, then perhaps the IngestJob received a create failure and subsequently reattempted. - log.error("Rename returned success but yet we did not fully take ownership of {} ({} moved to {} but {} still exists)", - jobDirectory, COMPLETE_FILE_MARKER, LOADING_FILE_MARKER, COMPLETE_FILE_MARKER); + log.error("Rename returned success but yet we did not fully take ownership of {} ({} moved to {} but {} still exists)", jobDirectory, + COMPLETE_FILE_MARKER, LOADING_FILE_MARKER, COMPLETE_FILE_MARKER); success = false; // move the job.loading out of the way. I don't want to delete any files just in case hadoop is getting confused // and a delete might result in both files deleted, and then we might think this is simply a failed distcp finally diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/SplitsFile.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/SplitsFile.java index 28283f940b9..7c25b062454 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/SplitsFile.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/SplitsFile.java @@ -17,7 +17,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Table; @@ -26,7 +27,7 @@ import datawave.util.time.DateHelper; public class SplitsFile { - private static final Logger log = Logger.getLogger(SplitsFile.class); + private static final Logger log = LoggerFactory.getLogger(SplitsFile.class); public static final String SPLIT_WORK_DIR = "split.work.dir"; public static final String MAX_SHARDS_PER_TSERVER = "shardedMap.max.shards.per.tserver"; @@ -45,11 +46,11 @@ public static void setupFile(Job job, Configuration conf) throws IOException, UR boolean doValidation = conf.getBoolean(SHARD_VALIDATION_ENABLED, false); try { - log.info("Base splits: " + baseSplitsPath); + log.info("Base splits: {}", baseSplitsPath); Path destSplits = new Path( conf.get(SPLIT_WORK_DIR) + "/" + conf.get(TableSplitsCache.SPLITS_CACHE_FILE, TableSplitsCache.DEFAULT_SPLITS_CACHE_FILE)); - log.info("Dest splits: " + destSplits); + log.info("Dest splits: {}", destSplits); FileUtil.copy(sourceFs, baseSplitsPath, destFs, destSplits, false, conf); conf.set(TableSplitsCache.SPLITS_CACHE_DIR, conf.get(SPLIT_WORK_DIR)); @@ -66,7 +67,7 @@ public static void setupFile(Job job, Configuration conf) throws IOException, UR } } catch (Exception e) { - log.error("Unable to use splits file because " + e.getMessage()); + log.error("Unable to use splits file because {}", e.getMessage()); throw e; } } @@ -93,13 +94,13 @@ public static void validateShardIdLocations(Configuration conf, String tableName int expectedNumberOfShards = shardIdFactory.getNumShards(datePrefix); boolean shardsExist = shardsExistForDate(shardIdToLocation, datePrefix, expectedNumberOfShards); if (!shardsExist) { - log.error("Shards for " + datePrefix + " for table " + tableName + " do not exist!"); + log.error("Shards for {} for table {} do not exist!", datePrefix, tableName); isValid = false; continue; } boolean shardsAreBalanced = shardsAreBalanced(shardIdToLocation, datePrefix, maxShardsPerTserver); if (!shardsAreBalanced) { - log.error("Shards for " + datePrefix + " for table " + tableName + " are not balanced!"); + log.error("Shards for {} for table {} are not balanced!", datePrefix, tableName); isValid = false; } } @@ -175,7 +176,7 @@ private static boolean shardsAreBalanced(Map locations, String date // if shard is assigned to more tservers than allowed, then the shards are not balanced if (cnt.intValue() > maxShardsPerTserver) { - log.warn(cnt.toInteger() + " Shards for " + datePrefix + " assigned to tablet " + value); + log.warn("{} Shards for {} assigned to tablet {}", cnt.toInteger(), datePrefix, value); dateIsBalanced = false; } diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/util/ThreadUtil.java b/warehouse/ingest-core/src/main/java/datawave/ingest/util/ThreadUtil.java index 0ebe6cf3537..0b81de5a1e5 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/util/ThreadUtil.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/util/ThreadUtil.java @@ -60,8 +60,8 @@ public static long waitForThreads(Logger log, ThreadPoolExecutor executor, Strin long time = 0; while (((qSize > 0) || (active > 0) || (compl < workUnits)) && !executor.isTerminated()) { if (log != null && (time < (System.currentTimeMillis() - (1000L * 10L)))) { - log.info("{} running, T: {}/{}, Completed: {}/{}, Remaining: {}, {} ms elapsed", - type, active, poolSize, compl, workUnits, qSize, (cur - start)); + log.info("{} running, T: {}/{}, Completed: {}/{}, Remaining: {}, {} ms elapsed", type, active, poolSize, compl, workUnits, qSize, + (cur - start)); time = System.currentTimeMillis(); } cur = System.currentTimeMillis(); @@ -70,8 +70,8 @@ public static long waitForThreads(Logger log, ThreadPoolExecutor executor, Strin compl = executor.getCompletedTaskCount(); } if (log != null) { - log.info("Finished Waiting for {} running, T: {}/{}, Completed: {}/{}, Remaining: {}, {} ms elapsed", - type, active, poolSize, compl, workUnits, qSize, (cur - start)); + log.info("Finished Waiting for {} running, T: {}/{}, Completed: {}/{}, Remaining: {}, {} ms elapsed", type, active, poolSize, compl, workUnits, + qSize, (cur - start)); } long stop = System.currentTimeMillis(); diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/util/NGramTokenizationStrategyTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/util/NGramTokenizationStrategyTest.java index 07d8ed864f4..f5b4b0f6709 100644 --- a/warehouse/ingest-core/src/test/java/datawave/ingest/util/NGramTokenizationStrategyTest.java +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/util/NGramTokenizationStrategyTest.java @@ -10,13 +10,14 @@ import java.util.Map; import java.util.Vector; -import org.apache.log4j.Logger; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.powermock.api.easymock.PowerMock; import org.powermock.core.classloader.annotations.PrepareForTest; import org.powermock.modules.junit4.PowerMockRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.hash.BloomFilter; @@ -131,7 +132,7 @@ public void testTokenize_LowMemory() throws Exception { // Set expectations PowerMock.mockStaticPartial(Logger.class, "getLogger"); - expect(Logger.getLogger(isA(Class.class))).andReturn(this.logger).anyTimes(); + expect(LoggerFactory.getLogger(isA(Class.class))).andReturn(this.logger).anyTimes(); PowerMock.mockStaticPartial(ResourceAvailabilityUtil.class, "isMemoryAvailable"); expect(ResourceAvailabilityUtil.isMemoryAvailable(.05f)).andReturn(true); PowerMock.mockStaticPartial(ResourceAvailabilityUtil.class, "isMemoryAvailable"); @@ -161,7 +162,7 @@ public void testTokenize_LowDiskSpace() throws Exception { // Set expectations PowerMock.mockStaticPartial(Logger.class, "getLogger"); - expect(Logger.getLogger(isA(Class.class))).andReturn(this.logger).anyTimes(); + expect(LoggerFactory.getLogger(isA(Class.class))).andReturn(this.logger).anyTimes(); PowerMock.mockStaticPartial(ResourceAvailabilityUtil.class, "isDiskAvailable"); expect(ResourceAvailabilityUtil.isDiskAvailable("/", .05f)).andReturn(true); PowerMock.mockStaticPartial(ResourceAvailabilityUtil.class, "isDiskAvailable"); @@ -510,7 +511,7 @@ public void testTokenize_StrategyStack() throws Exception { // Set expectations PowerMock.mockStaticPartial(Logger.class, "getLogger"); - expect(Logger.getLogger(isA(Class.class))).andReturn(this.logger).anyTimes(); + expect(LoggerFactory.getLogger(isA(Class.class))).andReturn(this.logger).anyTimes(); PowerMock.mockStaticPartial(ResourceAvailabilityUtil.class, "isDiskAvailable"); expect(ResourceAvailabilityUtil.isDiskAvailable("/", .05f)).andReturn(true).times(1); expect(ResourceAvailabilityUtil.isDiskAvailable("/", .05f)).andReturn(false).times(1); From 31bee2a24a47404869f9e523930527fd9b717848 Mon Sep 17 00:00:00 2001 From: emiliodskinner Date: Wed, 22 Jan 2025 18:55:56 +0000 Subject: [PATCH 5/6] task-2628-ingest-core: pt2 fix unit test --- .../ingest/util/NGramTokenizationStrategyTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/util/NGramTokenizationStrategyTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/util/NGramTokenizationStrategyTest.java index f5b4b0f6709..06c395242a6 100644 --- a/warehouse/ingest-core/src/test/java/datawave/ingest/util/NGramTokenizationStrategyTest.java +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/util/NGramTokenizationStrategyTest.java @@ -29,7 +29,7 @@ import datawave.ingest.util.TimeoutStrategy.TimeoutException; @RunWith(PowerMockRunner.class) -@PrepareForTest({ResourceAvailabilityUtil.class, Logger.class, MemberShipTest.class, BloomFilter.class}) +@PrepareForTest({ResourceAvailabilityUtil.class, LoggerFactory.class, MemberShipTest.class, BloomFilter.class}) public class NGramTokenizationStrategyTest { BloomFilter filter; @@ -131,7 +131,7 @@ public void testTokenize_LowMemory() throws Exception { int expectedNGramCount = BloomFilterUtil.predictNGramCount(fieldValue, MemoryStarvationStrategy.DEFAULT_MAX_NGRAM_LENGTH); // Set expectations - PowerMock.mockStaticPartial(Logger.class, "getLogger"); + PowerMock.mockStaticPartial(LoggerFactory.class, "getLogger"); expect(LoggerFactory.getLogger(isA(Class.class))).andReturn(this.logger).anyTimes(); PowerMock.mockStaticPartial(ResourceAvailabilityUtil.class, "isMemoryAvailable"); expect(ResourceAvailabilityUtil.isMemoryAvailable(.05f)).andReturn(true); @@ -161,7 +161,7 @@ public void testTokenize_LowDiskSpace() throws Exception { int expectedNGramCount = BloomFilterUtil.predictNGramCount(fieldValue, DiskSpaceStarvationStrategy.DEFAULT_MAX_NGRAM_LENGTH); // Set expectations - PowerMock.mockStaticPartial(Logger.class, "getLogger"); + PowerMock.mockStaticPartial(LoggerFactory.class, "getLogger"); expect(LoggerFactory.getLogger(isA(Class.class))).andReturn(this.logger).anyTimes(); PowerMock.mockStaticPartial(ResourceAvailabilityUtil.class, "isDiskAvailable"); expect(ResourceAvailabilityUtil.isDiskAvailable("/", .05f)).andReturn(true); @@ -510,7 +510,7 @@ public void testTokenize_StrategyStack() throws Exception { int timeoutAfterNGramCount = BloomFilterUtil.predictNGramCount(ncis.iterator().next().getIndexedFieldValue()); // Set expectations - PowerMock.mockStaticPartial(Logger.class, "getLogger"); + PowerMock.mockStaticPartial(LoggerFactory.class, "getLogger"); expect(LoggerFactory.getLogger(isA(Class.class))).andReturn(this.logger).anyTimes(); PowerMock.mockStaticPartial(ResourceAvailabilityUtil.class, "isDiskAvailable"); expect(ResourceAvailabilityUtil.isDiskAvailable("/", .05f)).andReturn(true).times(1); From d5600f16cab3c0461fb01d8620bd2f7892da4d3f Mon Sep 17 00:00:00 2001 From: emiliodskinner Date: Wed, 22 Jan 2025 19:59:11 +0000 Subject: [PATCH 6/6] task-2628-ingest-core: pt2 fix unit test --- .../job/BulkIngestMapFileLoader.java | 178 +++++++++--------- 1 file changed, 85 insertions(+), 93 deletions(-) diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/BulkIngestMapFileLoader.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/BulkIngestMapFileLoader.java index 2b9eaae6e65..cfd066ee75b 100755 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/BulkIngestMapFileLoader.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/job/BulkIngestMapFileLoader.java @@ -57,10 +57,8 @@ import org.apache.hadoop.tools.DistCp; import org.apache.hadoop.tools.DistCpOptions; import org.apache.hadoop.util.ToolRunner; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.slf4j.Marker; -import org.slf4j.MarkerFactory; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; import com.google.common.base.Objects; import com.google.common.collect.Lists; @@ -75,8 +73,7 @@ * various tablet servers. */ public final class BulkIngestMapFileLoader implements Runnable { - private static Logger log = LoggerFactory.getLogger(BulkIngestMapFileLoader.class); - private static Marker fatal = MarkerFactory.getMarker("fatal"); + private static Logger log = Logger.getLogger(BulkIngestMapFileLoader.class); private static int SLEEP_TIME = 30000; private static int FAILURE_SLEEP_TIME = 10 * 60 * 1000; // 10 minutes private static int MAX_DIRECTORIES = 1; @@ -291,48 +288,48 @@ public static void main(String[] args) throws AccumuloSecurityException, IOExcep try { String[] classes = jobObserverClasses.split(","); for (String jobObserverClass : classes) { - log.info("Adding job observer: {}", jobObserverClass); + log.info("Adding job observer: " + jobObserverClass); Class clazz = Class.forName(jobObserverClass); Observer o = (Observer) clazz.getDeclaredConstructor().newInstance(); jobObservers.add(o); } } catch (ClassNotFoundException | IllegalAccessException | InstantiationException | InvocationTargetException e) { - log.error("cannot instantiate job observer class {}", jobObserverClasses, e); + log.error("cannot instantiate job observer class '" + jobObserverClasses + "'", e); System.exit(-2); } catch (ClassCastException e) { - log.error("cannot cast {} to Observer", jobObserverClasses, e); + log.error("cannot cast '" + jobObserverClasses + "' to Observer", e); System.exit(-2); } } else if (args[i].startsWith("-")) { int index = args[i].indexOf('=', 1); if (index < 0) { - log.error("WARN: skipping bad property configuration {}", args[i]); + log.error("WARN: skipping bad property configuration " + args[i]); } else { String[] strArr = new String[] {args[i].substring(1, index), args[i].substring(index + 1)}; - log.info("Setting {} = {}", strArr[0], strArr[1]); + log.info("Setting " + strArr[0] + " = \"" + strArr[1] + '"'); properties.add(strArr); } } else { - log.info("Adding resource {}", args[i]); + log.info("Adding resource " + args[i]); conf.addResource(args[i]); } } } - log.info("Set sleep time to {}ms", SLEEP_TIME); - log.info("Will wait to bring map files online if there are more than {} running or queued major compactions.", MAJC_THRESHOLD); - log.info("Will not bring map files online unless at least {}ms have passed since last time.", MAJC_WAIT_TIMEOUT); - log.info("Will check the majcThreshold and majcDelay every {} bulk loads.", MAJC_CHECK_INTERVAL); - log.info("Processing a max of {} directories", MAX_DIRECTORIES); - log.info("Using {} bulk load threads", numBulkThreads); - log.info("Using {} HDFS operation threads", numHdfsThreads); - log.info("Using {} bulk assign threads", numBulkAssignThreads); - log.info("Using {} as the file system containing the original sequence files", seqFileHdfs); - log.info("Using {} as the source file system", srcHdfs); - log.info("Using {} as the destination file system", destHdfs); - log.info("Using {} as the jobtracker", jobtracker); - log.info("Using {} as the shutdown port", SHUTDOWN_PORT); - log.info("Using {} processing order", (FIFO ? "FIFO" : "LIFO")); + log.info("Set sleep time to " + SLEEP_TIME + "ms"); + log.info("Will wait to bring map files online if there are more than " + MAJC_THRESHOLD + " running or queued major compactions."); + log.info("Will not bring map files online unless at least " + MAJC_WAIT_TIMEOUT + "ms have passed since last time."); + log.info("Will check the majcThreshold and majcDelay every " + MAJC_CHECK_INTERVAL + " bulk loads."); + log.info("Processing a max of " + MAX_DIRECTORIES + " directories"); + log.info("Using " + numBulkThreads + " bulk load threads"); + log.info("Using " + numHdfsThreads + " HDFS operation threads"); + log.info("Using " + numBulkAssignThreads + " bulk assign threads"); + log.info("Using " + seqFileHdfs + " as the file system containing the original sequence files"); + log.info("Using " + srcHdfs + " as the source file system"); + log.info("Using " + destHdfs + " as the destination file system"); + log.info("Using " + jobtracker + " as the jobtracker"); + log.info("Using " + SHUTDOWN_PORT + " as the shutdown port"); + log.info("Using " + (FIFO ? "FIFO" : "LIFO") + " processing order"); for (String[] s : properties) { conf.set(s[0], s[1]); @@ -350,7 +347,7 @@ public static void main(String[] args) throws AccumuloSecurityException, IOExcep log.error("Configured tables for configured data types is empty"); System.exit(-2); } - log.info("Found table priorities: {}", tablePriorities); + log.info("Found table priorities: " + tablePriorities); String workDir = args[0]; String jobDirPattern = args[1].replaceAll("'", ""); @@ -434,7 +431,7 @@ public void run() { try { cleanJobDirectoriesOnStartup(); } catch (IOException e) { - log.error("Error Cleaning Up Directories. Manually check for orphans: {}", e.getMessage(), e); + log.error("Error Cleaning Up Directories. Manually check for orphans: " + e.getMessage(), e); } try { @@ -478,7 +475,7 @@ public void run() { URI workingHdfs = srcHdfs; try { - log.info("Started processing {}", mapFilesDir); + log.info("Started processing " + mapFilesDir); long start = System.currentTimeMillis(); // copy the data if needed @@ -496,12 +493,12 @@ public void run() { cleanUpJobDirectory(mapFilesDir); long end = System.currentTimeMillis(); - log.info("Finished processing {}, duration (sec): {}", mapFilesDir, ((end - start) / 1000)); + log.info("Finished processing " + mapFilesDir + ", duration (sec): " + ((end - start) / 1000)); // now that we actually processed something, reset the last load message time to force a message on the next round lastLoadMessageTime = 0; } catch (Exception e) { - log.error("Failed to process {}", mapFilesDir, e); + log.error("Failed to process " + mapFilesDir, e); boolean marked = markJobDirectoryFailed(workingHdfs, dstJobDirectory); if (!marked) { ++fsAccessFailures; @@ -509,7 +506,7 @@ public void run() { log.error("Too many failures updating marker files. Exiting..."); shutdown(); } else { - log.warn("Failed to mark {} as failed. Sleeping in case this was a transient failure.", dstJobDirectory); + log.warn("Failed to mark " + dstJobDirectory + " as failed. Sleeping in case this was a transient failure."); try { Thread.sleep(FAILURE_SLEEP_TIME); } catch (InterruptedException ie) { @@ -531,7 +528,7 @@ public void run() { } } } catch (Exception e) { - log.error("Error: {}", e.getMessage(), e); + log.error("Error: " + e.getMessage(), e); } } @@ -551,7 +548,7 @@ protected void cleanJobDirectoriesOnStartup() throws IOException { try { getFileSystem(destHdfs).delete(cleanupDirectories[i], true); } catch (IOException e) { - log.warn("Unable to delete directory {}", cleanupDirectories[i], e); + log.warn("Unable to delete directory " + cleanupDirectories[i], e); } } @@ -570,13 +567,13 @@ protected void shutdown() { * the server socket */ protected void listenForShutdownCommand(ServerSocket serverSocket) { - log.info("Listening for shutdown commands on port {}", serverSocket.getLocalPort()); + log.info("Listening for shutdown commands on port " + serverSocket.getLocalPort()); while (true) { try { Socket s = serverSocket.accept(); SocketAddress remoteAddress = s.getRemoteSocketAddress(); try { - log.info("{} connected to the shutdown port", remoteAddress); + log.info(remoteAddress + " connected to the shutdown port"); s.setSoTimeout(30000); InputStream is = s.getInputStream(); BufferedReader rdr = new BufferedReader(new InputStreamReader(is)); @@ -589,13 +586,13 @@ protected void listenForShutdownCommand(ServerSocket serverSocket) { serverSocket.close(); break; } else { - log.info("Unknown command [{}] received from {}. Ignoring.", line, remoteAddress); + log.info("Unkown command [" + line + "] received from " + remoteAddress + ". Ignoring."); } } catch (SocketTimeoutException e) { - log.info("Timed out waiting for input from {}", remoteAddress); + log.info("Timed out waiting for input from " + remoteAddress); } } catch (IOException e) { - log.error("Error waiting for shutdown connection: {}", e.getMessage(), e); + log.error("Error waiting for shutdown connection: " + e.getMessage(), e); } } } @@ -613,7 +610,7 @@ private Path distCpDirectory(Path jobDirectory) throws Exception { Path destPath = dest.makeQualified(new Path(jobDirectory.toUri().getPath())); Path logPath = new Path(destPath, "logs"); - log.info("Copying (using distcp) {} to {}", srcPath, destPath); + log.info("Copying (using distcp) " + srcPath + " to " + destPath); // Make sure the destination path doesn't already exist, so that distcp won't // complain. We could add -i to the distcp command, but we don't want to hide @@ -638,7 +635,7 @@ private Path distCpDirectory(Path jobDirectory) throws Exception { String[] args = (jobtracker == null) ? new String[0] : new String[] {"-jt", jobtracker}; int res = ToolRunner.run(conf, new DistCp(conf, options), args); if (res != 0) { - log.error("The toolrunner failed to execute. Returned with exit code of {}", res); + log.error("The toolrunner failed to execute. Returned with exit code of " + res); throw new RuntimeException("Failed to DistCp: " + res); } else { // verify the data was copied @@ -650,7 +647,7 @@ private Path distCpDirectory(Path jobDirectory) throws Exception { for (FileStatus srcFile : src.listStatus(srcPath)) { FileStatus destFile = destFiles.get(srcFile.getPath().getName()); if (destFile == null || destFile.getLen() != srcFile.getLen()) { - log.error("The DistCp failed to copy {}", srcFile.getPath()); + log.error("The DistCp failed to copy " + srcFile.getPath()); throw new RuntimeException("Failed to DistCp " + srcFile.getPath()); } } @@ -675,15 +672,12 @@ private Path distCpDirectory(Path jobDirectory) throws Exception { * @return boolean flag */ public boolean canBringMapFilesOnline(long lastOnlineTime, boolean logInfo) { + Level level = (logInfo ? Level.INFO : Level.DEBUG); int majC = getMajorCompactionCount(); + log.log(level, "There are " + majC + " compactions currently running or queued."); + long delta = System.currentTimeMillis() - lastOnlineTime; - if (logInfo) { - log.info("There are {} compactions currently running or queued.", majC); - log.info("Time since map files last brought online: {}s", delta / 1000); - } else { - log.debug("There are {} compactions currently running or queued.", majC); - log.debug("Time since map files last brought online: {}s", delta / 1000); - } + log.log(level, "Time since map files last brought online: " + (delta / 1000) + "s"); return (delta > MAJC_WAIT_TIMEOUT) && (majC < MAJC_THRESHOLD); } @@ -705,7 +699,7 @@ private int getMajorCompactionCount() { } catch (Exception e) { // Accumulo API changed, catch exception for now until we redeploy // accumulo on lightning. - log.error("Unable to retrieve major compaction stats: {}", e.getMessage()); + log.error("Unable to retrieve major compaction stats: " + e.getMessage()); } finally { if (client != null) { ThriftUtil.close(client, context); @@ -745,7 +739,7 @@ private Path[] getJobDirectories(URI hdfs, Path pathPattern) throws IOException } else { jobDirectories = new Path[0]; } - log.debug("Completed job directories: {}", Arrays.toString(jobDirectories)); + log.debug("Completed job directories: " + Arrays.toString(jobDirectories)); return jobDirectories; } @@ -765,7 +759,7 @@ private Path[] getJobDirectories(URI hdfs, Path pathPattern) throws IOException * if the table is not found */ public void bringMapFilesOnline(Path mapFilesDir) throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException { - log.info("Bringing all mapFiles under {} online.", mapFilesDir); + log.info("Bringing all mapFiles under " + mapFilesDir + " online."); // By now the map files should be on the local filesystem FileSystem fs = getFileSystem(destHdfs); @@ -801,16 +795,16 @@ public void bringMapFilesOnline(Path mapFilesDir) throws IOException, AccumuloEx String tableName = tableDir.getName(); if (!tableIds.containsKey(tableName)) { - log.debug("Skipping {} since it is not a accumulo table directory.", tableDir); + log.debug("Skipping " + tableDir + " since it is not a accumulo table directory."); continue; } if (tableNames.containsKey(tableName)) { if (tableNames.get(tableName).equals(tableDir)) { - log.warn("Skipping {} since we already processed {} under {}", tableDir, tableName, tableNames.get(tableName)); + log.warn("Skipping " + tableDir + " since we already processed " + tableName + " under " + tableNames.get(tableName)); continue; } else { - log.error("We got two different paths for {}: {} and {}", tableName, tableNames.get(tableName), tableDir); + log.error("We got two different paths for " + tableName + ": " + tableNames.get(tableName) + " and " + tableDir); throw new IOException("We got two different paths for " + tableName + ": " + tableNames.get(tableName) + " and " + tableDir); } } @@ -910,7 +904,7 @@ public void waitForCompletion() throws InterruptedException { public void run() { try { - // Ensure all the files put just under tableDir.... + // Ensure all of the files put just under tableDir.... collapseDirectory(); // create the failures directory @@ -918,18 +912,18 @@ public void run() { Path failuresPath = new Path(failuresDir); FileSystem fileSystem = FileSystem.get(srcHdfs, new Configuration()); if (fileSystem.exists(failuresPath)) { - log.error(fatal, "Cannot bring map files online because a failures directory already exists: {}", failuresDir); + log.fatal("Cannot bring map files online because a failures directory already exists: " + failuresDir); throw new IOException("Cannot bring map files online because a failures directory already exists: " + failuresDir); } fileSystem.mkdirs(failuresPath); // import the directory - log.info("Bringing Map Files online for {}", tableName); + log.info("Bringing Map Files online for " + tableName); accumuloClient.tableOperations().importDirectory(tableName, tableDir.toString(), failuresDir, false); - log.info("Completed bringing map files online for {}", tableName); + log.info("Completed bringing map files online for " + tableName); validateComplete(); } catch (Exception e) { - log.error("Error importing files into table {} from directory {}", tableName, mapFilesDir, e); + log.error("Error importing files into table " + tableName + " from directory " + mapFilesDir, e); this.exception = e; } finally { this.complete = true; @@ -949,7 +943,7 @@ private void collapseDirectory(Path dir) throws IOException { for (FileStatus file : fileSystem.listStatus(dir)) { if (file.isDirectory()) { Path filePath = file.getPath(); - log.warn("Found an unexpected subdirectory {}. Collapsing into {}.", filePath, tableDir); + log.warn("Found an unexpected subdirectory " + filePath + ". Collapsing into " + tableDir + "."); collapseDirectory(filePath); for (FileStatus subFile : fileSystem.listStatus(filePath)) { Path subFilePath = subFile.getPath(); @@ -959,7 +953,7 @@ private void collapseDirectory(Path dir) throws IOException { FileChecksum subFileCheckSum = fileSystem.getFileChecksum(subFilePath); FileChecksum destFileCheckSum = fileSystem.getFileChecksum(destFilePath); if (subFileCheckSum.equals(destFileCheckSum)) { - log.info("{} and {} are identical, removing the former", subFilePath, destFilePath); + log.info(subFilePath + " and " + destFilePath + " are identical, removing the former"); fileSystem.delete(subFilePath, false); } else { // Attempt to rename the file instead of failing @@ -967,17 +961,17 @@ private void collapseDirectory(Path dir) throws IOException { while (fileSystem.exists(destFilePath)) { destFilePath = new Path(tableDir, getNextName(destFilePath.getName())); } - log.info("Renaming {} to {}", subFilePath, destFilePath); + log.info("Renaming " + subFilePath + " to " + destFilePath); fileSystem.rename(subFilePath, destFilePath); } } else { - log.info("Renaming {} to {}", subFilePath, destFilePath); + log.info("Renaming " + subFilePath + " to " + destFilePath); fileSystem.rename(subFilePath, destFilePath); } } // verify the directory is empty if (fileSystem.listStatus(filePath).length > 0) { - log.error(fatal, "Failed to collapse subdirectory {}", filePath); + log.fatal("Failed to collapse subdirectory " + filePath); throw new IOException("Failed to collapse subdirectory " + filePath); } fileSystem.delete(filePath, false); @@ -1004,7 +998,7 @@ private String getNextName(String rfile) { private void validateComplete() throws IOException { FileSystem fileSystem = FileSystem.get(srcHdfs, new Configuration()); if (fileSystem.listStatus(tableDir).length > 0) { - log.error(fatal, "Failed to completely import {}", tableDir); + log.fatal("Failed to completely import " + tableDir); throw new IOException("Failed to completely import " + tableDir); } } @@ -1055,17 +1049,19 @@ public void cleanUpJobDirectory(Path mapFilesDir) throws IOException { // delete the successfully loaded map files directory and its parent directory destFs.delete(jobDirectory, true); } else { - log.error("There were failures bringing map files online. See: failed. {} failures/* for details", mapFilesDir.getName()); + log.error("There were failures bringing map files online. See: failed." + mapFilesDir.getName() + "failures/* for details"); + // rename the map files directory boolean success = destFs.rename(mapFilesDir, new Path(mapFilesDir.getParent(), "failed." + mapFilesDir.getName())); if (!success) - log.error("Unable to rename map files directory {} {} to failed. {}", destFs.getUri(), mapFilesDir, mapFilesDir.getName()); + log.error("Unable to rename map files directory " + destFs.getUri() + " " + mapFilesDir + " to failed." + mapFilesDir.getName()); + // create the job.failed file (renamed from job.loading if possible) success = destFs.rename(new Path(jobDirectory, LOADING_FILE_MARKER), new Path(jobDirectory, FAILED_FILE_MARKER)); if (!success) { success = destFs.createNewFile(new Path(jobDirectory, FAILED_FILE_MARKER)); if (!success) - log.error("Unable to create {} file in {}", FAILED_FILE_MARKER, jobDirectory); + log.error("Unable to create " + FAILED_FILE_MARKER + " file in " + jobDirectory); } } @@ -1085,44 +1081,44 @@ public boolean takeOwnershipJobDirectory(Path jobDirectory) { try { success = fs.rename(new Path(jobDirectory, COMPLETE_FILE_MARKER), new Path(jobDirectory, LOADING_FILE_MARKER)); - log.info("Renamed {}/{} to {}", jobDirectory, COMPLETE_FILE_MARKER, LOADING_FILE_MARKER); + log.info("Renamed " + jobDirectory + '/' + COMPLETE_FILE_MARKER + " to " + LOADING_FILE_MARKER); } catch (IOException e2) { - log.error("Exception while marking {} for loading: {}", jobDirectory, e2.getMessage(), e2); + log.error("Exception while marking " + jobDirectory + " for loading: " + e2.getMessage(), e2); } // if not successful, see if we can provide a reason if (!success) { if (fs.exists(new Path(jobDirectory, LOADING_FILE_MARKER))) { - log.info("Another process already took ownership of {} for loading", jobDirectory); + log.info("Another process already took ownership of " + jobDirectory + " for loading"); } else { - log.error("Unable to take ownership of {} for loading", jobDirectory); + log.error("Unable to take ownership of " + jobDirectory + " for loading"); } } else { if (!fs.exists(new Path(jobDirectory, LOADING_FILE_MARKER))) { // if the loading file marker does not exist, then we did not really succeed....hadoop strangeness? - log.error("Rename returned success but yet we did not take ownership of {} ({} does not exist)", jobDirectory, LOADING_FILE_MARKER); + log.error("Rename returned success but yet we did not take ownership of " + jobDirectory + " (" + LOADING_FILE_MARKER + " does not exist)"); success = false; } else if (fs.exists(new Path(jobDirectory, COMPLETE_FILE_MARKER))) { // if the complete file still exists, then perhaps the IngestJob received a create failure and subsequently reattempted. - log.error("Rename returned success but yet we did not fully take ownership of {} ({} moved to {} but {} still exists)", jobDirectory, - COMPLETE_FILE_MARKER, LOADING_FILE_MARKER, COMPLETE_FILE_MARKER); + log.error("Rename returned success but yet we did not fully take ownership of " + jobDirectory + " (" + COMPLETE_FILE_MARKER + " moved to " + + LOADING_FILE_MARKER + " but " + COMPLETE_FILE_MARKER + " still exists)"); success = false; // move the job.loading out of the way. I don't want to delete any files just in case hadoop is getting confused - // and a delete might result in both files deleted, and then we might think this is simply a failed distcp finally + // and a delete might result in both files deleted and then we might think this is simply a failed distcp finally // resulting in lost data. int count = 0; boolean done = false; while (!done && fs.exists(new Path(jobDirectory, COMPLETE_FILE_MARKER)) && count < 10) { count++; if (fs.rename(new Path(jobDirectory, LOADING_FILE_MARKER), new Path(jobDirectory, ATTEMPT_FILE_MARKER + '.' + count))) { - log.error("Moved {} to {}. {}", LOADING_FILE_MARKER, ATTEMPT_FILE_MARKER, count); + log.error("Moved " + LOADING_FILE_MARKER + " to " + ATTEMPT_FILE_MARKER + '.' + count); done = true; } } } } } catch (IOException e) { - log.error("Exception while marking {} for loading: {}", jobDirectory, e.getMessage(), e); + log.error("Exception while marking " + jobDirectory + " for loading: " + e.getMessage(), e); } return success; } @@ -1145,10 +1141,10 @@ public boolean markJobDirectoryFailed(URI workingHdfs, Path jobDirectory) { if (!success) { success = fs.createNewFile(new Path(jobDirectory, FAILED_FILE_MARKER)); if (!success) - log.error("Unable to create {} file in {}", FAILED_FILE_MARKER, jobDirectory); + log.error("Unable to create " + FAILED_FILE_MARKER + " file in " + jobDirectory); } } catch (IOException e) { - log.error("Exception while marking {} as failed: {}", jobDirectory, e.getMessage(), e); + log.error("Exception while marking " + jobDirectory + " as failed: " + e.getMessage(), e); } return success; } @@ -1179,7 +1175,7 @@ public void markSourceFilesLoaded(Path jobDirectory) throws IOException { try { renamed = sourceFs.rename(new Path(file), dst); } catch (Exception e) { - log.warn("Exception renaming {} to {}", file, dst, e); + log.warn("Exception renaming " + file + " to " + dst, e); renamed = false; } if (!renamed) { @@ -1191,7 +1187,7 @@ public void markSourceFilesLoaded(Path jobDirectory) throws IOException { throw new IOException( "Unable to rename " + file + " (exists=" + flaggedExists + ") to " + dst + " (exists=" + loadedExists + ")"); } else { - log.warn("File was already moved to loaded: {}", dst); + log.warn("File was already moved to loaded: " + dst); renamed = true; } } @@ -1205,9 +1201,7 @@ public void markSourceFilesLoaded(Path jobDirectory) throws IOException { }); } try { - if (log.isInfoEnabled()) { - log.info("Marking {} sequence files from flagged to loaded", renameCallables.size()); - } + log.info("Marking " + renameCallables.size() + " sequence files from flagged to loaded"); if (!renameCallables.isEmpty()) { List> execResults = executor.invokeAll(renameCallables); @@ -1233,12 +1227,10 @@ public void markSourceFilesLoaded(Path jobDirectory) throws IOException { } if (jobFile != null) { - if (log.isInfoEnabled()) { - log.info("Notifying observers for job: {} from work dir: {}", jobFile.getName(), jobDirectory); - } + log.info("Notifying observers for job: " + jobFile.getName() + " from work dir: " + jobDirectory); jobObservable.setJobId(jobFile.getName()); } else { - log.warn("no job file found for: {}", jobDirectory); + log.warn("no job file found for: " + jobDirectory); } } } @@ -1260,9 +1252,9 @@ public boolean markDirectoryForCleanup(Path jobDirectory, URI destFs) { boolean success = false; try { success = getFileSystem(destFs).rename(new Path(jobDirectory, LOADING_FILE_MARKER), new Path(jobDirectory, CLEANUP_FILE_MARKER)); - log.info("Renamed {}/{} to {}", jobDirectory, LOADING_FILE_MARKER, CLEANUP_FILE_MARKER); + log.info("Renamed " + jobDirectory + '/' + LOADING_FILE_MARKER + " to " + CLEANUP_FILE_MARKER); } catch (IOException e2) { - log.error("Exception while marking {} for Cleanup: {}", jobDirectory, e2.getMessage(), e2); + log.error("Exception while marking " + jobDirectory + " for Cleanup: " + e2.getMessage(), e2); } return success; @@ -1300,7 +1292,7 @@ private void writeStats(Path[] jobDirectories) throws IOException { if (!fs.exists(mDir)) fs.mkdirs(mDir); Path dst = new Path(mDir, src.getName()); - log.info("Copying file {} to {}", src, dst); + log.info("Copying file " + src + " to " + dst); fs.copyFromLocalFile(false, true, src, dst); // If this worked, then remove the local file rawFS.delete(src, false);