Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport 6.1: Add multi_grok pipeline function #20942

Open
wants to merge 2 commits into
base: 6.1
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changelog/unreleased/issue-15301.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
type = "added"
message = "Added multi_grok pipeline rule to support processing a string against many grok patterns at once."

issues = ["15301"]
pulls = ["20924"]
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@
import org.graylog.plugins.pipelineprocessor.functions.strings.KeyValue;
import org.graylog.plugins.pipelineprocessor.functions.strings.Length;
import org.graylog.plugins.pipelineprocessor.functions.strings.Lowercase;
import org.graylog.plugins.pipelineprocessor.functions.strings.MultiGrokMatch;
import org.graylog.plugins.pipelineprocessor.functions.strings.RegexMatch;
import org.graylog.plugins.pipelineprocessor.functions.strings.RegexReplace;
import org.graylog.plugins.pipelineprocessor.functions.strings.Replace;
Expand Down Expand Up @@ -202,6 +203,7 @@ protected void configure() {
addMessageProcessorFunction(RegexMatch.NAME, RegexMatch.class);
addMessageProcessorFunction(RegexReplace.NAME, RegexReplace.class);
addMessageProcessorFunction(GrokMatch.NAME, GrokMatch.class);
addMessageProcessorFunction(MultiGrokMatch.NAME, MultiGrokMatch.class);
addMessageProcessorFunction(GrokExists.NAME, GrokExists.class);

// string functions
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Copyright (C) 2020 Graylog, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*/
package org.graylog.plugins.pipelineprocessor.functions.strings;

import com.google.common.reflect.TypeToken;
import io.krakens.grok.api.Grok;
import io.krakens.grok.api.Match;
import jakarta.inject.Inject;
import org.graylog.plugins.pipelineprocessor.EvaluationContext;
import org.graylog.plugins.pipelineprocessor.ast.functions.AbstractFunction;
import org.graylog.plugins.pipelineprocessor.ast.functions.FunctionArgs;
import org.graylog.plugins.pipelineprocessor.ast.functions.FunctionDescriptor;
import org.graylog.plugins.pipelineprocessor.ast.functions.ParameterDescriptor;
import org.graylog2.grok.GrokPatternRegistry;

import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;

import static com.google.common.collect.ImmutableList.of;

public class MultiGrokMatch extends AbstractFunction<GrokMatch.GrokResult> {
public static final String NAME = "multi_grok";

@SuppressWarnings("unchecked")
private static final Class<List<String>> LIST_RETURN_TYPE = (Class<List<String>>) new TypeToken<List<String>>() {
}.getRawType();
private final ParameterDescriptor<String, String> valueParam;
private final ParameterDescriptor<Object, List<String>> patternsParam;
private final ParameterDescriptor<Boolean, Boolean> namedOnly;

private final GrokPatternRegistry grokPatternRegistry;

@Inject
public MultiGrokMatch(GrokPatternRegistry grokPatternRegistry) {
this.grokPatternRegistry = grokPatternRegistry;

valueParam = ParameterDescriptor.string("value").description("The string to apply each Grok pattern against").build();
patternsParam = ParameterDescriptor.object("patterns", LIST_RETURN_TYPE)
.description("The Grok patterns to match in order")
.transform(this::transformToList)
.build();
namedOnly = ParameterDescriptor.bool("only_named_captures").optional().description("Whether to only use explicitly named groups in the patterns").build();
}

@Override
public GrokMatch.GrokResult evaluate(FunctionArgs args, EvaluationContext context) {
final String value = valueParam.required(args, context);
final List<String> patterns = patternsParam.required(args, context);
final boolean onlyNamedCaptures = namedOnly.optional(args, context).orElse(false);

if (value == null || patterns == null || patterns.isEmpty()) {
return null;
}

for (String pattern : patterns) {
final Grok grok = grokPatternRegistry.cachedGrokForPattern(pattern, onlyNamedCaptures);

final Match match = grok.match(value);
if (!match.isNull()) {
return new GrokMatch.GrokResult(match.captureFlattened());
}
}
return new GrokMatch.GrokResult(Map.of());
}

private List<String> transformToList(Object value) {
if (value instanceof Collection<?>) {
return ((Collection<?>) value).stream()
.map(Object::toString)
.filter(Objects::nonNull)
.collect(Collectors.toList());
}
return Collections.emptyList();
}

@Override
public FunctionDescriptor<GrokMatch.GrokResult> descriptor() {
return FunctionDescriptor.<GrokMatch.GrokResult>builder()
.name(NAME)
.returnType(GrokMatch.GrokResult.class)
.params(of(patternsParam, valueParam, namedOnly))
.description("Applies a list of Grok patterns to a string and returns the first match")
.build();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@
import org.graylog.plugins.pipelineprocessor.functions.strings.KeyValue;
import org.graylog.plugins.pipelineprocessor.functions.strings.Length;
import org.graylog.plugins.pipelineprocessor.functions.strings.Lowercase;
import org.graylog.plugins.pipelineprocessor.functions.strings.MultiGrokMatch;
import org.graylog.plugins.pipelineprocessor.functions.strings.RegexMatch;
import org.graylog.plugins.pipelineprocessor.functions.strings.RegexReplace;
import org.graylog.plugins.pipelineprocessor.functions.strings.Replace;
Expand Down Expand Up @@ -393,6 +394,7 @@ public static void registerFunctions() {
grokPatternService,
Executors.newScheduledThreadPool(1));
functions.put(GrokMatch.NAME, new GrokMatch(grokPatternRegistry));
functions.put(MultiGrokMatch.NAME, new MultiGrokMatch(grokPatternRegistry));
functions.put(GrokExists.NAME, new GrokExists(grokPatternRegistry));

functions.put(MetricCounterIncrement.NAME, new MetricCounterIncrement(metricRegistry));
Expand Down Expand Up @@ -902,6 +904,20 @@ public void grokIssue18883() {
.isNull();
}


@Test
public void multiGrok() {
final Rule rule = parser.parseRule(ruleForTest(), false);
final Message message = evaluateRule(rule);

assertThat(message).isNotNull();
assertThat(message.hasField("abc_message")).isTrue();
assertThat(message.hasField("abc_ip")).isTrue();
assertThat(message.hasField("abc2_message")).isFalse();
assertThat(message.hasField("abc2_ip")).isFalse();
assertThat(message.getField("123_ip")).isEqualTo("192.168.0.2");
}

@Test
void urls() {
final Rule rule = parser.parseRule(ruleForTest(), false);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
rule "multi_grok"
when true
then
set_fields(
fields: multi_grok(
patterns: [
"^ABC %{IPV4:abc_ip}: %{GREEDY:abc_message}",
"^123 %{IPV4:123_ip}: %{GREEDY:123_message}",
"^ABC %{IPV4:abc2_ip}: %{GREEDY:abc2_message}"
],
value: "ABC 192.168.0.1: ABC message1",
only_named_captures: true
)
);

set_fields(
fields: multi_grok(
patterns: [
"^ABC %{IPV4:abc_ip}: %{GREEDY:abc_message}",
"^123 %{IPV4:123_ip}: %{GREEDY:123_message}",
"^ABC %{IPV4:abc2_ip}: %{GREEDY:abc2_message}"
],
value: "123 192.168.0.2: 123 message",
only_named_captures: true
)
);

set_fields(
fields: multi_grok(
patterns: [
"^ABC %{IPV4:abc_ip}: %{GREEDY:abc_message}",
"^123 %{IPV4:123_ip}: %{GREEDY:123_message}",
"^ABC %{IPV4:abc2_ip}: %{GREEDY:abc2_message}"
],
value: "XYZ 192.168.0.3: XYZ message",
only_named_captures: true
)
);

end
Loading