merge main

elastic · Oct 17, 2024 · 62d81f4 · 62d81f4
2 parents 9ce63d0 + 3f0ad12
commit 62d81f4
Show file tree

Hide file tree

Showing 844 changed files with 653 additions and 19,249 deletions.
diff --git a/.buildkite/scripts/health-report-tests/bootstrap.py b/.buildkite/scripts/health-report-tests/bootstrap.py
@@ -6,6 +6,7 @@
 """
 import os
 import subprocess
+import time
 import util
 import yaml
 
@@ -99,3 +100,19 @@ def run_logstash(self, full_start_required: bool) -> subprocess.Popen:
 
         print(f"Logstash is running with PID: {process.pid}.")
         return process
+
+    def stop_logstash(self, process: subprocess.Popen):
+        start_time = time.time()    # in seconds
+        process.terminate()
+        for stdout_line in iter(process.stdout.readline, ""):
+            # print(f"STDOUT: {stdout_line.strip()}")
+            if "Logstash shut down" in stdout_line or "Logstash stopped" in stdout_line:
+                print(f"Logstash stopped.")
+                return None
+            # shudown watcher keep running, we should be good with considering time spent
+            if time.time() - start_time > 60:
+                print(f"Logstash didn't stop in 1min, sending SIGTERM signal.")
+                process.kill()
+            if time.time() - start_time > 70:
+                print(f"Logstash didn't stop over 1min, exiting.")
+                return None
diff --git a/.buildkite/scripts/health-report-tests/config_validator.py b/.buildkite/scripts/health-report-tests/config_validator.py
@@ -6,11 +6,11 @@ class ConfigValidator:
     REQUIRED_KEYS = {
         "root": ["name", "config", "conditions", "expectation"],
         "config": ["pipeline.id", "config.string"],
-        "conditions": ["full_start_required"],
+        "conditions": ["full_start_required", "wait_seconds"],
         "expectation": ["status", "symptom", "indicators"],
         "indicators": ["pipelines"],
         "pipelines": ["status", "symptom", "indicators"],
-        "DYNAMIC": ["status", "symptom", "diagnosis", "impacts", "details"],
+        "DYNAMIC": ["status", "symptom", "diagnosis", "impacts", "details"],    # pipeline-id is a DYNAMIC
         "details": ["status"],
         "status": ["state"]
     }
@@ -19,7 +19,8 @@ def __init__(self):
         self.yaml_content = None
 
     def __has_valid_keys(self, data: any, key_path: str, repeated: bool) -> bool:
-        if isinstance(data, str) or isinstance(data, bool):   # we reached values
+        # we reached the value
+        if isinstance(data, str) or isinstance(data, bool) or isinstance(data, int) or isinstance(data, float):
             return True
 
         # we have two indicators section and for the next repeated ones, we go deeper

diff --git a/.buildkite/scripts/health-report-tests/main.py b/.buildkite/scripts/health-report-tests/main.py
@@ -62,21 +62,23 @@ def main():
                 print(f"Testing `{scenario_content.get('name')}` scenario.")
                 scenario_name = scenario_content['name']
 
-                is_full_start_required = next(sub.get('full_start_required') for sub in
-                                              scenario_content.get('conditions') if 'full_start_required' in sub)
+                is_full_start_required = scenario_content.get('conditions').get('full_start_required')
+                wait_seconds = scenario_content.get('conditions').get('wait_seconds')
                 config = scenario_content['config']
                 if config is not None:
                     bootstrap.apply_config(config)
                     expectations = scenario_content.get("expectation")
                     process = bootstrap.run_logstash(is_full_start_required)
                     if process is not None:
+                        if wait_seconds is not None:
+                            print(f"Test requires to wait for `{wait_seconds}` seconds.")
+                            time.sleep(wait_seconds)  # wait for Logstash to start
                         try:
                             scenario_executor.on(scenario_name, expectations)
                         except Exception as e:
                             print(e)
                             has_failed_scenario = True
-                        process.terminate()
-                        time.sleep(5)   # leave some window to terminate the process
+                        bootstrap.stop_logstash(process)
 
         if has_failed_scenario:
             # intentionally fail due to visibility

diff --git a/.buildkite/scripts/health-report-tests/scenario_executor.py b/.buildkite/scripts/health-report-tests/scenario_executor.py
@@ -12,10 +12,12 @@ def __init__(self):
         pass
 
     def __has_intersection(self, expects, results):
+        # TODO: this logic is aligned on current Health API response
+        #   there is no guarantee that method correctly runs if provided multi expects and results
         # we expect expects to be existing in results
         for expect in expects:
             for result in results:
-                if result.get('help_url') and "health-report-pipeline-status.html#" not in result.get('help_url'):
+                if result.get('help_url') and "health-report-pipeline-" not in result.get('help_url'):
                     return False
                 if not all(key in result and result[key] == value for key, value in expect.items()):
                     return False

diff --git a/.buildkite/scripts/health-report-tests/tests/abnormal-termination.yaml b/.buildkite/scripts/health-report-tests/tests/abnormal-termination.yaml
@@ -8,7 +8,8 @@ config:
     pipeline.workers: 1
     pipeline.batch.size: 1
 conditions:
-  - full_start_required: true
+  full_start_required: true
+  wait_seconds: 5
 expectation:
   status: "red"
   symptom: "1 indicator is unhealthy (`pipelines`)"
@@ -22,10 +23,10 @@ expectation:
           symptom: "The pipeline is unhealthy; 1 area is impacted and 1 diagnosis is available"
           diagnosis:
             - cause: "pipeline is not running, likely because it has encountered an error"
-            - action: "view logs to determine the cause of abnormal pipeline shutdown"
+              action: "view logs to determine the cause of abnormal pipeline shutdown"
           impacts:
             - description: "the pipeline is not currently processing"
-            - impact_areas: ["pipeline_execution"]
+              impact_areas: ["pipeline_execution"]
           details:
             status:
               state: "TERMINATED"
diff --git a/.buildkite/scripts/health-report-tests/tests/backpressure-1m.yaml b/.buildkite/scripts/health-report-tests/tests/backpressure-1m.yaml
@@ -0,0 +1,38 @@
+name: "Backpressured in 1min pipeline"
+config:
+  - pipeline.id: backpressure-1m-pp
+    config.string: |
+      input { heartbeat { interval => 0.1 } }
+      filter { failure_injector { degrade_at => [filter] } }
+      output { stdout {} }
+    pipeline.workers: 1
+    pipeline.batch.size: 1
+conditions:
+  full_start_required: true
+  wait_seconds: 70 # give more seconds to make sure time is over the threshold, 1m in this case
+expectation:
+  status: "yellow"
+  symptom: "1 indicator is concerning (`pipelines`)"
+  indicators:
+    pipelines:
+      status: "yellow"
+      symptom: "1 indicator is concerning (`backpressure-1m-pp`)"
+      indicators:
+        backpressure-1m-pp:
+          status: "yellow"
+          symptom: "The pipeline is concerning; 1 area is impacted and 1 diagnosis is available"
+          diagnosis:
+            - id: "logstash:health:pipeline:flow:worker_utilization:diagnosis:1m-blocked"
+              cause: "pipeline workers have been completely blocked for at least one minute"
+              action: "address bottleneck or add resources"
+          impacts:
+            - id: "logstash:health:pipeline:flow:impact:blocked_processing"
+              severity: 2
+              description: "the pipeline is blocked"
+              impact_areas: ["pipeline_execution"]
+          details:
+            status:
+              state: "RUNNING"
+            flow:
+              worker_utilization:
+                last_1_minute: 100.0
diff --git a/.buildkite/scripts/health-report-tests/tests/backpressure-5m.yaml b/.buildkite/scripts/health-report-tests/tests/backpressure-5m.yaml
@@ -0,0 +1,39 @@
+name: "Backpressured in 5min pipeline"
+config:
+  - pipeline.id: backpressure-5m-pp
+    config.string: |
+      input { heartbeat { interval => 0.1 } }
+      filter { failure_injector { degrade_at => [filter] } }
+      output { stdout {} }
+    pipeline.workers: 1
+    pipeline.batch.size: 1
+conditions:
+  full_start_required: true
+  wait_seconds: 310 # give more seconds to make sure time is over the threshold, 1m in this case
+expectation:
+  status: "red"
+  symptom: "1 indicator is unhealthy (`pipelines`)"
+  indicators:
+    pipelines:
+      status: "red"
+      symptom: "1 indicator is unhealthy (`backpressure-5m-pp`)"
+      indicators:
+        backpressure-5m-pp:
+          status: "red"
+          symptom: "The pipeline is unhealthy; 1 area is impacted and 1 diagnosis is available"
+          diagnosis:
+            - id: "logstash:health:pipeline:flow:worker_utilization:diagnosis:5m-blocked"
+              cause: "pipeline workers have been completely blocked for at least five minutes"
+              action: "address bottleneck or add resources"
+          impacts:
+            - id: "logstash:health:pipeline:flow:impact:blocked_processing"
+              severity: 1
+              description: "the pipeline is blocked"
+              impact_areas: ["pipeline_execution"]
+          details:
+            status:
+              state: "RUNNING"
+            flow:
+              worker_utilization:
+                last_1_minute: 100.0
+                last_5_minutes: 100.0
diff --git a/.buildkite/scripts/health-report-tests/tests/multipipeline.yaml b/.buildkite/scripts/health-report-tests/tests/multipipeline.yaml
@@ -0,0 +1,67 @@
+name: "Multi pipeline"
+config:
+  - pipeline.id: slow-start-pp-multipipeline
+    config.string: |
+      input { heartbeat {} }
+      filter { failure_injector { degrade_at => [register] } }
+      output { stdout {} }
+    pipeline.workers: 1
+    pipeline.batch.size: 1
+  - pipeline.id: normally-terminated-pp-multipipeline
+    config.string: |
+      input { generator { count => 1 } }
+      output { stdout {} }
+    pipeline.workers: 1
+    pipeline.batch.size: 1
+  - pipeline.id: abnormally-terminated-pp-multipipeline
+    config.string: |
+      input { heartbeat { interval => 1 } }
+      filter { failure_injector { crash_at => filter } }
+      output { stdout {} }
+    pipeline.workers: 1
+    pipeline.batch.size: 1
+conditions:
+  full_start_required: false
+  wait_seconds: 10
+expectation:
+  status: "red"
+  symptom: "1 indicator is unhealthy (`pipelines`)"
+  indicators:
+    pipelines:
+      status: "red"
+      symptom: "1 indicator is unhealthy (`abnormally-terminated-pp-multipipeline`) and 2 indicators are concerning (`slow-start-pp-multipipeline`, `normally-terminated-pp-multipipeline`)"
+      indicators:
+        slow-start-pp-multipipeline:
+          status: "yellow"
+          symptom: "The pipeline is concerning; 1 area is impacted and 1 diagnosis is available"
+          diagnosis:
+            - cause: "pipeline is loading"
+              action: "if pipeline does not come up quickly, you may need to check the logs to see if it is stalled"
+          impacts:
+            - impact_areas: ["pipeline_execution"]
+          details:
+            status:
+              state: "LOADING"
+        normally-terminated-pp-multipipeline:
+          status: "yellow"
+          symptom: "The pipeline is concerning; 1 area is impacted and 1 diagnosis is available"
+          diagnosis:
+            - cause: "pipeline has finished running because its inputs have been closed and events have been processed"
+              action: "if you expect this pipeline to run indefinitely, you will need to configure its inputs to continue receiving or fetching events"
+          impacts:
+            - impact_areas: [ "pipeline_execution" ]
+          details:
+            status:
+              state: "FINISHED"
+        abnormally-terminated-pp-multipipeline:
+          status: "red"
+          symptom: "The pipeline is unhealthy; 1 area is impacted and 1 diagnosis is available"
+          diagnosis:
+            - cause: "pipeline is not running, likely because it has encountered an error"
+              action: "view logs to determine the cause of abnormal pipeline shutdown"
+          impacts:
+            - description: "the pipeline is not currently processing"
+              impact_areas: [ "pipeline_execution" ]
+          details:
+            status:
+              state: "TERMINATED"
diff --git a/.buildkite/scripts/health-report-tests/tests/normal-termination.yaml b/.buildkite/scripts/health-report-tests/tests/normal-termination.yaml
@@ -7,7 +7,8 @@ config:
     pipeline.workers: 1
     pipeline.batch.size: 1
 conditions:
-  - full_start_required: true
+  full_start_required: true
+  wait_seconds: 5
 expectation:
   status: "yellow"
   symptom: "1 indicator is concerning (`pipelines`)"
@@ -21,7 +22,7 @@ expectation:
           symptom: "The pipeline is concerning; 1 area is impacted and 1 diagnosis is available"
           diagnosis:
             - cause: "pipeline has finished running because its inputs have been closed and events have been processed"
-            - action: "if you expect this pipeline to run indefinitely, you will need to configure its inputs to continue receiving or fetching events"
+              action: "if you expect this pipeline to run indefinitely, you will need to configure its inputs to continue receiving or fetching events"
           impacts:
             - impact_areas: ["pipeline_execution"]
           details:

diff --git a/.buildkite/scripts/health-report-tests/tests/slow-start.yaml b/.buildkite/scripts/health-report-tests/tests/slow-start.yaml
@@ -8,7 +8,8 @@ config:
     pipeline.workers: 1
     pipeline.batch.size: 1
 conditions:
-  - full_start_required: false
+  full_start_required: false
+  wait_seconds: 0
 expectation:
   status: "yellow"
   symptom: "1 indicator is concerning (`pipelines`)"
@@ -22,7 +23,7 @@ expectation:
           symptom: "The pipeline is concerning; 1 area is impacted and 1 diagnosis is available"
           diagnosis:
             - cause: "pipeline is loading"
-            - action: "if pipeline does not come up quickly, you may need to check the logs to see if it is stalled"
+              action: "if pipeline does not come up quickly, you may need to check the logs to see if it is stalled"
           impacts:
             - impact_areas: ["pipeline_execution"]
           details:

diff --git a/Gemfile.template b/Gemfile.template
@@ -14,7 +14,7 @@ gem "logstash-output-elasticsearch", ">= 11.14.0"
 gem "polyglot", require: false
 gem "treetop", require: false
 gem "faraday", "~> 1", :require => false # due elasticsearch-transport (elastic-transport) depending faraday '~> 1'
-gem "minitar", :group => :build
+gem "minitar", "~> 1", :group => :build
 gem "childprocess", "~> 4", :group => :build
 gem "fpm", "~> 1", ">= 1.14.1", :group => :build # compound due to bugfix https://github.com/jordansissel/fpm/pull/1856
 gem "gems", "~> 1", :group => :build

diff --git a/ci/branches.json b/ci/branches.json
@@ -8,7 +8,7 @@
         "branch": "8.x"
       },
       {
-        "branch": "8.15"
+        "branch": "8.16"
       },
       {
         "branch": "7.17"

diff --git a/docker/data/logstash/env2yaml/env2yaml.go b/docker/data/logstash/env2yaml/env2yaml.go
@@ -49,7 +49,6 @@ var validSettings = []string{
 	"config.debug",
 	"config.support_escapes",
 	"config.field_reference.escape_style",
-	"event_api.tags.illegal",
 	"queue.type",
 	"path.queue",
 	"queue.page_capacity",

diff --git a/docs/index.asciidoc b/docs/index.asciidoc
@@ -142,10 +142,6 @@ include::static/modules.asciidoc[]
 
 include::static/arcsight-module.asciidoc[]
 
-include::static/netflow-module.asciidoc[]
-
-include::static/azure-module.asciidoc[]
-
 // Working with Filebeat Modules
 include::static/filebeat-modules.asciidoc[]
 

diff --git a/docs/static/arcsight-module.asciidoc b/docs/static/arcsight-module.asciidoc
@@ -3,9 +3,11 @@
 === Logstash ArcSight Module
 
 ++++
-<titleabbrev>ArcSight Module</titleabbrev>
+<titleabbrev>ArcSight Module (deprecated)</titleabbrev>
 ++++
 
+deprecated[8.16.0, Replace by https://docs.elastic.co/integrations/cef[Common Event Format (CEF)] from Elastic {integrations}]
+
 NOTE: The Logstash ArcSight module is an
 https://www.elastic.co/products/x-pack[{xpack}] feature under the Basic License
 and is therefore free to use. Please contact
@@ -17,6 +19,8 @@ With a single command, the module taps directly into the ArcSight Smart Connecto
 parses and indexes the security events into Elasticsearch, and installs a suite of Kibana dashboards
 to get you exploring your data immediately.
 
+NOTE: The {ls} ArsSight Module has been deprecated and replaced by https://docs.elastic.co/integrations/cef[Common Event Format (CEF)] from Elastic {integrations}.
+
 [[arcsight-prereqs]]
 ==== Prerequisites
 

diff --git a/docs/static/modules.asciidoc b/docs/static/modules.asciidoc
@@ -7,8 +7,6 @@ visualizing it with purpose-built dashboards.
 These modules are available:
 
 * <<arcsight-module>>
-* <<netflow-module,Netflow Module (deprecated)>>
-* <<azure-module, Microsoft Azure Module (deprecated)>>
 
 Each module comes pre-packaged with Logstash configurations, Kibana dashboards,
 and other meta files that make it easier for you to set up the Elastic Stack for

diff --git a/docs/static/releasenotes.asciidoc b/docs/static/releasenotes.asciidoc
@@ -204,7 +204,7 @@ This new image flavor builds on top of a smaller and more secure base image, and
 * Logstash {logstash-ref}/monitoring-with-ea.html[monitoring doc] improvements https://github.com/elastic/logstash/pull/16208[#16208] 
 * Add ecs and datastream requirement for {logstash-ref}/ea-integrations.html#es-tips[integration filter] https://github.com/elastic/logstash/pull/16268[#16268] 
 * Remove reference to puppet {ls} module https://github.com/elastic/logstash/pull/12356[#12356]
-* Add section to describe intended usage of {logstash-ref}/jvm-settings.html#reducing-off-heap-usage[`pipeline.buffer.type`] https://github.com/elastic/logstash/pull/16083[#16083] 
+* Add section to describe intended usage of https://www.elastic.co/guide/en/logstash/8.15/jvm-settings.html#reducing-off-heap-usage[`pipeline.buffer.type`] https://github.com/elastic/logstash/pull/16083[#16083] 
 * Reposition {logstash-ref}/node-stats-api.html#pipeline-stats[`worker-utilization`] stat for better placement and flow https://github.com/elastic/logstash/pull/16337[#16337]
 * Add {logstash-ref}/performance-troubleshooting.html[tuning guidance] based on Flow metrics https://github.com/elastic/logstash/pull/16289[#16289]
 

diff --git a/docs/static/settings-file.asciidoc b/docs/static/settings-file.asciidoc
@@ -361,12 +361,6 @@ separating each log lines per pipeline could be helpful in case you need to trou
 | Setting to `true` to allow or `false` to block running Logstash as a superuser.
 | `true`
 
-| `event_api.tags.illegal`
-| When set to `warn`, allow illegal value assignment to the reserved `tags` field.
-When set to `rename`, Logstash events can't be created with an illegal value in `tags`. This value will be moved to `_tags` and a `_tagsparsefailure` tag is added to indicate the illegal operation. Doing `set` operation with illegal value will throw exception.
-Setting this flag to `warn` is deprecated and will be removed in a future release.
-| `rename`
-
 | `pipeline.buffer.type`
 | Determine where to allocate memory buffers, for plugins that leverage them.
 Currently defaults to `direct` but can be switched to `heap` to select Java heap space, which will become the default in the future.