Skip to content

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
kaisecheng committed Oct 17, 2024
2 parents 9ce63d0 + 3f0ad12 commit 62d81f4
Show file tree
Hide file tree
Showing 844 changed files with 653 additions and 19,249 deletions.
17 changes: 17 additions & 0 deletions .buildkite/scripts/health-report-tests/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""
import os
import subprocess
import time
import util
import yaml

Expand Down Expand Up @@ -99,3 +100,19 @@ def run_logstash(self, full_start_required: bool) -> subprocess.Popen:

print(f"Logstash is running with PID: {process.pid}.")
return process

def stop_logstash(self, process: subprocess.Popen):
start_time = time.time() # in seconds
process.terminate()
for stdout_line in iter(process.stdout.readline, ""):
# print(f"STDOUT: {stdout_line.strip()}")
if "Logstash shut down" in stdout_line or "Logstash stopped" in stdout_line:
print(f"Logstash stopped.")
return None
# shudown watcher keep running, we should be good with considering time spent
if time.time() - start_time > 60:
print(f"Logstash didn't stop in 1min, sending SIGTERM signal.")
process.kill()
if time.time() - start_time > 70:
print(f"Logstash didn't stop over 1min, exiting.")
return None
7 changes: 4 additions & 3 deletions .buildkite/scripts/health-report-tests/config_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ class ConfigValidator:
REQUIRED_KEYS = {
"root": ["name", "config", "conditions", "expectation"],
"config": ["pipeline.id", "config.string"],
"conditions": ["full_start_required"],
"conditions": ["full_start_required", "wait_seconds"],
"expectation": ["status", "symptom", "indicators"],
"indicators": ["pipelines"],
"pipelines": ["status", "symptom", "indicators"],
"DYNAMIC": ["status", "symptom", "diagnosis", "impacts", "details"],
"DYNAMIC": ["status", "symptom", "diagnosis", "impacts", "details"], # pipeline-id is a DYNAMIC
"details": ["status"],
"status": ["state"]
}
Expand All @@ -19,7 +19,8 @@ def __init__(self):
self.yaml_content = None

def __has_valid_keys(self, data: any, key_path: str, repeated: bool) -> bool:
if isinstance(data, str) or isinstance(data, bool): # we reached values
# we reached the value
if isinstance(data, str) or isinstance(data, bool) or isinstance(data, int) or isinstance(data, float):
return True

# we have two indicators section and for the next repeated ones, we go deeper
Expand Down
10 changes: 6 additions & 4 deletions .buildkite/scripts/health-report-tests/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,21 +62,23 @@ def main():
print(f"Testing `{scenario_content.get('name')}` scenario.")
scenario_name = scenario_content['name']

is_full_start_required = next(sub.get('full_start_required') for sub in
scenario_content.get('conditions') if 'full_start_required' in sub)
is_full_start_required = scenario_content.get('conditions').get('full_start_required')
wait_seconds = scenario_content.get('conditions').get('wait_seconds')
config = scenario_content['config']
if config is not None:
bootstrap.apply_config(config)
expectations = scenario_content.get("expectation")
process = bootstrap.run_logstash(is_full_start_required)
if process is not None:
if wait_seconds is not None:
print(f"Test requires to wait for `{wait_seconds}` seconds.")
time.sleep(wait_seconds) # wait for Logstash to start
try:
scenario_executor.on(scenario_name, expectations)
except Exception as e:
print(e)
has_failed_scenario = True
process.terminate()
time.sleep(5) # leave some window to terminate the process
bootstrap.stop_logstash(process)

if has_failed_scenario:
# intentionally fail due to visibility
Expand Down
4 changes: 3 additions & 1 deletion .buildkite/scripts/health-report-tests/scenario_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ def __init__(self):
pass

def __has_intersection(self, expects, results):
# TODO: this logic is aligned on current Health API response
# there is no guarantee that method correctly runs if provided multi expects and results
# we expect expects to be existing in results
for expect in expects:
for result in results:
if result.get('help_url') and "health-report-pipeline-status.html#" not in result.get('help_url'):
if result.get('help_url') and "health-report-pipeline-" not in result.get('help_url'):
return False
if not all(key in result and result[key] == value for key, value in expect.items()):
return False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ config:
pipeline.workers: 1
pipeline.batch.size: 1
conditions:
- full_start_required: true
full_start_required: true
wait_seconds: 5
expectation:
status: "red"
symptom: "1 indicator is unhealthy (`pipelines`)"
Expand All @@ -22,10 +23,10 @@ expectation:
symptom: "The pipeline is unhealthy; 1 area is impacted and 1 diagnosis is available"
diagnosis:
- cause: "pipeline is not running, likely because it has encountered an error"
- action: "view logs to determine the cause of abnormal pipeline shutdown"
action: "view logs to determine the cause of abnormal pipeline shutdown"
impacts:
- description: "the pipeline is not currently processing"
- impact_areas: ["pipeline_execution"]
impact_areas: ["pipeline_execution"]
details:
status:
state: "TERMINATED"
38 changes: 38 additions & 0 deletions .buildkite/scripts/health-report-tests/tests/backpressure-1m.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: "Backpressured in 1min pipeline"
config:
- pipeline.id: backpressure-1m-pp
config.string: |
input { heartbeat { interval => 0.1 } }
filter { failure_injector { degrade_at => [filter] } }
output { stdout {} }
pipeline.workers: 1
pipeline.batch.size: 1
conditions:
full_start_required: true
wait_seconds: 70 # give more seconds to make sure time is over the threshold, 1m in this case
expectation:
status: "yellow"
symptom: "1 indicator is concerning (`pipelines`)"
indicators:
pipelines:
status: "yellow"
symptom: "1 indicator is concerning (`backpressure-1m-pp`)"
indicators:
backpressure-1m-pp:
status: "yellow"
symptom: "The pipeline is concerning; 1 area is impacted and 1 diagnosis is available"
diagnosis:
- id: "logstash:health:pipeline:flow:worker_utilization:diagnosis:1m-blocked"
cause: "pipeline workers have been completely blocked for at least one minute"
action: "address bottleneck or add resources"
impacts:
- id: "logstash:health:pipeline:flow:impact:blocked_processing"
severity: 2
description: "the pipeline is blocked"
impact_areas: ["pipeline_execution"]
details:
status:
state: "RUNNING"
flow:
worker_utilization:
last_1_minute: 100.0
39 changes: 39 additions & 0 deletions .buildkite/scripts/health-report-tests/tests/backpressure-5m.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: "Backpressured in 5min pipeline"
config:
- pipeline.id: backpressure-5m-pp
config.string: |
input { heartbeat { interval => 0.1 } }
filter { failure_injector { degrade_at => [filter] } }
output { stdout {} }
pipeline.workers: 1
pipeline.batch.size: 1
conditions:
full_start_required: true
wait_seconds: 310 # give more seconds to make sure time is over the threshold, 1m in this case
expectation:
status: "red"
symptom: "1 indicator is unhealthy (`pipelines`)"
indicators:
pipelines:
status: "red"
symptom: "1 indicator is unhealthy (`backpressure-5m-pp`)"
indicators:
backpressure-5m-pp:
status: "red"
symptom: "The pipeline is unhealthy; 1 area is impacted and 1 diagnosis is available"
diagnosis:
- id: "logstash:health:pipeline:flow:worker_utilization:diagnosis:5m-blocked"
cause: "pipeline workers have been completely blocked for at least five minutes"
action: "address bottleneck or add resources"
impacts:
- id: "logstash:health:pipeline:flow:impact:blocked_processing"
severity: 1
description: "the pipeline is blocked"
impact_areas: ["pipeline_execution"]
details:
status:
state: "RUNNING"
flow:
worker_utilization:
last_1_minute: 100.0
last_5_minutes: 100.0
67 changes: 67 additions & 0 deletions .buildkite/scripts/health-report-tests/tests/multipipeline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: "Multi pipeline"
config:
- pipeline.id: slow-start-pp-multipipeline
config.string: |
input { heartbeat {} }
filter { failure_injector { degrade_at => [register] } }
output { stdout {} }
pipeline.workers: 1
pipeline.batch.size: 1
- pipeline.id: normally-terminated-pp-multipipeline
config.string: |
input { generator { count => 1 } }
output { stdout {} }
pipeline.workers: 1
pipeline.batch.size: 1
- pipeline.id: abnormally-terminated-pp-multipipeline
config.string: |
input { heartbeat { interval => 1 } }
filter { failure_injector { crash_at => filter } }
output { stdout {} }
pipeline.workers: 1
pipeline.batch.size: 1
conditions:
full_start_required: false
wait_seconds: 10
expectation:
status: "red"
symptom: "1 indicator is unhealthy (`pipelines`)"
indicators:
pipelines:
status: "red"
symptom: "1 indicator is unhealthy (`abnormally-terminated-pp-multipipeline`) and 2 indicators are concerning (`slow-start-pp-multipipeline`, `normally-terminated-pp-multipipeline`)"
indicators:
slow-start-pp-multipipeline:
status: "yellow"
symptom: "The pipeline is concerning; 1 area is impacted and 1 diagnosis is available"
diagnosis:
- cause: "pipeline is loading"
action: "if pipeline does not come up quickly, you may need to check the logs to see if it is stalled"
impacts:
- impact_areas: ["pipeline_execution"]
details:
status:
state: "LOADING"
normally-terminated-pp-multipipeline:
status: "yellow"
symptom: "The pipeline is concerning; 1 area is impacted and 1 diagnosis is available"
diagnosis:
- cause: "pipeline has finished running because its inputs have been closed and events have been processed"
action: "if you expect this pipeline to run indefinitely, you will need to configure its inputs to continue receiving or fetching events"
impacts:
- impact_areas: [ "pipeline_execution" ]
details:
status:
state: "FINISHED"
abnormally-terminated-pp-multipipeline:
status: "red"
symptom: "The pipeline is unhealthy; 1 area is impacted and 1 diagnosis is available"
diagnosis:
- cause: "pipeline is not running, likely because it has encountered an error"
action: "view logs to determine the cause of abnormal pipeline shutdown"
impacts:
- description: "the pipeline is not currently processing"
impact_areas: [ "pipeline_execution" ]
details:
status:
state: "TERMINATED"
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ config:
pipeline.workers: 1
pipeline.batch.size: 1
conditions:
- full_start_required: true
full_start_required: true
wait_seconds: 5
expectation:
status: "yellow"
symptom: "1 indicator is concerning (`pipelines`)"
Expand All @@ -21,7 +22,7 @@ expectation:
symptom: "The pipeline is concerning; 1 area is impacted and 1 diagnosis is available"
diagnosis:
- cause: "pipeline has finished running because its inputs have been closed and events have been processed"
- action: "if you expect this pipeline to run indefinitely, you will need to configure its inputs to continue receiving or fetching events"
action: "if you expect this pipeline to run indefinitely, you will need to configure its inputs to continue receiving or fetching events"
impacts:
- impact_areas: ["pipeline_execution"]
details:
Expand Down
5 changes: 3 additions & 2 deletions .buildkite/scripts/health-report-tests/tests/slow-start.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ config:
pipeline.workers: 1
pipeline.batch.size: 1
conditions:
- full_start_required: false
full_start_required: false
wait_seconds: 0
expectation:
status: "yellow"
symptom: "1 indicator is concerning (`pipelines`)"
Expand All @@ -22,7 +23,7 @@ expectation:
symptom: "The pipeline is concerning; 1 area is impacted and 1 diagnosis is available"
diagnosis:
- cause: "pipeline is loading"
- action: "if pipeline does not come up quickly, you may need to check the logs to see if it is stalled"
action: "if pipeline does not come up quickly, you may need to check the logs to see if it is stalled"
impacts:
- impact_areas: ["pipeline_execution"]
details:
Expand Down
2 changes: 1 addition & 1 deletion Gemfile.template
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ gem "logstash-output-elasticsearch", ">= 11.14.0"
gem "polyglot", require: false
gem "treetop", require: false
gem "faraday", "~> 1", :require => false # due elasticsearch-transport (elastic-transport) depending faraday '~> 1'
gem "minitar", :group => :build
gem "minitar", "~> 1", :group => :build
gem "childprocess", "~> 4", :group => :build
gem "fpm", "~> 1", ">= 1.14.1", :group => :build # compound due to bugfix https://github.com/jordansissel/fpm/pull/1856
gem "gems", "~> 1", :group => :build
Expand Down
2 changes: 1 addition & 1 deletion ci/branches.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"branch": "8.x"
},
{
"branch": "8.15"
"branch": "8.16"
},
{
"branch": "7.17"
Expand Down
1 change: 0 additions & 1 deletion docker/data/logstash/env2yaml/env2yaml.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ var validSettings = []string{
"config.debug",
"config.support_escapes",
"config.field_reference.escape_style",
"event_api.tags.illegal",
"queue.type",
"path.queue",
"queue.page_capacity",
Expand Down
4 changes: 0 additions & 4 deletions docs/index.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,6 @@ include::static/modules.asciidoc[]

include::static/arcsight-module.asciidoc[]

include::static/netflow-module.asciidoc[]

include::static/azure-module.asciidoc[]

// Working with Filebeat Modules
include::static/filebeat-modules.asciidoc[]

Expand Down
6 changes: 5 additions & 1 deletion docs/static/arcsight-module.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
=== Logstash ArcSight Module

++++
<titleabbrev>ArcSight Module</titleabbrev>
<titleabbrev>ArcSight Module (deprecated)</titleabbrev>
++++

deprecated[8.16.0, Replace by https://docs.elastic.co/integrations/cef[Common Event Format (CEF)] from Elastic {integrations}]

NOTE: The Logstash ArcSight module is an
https://www.elastic.co/products/x-pack[{xpack}] feature under the Basic License
and is therefore free to use. Please contact
Expand All @@ -17,6 +19,8 @@ With a single command, the module taps directly into the ArcSight Smart Connecto
parses and indexes the security events into Elasticsearch, and installs a suite of Kibana dashboards
to get you exploring your data immediately.

NOTE: The {ls} ArsSight Module has been deprecated and replaced by https://docs.elastic.co/integrations/cef[Common Event Format (CEF)] from Elastic {integrations}.

[[arcsight-prereqs]]
==== Prerequisites

Expand Down
2 changes: 0 additions & 2 deletions docs/static/modules.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ visualizing it with purpose-built dashboards.
These modules are available:

* <<arcsight-module>>
* <<netflow-module,Netflow Module (deprecated)>>
* <<azure-module, Microsoft Azure Module (deprecated)>>

Each module comes pre-packaged with Logstash configurations, Kibana dashboards,
and other meta files that make it easier for you to set up the Elastic Stack for
Expand Down
2 changes: 1 addition & 1 deletion docs/static/releasenotes.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ This new image flavor builds on top of a smaller and more secure base image, and
* Logstash {logstash-ref}/monitoring-with-ea.html[monitoring doc] improvements https://github.com/elastic/logstash/pull/16208[#16208]
* Add ecs and datastream requirement for {logstash-ref}/ea-integrations.html#es-tips[integration filter] https://github.com/elastic/logstash/pull/16268[#16268]
* Remove reference to puppet {ls} module https://github.com/elastic/logstash/pull/12356[#12356]
* Add section to describe intended usage of {logstash-ref}/jvm-settings.html#reducing-off-heap-usage[`pipeline.buffer.type`] https://github.com/elastic/logstash/pull/16083[#16083]
* Add section to describe intended usage of https://www.elastic.co/guide/en/logstash/8.15/jvm-settings.html#reducing-off-heap-usage[`pipeline.buffer.type`] https://github.com/elastic/logstash/pull/16083[#16083]
* Reposition {logstash-ref}/node-stats-api.html#pipeline-stats[`worker-utilization`] stat for better placement and flow https://github.com/elastic/logstash/pull/16337[#16337]
* Add {logstash-ref}/performance-troubleshooting.html[tuning guidance] based on Flow metrics https://github.com/elastic/logstash/pull/16289[#16289]

Expand Down
6 changes: 0 additions & 6 deletions docs/static/settings-file.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -361,12 +361,6 @@ separating each log lines per pipeline could be helpful in case you need to trou
| Setting to `true` to allow or `false` to block running Logstash as a superuser.
| `true`

| `event_api.tags.illegal`
| When set to `warn`, allow illegal value assignment to the reserved `tags` field.
When set to `rename`, Logstash events can't be created with an illegal value in `tags`. This value will be moved to `_tags` and a `_tagsparsefailure` tag is added to indicate the illegal operation. Doing `set` operation with illegal value will throw exception.
Setting this flag to `warn` is deprecated and will be removed in a future release.
| `rename`

| `pipeline.buffer.type`
| Determine where to allocate memory buffers, for plugins that leverage them.
Currently defaults to `direct` but can be switched to `heap` to select Java heap space, which will become the default in the future.
Expand Down
Loading

0 comments on commit 62d81f4

Please sign in to comment.