Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support autoscaling on broker and bookie #1183

Merged
merged 4 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@ spec:
replicas: {{ .Values.bookkeeper.replicaCount }}
image: "{{ .Values.images.bookie.repository }}:{{ .Values.images.bookie.tag }}"
imagePullPolicy: {{ .Values.images.bookie.pullPolicy }}
{{- if .Values.bookkeeper.autoScaling.enabled }}
autoScalingPolicy:
minReplicas: {{ .Values.bookkeeper.autoScaling.minReplicas }}
maxReplicas: {{ .Values.bookkeeper.autoScaling.maxReplicas }}
{{- if .Values.bookkeeper.autoScaling.metrics }}
metrics:
{{- with .Values.bookkeeper.autoScaling.metrics }}
{{ toYaml . | indent 4 }}
{{- end }}
{{- end }}
{{- end }}
logConfig:
level: {{ .Values.bookkeeper.logConfig.level }}
format: {{ .Values.bookkeeper.logConfig.format }}
Expand Down
11 changes: 11 additions & 0 deletions charts/sn-platform-slim/templates/broker/broker-cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,17 @@ spec:
replicas: {{ .Values.broker.replicaCount }}
image: "{{ .Values.images.broker.repository }}:{{ .Values.images.broker.tag }}"
imagePullPolicy: {{ .Values.images.broker.pullPolicy }}
{{- if .Values.broker.autoScaling.enabled }}
autoScalingPolicy:
minReplicas: {{ .Values.broker.autoScaling.minReplicas }}
maxReplicas: {{ .Values.broker.autoScaling.maxReplicas }}
{{- if .Values.broker.autoScaling.metrics }}
metrics:
{{- with .Values.broker.autoScaling.metrics }}
{{ toYaml . | indent 4 }}
{{- end }}
{{- end }}
{{- end }}
logConfig:
level: {{ .Values.broker.logConfig.level }}
format: {{ .Values.broker.logConfig.format }}
Expand Down
14 changes: 14 additions & 0 deletions charts/sn-platform-slim/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,13 @@ bookkeeper:
# annotations that will be added on the BookKeeperCluster CR only.
annotations: {}

# The field autoScaling will track the average metrics usage of the Pulsar nodes and adjusts the nodes to keep at the target metrics usage level.
# If the average metrics usage for nodes is over the target, scaling controller will scale out to bring more nodes and if the average metrics for the nodes is less than the target, scaling controller will downscale nodes to save resources.
autoScaling:
enabled: false
minReplicas: 1
maxReplicas: 4
metrics: {}
# The field logConfig can be used to change the log level and log format of pods.
# The logConfig field is optional. If it is not specified, the component will use the default log configuration /pulsar/conf/log4j2.yaml.
# f it is specified will dynamically change the log level and log format of the component by changing the CR.
Expand Down Expand Up @@ -1025,6 +1032,13 @@ broker:
# annotations that will be added on the PulsarBroker CR only.
annotations: {}

# The field autoScaling will track the average metrics usage of the Pulsar nodes and adjusts the nodes to keep at the target metrics usage level.
# If the average metrics usage for nodes is over the target, scaling controller will scale out to bring more nodes and if the average metrics for the nodes is less than the target, scaling controller will downscale nodes to save resources.
autoScaling:
enabled: false
minReplicas: 1
maxReplicas: 4
metrics: {}
# The field logConfig can be used to change the log level and log format of pods.
# The logConfig field is optional. If it is not specified, the component will use the default log configuration /pulsar/conf/log4j2.yaml.
# If it is specified will dynamically change the log level and log format of the component by changing the CR.
Expand Down
11 changes: 11 additions & 0 deletions charts/sn-platform/templates/bookkeeper/bookkeeper-cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@ spec:
replicas: {{ .Values.bookkeeper.replicaCount }}
image: "{{ .Values.images.bookie.repository }}:{{ .Values.images.bookie.tag }}"
imagePullPolicy: {{ .Values.images.bookie.pullPolicy }}
{{- if .Values.bookkeeper.autoScaling.enabled }}
autoScalingPolicy:
minReplicas: {{ .Values.bookkeeper.autoScaling.minReplicas }}
maxReplicas: {{ .Values.bookkeeper.autoScaling.maxReplicas }}
{{- if .Values.bookkeeper.autoScaling.metrics }}
metrics:
{{- with .Values.bookkeeper.autoScaling.metrics }}
{{ toYaml . | indent 4 }}
{{- end }}
{{- end }}
{{- end }}
logConfig:
level: {{ .Values.bookkeeper.logConfig.level }}
format: {{ .Values.bookkeeper.logConfig.format }}
Expand Down
11 changes: 11 additions & 0 deletions charts/sn-platform/templates/broker/broker-cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,17 @@ spec:
replicas: {{ .Values.broker.replicaCount }}
image: "{{ .Values.images.broker.repository }}:{{ .Values.images.broker.tag }}"
imagePullPolicy: {{ .Values.images.broker.pullPolicy }}
{{- if .Values.broker.autoScaling.enabled }}
autoScalingPolicy:
minReplicas: {{ .Values.broker.autoScaling.minReplicas }}
maxReplicas: {{ .Values.broker.autoScaling.maxReplicas }}
{{- if .Values.broker.autoScaling.metrics }}
metrics:
{{- with .Values.broker.autoScaling.metrics }}
{{ toYaml . | indent 4 }}
{{- end }}
{{- end }}
{{- end }}
logConfig:
level: {{ .Values.broker.logConfig.level }}
format: {{ .Values.broker.logConfig.format }}
Expand Down
16 changes: 16 additions & 0 deletions charts/sn-platform/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,14 @@ bookkeeper:
# annotations that will be added on the BookKeeperCluster CR only.
annotations: {}

# The field autoScaling will track the average metrics usage of the Pulsar nodes and adjusts the nodes to keep at the target metrics usage level.
# If the average metrics usage for nodes is over the target, scaling controller will scale out to bring more nodes and if the average metrics for the nodes is less than the target, scaling controller will downscale nodes to save resources.
autoScaling:
enabled: false
minReplicas: 1
maxReplicas: 4
metrics: {}

# The field logConfig can be used to change the log level and log format of pods.
# The logConfig field is optional. If it is not specified, the component will use the default log configuration /pulsar/conf/log4j2.yaml.
# If it is specified will dynamically change the log level and log format of the component by changing the CR.
Expand Down Expand Up @@ -1099,6 +1107,14 @@ broker:
# annotations that will be added on the PulsarBroker CR only.
annotations: {}

# The field autoScaling will track the average metrics usage of the Pulsar nodes and adjusts the nodes to keep at the target metrics usage level.
# If the average metrics usage for nodes is over the target, scaling controller will scale out to bring more nodes and if the average metrics for the nodes is less than the target, scaling controller will downscale nodes to save resources.
autoScaling:
enabled: false
minReplicas: 1
maxReplicas: 4
metrics: {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what are the available metrics here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be great if there is a list of the metric names that can be used for pulsar auto-scaling.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what are the available metrics here?
It depends on the metrics backend service used. If users deploy the Metrics Server then only resource metrics could be used. But with prometheus and prometheus adapter, all metrics from the prometheus could be used by the auto-scaling.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cpu_usage here is a custom metric implemented by prometheus-adapter other than the general prometheus metric, the user should know the catalog of the supported metrics of broker autoScaling.


# The field logConfig can be used to change the log level and log format of pods.
# The logConfig field is optional. If it is not specified, the component will use the default log configuration /pulsar/conf/log4j2.yaml.
# If it is specified will dynamically change the log level and log format of the component by changing the CR.
Expand Down
59 changes: 59 additions & 0 deletions examples/sn-platform/autoscaling-custom-metrics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
broker:
autoScaling:
enabled: true
minReplicas: 1
maxReplicas: 4
# Deploy the https://prometheus.io/ and https://github.com/kubernetes-sigs/prometheus-adapter to support custom metrics.
# Refer the prometheus.-adapter.yaml how to setup the custom metrics.
metrics:
- pods:
metric:
name: cpu_usage
target:
averageValue: "75"
type: AverageValue
type: Pods
- pods:
metric:
name: network_in_rate_kb
target:
averageValue: "204800"
type: AverageValue
type: Pods
- pods:
metric:
name: network_out_rate_kb
target:
averageValue: "204800"
type: AverageValue
type: Pods

bookkeeper:
autoScaling:
enabled: true
minReplicas: 1
maxReplicas: 4
# Deploy the https://prometheus.io/ and https://github.com/kubernetes-sigs/prometheus-adapter to support custom metrics.
# Refer the prometheus.-adapter.yaml how to setup the custom metrics.
metrics:
- pods:
metric:
name: cpu_usage
target:
averageValue: "75"
type: AverageValue
type: Pods
- pods:
metric:
name: network_in_rate_kb
target:
averageValue: "204800"
type: AverageValue
type: Pods
- pods:
metric:
name: network_out_rate_kb
target:
averageValue: "204800"
type: AverageValue
type: Pods
29 changes: 29 additions & 0 deletions examples/sn-platform/autoscaling-resource-metrics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
broker:
autoScaling:
enabled: true
minReplicas: 1
maxReplicas: 4
# Deploy the https://github.com/kubernetes-sigs/metrics-server to support resource metrics.
# Metrics server supports the cpu and memory PodMetrics.
metrics:
type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70

bookkeeper:
autoScaling:
enabled: true
minReplicas: 1
maxReplicas: 4
# Deploy the https://github.com/kubernetes-sigs/metrics-server to support resource metrics.
# Metrics server supports the cpu and memory PodMetrics.
metrics:
type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
79 changes: 79 additions & 0 deletions examples/sn-platform/prometheus-adapter.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
prometheus:
url: http://prometheus-server.monitor.svc
port: 80
listenPort: 8443
rules:
default: false
custom:
- seriesQuery: '{namespace!="",__name__!~"^container_.*"}'
resources:
template: "<<.Resource>>"
name:
matches: "^(.*)_total"
as: ""
metricsQuery: |
sum by (<<.GroupBy>>) (
irate (
<<.Series>>{<<.LabelMatchers>>}[1m]
)
)
- seriesQuery: 'container_cpu_usage_seconds_total{namespace!~"(sn-system|kube-system|olm|cert-manager)"}'
seriesFilters: []
resources:
overrides:
pod:
resource: pod
namespace:
resource: namespace
name:
matches: "container_cpu_usage_seconds_total"
as: "cpu_usage"
metricsQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>) / (sum(container_spec_cpu_shares{<<.LabelMatchers>>}/1000) by (<<.GroupBy>>)) * 100
- seriesQuery: 'container_network_receive_bytes_total{namespace!~"(sn-system|kube-system|olm|cert-manager)"}'
seriesFilters: []
resources:
overrides:
pod:
resource: pod
namespace:
resource: namespace
name:
matches: "container_network_receive_bytes_total"
as: "network_in_rate_kb"
metricsQuery: rate(container_network_receive_bytes_total{<<.LabelMatchers>>}[5m]) / 1024
- seriesQuery: 'container_network_transmit_bytes_total{namespace!~"(sn-system|kube-system|olm|cert-manager)"}'
seriesFilters: []
resources:
overrides:
pod:
resource: pod
namespace:
resource: namespace
name:
matches: "container_network_transmit_bytes_total"
as: "network_out_rate_kb"
metricsQuery: rate(container_network_transmit_bytes_total{<<.LabelMatchers>>}[5m]) / 1024
- seriesQuery: 'container_fs_reads_bytes_total{namespace!~"(sn-system|kube-system|olm|cert-manager)"}'
seriesFilters: []
resources:
overrides:
pod:
resource: pod
namespace:
resource: namespace
name:
matches: "container_fs_reads_bytes_total"
as: "disk_read_rate_kb"
metricsQuery: sum(rate(container_fs_reads_bytes_total{<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>) / 1024
- seriesQuery: 'container_fs_writes_bytes_total{namespace!~"(sn-system|kube-system|olm|cert-manager)"}'
seriesFilters: []
resources:
overrides:
pod:
resource: pod
namespace:
resource: namespace
name:
matches: "container_fs_writes_bytes_total"
as: "disk_write_rate_kb"
metricsQuery: sum(rate(container_fs_writes_bytes_total{<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>) / 1024
Loading