diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..72e25147 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,19 @@ +--- +name: Bug Report +about: Create a report to help us improve +labels: bug +--- + + + + +#### Problem description + +[this should explain **why** the current behavior is a problem and why the expected output is a better solution.] + +#### Expected Output + + +#### Environment Information + +Os, Python version, ... diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml deleted file mode 100644 index 93064747..00000000 --- a/.github/ISSUE_TEMPLATE/bug_report.yaml +++ /dev/null @@ -1,45 +0,0 @@ -name: Bug report -description: Create a report to help us improve -title: "[BUG] " -labels: ["bug"] -body: - - type: markdown - attributes: - value: | - Thanks for taking the time to fill out this bug report! - - - type: checkboxes - id: labels - attributes: - label: Labels - description: Select labels that apply to this issue - options: - - label: bug - - label: help wanted - - label: good first issue - - - type: textarea - id: expected - attributes: - label: Expected behavior - description: A clear and concise description of what you expected to happen - placeholder: "Describe the expected behavior here..." - validations: - required: true - - - type: textarea - id: actual - attributes: - label: Actual behavior - description: A clear and concise description of what actually happened - placeholder: "Describe what actually happened here..." - validations: - required: true - - - type: textarea - id: environment - attributes: - label: Environment context - description: Add any other context about the environment you are using here - placeholder: "Add any other environmental context here such as cloud provider, k8s version, other versions, etc..." - diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..516403f3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +## Feature Request: [Title of the Feature] + + + +### Summary +A brief summary of the feature request and what it aims to accomplish. + +### Problem Statement +Describe the problem or pain point that this feature would address. Why is this feature important? What issues are users currently facing that this feature would solve? + +### Proposed Solution +Outline the proposed solution for the feature. Include any relevant details, such as how the feature would work, what changes it would involve, and any specific requirements or considerations. + +### Additional Details +Provide any additional information that might be helpful for understanding the feature request. This could include: + +- Examples of how the feature could be used +- Screenshots or diagrams illustrating the feature +- Potential limitations or edge cases +- Links to related discussions or prior issues \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml b/.github/ISSUE_TEMPLATE/feature_request.yaml deleted file mode 100644 index 5d3c0550..00000000 --- a/.github/ISSUE_TEMPLATE/feature_request.yaml +++ /dev/null @@ -1,28 +0,0 @@ -name: Feature Request -description: Suggest a new feature or enhancement -title: "[FEATURE] " -labels: ["enhancement"] -body: - - type: markdown - attributes: - value: | - Thanks for taking the time to suggest a feature! - - - type: textarea - id: description - attributes: - label: Description - description: A clear and concise description of the feature you are requesting - placeholder: "Describe the feature here..." - validations: - required: true - - - type: checkboxes - id: labels - attributes: - label: Labels - description: Select labels that apply to this feature request - options: - - label: enhancement - - label: discussion - - label: help wanted diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2354af8c..efb4650e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -31,6 +31,7 @@ jobs: helm repo add eoapi https://devseed.com/eoapi-k8s/ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts helm repo add grafana https://grafana.github.io/helm-charts + helm repo add bitnami https://charts.bitnami.com/bitnami helm repo list - name: run chart-releaser diff --git a/.gitignore b/.gitignore index 51650b94..335b2c80 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .idea/ helm-chart/config.yaml -config_ingress.yaml \ No newline at end of file +config_ingress.yaml +helm-chart/eoapi/charts/*.tgz diff --git a/README.md b/README.md index 92093de3..5bca7d22 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ * [Getting Started](#gettingstarted) * [Helm Installation](#helminstall) * [Default Configuration and Options](#options) -* [Autoscaling](./docs/autoscaling.md) +* [Autoscaling / Monitoring / Observability](./docs/autoscaling.md) diff --git a/docs/autoscaling.md b/docs/autoscaling.md index ecded46f..d17b2168 100644 --- a/docs/autoscaling.md +++ b/docs/autoscaling.md @@ -1,38 +1,91 @@ -## Autoscaling +# Autoscaling / Monitoring / Observability Autoscaling is both art and science. To test out your application's autoscaling requirements you often need to consider -your data volume, data usage patterns, bottlenecks (such as the database) among many, many other things. +your data volume, data usage patterns, bottlenecks (such as the database) among many, many other things. Load testing, +metrics, monitoring and observability will help you explore what those needs are. -Load testing, metrics and observability will help you explore what those autoscaling needs are. This `eoapi-k8s` chart -includes some default autoscaling values as well as an `eoapi-support` chart to help you do this exploration. Below -we walk through how to set these things up. + +> ⓘ The `eoapi-support` chart in this repository (see `../helm-chart/eoapi-support`) is required to be installed to +enable any of the eoAPI service autoscaling. It cannot be listed as a dependecy of `eoapi` chart +b/c of the limitations in `prometheus-adapter` and `grafana` for constructing the Prometheus internal +service domains dynamically. + +If you are comfortable with k8s you probably only need to `helm install` the support chart and be on your way. Other folks +might want to read through the verbose walkthrough material below to familiarize yourself with how things work. --- -### Verify `metrics-server` is installed by default in EKS or GCP +## Helm Install `eoapi-support` -Newer versions of AWS EKS and GCP GKE "should" come with `metrics-server` (the default k8s metrics utility) pre-installed -in the `kube-system` namespace. You can verify this using: +The following instructions assume you've gone through the [AWS](./docs/aws-eks.md) or [GCP](./docs/gcp-gke.md) cluster set up +and installed the `eoapi` chart. - ```sh - kubectl get deployment metrics-server -n kube-system +1. Go to the [releases section](https://github.com/developmentseed/eoapi-k8s/releases) of this repository and find the latest +`eoapi-support-` version to install. The example below assumes we're working with `eoapi-support-0.1.4` + + +2. Decide on a release name and `namespace` for your support chart. The next steps assume we've +chosen a release name of `eoapi-support` and a similar namespace of `eoapi-support` + + +3. Then do a normal `helm install` but you'll want to parameterize and pass overrides for the prometheus URL to include +the release name and namespace chosen above. This allows other third-party dependencies used in the chart +(`prometheus-adpater` and `grafana`) know where to find the prometheus service internally. This is unfortunately a +manual step that cannot be automated + + ```bash + helm upgrade --install -n eoapi-support \ + --create-namespace eoapi-support eoapi/eoapi-support --version 0.1.4 \ + --set prometheus-adapter.prometheus.url='http://eoapi-support-prometheus-server.eoapi-support.svc.cluster.local' \ + --set grafana.datasources.datasources\\.yaml.datasources[0].url='http://eoapi-support-prometheus-server.eoapi-support.svc.cluster.local' ``` -If it's not there then you can install it with default configuration by doing: + +4. verify that everything is set up correctly and no deployments are not failing: ```sh - helm repo add bitnami https://charts.bitnami.com/bitnami - helm repo update - helm -n kube-system install metrics-server bitnami/metrics-server - # helm -n kube-system delete metrics-server + watch -n 1 "kubectl -n eoapi-support get deploy,pod,svc" + NAME READY STATUS RESTARTS AGE + pod/eoapi-support-grafana-7fdc9688dd-wkw7p 1/1 Running 0 79s + pod/eoapi-support-kube-state-metrics-54d75784db-ghgbd 1/1 Running 0 79s + pod/eoapi-support-prometheus-adapter-668b6bd89c-kb25q 1/1 Running 0 79s + pod/eoapi-support-prometheus-node-exporter-6f96z 1/1 Running 0 79s + pod/eoapi-support-prometheus-node-exporter-fr96x 1/1 Running 0 79s + pod/eoapi-support-prometheus-node-exporter-pdvvp 1/1 Running 0 79s + pod/eoapi-support-prometheus-server-76dcfc684b-wmk5c 2/2 Running 0 79s + + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + service/eoapi-support-grafana LoadBalancer 10.123.248.75 104.154.59.180 80:30821/TCP 79s + service/eoapi-support-kube-state-metrics ClusterIP 10.123.241.247 8080/TCP 79s + service/eoapi-support-prometheus-adapter ClusterIP 10.123.249.21 443/TCP 79s + service/eoapi-support-prometheus-node-exporter ClusterIP 10.123.249.90 9100/TCP 79s + service/eoapi-support-prometheus-server ClusterIP 10.123.247.255 80/TCP 79s ``` -After installing verify things are working by getting nodes stats: + - ```sh - kubectl get --raw /apis/metrics.k8s.io/v1beta1/nodes | jq '.items[] | {name:.metadata.name, cpu:.usage.cpu, memory:.usage.memory}' +5. If anything in steps 1 through 3 seems confusing then here is a quick bash script to clear it up: + + ```shell + export RELEASE_NAME=eoapi + export RELEASE_NS=eoapi + export SUPPORT_RELEASE_NAME=eoapi-support + export SUPPORT_RELEASE_NS=eoapi-support + + helm upgrade --install \ + -n $SUPPORT_RELEASE_NS --create-namespace $SUPPORT_RELEASE_NAME \ + eoapi/eoapi-support --version 0.1.4 \ + --set prometheus-adapter.prometheus.url='http://${SUPPORT_RELEASE_NAME}-prometheus-server.${SUPPORT_RELEASE_NS}.svc.cluster.local' \ + --set grafana.datasources.datasources\\.yaml.datasources[0].url='http://${SUPPORT_RELEASE_NAME}-prometheus-server.${SUPPORT_RELEASE_NS}.svc.cluster.local' \ + -f /tmp/values-overrides.yaml + + helm upgrade --install \ + -n $RELEASE_NS --create-namespace $RELEASE_NAME \ + eoapi/eoapi --version 0.4.6 \ + -f /tmp/support-values-overrides.yaml ``` + --- ### Review [Default Configuration and Options](configuration.md) @@ -59,62 +112,66 @@ After installing verify things are working by getting nodes stats: --- -### Review [Production Storage](aws-gpc-storage-walkthrough.md) Set Up - -The default `eoapi` helm chart installs an in-memory postgres/postgis database but most folks will want to -test autoscaling against something more production suitable - ---- - -### Install `eoapi-support` Chart - -This chart has the metrics, observability and visualization dependencies +### How Autoscaling Works -1. change into this repository's `/helm-chart/` folder +If you grok the default `eoapi-support` values in `values.yaml` you'll see we use custom metrics and prometheus queries +based on the nginx ingress controller's request rate under the `prometheus-adpater.prometheus:` key: -2. then download the dependencies for the `eoapi-support` chart - - ```sh - helm repo add grafana https://grafana.github.io/helm-charts - helm dependency build ./eoapi-support + ```yaml + prometheus-adapter: + prometheus: + # NOTE: the `url` below make some assumptions about the namespace where you released eoapi and prometheus + # 1) that you didn't change the default name of the `prometheus-server` or the port and installed in eoapi namespace + # 2) namely that you ran `helm install eoapi --create-namespace=eoapi` with the `eoapi` namespace + url: http://eoapi-support-prometheus-server.eoapi.svc.cluster.local + port: 80 + path: "" + rules: + default: false + # NOTE: the `name.as` values below make some assumptions about your release name + # namely that you have run `helm install eoapi eoapi/eoapi --create-namespace=eoapi` + custom: + - seriesQuery: '{__name__=~"^nginx_ingress_controller_requests$",namespace!=""}' + seriesFilters: [] + resources: + template: <<.Resource>> + name: + matches: "" + as: "nginx_ingress_controller_requests_rate_vector_eoapi" + metricsQuery: round(sum(rate(<<.Series>>{service="vector",path=~"/vector.*",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>), 0.001) + - seriesQuery: '{__name__=~"^nginx_ingress_controller_requests$",namespace!=""}' + seriesFilters: [] + resources: + template: <<.Resource>> + name: + matches: "" + as: "nginx_ingress_controller_requests_rate_raster_eoapi" + metricsQuery: round(sum(rate(<<.Series>>{service="raster",path=~"/raster.*",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>), 0.001) + - seriesQuery: '{__name__=~"^nginx_ingress_controller_requests$",namespace!=""}' + seriesFilters: [] + resources: + template: <<.Resource>> + name: + matches: "" + as: "nginx_ingress_controller_requests_rate_stac_eoapi" + metricsQuery: round(sum(rate(<<.Series>>{service="stac",path=~"/stac.*",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>), 0.001) ``` -3. then install those dependencies: +Prometheus adapter is a bridge for metrics between Prometheus (which scrapes nginx) and the k8s metrics server so it can autoscale deployments using these custom metrics. +If you've chosen `both` or `requestRate` as a autoscaling `type:` for those values then these custom metrics are used to template an `hpa.yaml` for each service - ```sh - helm upgrade --install \ - --namespace eoapi \ - --create-namespace \ - eoapi-support \ - ./eoapi-support - # if you need to delete the chart you can run: - # helm delete eoapi-support -n eoapi - ``` - -4. verify that everything is set up correctly and no deployments are failing: +### Log into Grafana - ```sh - watch -n 1 "kubectl -n eoapi get pod,svc" - # NAME READY STATUS RESTARTS AGE - # pod/eoapi-support-grafana-7fdc9688dd-wkw7p 1/1 Running 0 79s - # pod/eoapi-support-kube-state-metrics-54d75784db-ghgbd 1/1 Running 0 79s - # pod/eoapi-support-prometheus-adapter-668b6bd89c-kb25q 1/1 Running 0 79s - # pod/eoapi-support-prometheus-node-exporter-6f96z 1/1 Running 0 79s - # pod/eoapi-support-prometheus-node-exporter-fr96x 1/1 Running 0 79s - # pod/eoapi-support-prometheus-node-exporter-pdvvp 1/1 Running 0 79s - # pod/eoapi-support-prometheus-server-76dcfc684b-wmk5c 2/2 Running 0 79s - - # NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE - # service/eoapi-support-grafana LoadBalancer 10.123.248.75 104.154.59.180 80:30821/TCP 79s - # service/eoapi-support-kube-state-metrics ClusterIP 10.123.241.247 8080/TCP 79s - # service/eoapi-support-prometheus-adapter ClusterIP 10.123.249.21 443/TCP 79s - # service/eoapi-support-prometheus-node-exporter ClusterIP 10.123.249.90 9100/TCP 79s - # service/eoapi-support-prometheus-server ClusterIP 10.123.247.255 80/TCP 79s - ``` +When you `helm install` the support chart you by default get a Grafana dashboard set up with different default metrics charts +to help you load test and explore your service autoscaling. Grafana creates a new username `admin` and password for you +that you'll have to retrieve to login. -5. note that the `service/eoapi-support-grafana` has an EXTERNAL-IP that we can use to view it. This is just a quick way to work with it. You'll want to set it up with a ingress in the future +> ⓘ Note that the `service/eoapi-support-grafana` has an EXTERNAL-IP that we can use to view it. +This is just a quick way to work with it. You'll want to set it up with an ingress in the future -6. to log into Grafana you'll need to export the default username/password it came installed with: + +1. To log into Grafana you'll need to export the default username/password it came installed with. Note that secret names are prefixed +with the `release` name we installed the chart with below `-grafana`: ```sh kubectl get secret eoapi-support-grafana --template='{{index .data "admin-user"}}' -n eoapi | base64 -d @@ -122,65 +179,25 @@ This chart has the metrics, observability and visualization dependencies kubectl get secret eoapi-support-grafana --template='{{index .data "admin-password"}}' -n eoapi | base64 -d # ``` - -### Install Newest `eoapi` Chart - -1. The `autoscaling` key was added to the `values.yaml` in version in chart version `0.1.11`. So update your eoapi repo: + +2. To find the URL for the load balancer for where to log in with Grafana you can query the services: ```sh - helm repo add eoapi https://devseed.com/eoapi-k8s/ - helm repo update + kubectl get svc -n eoapi-support ``` + +### Install or Upgrade Autoscaling Changes to `eoapi` Chart -2. Add the required secret overrides and autoscaling changes you need to an arbitrarily named `.yaml` file (`config.yaml` below) -but the important part here is that we are enabling `autoscaling` and playing with `requestRate` metric +1. If you haven't already decide which services (`vector` || `raster` || `stac`) you want to enable `autoscaling` on change your values yaml for these and redeploy - ```sh - cat config.yaml - ``` - ```yaml - ingress: - enabled: true - className: "nginx" - db: - enabled: true - settings: - resources: - requests: - storage: "100Mi" - cpu: "256m" - memory: "1024Mi" - limits: - cpu: "512m" - memory: "1024Mi" - secrets: - PGUSER: "username" - POSTGRES_USER: "username" - PGPASSWORD: "password" - POSTGRES_PASSWORD: "password" - raster: - enabled: true - autoscaling: - enabled: true - type: "requestRate" - targets: - requestRate: 50 - settings: - resources: - limits: - cpu: "768m" - memory: "4096Mi" - requests: - cpu: "256m" - memory: "1024Mi" stac: enabled: true autoscaling: enabled: true type: "requestRate" targets: - requestRate: 50 + requestRate: 50000m settings: resources: limits: @@ -195,7 +212,7 @@ but the important part here is that we are enabling `autoscaling` and playing wi enabled: true type: "requestRate" targets: - requestRate: 50 + requestRate: 50000m settings: resources: limits: @@ -205,110 +222,35 @@ but the important part here is that we are enabling `autoscaling` and playing wi cpu: "256m" memory: "1024Mi" ``` + +2. Review what the heck the unit `m` means for your [autoscaling values in the k8s docs](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/#quantities) -3. Then `helm install` the eoapi chart pointing to the path for the `config.yaml` above - - ```sh - helm upgrade --install -n eoapi --create-namespace eoapi eoapi/eoapi --version 0.1.11 -f config.yaml - # if you need to delete the chart then you can run: - # helm delete eoapi -n eoapi - ``` - -4. Make sure all pods and services are in `STATUS=Running`: - - ```sh - kubectl -n eoapi get pods,service - # NAME READY STATUS RESTARTS AGE - # pod/doc-server-6dd9c9c888-8l8tv 1/1 Running 0 87s - # pod/eoapi-support-grafana-865b7f49f5-6qkmj 1/1 Running 0 46m - # pod/eoapi-support-kube-state-metrics-54d75784db-d899f 1/1 Running 0 46m - # pod/eoapi-support-prometheus-adapter-6bd87848fd-glc46 1/1 Running 0 46m - # pod/eoapi-support-prometheus-node-exporter-d7vks 0/1 ContainerCreating 0 5s - # pod/eoapi-support-prometheus-node-exporter-np54q 1/1 Running 0 46m - # pod/eoapi-support-prometheus-node-exporter-rsgc5 1/1 Running 0 46m - # pod/eoapi-support-prometheus-node-exporter-tcqvb 1/1 Running 0 46m - # pod/eoapi-support-prometheus-server-76dcfc684b-f78k8 2/2 Running 0 46m - # pod/pgstac-6648b8cc89-v55fh 0/2 Pending 0 87s - # pod/raster-eoapi-b859dd849-7fvwn 0/1 ContainerCreating 0 87s - # pod/stac-eoapi-8c865f5cd-pjhx6 1/1 Running 1 (26s ago) 87s - # pod/vector-eoapi-9957c7469-qk8hn 1/1 Running 1 (26s ago) 87s - - # NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE - # service/doc-server ClusterIP 10.123.252.119 80/TCP 87s - # service/eoapi-support-grafana LoadBalancer 10.123.250.188 34.171.130.0 80:31513/TCP 46m - # service/eoapi-support-kube-state-metrics ClusterIP 10.123.251.118 8080/TCP 46m - # service/eoapi-support-prometheus-adapter ClusterIP 10.123.243.36 443/TCP 46m - # service/eoapi-support-prometheus-node-exporter ClusterIP 10.123.247.202 9100/TCP 46m - # service/eoapi-support-prometheus-server ClusterIP 10.123.249.238 80/TCP 46m - # service/pgstac ClusterIP 10.123.244.121 5432/TCP 87s - # service/raster ClusterIP 10.123.253.229 8080/TCP 87s - # service/stac ClusterIP 10.123.245.192 8080/TCP 87s - # service/vector ClusterIP 10.123.247.62 8080/TCP 87s - ``` - ---- - -### Enable a Prometheus to Scrape Nginx - -1. Now we need to tell the nginx ingress controller that it should allow prometheus to scrape it. This is a requirement to get our custom metrics. - -2. Get the values that `ingress-nginx` was deployed with so we can append our rules to them. (If you followed the cloud provider set up docs for [EKS](aws-eks.md) or [GKE](gcp-gke.md) then these configuration values should already be set ). Oftentimes this resource is in `ingress-nginx` namespace - - ```sh - # this assumes your release name is `ingress-nginx`, though you might've named it something else - helm get values ingress-nginx -n ingress-nginx - - # USER-SUPPLIED VALUES: - # If it is empty, this indicates that nothing has been applied, or no custom values were previously set. - ``` - -3. Create an empty `config_ingress.yaml` somewhere on your file system. Take everything from below `USER-SUPPLIED VALUES:` and make `ingress-nginx` scrapable - ```yaml - controller: - enableLatencyMetrics: true - metrics: - enabled: true - service: - annotations: - prometheus.io/scrape: "true" - prometheus.io/port: "10254" - ``` - -4. Redeploy your `ingress-nginx` release with the configuration from the last step: +3. Then `helm install` the eoapi chart with these changes ```sh - # this assumes your release name is `ingress-nginx` and that the repo was installed as `ingress-nginx` - # though you might've named them something else - helm -n ingress-nginx upgrade ingress-nginx ingress-nginx/ingress-nginx -f config_ingress.yaml + helm upgrade --install -n eoapi... ``` -5. Now go back to Grafana and hit the refresh button and wait a bit. You should see data in your graphs - --- -### Add a `nip.io` Host to Your Ingress +### Add Load Balancer Host as a Host to Your Ingress -1. Nginx will not expose metrics for ingresses without hosts or with wildcards. Since `eoapi-k8s` doesn't set up -hosts for you then you'll need to find the `EXTERNAL-IP` for your `ingress-nginx-controller` and use that +Unfortunately, nginx will not expose metrics for ingresses without hosts or hosts with wildcards. You'll either need to deploy +`eoapi-k8s` chart again with `ingress.tls.enabled` or need to find the `EXTERNAL-IP` for your `ingress-nginx-controller` and use that to set up a simple host -2. Find the IP that your `ingress-nginx-controller` exposes: +1. Find the IP that your `ingress-nginx-controller` service load balancer: ```sh - kubectl -n ingress-nginx get svc/ingress-nginx-controller -o=jsonpath='{.status.loadBalancer.ingress[0].ip}' - 35.239.254.21% + kubectl -n ingress-nginx get svc/ingress-nginx-controller -o=jsonpath='{.status.loadBalancer.ingress[0].hostname}' + http://abc5929f88f8c45c38f6cbab2faad43c-776419634.us-west-2.elb.amazonaws.com/ ``` - -3. Then live edit your shared ingress for eoapi services to build an arbitrary host name using `nip.io`. Since -one of the Grafana default charts filters on hostname it's probably best to keep the format to `eoapi-.nip.io`. -`nip.io` will proxy traffic with a full domain to your instance. Using `nip.io` isn't long-term solution but a way to test: + +2. Then live edit your shared ingress for eoapi services to add the host: ```sh kubectl edit ingress nginx-service-ingress-shared-eoapi -n eoapi - - ## In case if eks, replace the elb url without using nip.io - kubectl -n ingress-nginx get svc/ingress-nginx-controller -o=jsonpath='{.status.loadBalancer.ingress[0].hostname}' ``` ```yaml @@ -326,7 +268,7 @@ one of the Grafana default charts filters on hostname it's probably best to keep spec: ingressClassName: nginx rules: - - host: eoapi-35.239.254.92.nip.io + - host: abc5929f88f8c45c38f6cbab2faad43c-776419634.us-west-2.elb.amazonaws.com http: paths: ... @@ -341,4 +283,4 @@ And then finally roll out the deployment. --- -### Now move onto the [Load Testing](loaadtesting.md) document \ No newline at end of file +### Now move onto the [Load Testing](loadtesting.md) document \ No newline at end of file diff --git a/helm-chart/eoapi-support/Chart.yaml b/helm-chart/eoapi-support/Chart.yaml index 8da53b8c..222e3e8f 100644 --- a/helm-chart/eoapi-support/Chart.yaml +++ b/helm-chart/eoapi-support/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v2 name: eoapi-support -appVersion: "0.1.4" -version: "0.1.4" +appVersion: "0.1.5" +version: "0.1.5" dependencies: # Prometheus for collection of metrics. @@ -27,3 +27,8 @@ dependencies: # NOTE: configuration for this dependency is handled in `eoapi-support/values.yaml.grafana` values version: 7.3.3 repository: https://grafana.github.io/helm-charts + + + - name: metrics-server + version: 7.2.8 + repository: https://charts.bitnami.com/bitnami \ No newline at end of file diff --git a/helm-chart/eoapi-support/values.yaml b/helm-chart/eoapi-support/values.yaml index ab5b1b18..6258343f 100644 --- a/helm-chart/eoapi-support/values.yaml +++ b/helm-chart/eoapi-support/values.yaml @@ -20,7 +20,7 @@ prometheus-adapter: name: matches: "" as: "nginx_ingress_controller_requests_rate_vector_eoapi" - metricsQuery: round(sum(rate(<<.Series>>{service=~"vector.*",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>), 0.001) + metricsQuery: round(sum(rate(<<.Series>>{service="vector",path=~"/vector.*",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>), 0.001) - seriesQuery: '{__name__=~"^nginx_ingress_controller_requests$",namespace!=""}' seriesFilters: [] resources: @@ -28,7 +28,7 @@ prometheus-adapter: name: matches: "" as: "nginx_ingress_controller_requests_rate_raster_eoapi" - metricsQuery: round(sum(rate(<<.Series>>{service=~"raster.*",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>), 0.001) + metricsQuery: round(sum(rate(<<.Series>>{service="raster",path=~"/raster.*",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>), 0.001) - seriesQuery: '{__name__=~"^nginx_ingress_controller_requests$",namespace!=""}' seriesFilters: [] resources: @@ -36,7 +36,7 @@ prometheus-adapter: name: matches: "" as: "nginx_ingress_controller_requests_rate_stac_eoapi" - metricsQuery: round(sum(rate(<<.Series>>{service=~"stac.*",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>), 0.001) + metricsQuery: round(sum(rate(<<.Series>>{service="stac",path=~"/stac.*",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>), 0.001) prometheus: # alertmanager is an optional prometheus chart dependency that we opt-out from diff --git a/helm-chart/eoapi/Chart.yaml b/helm-chart/eoapi/Chart.yaml index d968953e..c32a79a4 100644 --- a/helm-chart/eoapi/Chart.yaml +++ b/helm-chart/eoapi/Chart.yaml @@ -15,13 +15,13 @@ kubeVersion: ">=1.23.0-0" # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: "0.4.6" +version: "0.4.7" # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.3.12" +appVersion: "0.3.13" dependencies: - name: postgrescluster diff --git a/helm-chart/eoapi/templates/_helpers.tpl b/helm-chart/eoapi/templates/_helpers.tpl index ac712858..299c19b0 100644 --- a/helm-chart/eoapi/templates/_helpers.tpl +++ b/helm-chart/eoapi/templates/_helpers.tpl @@ -217,4 +217,4 @@ that you cannot have db.enabled and (postgrescluster.enabled or pgstacBootstrap. {{- fail "you cannot use have both db.enabled and pgstacBootstrap.enabled" -}} {{- end -}} -{{- end -}} \ No newline at end of file +{{- end -}} diff --git a/helm-chart/eoapi/templates/services/ingress-nginx.yaml b/helm-chart/eoapi/templates/services/ingress-nginx.yaml index 08763da4..28028451 100644 --- a/helm-chart/eoapi/templates/services/ingress-nginx.yaml +++ b/helm-chart/eoapi/templates/services/ingress-nginx.yaml @@ -15,6 +15,8 @@ metadata: nginx.ingress.kubernetes.io/use-regex: "true" nginx.ingress.kubernetes.io/rewrite-target: /$2 nginx.ingress.kubernetes.io/enable-cors: "true" + # enable-access-log is required for nginx to dump metrics about path rewrites for prometheus to scrape + nginx.ingress.kubernetes.io/enable-access-log: "true" {{- if (and (.Values.ingress.tls.enabled) (.Values.ingress.tls.certManager)) }} cert-manager.io/issuer: {{ .Values.ingress.tls.certManagerIssuer }} {{- end }} diff --git a/helm-chart/eoapi/tests/hpa_tests.yaml b/helm-chart/eoapi/tests/hpa_tests.yaml index 3d8b0524..0629bcaa 100644 --- a/helm-chart/eoapi/tests/hpa_tests.yaml +++ b/helm-chart/eoapi/tests/hpa_tests.yaml @@ -9,6 +9,7 @@ tests: vector.enabled: true ingress.className: "testing123" vector.autoscaling.enabled: true + vector.autoscaling.type: "requestRate" asserts: - failedTemplate: errorMessage: When using an 'ingress.className' other than 'nginx' you cannot enable autoscaling by 'requestRate' at this time b/c it's solely an nginx metric diff --git a/helm-chart/eoapi/tests/ingress_tests_nginx.yaml b/helm-chart/eoapi/tests/ingress_tests_nginx.yaml index 0d88f61f..bee00d0d 100644 --- a/helm-chart/eoapi/tests/ingress_tests_nginx.yaml +++ b/helm-chart/eoapi/tests/ingress_tests_nginx.yaml @@ -17,6 +17,7 @@ tests: - equal: path: metadata.annotations value: + nginx.ingress.kubernetes.io/enable-access-log: "true" nginx.ingress.kubernetes.io/use-regex: "true" nginx.ingress.kubernetes.io/rewrite-target: /$2 nginx.ingress.kubernetes.io/enable-cors: "true" @@ -38,6 +39,7 @@ tests: - equal: path: metadata.annotations value: + nginx.ingress.kubernetes.io/enable-access-log: "true" nginx.ingress.kubernetes.io/use-regex: "true" nginx.ingress.kubernetes.io/rewrite-target: /$2 nginx.ingress.kubernetes.io/enable-cors: "true" @@ -59,6 +61,7 @@ tests: - equal: path: metadata.annotations value: + nginx.ingress.kubernetes.io/enable-access-log: "true" nginx.ingress.kubernetes.io/use-regex: "true" nginx.ingress.kubernetes.io/rewrite-target: /$2 nginx.ingress.kubernetes.io/enable-cors: "true" diff --git a/helm-chart/eoapi/values.yaml b/helm-chart/eoapi/values.yaml index e296fc41..0952c100 100644 --- a/helm-chart/eoapi/values.yaml +++ b/helm-chart/eoapi/values.yaml @@ -56,8 +56,8 @@ comment_db: > The `postgrescluster` specs below are pass-through values to configure those separate charts. For more information read https://access.crunchydata.com/documentation/postgres-operator/latest -# temporary solution for EOEPCA until we figure out how to get NFS mounts -# working with PGO below. Since disabled by default most people SHOULD NOT +# DEPRECATED: this is the backward compatible way we originally did things +# and a temporary solution for EOEPCA. Since disabled by default most people SHOULD NOT # use this option as it won't be talked about explicitly in the docs db: enabled: false @@ -173,11 +173,13 @@ pgstacBootstrap: raster: enabled: true autoscaling: + # NOTE: to have autoscaling working you'll need to install the `eoapi-support` chart + # see ../../../docs/autoscaling.md for more information enabled: false minReplicas: 1 maxReplicas: 10 # `type`: "cpu" || "requestRate" || "both" - type: "requestRate" + type: "both" behaviour: scaleDown: stabilizationWindowSeconds: 60 @@ -185,8 +187,12 @@ raster: stabilizationWindowSeconds: 0 targets: # matches `type` value above unless `type: "both"` is selected - cpu: 85 - requestRate: 10000 + cpu: 75 + # 'm' units here represents generic milli (one-thousandth) unit instead of 'decimal' + # https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/#quantities + # so when the average unit among these pods is /1000 then scale + # you can watch the actual/target in real time using `kubectl get hpa/` + requestRate: 1000000m image: name: ghcr.io/stac-utils/titiler-pgstac tag: uvicorn-1.2.0 @@ -229,11 +235,13 @@ raster: stac: enabled: true autoscaling: + # NOTE: to have autoscaling working you'll need to install the `eoapi-support` chart + # see ../../../docs/autoscaling.md for more information enabled: false minReplicas: 1 maxReplicas: 10 # `type`: "cpu" || "requestRate" || "both" - type: "requestRate" + type: "both" behaviour: scaleDown: stabilizationWindowSeconds: 60 @@ -241,8 +249,12 @@ stac: stabilizationWindowSeconds: 0 targets: # matches `type` value above unless `type: "both"` is selected - cpu: 85 - requestRate: 15000 + cpu: 75 + # 'm' units here represents generic milli (one-thousandth) unit instead of 'decimal' + # https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/#quantities + # so when the average unit among these pods is /1000 then scale + # you can watch the actual/target in real time using `kubectl get hpa/` + requestRate: 15000000m image: name: ghcr.io/stac-utils/stac-fastapi-pgstac tag: 2.4.9 @@ -270,13 +282,15 @@ stac: WEB_CONCURRENCY: "5" vector: - enabled: true + enabled: false autoscaling: + # NOTE: to have autoscaling working you'll need to install the `eoapi-support` chart + # see ../../../docs/autoscaling.md for more information enabled: false minReplicas: 1 maxReplicas: 10 # `type`: "cpu" || "requestRate" || "both" - type: "requestRate" + type: "both" behaviour: scaleDown: stabilizationWindowSeconds: 60 @@ -284,8 +298,12 @@ vector: stabilizationWindowSeconds: 0 targets: # matches `type` value above unless `type: "both"` is selected - cpu: 85 - requestRate: 15000 + cpu: 75 + # 'm' units here represents generic milli (one-thousandth) unit instead of 'decimal' + # https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/#quantities + # so when the average unit among these pods is /1000 then scale + # you can watch the actual/target in real time using `kubectl get hpa/` + requestRate: 1000000m image: name: ghcr.io/developmentseed/tipg tag: uvicorn-0.6.1 @@ -319,6 +337,3 @@ vector: docServer: enabled: true - - - diff --git a/scripts/release.sh b/scripts/release.sh new file mode 100644 index 00000000..a867fb2b --- /dev/null +++ b/scripts/release.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +export RELEASE_NAME=eoapi +export RELEASE_NS=eoapi +export SUPPORT_RELEASE_NAME=eoapi-support +export SUPPORT_RELEASE_NS=eoapi + +helm upgrade --install \ + -n $SUPPORT_RELEASE_NS --create-namespace $SUPPORT_RELEASE_NAME \ + eoapi/eoapi-support --version 0.1.5 \ + --set prometheus-adapter.prometheus.url='http://${SUPPORT_RELEASE_NAME}-prometheus-server.${SUPPORT_RELEASE_NS}.svc.cluster.local' \ + --set grafana.datasources.datasources\\.yaml.datasources[0].url='http://${SUPPORT_RELEASE_NAME}-prometheus-server.${SUPPORT_RELEASE_NS}.svc.cluster.local' \ + -f /Users/ranchodeluxe/apps/eoapi-k8s/helm-chart/eoapi-support/values.yaml + +helm upgrade --install \ + -n $RELEASE_NS --create-namespace $RELEASE_NAME \ + eoapi/eoapi --version 0.4.7 \ + -f /Users/ranchodeluxe/apps/eoapi-k8s/helm-chart/eoapi/values.yaml + +