diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000000..d0fcee78c3 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,99 @@ +# NOTE: "we" in this file will refer to the Compute Infrastructure team at Yelp + +# prevent cheeky modifications :) +CODEOWNERS @Yelp/paasta + +## These impact the build process, so we probably want CI review on 'em +**/Makefile @Yelp/paasta +Makefile @Yelp/paasta +# this one is kinda misnamed since it's really a patch/ directory :p +contrib/ @Yelp/paasta +tox.ini @Yelp/paasta +# NOTE: we should probably CODEOWNERS requirements, but this would slow down ML +# Compute since they have a lot of logic inside service_configuration_lib and +# tend to make PaaSTA releases that just bump this library +# XXX: maybe CODEOWNERS to CI + ML Compute? + +## These contain entrypoints for PaaSTA or otherwise control entrypoint builds +setup.py @Yelp/paasta +debian/control @Yelp/paasta +debian/paasta-tools.links @Yelp/paasta +debian/paasta-tools.substvars @Yelp/paasta +debian/rules @Yelp/paasta +# this one is unfortunate since operators own the paasta status code for their +# workloads, but atm all this is in one large file +paasta_tools/cli/ @Yelp/paasta +# exclude these files from being codeownered to us +paasta_tools/cli/cmds/spark_run.py +paasta_tools/cli/cmds/security_check.py + + +# we don't own all the API code, but we do own most of it. if this turns into a +# problem, we can split it up further or see if we can specifically remove +# CODEOWNERS from the parts we don't own +paasta_tools/api/ @Yelp/paasta +paasta_tools/api/* @Yelp/paasta + +## common code used in many operators OR core code for paasta services +# at the time or writing, we only care/use pause_service_autoscaler.py and +# utils.py, but we might as well own the whole directory +paasta_tools/autoscaling/ @Yelp/paasta +# this might not always be true, but for now any file that starts with setup +# is pretty foundational (and do things like create k8s deployments, CRDs, +# etc.) +paasta_tools/setup* @Yelp/paasta +# and same logic for files that start with cleanup +paasta_tools/cleanup* @Yelp/paasta +# ...too bad we didn't call this cleanup_completed_pods.py :p +paasta_tools/prune_completed_pods.py @Yelp/paasta +# atm, the only thing we care about/use here is the kubernetes.py file, but +# just in case we refactor things out a bit +paasta_tools/instance/ @Yelp/paasta +# helpers or base classes used in many places +paasta_tools/*utils.py @Yelp/paasta +paasta_tools/kubernetes/ @Yelp/paasta +paasta_tools/long_running_service_tools.py @Yelp/paasta +# metadata generators - for now all of these are important enough that we +# should be involved +paasta_tools/generate* @Yelp/paasta +# PaaSTA/Tron core code +paasta_tools/adhoc_tools.py @Yelp/paasta +paasta_tools/kubernetes_tools.py @Yelp/paasta +paasta_tools/eks_tools.py @Yelp/paasta +paasta_tools/paasta* @Yelp/paasta +# mesh-related code +paasta_tools/envoy_tools.py@Yelp/paasta +paasta_tools/hacheck.py @Yelp/paasta +paasta_tools/smartstack_tools.py @Yelp/paasta +# same logic as other wildcards above - currently all of these are important +# and owner by us +paasta_tools/list* @Yelp/paasta +paasta_tools/tron/ @Yelp/paasta +paasta_tools/tron* @Yelp/paasta +# secret management +paasta_tools/secret_providers/ @Yelp/paasta +paasta_tools/secret_tools.py @Yelp/paasta + +## directories we want to break apart +# we have a bad habit of depending on things in here - which is always a fun +# time since we're pretty lax with the code quality of things we add here +# initially. hopefully adding CODEOWNERS will help us be more mindful and +# eventually move these to more appropriate/tested locations (as well as +# protect important files like the autotune or SSR management code) +paasta_tools/contrib/* @Yelp/paasta + +## files used for alerting +# NOTE: we could codeowner check*, but there's enough scripts like +# check_flink_services_health.py, check_spark_jobs.py, etc. that we +# should break this down to just what we own +paasta_tools/check_autoscaler_max_instances.py @Yelp/paasta +paasta_tools/check_kubernetes_api.py @Yelp/paasta +paasta_tools/check_kubernetes_services_replication.py @Yelp/paasta +paasta_tools/check_oom_events.py @Yelp/paasta +paasta_tools/check_services_replication_tools.py @Yelp/paasta +paasta_tools/monitoring/ @Yelp/paasta +paasta_tools/monitoring* @Yelp/paasta +paasta_tools/oom_logger.py @Yelp/paasta +# not technically alerting, but very useful for spot interruption handling +# so we should be careful with it +paasta_tools/broadcast_log_to_services.py @Yelp/paasta diff --git a/debian/changelog b/debian/changelog index e45f486549..c843a00528 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,184 @@ +paasta-tools (1.8.2) xenial; urgency=medium + + * 1.8.2 tagged with 'make release' + Commit: Merge pull request #3989 from + Yelp/u/jfong/fix_paasta_status_bouncestatus Fix backwards "Bouncing + to" status + + -- Jen Patague Mon, 02 Dec 2024 11:59:05 -0800 + +paasta-tools (1.8.1) xenial; urgency=medium + + * 1.8.1 tagged with 'make release' + Commit: Enable topology spread constraints by default for tron + (#3987) I debated getting rid of this toggle entirely, but I guess + it"s nice to keep around just in case? I"ll also follow this up + with a Puppet PR to clean things up there + + -- Luis Perez Thu, 14 Nov 2024 12:00:34 -0800 + +paasta-tools (1.8.0) xenial; urgency=medium + + * 1.8.0 tagged with 'make release' + Commit: Add tron topology_spread_constraints support to PaaSTA + (#3983) This adds support for configuring Tron-launched pods with a + default Topology Spread Constraint (and node affinities) that will + spread pods out across multiple AZs - otherwise, Karpenter will + overwhelmingly favor a single AZ due to our config + + -- Luis Perez Mon, 11 Nov 2024 12:36:11 -0800 + +paasta-tools (1.7.2) xenial; urgency=medium + + * 1.7.2 tagged with 'make release' + Commit: Allowlist eks files in config_utils (#3984) + updater.write_configs() is not just used for managing autotune files + - SSR shard creation uses it to manage creating/updating the non- + autotuned_defaults config files (which are now always eks- + CLUSTER.yaml). + + -- Luis Perez Tue, 29 Oct 2024 13:52:41 -0700 + +paasta-tools (1.7.1) xenial; urgency=medium + + * 1.7.1 tagged with 'make release' + Commit: Bump service-configuration-lib to v3 to stop mounting + /etc/pki/spark (#3982) This is no longer needed since we"ve been + running Spark on just EKS for a while. + + -- Luis Perez Wed, 16 Oct 2024 13:53:57 -0700 + +paasta-tools (1.7.0) xenial; urgency=medium + + * 1.7.0 tagged with 'make release' + Commit: Cleaning up Mesos from paasta readthedocs - PAASTA-18313 + (#3954) * Cleaning up Mesos from paasta readthedocs * Address + reviews * Address more reviews * Addressing yelpsoa files reviews + + -- Eman Elsabban Wed, 16 Oct 2024 11:32:36 -0700 + +paasta-tools (1.6.4) xenial; urgency=medium + + * 1.6.4 tagged with 'make release' + Commit: Merge pull request #3977 from + Yelp/u/calvinli/automated_redeploys_default_toggle let + enable_automated_redeploys default value be configurable + + -- Evan Krall Wed, 09 Oct 2024 15:15:13 -0700 + +paasta-tools (1.6.3) xenial; urgency=medium + + * 1.6.3 tagged with 'make release' + Commit: Remove vtgate lifecycle hooks (#3966) Remove vtgate + lifecycle hooks as we"re not proceeding with #3959 and would like to + have functional vtgate pods for testing + + -- Luis Perez Mon, 07 Oct 2024 12:24:40 -0700 + +paasta-tools (1.6.2) xenial; urgency=medium + + * 1.6.2 tagged with 'make release' + Commit: Sort dropped caps when caps are added (#3973) As sets are + unordered, not sorting the resulting list built from set operations + means that we"re constantly changing the order of metadata in the + final podspec, leading to bounces almost every time the s_k_j runs + + -- Luis Perez Wed, 25 Sep 2024 15:07:47 -0700 + +paasta-tools (1.6.1) xenial; urgency=medium + + * 1.6.1 tagged with 'make release' + Commit: Only drop capabilities that are not added (#3972) It + appears that containerd has changed the behavior around + adding/dropping linux capabilities and added caps no longer take + precedence over dropped ones + + -- Luis Perez Wed, 25 Sep 2024 09:39:23 -0700 + +paasta-tools (1.6.0) xenial; urgency=medium + + * 1.6.0 tagged with 'make release' + Commit: Add service and instance annotations to tron pods (#3967) + We can currently only figure out what service/instance a log belongs + to by looking at the k8s labels for the emitting pod, but label + values are quite limited in length and we"ve got some pretty large + job and/or action names, which means that the instance label for a + large chunk of tronjobs ends up getting truncated. Solution: + annotations! these have a significantly higher limit (256kb) and + they can still be read by our otel collector - the only downside is + that annotations cannot be used for filtering, but that"s fine :) + + -- Luis Perez Wed, 25 Sep 2024 07:59:52 -0700 + +paasta-tools (1.5.5) xenial; urgency=medium + + * 1.5.5 tagged with 'make release' + Commit: Fixing paasta clean up stale nodes (#3971) * Fixing paasta + clean up stale nodes * Fixing paasta clean up stale nodes + + -- Wilmer Bandres Wed, 25 Sep 2024 04:45:30 -0700 + +paasta-tools (1.5.4) xenial; urgency=medium + + * 1.5.4 tagged with 'make release' + Commit: Document minimum_error_rps in smartstack.yaml (#3970) + Update the paasta docs with minimum_error_rps for default error + alerting endpoints + + -- Luis Perez Tue, 24 Sep 2024 12:09:45 -0700 + +paasta-tools (1.5.3) xenial; urgency=medium + + * 1.5.3 tagged with 'make release' + Commit: Merge pull request #3968 from Yelp/u/mpiano/SEC-19486 local- + run: add option to authenticate as the calling user + + -- Eman Elsabban Mon, 23 Sep 2024 12:41:04 -0700 + +paasta-tools (1.5.2) xenial; urgency=medium + + * 1.5.2 tagged with 'make release' + Commit: Upgrading k8s lib (#3965) * Upgrading k8s lib * Fixing + tests * fixing setup crd * fixing setup CR * Removing dead code + + -- Wilmer Bandres Fri, 20 Sep 2024 08:57:21 -0700 + +paasta-tools (1.5.1) xenial; urgency=medium + + * 1.5.1 tagged with 'make release' + Commit: Merge pull request #3960 from Yelp/yaro/update_vector- + logs_filer Update vector-logs message filter FN usage + + -- Yaroslav Liakhovskyi Wed, 18 Sep 2024 00:46:54 -0700 + +paasta-tools (1.5.0) xenial; urgency=medium + + * 1.5.0 tagged with 'make release' + Commit: Attempt to batch config loading for tron deployments (#3956) + Right now we make at most 2N calls to the Tron API during config + deployments: N to get the current configs and at most N if all + services have changes. To start, I"d like to reduce this to N by + allowing GET /api/config to return all the configs so that the only + requests needed are POSTs for changed configs. Since I"m a little + worried about how tron-pnw-prod will fare with returning all the + configs in a single request, using this new endpoint is behind a + feature toggle. Hopefully this works out and we don"t need to add + pagination to the endpoint :) Depending on how this goes, we can + look into batching up the POSTs so that we can also do that in a + single request (or at least Tue, 17 Sep 2024 12:46:59 -0700 + +paasta-tools (1.4.34) xenial; urgency=medium + + * 1.4.34 tagged with 'make release' + Commit: Merge pull request #3958 from Yelp/jfong/PAASTA-18194-all- + namespaces-option PAASTA-18194: Stop querying all managed + namespaces by default for paasta status + + -- Jen Patague Mon, 16 Sep 2024 12:11:03 -0700 + paasta-tools (1.4.33) xenial; urgency=medium * 1.4.33 tagged with 'make release' diff --git a/docs/source/about/glossary.rst b/docs/source/about/glossary.rst index 1bfd27857c..1ab62159a0 100644 --- a/docs/source/about/glossary.rst +++ b/docs/source/about/glossary.rst @@ -1,37 +1,65 @@ Glossary ======== -**App** -~~~~~~~~ - -Marathon app. A unit of configuration in Marathon. During normal -operation, one service "instance" maps to one Marathon app, but during -deploys there may be more than one app. Apps contain Tasks. - **Docker** ~~~~~~~~~~ Container `technology `_ that PaaSTA uses. +**Kubernetes** +~~~~~~~~~~~~~~ + +`Kubernetes `_ (a.k.a. k8s) is the open-source system on which Yelp runs many compute workloads. +In Kubernetes, tasks are distributed to and run by servers called Kubelets (but a.k.a. kube nodes or Kubernetes agents) from the Kubernetes control plane. + +**Kubernetes Deployment** +~~~~~~~~~~~~~~~~~~~~~~~~~ + +A Kubernetes resource that represents a collection of pods running the same application. A Deployment is responsible for creating and updating instances of your application. + +**Kubernetes Node** +~~~~~~~~~~~~~~~~~~~ + +A node is a worker machine in a Kubernetes cluster that runs Pods. +In our case, it's usually a virtual machine provisioned via AWS EC2 Fleets or AutoScalingGroups + +**Kubernetes Horizontal Pod Autoscaler (HPA)** +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A Kubernetes feature that automatically scales the number of pods in a deployment based on observed CPU utilization (or, with custom metrics support, on some other application-provided metrics). + **clustername** ~~~~~~~~~~~~~~~ A shortname used to describe a PaaSTA cluster. Use \`paasta list-clusters\` to see them all. +**Kubernetes Pod** +~~~~~~~~~~~~~~~~~~~ + +Atomic deployment unit for PaaSTA workloads at Yelp and all Kubernetes clusters. Can be thought of as a collection of 1 or more related containers. +Pods can be seen as one or more containers that share a network namespace, at Yelp these are individual instances of one of our services, many can run on each server. + +**Kubernetes Namespace** +~~~~~~~~~~~~~~~~~~~~~~~~ + +It provides a mechanism for isolating groups of resources within a single cluster. Each K8s Namespace can contain resources like +Pods and Deployments, and it allows for management and access controls to be applied at the Namespace level. + **instancename** ~~~~~~~~~~~~~~~~ -Logical collection of Mesos tasks that comprise a Marathon app. service -name + instancename = Marathon app name. Examples: main, canary. +Logical collection of Kubernetes pods that comprise an application (a Kubernetes Deployment) deployed on Kubernetes. service +name + instancename = Kubernetes Deployment. Examples: main, canary. Each instance represents a running +version of a service with its own configuration and resources. **namespace** ~~~~~~~~~~~~~ An haproxy/SmartStack concept grouping backends that listen on a -particular port. A namespace may route to many healthy Marathon -instances. By default, the namespace in which a Marathon job appears is +particular port. A namespace may route to many healthy PaaSTA +instances. By default, the namespace in which a PaaSTA instance appears is its instancename. **Nerve** @@ -40,32 +68,6 @@ its instancename. A service announcement `daemon `_ that registers services in zookeeper to be discovered. -**Marathon** -~~~~~~~~~~~~ - -A `Mesos Framework `_ -designed to deploy stateless services. - -**Mesos** -~~~~~~~~~ - -A `Cluster/Scheduler `_ that interacts -with other `Framework `_ -software to run things on nodes. - -**Mesos Master** -~~~~~~~~~~~~~~~~ - -A machine running a Mesos Master process, responsible for coordination -but not responsible for actually running Marathon or Tron jobs. There -are several Masters, coordinating as a quorum via Zookeeper. - -**Mesos Slave** -~~~~~~~~~~~~~~~ - -A machine running a Mesos Slave process, responsible for running -Marathon or Tron jobs as assigned by the Mesos Master. - **PaaSTA** ~~~~~~~~~~ @@ -87,12 +89,6 @@ The brand name for Airbnb’s Nerve + Synapse service discovery solution. A local haproxy daemon that runs on yocalhost -**Task** -~~~~~~~~ - -Marathon task. A process (usually inside a Docker container) running on -a machine (a Mesos Slave). One or more Tasks constitutes an App. - **soa-configs** ~~~~~~~~~~~~~~~ @@ -107,5 +103,5 @@ services. **Zookeeper** ~~~~~~~~~~~~~ -A distributed key/value store used by Mesos for coordination and +A distributed key/value store used by PaaSTA for coordination and persistence. diff --git a/docs/source/about/paasta_principles.rst b/docs/source/about/paasta_principles.rst index ee7fbe404c..7ad5baac39 100644 --- a/docs/source/about/paasta_principles.rst +++ b/docs/source/about/paasta_principles.rst @@ -54,7 +54,7 @@ a particular app in a theoretical PaaS: +=============================================+=====================================+ | :: | :: | | | | -| $ cat >marathon-cluster.yaml <kubernetes-cluster.yaml <`_ to deploy -long-running services. At Yelp, PaaSTA clusters are deployed at the -``superregion`` level. This means that a service could potentially be deployed -on any available host in that ``superregion`` that has resources to run it. If -PaaSTA were unaware of the Smartstack ``discover:`` settings, Marathon would -naively deploy tasks in a potentially "unbalanced" manner: - -.. image:: unbalanced_distribution.svg - :width: 700px - -With the naive approach, there is a total of six tasks for the superregion, but -four landed in ``region 1``, and two landed in ``region 2``. If -the ``discover`` setting were set to ``habitat``, there would be habitats -**without** tasks available to serve anything, likely causing an outage. - -In a world with configurable SmartStack discovery settings, the deployment -system (Marathon) must be aware of these and deploy accordingly. - -What A SmartStack-Aware Deployment Looks Like -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -By taking advantage of -`Marathon Constraint Language `_ -, specifically the -`GROUP_BY `_ -operator, Marathon can deploy tasks in such a way as to ensure a balanced number -of tasks in each latency zone. - -Example: Balanced deployment to every habitat -********************************************* - -For example, if the SmartStack setting -were ``discover: habitat`` [1]_, we Marathon could enforce the constraint -``["habitat", "GROUP_BY"]``, which will ask Marathon to distribute tasks -evenly between the habitats[2]_: - -.. image:: balanced_distribution.svg - :width: 700px - -Example: Deployment balanced to each region -******************************************* - -Similarly, if the ``discover`` setting were set to ``region``, the equivalent -Marathon constraint would ensure an equal number of tasks distributed to each region. - -.. image:: balanced_distribution_region.svg - :width: 700px - -Even though there some habitats in this diagram that lack the service, the -``discover: region`` setting allows clients to utilize *any* process as long -as it is in the local region. The Marathon constraint of ``["region", "GROUP_BY"]`` -ensures that tasks are distributed equally over the regions, in this case three -in each. - - -.. [1] Technically PaaSTA should be using the smallest value of the ``advertise`` - setting, tracked in `PAASTA-1253 `_. -.. [2] Currently the ``instances:`` count represents the total number of - instances in the cluster. Eventually with `PAASTA-1254 `_ - the instance count will be a per-discovery-location setting, meaning there - will always be an equal number of instances per location. (With ``instances: 6`` - and a ``discovery: habitat``, and three habitats, the total task count would be - 18, 6 in each habitat.) - - -How SmartStack Settings Influence Monitoring --------------------------------------------- - -If a service is in SmartStack, PaaSTA uses the same ``discover`` setting -referenced above to decide how the service should be monitored. When a service -author sets a particular setting, say ``discover: region``, it implies that the -system should enforce availability of that service in every region. If there -are regions that lack tasks to serve that service, then PaaSTA should alert. - -Example: Checking Each Habitat When ``discover: habitat`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If SmartStack is configured to ``discover: habitat``, PaaSTA configures -Marathon to balance tasks to each habitat. But what if it is unable to do that? - -.. image:: replication_alert_habitat.svg - :width: 700px - -In this case, there are no tasks in habitat F. This is a problem because -``discover: habitat`` implies that any clients in habitat F will not -be able to find the service. It is *down* in habitat F. - -To detect and alert on this, PaaSTA uses the ``discover`` setting to decide -which unique locations to look at (e.g. ``habitat``). Paasta iterates over -each unique location (e.g. habitats A-F) and inspects the replication levels -in each location. It finds that there is at least one habitat with too few -instances (habitat F, which has 0 out of 1) and alerts. - -The output of the alert or ``paasta status`` looks something like this:: - - Smartstack: - habitatA - Healthy - in haproxy with (1/1) total backends UP in this namespace. - habitatB - Healthy - in haproxy with (1/1) total backends UP in this namespace. - habitatC - Healthy - in haproxy with (1/1) total backends UP in this namespace. - habitatD - Healthy - in haproxy with (1/1) total backends UP in this namespace. - habitatE - Healthy - in haproxy with (1/1) total backends UP in this namespace. - habitatF - Critical - in haproxy with (0/1) total backends UP in this namespace. - -In this case the service authors have a few actions they can take: - -- Increase the total instance count to have more tasks per habitat. - (In this example, each habitat contains a single point of failure!) -- Change the ``discovery`` setting to ``region`` to increase availability - at the cost of latency. -- Investigate *why* tasks can't run in habitat F. - (Lack of resources? Improper configs? Missing service dependencies?) - -Example: Checking Each Region When ``discover: region`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If SmartStack is configured to ``discover: region``, PaaSTA configures -Marathon to balance tasks to each region. But what if it is unable to launch -all the tasks, but there were tasks running in that region? - -.. image:: replication_noalert_region.svg - :width: 700px - -The output of the alert or ``paasta status`` looks something like this:: - - Smartstack: - region1 - Healthy - in haproxy with (3/3) total backends UP in this namespace. - region2 - Warning - in haproxy with (2/3) total backends UP in this namespace. - -Assuming a threshold of 50%, an alert would not be sent to the team in this case. - -Even if some habitats do not have tasks for this service, ``discover: region`` -ensures that clients can be satisfied by tasks in the same region if not by -tasks in the same habitat. +PaaSTA's SmartStack Unawareness and Pod Spreading Strategy +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +PaaSTA is not natively aware of SmartStack, to make it aware or more specifically Kubernetes scheduler aware, we can use Pod Topology Spread Contraints. +To balance pods across Availability Zones (AZs) in Kubernetes, we use `topology spread contraints `_. By using the key +"topology_spread_constraints" in soa-configs to assign it for each instance of a service. The Relationship Between Nerve "namespaces" and PaaSTA "instances" ------------------------------------------------------------------ @@ -189,9 +56,9 @@ components of the same service on different ports. In PaaSTA we call these api: proxy_port: 20002 -The corresponding Marathon configuration in PaaSTA might look like this:: +The corresponding Kubernetes configuration in PaaSTA might look like this:: - #marathon.yaml + #kubernetes.yaml main: instances: 10 cmd: myserver.py @@ -214,7 +81,7 @@ the same Nerve namespace. Consider this example:: main: proxy_port: 20001 - #marathon.yaml + #kubernetes.yaml main: instances: 10 cmd: myserver.py @@ -238,7 +105,7 @@ Sharding is another use case for using alternative namespaces:: main: proxy_port: 20001 - #marathon.yaml + #kubernetes.yaml shard1: instances: 10 registrations: ['service.main'] diff --git a/docs/source/autoscaling.rst b/docs/source/autoscaling.rst index 7b9e9c3ee1..11bc2db412 100644 --- a/docs/source/autoscaling.rst +++ b/docs/source/autoscaling.rst @@ -2,7 +2,7 @@ Autoscaling PaaSTA Instances ==================================== -PaaSTA allows programmatic control of the number of replicas (pods) a service has. +PaaSTA allows programmatic control of the number of replicas (Pods) a service has. It uses Kubernetes' Horizontal Pod Autoscaler (HPA) to watch a service's load and scale up or down. How to use autoscaling @@ -24,9 +24,9 @@ This behavior may mean that your service is scaled up unnecessarily when you fir Don't worry - the autoscaler will soon learn what the actual load on your service is, and will scale back down to the appropriate level. If you use autoscaling it is highly recommended that you make sure your service has a readiness probe. -If your service is registered in Smartstack, each pod automatically gets a readiness probe that checks whether that pod is available in the service mesh. +If your service is registered in Smartstack, each Pod automatically gets a readiness probe that checks whether that Pod is available in the service mesh. Non-smartstack services may want to configure a ``healthcheck_mode``, and either ``healthcheck_cmd`` or ``healthcheck_uri`` to ensure they have a readiness probe. -The HPA will ignore the load on your pods between when they first start up and when they are ready. +The HPA will ignore the load on your Pods between when they first start up and when they are ready. This ensures that the HPA doesn't incorrectly scale up due to this warm-up CPU usage. Autoscaling parameters are stored in an ``autoscaling`` attribute of your instances as a dictionary. @@ -66,7 +66,7 @@ The currently available metrics providers are: Measures the CPU usage of your service's container. :uwsgi: - With the ``uwsgi`` metrics provider, Paasta will configure your pods to be scraped from your uWSGI master via its `stats server `_. + With the ``uwsgi`` metrics provider, Paasta will configure your Pods to be scraped from your uWSGI master via its `stats server `_. Setpoint refers to the worker utilization, which is the percentage of workers that are busy. We currently only support uwsgi stats on port 8889, and Prometheus will attempt to scrape that port. @@ -78,7 +78,7 @@ The currently available metrics providers are: :gunicorn: - With the ``gunicorn`` metrics provider, Paasta will configure your pods to run an additional container with the `statsd_exporter `_ image. + With the ``gunicorn`` metrics provider, Paasta will configure your Pods to run an additional container with the `statsd_exporter `_ image. This sidecar will listen on port 9117 and receive stats from the gunicorn service. The ``statsd_exporter`` will translate the stats into Prometheus format, which Prometheus will scrape. You can specify ``moving_average_window_seconds`` (default ``1800``, or 30 minutes) to adjust how long of a time period your worker utilization is averaged over: set a smaller value to autoscale more quickly, or set a larger value to ignore spikes. diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index 3264867a0b..b1d1133621 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -22,14 +22,14 @@ You can run ``make itest`` to execute them. Example Cluster ^^^^^^^^^^^^^^^^^ There is a docker compose configuration based on our itest containers that you -can use to run the paasta code against a semi-realistic cluster whilst you are +can use to run the PaaSTA code against a semi-realistic cluster whilst you are developing. More instructions `here <./installation/example_cluster.html>`_ System Package Building / itests ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PaaSTA is distributed as a debian package. This package can be built and tested -with ``make itest_xenial``. These tests make assertions about the +with ``make itest_``. These tests make assertions about the packaging implementation. @@ -71,12 +71,3 @@ it is a little tricky. * ``eval "$(.tox/py27/bin/register-python-argcomplete ./tox/py27/bin/paasta)"`` * There is a simple integration test. See the itest/ folder. - -Upgrading Components --------------------- - -As things progress, there will come a time that you will have to upgrade -PaaSTA components to new versions. - -* See `Upgrading Mesos `_ for how to upgrade Mesos safely. -* See `Upgrading Marathon `_ for how to upgrade Marathon safely. diff --git a/docs/source/index.rst b/docs/source/index.rst index 2227f36658..4e9152eabc 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -34,7 +34,7 @@ PaaSTA Development .. toctree:: :maxdepth: 2 - paasta_development + PaaSTA_development contributing style_guide upgrading_marathon diff --git a/docs/source/installation/example_cluster.rst b/docs/source/installation/example_cluster.rst index a612783e22..2c7383da37 100644 --- a/docs/source/installation/example_cluster.rst +++ b/docs/source/installation/example_cluster.rst @@ -24,11 +24,6 @@ everything with ``docker-compose down && docker-compose run playground``. Getting Started --------------- -Mesos -~~~~~ -To launch a running Mesos cluster, then run ``docker-compose run playground`` -and you'll be dropped into a shell with the paasta\_tools package installed in development mode. - Kubernetes ~~~~~~~~~~ To instead launch a Kubernetes cluster, run @@ -47,9 +42,7 @@ Try it out The cluster includes a git remote and docker registry. The git remote contains an example repo but you can add more if you want. -The mesos and marathon webuis are exposed on your docker host -on port 5050, 8080, 8081. So load them up if you want to watch. Then in -the playground container: +In the playground container: :: @@ -63,9 +56,8 @@ the playground container: Scaling The Cluster ------------------- -If you want to add more capacity to the cluster, you can increase the number of Mesos agents/Kubernetes Nodes: +If you want to add more capacity to the cluster, you can increase the number of Kubernetes Nodes: -``docker-compose scale mesosslave=4`` or ``docker-compose scale kubernetes=4`` @@ -79,9 +71,8 @@ Some but not all of the paasta command line tools should work. Try: paasta status -s hello-world Scribe is not included with this example cluster. If you are looking for -logs, check ``/var/logs/paasta_logs`` and syslog on the mesosmaster for -the output from cron. Also note that all the slaves share the host's -docker daemon. +logs, check syslog on the Kubernetes node that the pod is running on for the output from cron. +You can get the host the pod is running on by adding "-v" to the command above. Cleanup ------- diff --git a/docs/source/installation/getting_started.rst b/docs/source/installation/getting_started.rst index 13d562de2d..0a2bfc555b 100644 --- a/docs/source/installation/getting_started.rst +++ b/docs/source/installation/getting_started.rst @@ -33,9 +33,7 @@ are currently not available, so one must build them and install them manually:: make itest_xenial sudo dpkg -i dist/paasta-tools*.deb -This package must be installed anywhere the PaaSTA CLI and on the Mesos/Marathon -masters. If you are using SmartStack for service discovery, then the package must -be installed on the Mesos Slaves as well so they can query the local API. +This package must be installed anywhere the PaaSTA CLI is needed and on the kube nodes. Once installed, ``paasta_tools`` reads global configuration from ``/etc/paasta/``. This configuration is in key/value form encoded as JSON. All files in ``/etc/paasta`` @@ -76,7 +74,7 @@ Docker and a Docker Registry PaaSTA uses `Docker `_ to build and distribute code for each service. PaaSTA assumes that a single registry is available and that the associated components -(Docker commands, unix users, mesos slaves, etc) have the correct credentials +(Docker commands, unix users, Kubernetes Nodes, etc) have the correct credentials to use it. The docker registry needs to be defined in a config file in ``/etc/paasta/``. @@ -91,34 +89,24 @@ filename is irrelevant, but here would be an example There are many registries available to use, or you can `host your own `_. -Mesos ------ - -PaaSTA uses Mesos to do the heavy lifting of running the actual services on -pools of machines. See the `official documentation `_ -on how to get started with Mesos. - -Marathon --------- +Kubernetes +---------- -PaaSTA uses `Marathon `_ for supervising long-running services running in Mesos. -See the `official documentation `__ for how to get started with Marathon. -Then, see the `PaaSTA documentation <../yelpsoa_configs.html#marathon-clustername-yaml>`_ for how to define Marathon -jobs. +PaaSTA uses `Kubernetes `_ to manage and orchestrate its containerized services. +See the `PaaSTA documentation <../yelpsoa_configs.html#kubernetes-clustername-yaml>`_ for how to define PaaSTA +services in Kubernetes. -Once Marathon jobs are defined in soa-configs, there are a few tools provided by PaaSTA -that interact with the Marathon API: +Once PaaSTA services are defined in soa-configs, there are a few tools provided by PaaSTA +that interact with the Kubernetes API: -* ``deploy_marathon_services``: Does the initial sync between soa-configs and the Marathon API. - This is the tool that handles "bouncing" to new version of code, and resizing Marathon applications when autoscaling +* ``setup_kubernetes_job``: Does the initial sync between soa-configs and the Kubernetes API. + This is the tool that handles "bouncing" to new version of code, and resizing Kubernetes deployments when autoscaling is enabled. - This is idempotent, and should be run periodically on a box with a ``marathon.json`` file in the - `system paasta config <../system_configs.html>`_ directory (Usually ``/etc/paasta``). - We recommend running this frequently - delays between runs of this command will limit how quickly new versions of - services or changes to soa-configs are picked up. -* ``cleanup_marathon_jobs``: Cleans up lost or abandoned services. This tool - looks for Marathon jobs that are *not* defined in soa-configs and removes them. -* ``check_marathon_services_replication``: Iterates over all Marathon services + This is idempotent, and is ran periodically on a box with a ``deployments.json`` file in the + ``/nail/etc/services`` directory, updating or creating the Kubernetes Deployment object representing the modified service instance. +* ``cleanup_kubernetes_jobs``: Cleans up lost or abandoned services. This tool + looks for Kubernetes instances that are *not* defined in soa-configs and removes them. +* ``check_kubernetes_services_replication``: Iterates over all Kubernetes services and inspects their health. This tool integrates with the monitoring infrastructure and will alert the team responsible for the service if it becomes unhealthy to the point where manual intervention is required. @@ -128,7 +116,7 @@ SmartStack and Hacheck `SmartStack `_ is a dynamic service discovery system that allows clients to find and route to -healthy mesos tasks for a particular service. +healthy Kubernetes Pods for a particular service. Smartstack consists of two agents: `nerve `_ and `synapse `_. Nerve is responsible for health-checking services and registering them in ZooKeeper. Synapse then reads that data from ZooKeeper and configures an HAProxy instance. @@ -137,7 +125,7 @@ To manage the configuration of nerve (detecting which services are running on a we have a package called `nerve-tools `_. This repo builds a .deb package, and should be installed on all slaves. Each slave should run ``configure_nerve`` periodically. -We recommend this runs quite frequently (we run it every 5s), since Marathon tasks created by Paasta are not available +We recommend this runs quite frequently (we run it every 5s), since Kubernetes Pods created by PaaSTA are not available to clients until nerve is reconfigured. Similarly, to manage the configuration of synapse, we have a package called `synapse-tools `_. diff --git a/docs/source/isolation.rst b/docs/source/isolation.rst index f118361f19..1f692db17b 100644 --- a/docs/source/isolation.rst +++ b/docs/source/isolation.rst @@ -1,27 +1,25 @@ ============================================== -Resource Isolation in PaaSTA, Mesos and Docker +Resource Isolation in PaaSTA, Kubernetes and Docker ============================================== PaaSTA instance definitions include fields that specify the required resources -for your service. The reason for this is two-fold: firstly, so that whichever -Mesos framework can evaluate which Mesos agent making -offers have enough capacity to run the task (and pick one of the agents -accordingly); secondly, so that tasks can be protected from especially noisy -neighbours on a box. That is, if a task under-specifies the resources it +for your service. The reason for this is two-fold: firstly, so that the Kubernetes scheduler +can evaluate which Kubernetes nodes have enough capacity to schedule the Kubernetes Pods (representing PaaSTA instances) on, in the cluster specified; +secondly, so that the Pods can be protected from especially noisy +neighbours on a box. That is, if a Pod under-specifies the resources it requires to run, or in another case, has a bug that means that it consumes far -more resources than it *should* require, then the offending tasks can be +more resources than it *should* require, then the offending Pods can be isolated effectively, preventing them from having a negative impact on its neighbours. -This document is designed to give a more detailed review of how Mesos -Frameworks such as Marathon use these requirements to run tasks on -different Mesos agents, and how these isolation mechanisms are implemented. +This document is designed to give a more detailed review of how Kubernetes +use these requirements to run Pods on different Kubernetes nodes, and how these isolation mechanisms are implemented. Note: Knowing the details of these systems isn't a requirement of using PaaSTA; most service authors may never need to know the details of such things. In fact, one of PaaSTA's primary design goals is to *hide* the minutiae of schedulers and resource isolation. However, this may benefit administrators -of PaaSTA (and, more generally, Mesos clusters), and the simply curious. +of PaaSTA (and, more generally, Kubernetes clusters), and the simply curious. Final note: The details herein may, nay, will contain (unintended) inaccuracies. If you notice such a thing, we'd be super grateful if you could open a pull @@ -31,64 +29,51 @@ How Tasks are Scheduled on Hosts -------------------------------- To first understand how these resources are used, one must understand how -a task is run on a Mesos cluster. - -Mesos can run in two modes: Master and Agent. When a node is running Mesos in -Master mode, it is responsible for communicating between agent processes and -frameworks. A Framework is a program which wants to run tasks on the Mesos -cluster. - -A master is responsible for presenting frameworks with resource offers. -Resource offers are compute resource free for a framework to run a task. The -details of that compute resource comes from the agent nodes, which regularly -tell the Master agent the resources it has available for running tasks. Using -the correct parlance, Mesos agents make 'offers' to the master. - -Once a master node receives offers from an agent, it forwards it to -a framework. Resource offers are split between frameworks according to -the master's configuration - there may be particular priority given -to some frameworks. - -At Yelp, we treat the frameworks we run (at the time of writing, Marathon and -Tron) equally. That means that frameworks *should* have offers distributed -between them evenly, and all tasks are considered equal. - -It is then up to the framework to decide what it wants to do with an offer. -The framework may decide to: - - * Reject the offer, if the framework has no tasks to run. - * Reject the offer, if the resources included in the offer are not enough to - match those required by the application. - * Reject the offer, if attributes on the slave conflict with any constraints - set by the task. - * Accept the offer, if there is a task that requires resources less than or - equal to the resources offered by the Agent. - -When rejecting an offer, the framework may apply a 'filter' to the offer. This -filter is then used by the Mesos master to ensure that it does *not* resend -offers that are 'filtered' by a framework. The default filter applied includes -a timeout - a Master will not resend an offer to a framework for a period of 5 -seconds. - -If a framework decides it wants to accept a resource offer, it then tells the -master to run a task on the agent. The details of the 'acceptance' include a -detail of the task to be run, and the 'executor' used to run the task. - -By default, PaaSTA uses the 'Docker' executor everywhere. This means that *all* -tasks launched by Marathon and Tron are done so with a Docker container. - -How Tasks are isolated from each other. ---------------------------------------- - -Given that a slave may run multiple tasks, we need to ensure that tasks cannot +a Pod is run on a Kubernetes cluster. + +Kubernetes has two types of nodes: Master and worker nodes. The master nodes are +responsible for managing the cluster. + +The master node contains the following components: + + * API Server: Exposes the Kubernetes API. It is the front-end for the Kubernetes control plane. + * Scheduler: Responsible for distributing workloads across multiple nodes. + * Controller Manager: Responsible for regulating the state of the cluster. + +Worker nodes are the machines that run the workload. Each worker node runs the following components +to manage the execution and networking of containers: + + * Kubelet: An agent that runs on each node in the cluster. It makes sure that containers are running in a Pod. + * Kube-proxy: Maintains network rules on nodes. These network rules allow network communication to Pods from network sessions inside or outside of the cluster. + * Container runtime: The software that is responsible for running containers. Kubernetes supports several container runtimes: Docker, containerd, CRI-O, and any implementation of the Kubernetes CRI (Container Runtime Interface). + + +When a new Pod (representing a PaaSTA instance) is created, the Kubernetes scheduler (kube-scheduler) will assign it to the best node for it to run on. +The scheduler will take into account the resources required by the Pod, the resources available on the nodes, and any constraints that are specified. It takes the following +criteria into account when selecting a node to have the Pod run on: + + * Resource requirements: Checks if nodes have enough CPU, memory, and other resources requested by the Pod. + * Node affinity: Checks if the Pod should be scheduled on a node that has a specific label. + * Inter-Pod affinity/anti-affinity: checks if the Pod should be scheduled near or far from another Pod. + * Taints and tolerations: Checks if the Pod should be scheduled on a node that has a specific taint. + * Node selectors: Checks if the Pod should be scheduled on a node that has a specific label. + * Custom Policies: any custom scheduling policies or priorities such as the Pod Topology Spread Constraints set by the key "topology_spread_constraint". + +The scheduler will then score each node that can host the Pod, based on the criteria above and any custom policies and then select the node +with the highest score to run the Pod on. If multiple nodes have the same highest score then one of them is chosen randomly. Once a node is selected, the scheduler assigns +the Pod to the node and the decision is then communicated back to the API server, which in turn notifies the Kubelet on the chosen node to start the Pod. +For more information on how the scheduler works, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling/scheduling-framework/). + +How PaaSTA services are isolated from each other +------------------------------------------------ + +Given that a node may run multiple Pods for PaaSTA services, we need to ensure that Pods cannot 'interfere' with one another. We do this on a file system level using Docker - processes launched in Docker containers are protected from each other and the host by using kernel namespaces. Note that the use of kernel namespaces is a -feature of Docker - PaaSTA doesn't do anything 'extra' to enable this. It's -also worth noting that there are other 'container' technologies that could -provide this - the native Mesos 'containerizer' included. +feature of Docker - PaaSTA doesn't do anything 'extra' to enable this. -However, these tasks are still running and consuming resources on the same +However, these Pods are still running and consuming resources on the same host. The next section aims to explain how PaaSTA services are protected from so-called 'noisy neighbours' that can starve others from resources. @@ -130,21 +115,20 @@ If the processes in the cgroup reaches the ``memsw.limit_in_bytes`` value , then the kernel will invoke the OOM killer, which in turn will kill off one of the processes in the cgroup (often, but not always, this is the biggest contributor to the memory usage). If this is the only process running in the -Docker container, then the container will die. The mesos framework which -launched the task may or may not decide to try and start the same task -elsewhere. +Docker container, then the container will die. Kubernetes will restart the container +as the RestartPolicy for the container is set to "Always". CPUs """" CPU enforcement is implemented slightly differently. Many people expect the value defined in the ``cpus`` field in a service's soa-configs to map to a -number of cores that are reserved for a task. However, isolating CPU time like +number of cores that are reserved for a Pod. However, isolating CPU time like this can be particularly wasteful; unless a task spends 100% of its time on -CPU (and thus has *no* I/O), then there is no need to prevent other tasks from +CPU (and thus has *no* I/O), then there is no need to prevent other Pods from running on the spare CPU time available. -Instead, the CPU value is used to give tasks a relative priority. This priority +Instead, the CPU value is used to give Pods a relative priority. This priority is used by the Linux Scheduler decide the order in which to run waiting threads. @@ -170,17 +154,11 @@ Some notes on this: against the share available for another. The result of this may be that a higher number of 'skinny' containers may be preferable to 'fat' containers. -This is different from how Mesos and Marathon use the CPU value when evaluating -whether a task 'fits' on a host. Yelp configures agents to advertise the number -of cores on the box, and Marathon will only schedule containers on agents where -there is enough 'room' on the host, when in reality, there is no such limit. - Disk """"" -Unfortunately, the isolator provided by Mesos does not support isolating disk -space used by Docker containers; that is, we have no way of limiting the amount -of disk space used by a task. Our best effort is to ensure that the disk space -is part of the offer given by a given Mesos agent to frameworks, and ensure -that any services we know to use high disk usage (such as search indexes) have -the ``disk`` field set appropriately in their configuration. +Kubernetes supports disk resource isolation through the use of storage quotas. Kubernetes +will periodically poll for usage, so it is possible to temporarily exceed the configured +limit. When Kubernetes sees that a container has exceeded it's limit, it will evict (i.e., kill) the offending Pod, thereby deleting the containers filesystem and reclaiming the used disk. + +NOTE: this usage calculation takes into consideration node-level container logs (i.e., container logs for stdout/stderr stored on-host to power things like ``kubectl logs``) - if an application is particularly "chatty" with its output, the ``disk`` allocation in soa-configs will need to take this into account.``` diff --git a/docs/source/paasta_development.rst b/docs/source/paasta_development.rst index cf74c49bd3..a961fad452 100644 --- a/docs/source/paasta_development.rst +++ b/docs/source/paasta_development.rst @@ -107,7 +107,7 @@ If you didn't run ``setup_kubernetes_job`` to deploy ``compute-infra-test-servic 1. Using ``launch.json`` file - 1. From the ``Run and Debug`` tab in VS Code, press on ``paasta playground``. This will run all PaaSTA components. + 1. From the ``Run and Debug`` tab in VS Code, press on ``PaaSTA playground``. This will run all PaaSTA components. 2. Using make targets diff --git a/docs/source/soa_configs.rst b/docs/source/soa_configs.rst index 83054be5a4..932ea17777 100644 --- a/docs/source/soa_configs.rst +++ b/docs/source/soa_configs.rst @@ -22,15 +22,13 @@ directory. There is one folder per service. Here is an example tree:: ├── api │   ├── adhoc-prod.yaml │   ├── deploy.yaml - │   ├── marathon-dev.yaml - │   ├── marathon-prod.yaml │   ├── monitoring.yaml │   ├── service.yaml │   ├── smartstack.yaml │   └── tron-prod.yaml ... -See the `paasta-specific soa-configs documentation `_ for more information +See the `PaaSTA-specific soa-configs documentation `_ for more information about the structure and contents of some example files in soa-configs that PaaSTA uses. For more information about why we chose this method of config distribution, diff --git a/docs/source/style_guide.rst b/docs/source/style_guide.rst index 0c0eb5941e..8157422f27 100644 --- a/docs/source/style_guide.rst +++ b/docs/source/style_guide.rst @@ -47,9 +47,9 @@ Bad: * Anything going to scribe should ALSO go to stdout. Good: - * setup_marathon_job => general output to stdout, app-specific output to scribe + * setup_kubernetes_job => general output to stdout, app-specific output to scribe Bad: - * setup_marathon_job | stdint2scribe (no selective filtering, raw stdout dump) + * setup_kubernetes_job | stdint2scribe (no selective filtering, raw stdout dump) Good: * paasta itest => Sends summary of pass or fail to scribe event log. Sends full output of the run to the scribe debug log @@ -79,7 +79,7 @@ Event Level General Guidelines: * All event-level scribe logs should be as terse as possible while still providing a high level summary of the events occurring in the infrastructure. * All state changing events MUST have at least one event-level scribe log line emitted. -* It is not necessary to repeat redundant information, like service name, as all paasta log invocations already are service-specific anyway. +* It is not necessary to repeat redundant information, like service name, as all PaaSTA log invocations already are service-specific anyway. * All event level logs SHOULD use active verbs to indicate the action that took place. * Log lines SHOULD NOT contain the log level that they are using *in* the log line. Don't try to emulate syslog. * If an external URL with more context is available, the log line SHOULD reference it, but only if an error or warning is detected. @@ -104,7 +104,7 @@ Debug Level Debug Level General Guidelines: -* Viewing Debug level logs SHOULD NOT be necessary under normal paasta operation. +* Viewing Debug level logs SHOULD NOT be necessary under normal PaaSTA operation. * Debug logs are for providing additional context when things go wrong. * Debug logs should still use active verbs and not repeat redundant information if possible. * All debug-level logs should also go to stderr. diff --git a/docs/source/system_configs.rst b/docs/source/system_configs.rst index 9edfdf4e43..bca64f44b9 100644 --- a/docs/source/system_configs.rst +++ b/docs/source/system_configs.rst @@ -2,7 +2,7 @@ System Paasta Configs ===================== The "System Paasta Configs" inform Paasta about your environment and cluster setup, such as how to connect to -Marathon/hacheck/etc, what the cluster name is, etc. +Kubernetes/hacheck/etc, what the cluster name is, etc. Structure @@ -26,10 +26,7 @@ Configuration options These are the keys that may exist in system configs: - * ``zookeeper``: A zookeeper connection url, used for discovering where the Mesos leader is, and some locks. - Example: ``"zookeeper": "zk://zookeeper1:2181,zookeeper2:2181,zookeeper3:2181/mesos"``. - - * ``docker_registry``: The name of the docker registry where paasta images will be stored. This can optionally + * ``docker_registry``: The name of the docker registry where PaaSTA images will be stored. This can optionally be set on a per-service level as well, see `yelpsoa_configs `_ Example: ``"docker_registry": "docker-paasta.yelpcorp.com:443"`` @@ -44,9 +41,8 @@ These are the keys that may exist in system configs: Example:: "dashboard_links": { - "uswest1-prod": { - "Mesos": "http://mesos.paasta-uswest1-prod.yelpcorp.com", - "Cluster charts": "http://kibana.yelpcorp.com/something", + "norcal-devc": { + "Tron": "http://y/tron-norcal-devc", } } @@ -97,23 +93,13 @@ These are the keys that may exist in system configs: Example: ``"sensu_port": 3031`` - * ``dockercfg_location``: A URI of a .dockercfg file, to allow mesos slaves - to authenticate with the docker registry. - Defaults to ``file:///root/.dockercfg``. - While this must be set, this file can contain an empty JSON dictionary (``{}``) if your docker registry does not - require authentication. - May use any URL scheme supported by Mesos's `fetcher module. `_ - - Example: ``"dockercfg_location": "http://somehost/somepath"`` - * ``synapse_port``: The port that haproxy-synapse exposes its status on. Defaults to ``3212``. Example: ``"synapse_port": 3213`` - * ``synapse_host``: The default host that paasta should interrogate for haproxy-synapse state. + * ``synapse_host``: The default host that PaaSTA should interrogate for haproxy-synapse state. Defaults to ``localhost``. - Primarily used in `check_marathon_services_replication `_. Example: ``"synapse_host": 169.254.255.254`` diff --git a/docs/source/workflow.rst b/docs/source/workflow.rst index 5aae3605d1..21db17c935 100644 --- a/docs/source/workflow.rst +++ b/docs/source/workflow.rst @@ -7,9 +7,9 @@ Ways That PaaSTA Can Run Services Long Running Services ^^^^^^^^^^^^^^^^^^^^^ -Long running services are are processes that are expected to run continuously +Long running services are processes that are expected to run continuously and usually have the same process id throughout. PaaSTA uses -`Marathon `_ to configure how these +`Kubernetes `_ to configure how these services should run. These services often serve network traffic, usually HTTP. PaaSTA integrates with @@ -61,68 +61,12 @@ Deployment A yelpsoa-configs master runs `generate_deployments_for_service `_ frequently. The generated ``deployments.json`` appears in ``/nail/etc/services/service_name`` throughout the cluster. -Marathon masters run `deploy_marathon_services `_, -a thin wrapper around ``setup_marathon_job``. -These scripts parse ``deployments.json`` and the current cluster state, -then issue commands to Marathon to put the cluster into the right state --- cluster X should be running version Y of service Z. - How PaaSTA Runs Docker Containers --------------------------------- -Marathon launches the Docker containers that comprise a PaaSTA service. - -Docker images are run by Mesos's native Docker executor. PaaSTA composes the -configuration for the running image: - -* ``--attach``: stdout and stderr from running images are sent to logs that end - up in the Mesos sandbox (currently unavailable). - -* ``--cpu-shares``: This is the value set in ``marathon.yaml`` as "cpus". - -* ``--memory``: This is the value set in ``marathon.yaml`` as "mem". - -* ``--memory-swap``: Total memory limit (memory + swap). We set this to the same value - as "mem", rounded up to the nearest MB, to prevent containers being able to swap. - -* ``--net``: PaaSTA uses bridge mode to enable random port allocation. - -* ``--env``: Any environment variables specified in the ``env`` section will be here. Additional - ``PAASTA_``, ``MARATHON_``, and ``MESOS_`` environment variables will also be injected, see the - `related docs `_ for more information. - -* ``--publish``: Mesos picks a random port on the host that maps to and exposes - port 8888 inside the container. This random port is announced to Smartstack - so that it can be used for load balancing. +Kubernetes launches the Docker containers that comprise a PaaSTA service. Once a Pod is scheduled to start, the kubelet on the node running the Pod interacts with the container runtime +through the Container Runtime Interface (CRI) to start the container defined in the Pod specification. -* ``--privileged``: Containers run by PaaSTA are not privileged. - -* ``--restart``: No restart policy is set on PaaSTA containers. Restarting - tasks is left as a job for the Framework (Marathon). - -* ``--rm``: Mesos containers are rm'd after they finish. - -* ``--tty``: Mesos containers are *not* given a tty. - -* ``--volume``: Volume mapping is controlled via the paasta_tools - configuration. PaaSTA uses the volumes declared in ``/etc/paasta/volumes.json`` - as well as per-service volumes declared in ``extra_volumes`` declared - in the `soa-configs `_. - -* ``--workdir``: Mesos containers are launched in a temporary "workspace" - directory on disk. Use the workdir sparingly and try not to output files. - -Mesos is the actual system that runs the docker images. In Mesos land these are -called "TASKS". PaaSTA-configured tasks use exponential backoff to prevent -unhealthy tasks from continuously filling up disks and logs -- the more times -that your service has failed to start, the longer Mesos will wait before -trying to start it again. - -Mesos *will* healthcheck the task based on the same healthcheck that SmartStack -uses, in order to prune unhealthy tasks. This pruning is less aggressive than -SmartStack's checking, so a dead task will go DOWN in SmartStack before it is -reaped by Marathon. By default the healthchecks occur every 10 seconds, and a service -must fail 30 times before that task is pruned and a new one is launched in its place. -This means a task had 5 minutes by default to properly respond to its healthchecks. +Note: Kubernetes supports containerd as the Container Runtime. Time Zones In Docker Containers ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -180,15 +124,11 @@ Monitoring PaaSTA gives you a few `Sensu `_-powered monitoring checks for free: -* `setup_marathon_job `_: - Alerts when a Marathon service cannot be deployed or bounced for some reason. - It will resolve when a service has been successfully deployed/bounced. - -* `check_marathon_services_replication `_: +* **check_kubernetes_services_replication**: runs periodically and sends an alert if fewer than 50% of the requested instances are deployed on a cluster. If the service is registered in Smartstack it will look in Smartstack to count the available instances. Otherwise it - counts the number of healthy tasks in Mesos. + counts the number of healthy Pods in Kubernetes. The PaaSTA command line @@ -197,7 +137,7 @@ The PaaSTA command line The PaaSTA command line interface, ``paasta``, gives users of PaaSTA the ability to inspect the state of services, as well as stop and start existing services. See the man pages for a description and detail of options for any -individual paasta command. Some of the most frequently used commands are +individual PaaSTA command. Some of the most frequently used commands are listed below: * ``paasta start`` - sets the desired state of the service instance to @@ -214,4 +154,4 @@ listed below: **NB**: ``paasta stop`` is a temporary measure; that is, it's effect only lasts until you deploy a new version of your service. That means that if you run ``paasta stop`` and push a version of the docker image serving your service, then - paasta will reset the effect of ``paasta stop``. + PaaSTA will reset the effect of ``paasta stop``. diff --git a/docs/source/yelpsoa_configs.rst b/docs/source/yelpsoa_configs.rst index 1aade1783c..dfc80c2a53 100644 --- a/docs/source/yelpsoa_configs.rst +++ b/docs/source/yelpsoa_configs.rst @@ -14,7 +14,7 @@ so you are free to use them for YAML templates. **Note** that service names (the name of the folder where your config file is located) should be no more than 63 characters. For kubernetes services(config files with kubernetes as prefix), the instance names should be no more than 63 characters as well. -_ is counted as two character. We convert _ to -- because underscore is not allowed in kubernetes pod names. +_ is counted as two character. We convert _ to -- because underscore is not allowed in kubernetes Pod names. Example:: @@ -41,26 +41,21 @@ Example:: All configuration files that define something to launch on a PaaSTA Cluster can specify the following options: - * ``cpus``: Number of CPUs an instance needs. Defaults to 1. CPUs in Mesos - are "shares" and represent a minimal amount of a CPU to share with a task - relative to the other tasks on a host. A task can burst to use any - available free CPU, but is guaranteed to get the CPU shares specified. For - a more detailed read on how this works in practice, see the docs on `isolation `_. + * ``cpus``: Number of CPUs an instance needs. Defaults to 1. CPUs in Kubernetes + are "shares" and represent a minimal amount of a CPU to share with a Pod + relative to the other Pods on a host. For a more detailed read on + how this works in practice, see the docs on `isolation `_. * ``cpu_burst_add``: Maximum number of additional CPUs an instance may use while bursting; if unspecified, PaaSTA defaults to 1 for long-running services, and 0 for scheduled jobs (Tron). For example, if a service specifies that it needs 2 CPUs normally and 1 for burst, the service may go up to 3 CPUs, if needed. - * ``mem``: Memory (in MB) an instance needs. Defaults to 4096 (4GB). In Mesos - memory is constrained to the specified limit, and tasks will reach + * ``mem``: Memory (in MB) an instance needs. Defaults to 4096 (4GB). In Kubernetes + memory is constrained to the specified limit, and containers will reach out-of-memory (OOM) conditions if they attempt to exceed these limits, and - then be killed. There is currently not way to detect if this condition is - met, other than a ``TASK_FAILED`` message. For more a more detailed read on + then be killed. For more a more detailed read on how this works, see the docs on `isolation `_ - * ``disk``: Disk (in MB) an instance needs. Defaults to 1024 (1GB). Disk limits - may or may not be enforced, but services should set their ``disk`` setting - regardless to ensure the scheduler has adequate information for distributing - tasks. + * ``disk``: Disk (in MB) an instance needs. Defaults to 1024 (1GB). * ``env``: A dictionary of environment variables that will be made available to the container. PaaSTA additionally will inject the following variables automatically (keep in mind all environment variables are strings in a shell): @@ -74,11 +69,13 @@ specify the following options: * ``PAASTA_GIT_SHA``: The short git sha of the code the container has * ``PAASTA_DEPLOY_GROUP``: The `deploy group `_ specified * ``PAASTA_MONITORING_TEAM``: The team that is configured to get alerts. - * ``PAASTA_LAUNCHED_BY``: May not be present. If present, will have the username of the user who launched the paasta container + * ``PAASTA_LAUNCHED_BY``: May not be present. If present, will have the username of the user who launched the PaaSTA container * ``PAASTA_RESOURCE_CPUS``: Number of cpus allocated to a container * ``PAASTA_RESOURCE_MEM``: Amount of ram in MB allocated to a container * ``PAASTA_RESOURCE_DISK``: Amount of disk space in MB allocated to a container * ``PAASTA_RESOURCE_GPUS``: Number of GPUS (if requested) allocated to a container + * ``PAASTA_IMAGE_VERSION``: The version of the docker image + * ``PAASTA_INSTANCE_TYPE``: The instance type of the service (e.g: tron, kubernetes, eks, etc) * ``extra_volumes``: An array of dictionaries specifying extra bind-mounts @@ -112,7 +109,7 @@ Placement Options ----------------- Placement options provide control over how PaaSTA schedules a task, whether it -is scheduled by Marathon (on Mesos), Kubernetes, Tron, or ``paasta remote-run``. +is scheduled by Kubernetes, Tron, or ``paasta remote-run``. Most commonly, it is used to restrict tasks to specific locations. .. _general-placement-options: @@ -120,7 +117,7 @@ Most commonly, it is used to restrict tasks to specific locations. General ^^^^^^^ -These options are applicable to tasks scheduled through Mesos or Kubernetes. +These options are applicable to tasks scheduled through Kubernetes. * ``deploy_blacklist``: A list of lists indicating a set of locations to *not* deploy to. For example: @@ -256,7 +253,7 @@ For more information on selector operators, see the official Kubernetes documentation on `node affinities `_. - * ``pod_management_policy``: An option for applications managed with `StatefulSets `_ to determine if the pods are managed in parallel or in order. + * ``pod_management_policy``: An option for applications managed with `StatefulSets `_ to determine if the Pods are managed in parallel or in order. The default value is `OrderedReady `_. It can be set to `Parallel `_. For example:: @@ -264,31 +261,6 @@ documentation on `node affinities pod_management_policy: Parallel -.. _mesos-placement-options: - -Mesos -^^^^^ - -These options are applicable only to tasks scheduled on Mesos. - - * ``constraints``: Overrides the default placement constraints for services. - Should be defined as an array of arrays (E.g ``[["habitat", "GROUP_BY"]]`` - or ``[["habitat", "GROUP_BY"], ["hostname", "UNIQUE"]]``). Defaults to - ``[[", "GROUP_BY"], ["pool", "LIKE", ], - [, "UNLIKE", ], ...]`` - where ```` is defined by the ``discover`` attribute - in ``smartstack.yaml``, ```` is defined by the ``pool`` attribute in - ``marathon.yaml``, and ``deploy_blacklist_type`` and - ``deploy_blacklist_value`` are defined in the ``deploy_blacklist`` attribute - in marathon.yaml. For more details and other constraint types, see the - official `Marathon constraint documentation - `_. - - * ``extra_constraints``: Adds to the default placement constraints for - services. This acts the same as ``constraints``, but adds to the default - constraints instead of replacing them. See ``constraints`` for details on - format and the default constraints. - ``kubernetes-[clustername].yaml`` ------------------------------- @@ -401,7 +373,7 @@ instance MAY have: Default value is 0.8. * ``desired_active_requests_per_replica``: Only valid for the ``active-requests`` metrics provider. The - target number of requests per second each pod should be receiving. + target number of requests per second each Pod should be receiving. * ``max_instances_alert_threshold``: If the autoscaler has scaled your service to ``max_instances``, and the service's utilization (as measured by your ``metrics_provider``) is above this value, you'll get an alert. @@ -458,7 +430,7 @@ instance MAY have: A failing readiness probe will not restart the instance, it will however be removed from the mesh and not receive any new traffic. - To add an additional delay after the pod has started and before probes should + To add an additional delay after the Pod has started and before probes should start, see ``min_task_uptime``. * ``healthcheck_interval_seconds``: Kubernetes will wait this long between @@ -475,6 +447,10 @@ instance MAY have: Defaults to the same uri specified in ``smartstack.yaml``, but can be set to something different here. + * ``net``: Specify which kind of + `networking mode `_ + adhoc containers of this service should be launched using. Defaults to ``'bridge'``. + * ``prometheus_shard``: Optional name of Prometheus shard to be configured to scrape the service. This shard should already exist and will not be automatically created. @@ -490,12 +466,12 @@ instance MAY have: accessed externally. This option is implied when registered to smartstack or when specifying a ``prometheus_port``. Defaults to ``false`` - * ``weight``: Load balancer/service mesh weight to assign to pods belonging to this instance. - Pods should receive traffic proportional to their weight, i.e. a pod with - weight 20 should receive 2x as much traffic as a pod with weight 10. + * ``weight``: Load balancer/service mesh weight to assign to Pods belonging to this instance. + Pods should receive traffic proportional to their weight, i.e. a Pod with + weight 20 should receive 2x as much traffic as a Pod with weight 10. Defaults to 10. Must be an integer. - This only makes a difference when some pods in the same load balancer have different weights than others, such as when you have two or more instances with the same ``registration`` but different ``weight``. + This only makes a difference when some Pods in the same load balancer have different weights than others, such as when you have two or more instances with the same ``registration`` but different ``weight``. * ``lifecycle``: A dictionary of additional options that adjust the termination phase of the `pod lifecycle `_: This currently supports two sub-keys: @@ -506,205 +482,28 @@ instance MAY have: * ``termination_grace_period_seconds``: the number of seconds to allow before forcibly killing your instance. Note that the instance will be forcibly killed after this period, so your pre_stop_command should complete well within this time period! * ``namespace``: - **Currently in development, do not use.** The Kubernetes namespace where Paasta will create objects related to this service. Defaults to ``paastasvc-service--name`` (that is, the service name will have underscores replaced with ``--``.) -**Note**: Although many of these settings are inherited from ``smartstack.yaml``, -their thresholds are not the same. The reason for this has to do with control -loops and infrastructure stability. The load balancer tier can be pickier -about which copies of a service it can send requests to, compared to Mesos. - -A load balancer can take a container out of service and put it back in a few -seconds later. Minor flaps and transient errors are tolerated. - -The healthchecks specified here in this file signal to the infrastructure that -a container is unhealthy, and the action to take is to completely destroy it and -launch it elsewhere. This is a more expensive operation than taking a container -out of the load balancer, so it justifies having less sensitive thresholds. - -``marathon-[clustername].yaml`` -------------------------------- - -e.g. ``marathon-pnw-prod.yaml``, ``marathon-mesosstage.yaml``. The -clustername is usually the same as the ``superregion`` in which the cluster -lives (``pnw-prod``), but not always (``mesosstage``). It MUST be all -lowercase. (non alphanumeric lowercase characters are ignored) - -**Note:** All values in this file except the following will cause PaaSTA to -`bounce `_ the service: - -* ``min_instances`` -* ``instances`` -* ``max_instances`` -* ``backoff_seconds`` - -Top level keys are instance names, e.g. ``main`` and ``canary``. Each -instance MAY have: - - * Anything in the `Common Settings`_. - - * Anything from :ref:`General Placement Options ` - and :ref:`Mesos Placement Options `. - - * ``cap_add``: List of capabilities that are passed to Docker. Defaults - to empty list. Example:: - - "cap_add": ["IPC_LOCK", "SYS_PTRACE"] - - * ``instances``: Marathon will attempt to run this many instances of the Service - - * ``min_instances``: When autoscaling, the minimum number of instances that - marathon will create for a service. Defaults to 1. - - * ``max_instances``: When autoscaling, the maximum number of instances that - marathon will create for a service - - * ``registrations``: A list of SmartStack registrations (service.namespace) - where instances of this PaaSTA service ought register in. In SmartStack, - each service has difference pools of backend servers that are listening on - a particular port. In PaaSTA we call these "Registrations". By default, the - Registration assigned to a particular instance in PaaSTA has the *same name*, - so a service ``foo`` with a ``main`` instance will correspond to the - ``foo.main`` Registration. This would correspond to the SmartStack - namespace defined in the Registration service's ``smartstack.yaml``. This - ``registrations`` option allows users to make PaaSTA instances appear - under an *alternative* namespace (or even service). For example - ``canary`` instances can have ``registrations: ['foo.main']`` to route - their traffic to the same pool as the other ``main`` instances. - - * ``backoff_factor``: PaaSTA will automatically calculate the duration of an - application's backoff period in case of a failed launch based on the number - of instances. For each consecutive failure that duration is multiplied by - ``backoff_factor`` and added to the previous value until it reaches - ``max_launch_delay_seconds``. See `Marathon's API docs `_ - for more information. Defaults to 2. - - * ``max_launch_delay_seconds``: The maximum time marathon will wait between attempts - to launch an app that previously failed to launch. See `Marathon's API docs - `_ for more information. Defaults to 300 seconds. - - .. _net: - - * ``net``: Specify which kind of - `networking mode `_ - instances of this service should be launched using. Defaults to ``'bridge'``. - - * ``container_port``: Specify the port to expose when in ``bridge`` mode. - Defaults to ``8888``. - - * ``bounce_method``: Controls the bounce method; see `bounce_lib `_ - - * ``bounce_health_params``: A dictionary of parameters for get_happy_tasks. - - * ``check_haproxy``: Boolean indicating if PaaSTA should check the local - haproxy to make sure this task has been registered and discovered - (Defaults to ``True`` if service is in SmartStack) - - * ``min_task_uptime``: Minimum number of seconds that a task must be - running before we consider it healthy (Disabled by default) - - * ``haproxy_min_fraction_up``: if ``check_haproxy`` is True, we check haproxy on up to 20 boxes to see whether a task is available. - This fraction of boxes must agree that the task is up for the bounce to treat a task as healthy. - Defaults to 1.0 -- haproxy on all queried boxes must agree that the task is up. - - * ``bounce_margin_factor``: proportionally increase the number of old instances - to be drained when the crossover bounce method is used. - 0 < bounce_margin_factor <= 1. Defaults to 1 (no influence). - This allows bounces to proceed in the face of a percentage of failures. - It doesn’t affect any other bounce method but crossover. - See `the bounce docs `_ for a more detailed description. - - * ``bounce_start_deadline``: a floating point number of seconds to add to the deadline when deployd notices a change - to soa-configs or the marked-for-deployment version of an instance. - Defaults to 0. (deadline = now) - When deployd has a queue of instances to process, it will choose to process instances with a lower deadline first. - Set this to a large positive number to allow deployd to process other instances before this one, even if their - soa-configs change or mark-for-deployment happened after this one. - This setting only affects the first time deployd processes an instance after a change -- - instances that need to be reprocessed will be reenqueued normally. - - * ``drain_method``: Controls the drain method; see `drain_lib - `_. Defaults to ``noop`` for - instances that are not in Smartstack, or ``hacheck`` if they are. - - * ``drain_method_params``: A dictionary of parameters for the specified - drain_method. Valid parameters are any of the kwargs defined for the - specified bounce_method in `drain_lib `_. - - * ``cmd``: The command that is executed. Can be used as an alternative to - args for containers without an `entrypoint - `_. This value is - wrapped by Mesos via ``/bin/sh -c ${app.cmd}``. Parsing the Marathon config - file will fail if both args and cmd are specified [#note]_. - - * ``args``: An array of docker args if you use the `"entrypoint" - `_ functionality. - Parsing the Marathon config file will fail if both args and cmd are - specified [#note]_. - - * ``monitoring``: See the `monitoring.yaml`_ section for details. - - * ``autoscaling``: See the `autoscaling docs `_ for valid options and how they work - - * ``metrics_provider``: Which method PaaSTA will use to determine a service's utilization. - - * ``decision_policy``: Which method PaaSTA will use to determine when to autoscale a service. - - * ``deploy_group``: A string identifying what deploy group this instance belongs - to. The ``step`` parameter in ``deploy.yaml`` references this value - to determine the order in which to build & deploy deploy groups. Defaults to - ``clustername.instancename``. See the deploy group doc_ for more information. - - * ``replication_threshold``: An integer representing the percentage of instances that - need to be available for monitoring purposes. If less than ``replication_threshold`` - percent instances of a service's backends are not available, the monitoring - scripts will send a CRITICAL alert. - -In addition, each instancename MAY configure additional Marathon healthcheck -options (Read the official -`mesos documentation `_ -for more low-level details: - - * ``healthcheck_mode``: One of ``cmd``, ``tcp``, ``http``, or ``https``. - If set to ``http`` or ``https``, a ``curl`` command will be executed - inside the container. + * ``autotune_limits``: Optionally set lower and upper-bounds for autotuned resources. Useful for services that have a known range of resource usage, but that have usage patterns that don't play well with our autotune system. + This is a dictionary with the following keys: - If set to ``cmd`` then PaaSTA will execute ``healthcheck_cmd`` and - examine the return code. It must return 0 to be considered healthy. + * ``cpu``: A dictionary with the keys ``min`` and ``max``. These are the lower and upper bounds for the CPU limit, respectively. + * ``memory``: A dictionary with the keys ``min`` and ``max``. These are the lower and upper bounds for the memory limit, respectively. + * ``disk``: A dictionary with the keys ``min`` and ``max``. These are the lower and upper bounds for the disk limit, respectively. - If the service is registered in SmartStack, the healthcheck_mode will - automatically use the same setings specified by ``smartstack.yaml``. + Example:: - If not in smartstack, the default healthcheck is "None", which means - the container is considered healthy unless it crashes. + autotune_limits: + cpu: + min: 0.1 + memory: + max: 1024 + disk: + min: 1000 + max: 10000 - A http healthcheck is considered healthy if it returns a 2xx or 3xx - response code. - - * ``healthcheck_cmd``: If ``healthcheck_mode`` is set to ``cmd``, then this - command is executed inside the container as a healthcheck. It must exit - with status code 0 to signify a successful healthcheck. Any other exit code - is treated as a failure. This is a required field if ``healthcheck_mode`` - is ``cmd``. - - * ``healthcheck_grace_period_seconds``: Marathon will wait this long for a - service to come up before counting failed healthchecks. Defaults to 60 - seconds. - - * ``healthcheck_interval_seconds``: Marathon will wait this long between - healthchecks. Defaults to 10 seconds. - - * ``healthcheck_timeout_seconds``: Marathon will wait this long for a - healthcheck to return before considering it a failure. Defaults to 10 - seconds. - - * ``healthcheck_max_consecutive_failures``: Marathon will kill the current - task if this many healthchecks fail consecutively. Defaults to 30 attempts. - - * ``healthcheck_uri``: The url of the service to healthcheck if using http. - Defaults to the same uri specified in ``smartstack.yaml``, but can be - set to something different here. + NOTE: it's possible to set only one of the bounds, if you only want to set a lower or upper bound (i.e., setting both min/max is not required). **Note**: Although many of these settings are inherited from ``smartstack.yaml``, their thresholds are not the same. The reason for this has to do with control @@ -719,29 +518,6 @@ a container is unhealthy, and the action to take is to completely destroy it and launch it elsewhere. This is a more expensive operation than taking a container out of the load balancer, so it justifies having less sensitive thresholds. -**Footnotes**: - -.. [#note] The Marathon docs and the Docker docs are inconsistent in their - explanation of args/cmd: - - The `Marathon docs - `_ - state that it is invalid to supply both cmd and args in the same app. - - The `Docker docs `_ - do not state that it's incorrect to specify both args and cmd. Furthermore, - they state that "Command line arguments to docker run will be - appended after all elements in an exec form ENTRYPOINT, and will override - all elements specified using CMD" which implies that both cmd and args can - be provided, but cmd will be silently ignored. - - To avoid issues resulting from this discrepancy, we abide by the stricter - requirements from Marathon and check that no more than one of cmd and args - is specified. If both are specified, an exception is thrown with an - explanation of the problem, and the program terminates. - -.. _doc: deploy_groups.html - ``tron-[clustername].yaml`` -------------------------------- @@ -797,8 +573,6 @@ Each Tron **action** of a job MAY specify the following: * Anything in the `Common Settings`_. * Anything from :ref:`General Placement Options ` - and :ref:`Mesos Placement Options ` (currently, Tron - only supports Mesos workloads). * ``service``: Uses a docker image from different service. When ``service`` is set for an action, that setting takes precedence over what is set for the job. @@ -827,7 +601,7 @@ If a Tron **action** of a job is of executor type ``spark``, it MAY specify the * ``spark_args``: Dictionary of spark configurations documented in https://spark.apache.org/docs/latest/configuration.html. Note some configurations are non- - user-editable as they will be populated by paasta tools. See + user-editable as they will be populated by PaaSTA. See https://github.com/Yelp/service_configuration_lib/blob/master/service_configuration_lib/spark_config.py#L9 for a complete list of such configurations. @@ -852,7 +626,22 @@ Each instance MAY have: * ``deploy_group`` -See the `marathon-[clustername].yaml`_ section for details for each of these parameters. +See the `kubernetes-[clustername].yaml`_ section for details for each of these parameters. + +**Footnotes**: + +.. [#note] The Docker docs explanation on using both args and cmd: + The `Docker docs `_ + do not state that it's incorrect to specify both args and cmd. Furthermore, + they state that "Command line arguments to docker run will be + appended after all elements in an exec form ENTRYPOINT, and will override + all elements specified using CMD" which implies that both cmd and args can + be provided, but cmd will be silently ignored. + + To avoid issues resulting from this discrepancy, we abide by the stricter + requirements and check that no more than one of cmd and args + is specified. If both are specified, an exception is thrown with an + explanation of the problem, and the program terminates. ``smartstack.yaml`` ------------------- @@ -873,7 +662,7 @@ Here is an example smartstack.yaml:: The ``main`` key is the service namespace. Namespaces were introduced for PaaSTA services in order to support running multiple daemons from a single -service codebase. In PaaSTA, each instance in your marathon.yaml maps to a +service codebase. In PaaSTA, each instance in your kubernetes.yaml maps to a smartstack namespace of the same name, unless you specify a different ``registrations``. @@ -891,7 +680,7 @@ Basic HTTP and TCP options it will generate synapse discovery files on every host, but no listening port will be allocated. This must be unique across all environments where PaaSTA (or synapse) runs. At Yelp, we pick from the range [19000, 21000]. - Feel free to pick the next available value -- paasta fsm will do this for + Feel free to pick the next available value -- ``paasta fsm`` will do this for you automatically! * ``mode``: string of value ``http`` or ``tcp``, specifying whether the service @@ -1089,6 +878,7 @@ These keys provide optional overrides for the default alerting behaviour. page_nonprod: true error_threshold_ratio: 0.02 minimum_error_rps: 10 + default_endpoint_alerting: true endpoints: - name: GET /something - name: GET /something/else @@ -1108,11 +898,15 @@ These keys provide optional overrides for the default alerting behaviour. respects the paging behaviour set in the monitoring.yaml file. Override that here if required. - ``page_nonprod``: Override the default paging behaviour for non-production environments. Defaults to **false**. - - ``error_threshold_ratio``: Error threshold ratio (0-1). Defaults to **0.01**. - - ``minimum_error_rps``: Minimum error rate per second, minimum is zero. Defaults to **5**. - - ``endpoints``: List of endpoints to create alerts for. + - ``error_threshold_ratio``: Error threshold ratio (0-1) for errors under this namespace. Defaults to **0.01**. + - ``minimum_error_rps``: Minimum error rate per second for errors under this namespace before an alert can be triggered, minimum is zero. Defaults to **5**. + - ``default_endpoint_alerting``: Turn on alerts for all endpoints in this namespace. Defaults to **false**. + - ``endpoint_error_threshold_ratio``: Error threshold ratio (0-1) for errors to any singular endpoint. Defaults to the namespace ``error_threshold_ratio`` if specified, or **0.01**. + - ``endpoint_minimum_error_rps``: Minimum error rate per second for errors to any singular endpoint before an alert can be triggered for errors to any singular endpoint. Defaults to the namespace ``minimum_error_rps`` if specified, or **5**. + - ``endpoints``: List of endpoints to create specific alerts for. - ``name``: The name of the endpoint. - - ``error_threshold_ratio``: Error threshold ratio (0-1). If not specified the threshold will be inherited from the parent. + - ``error_threshold_ratio``: Error threshold ratio (0-1). If not specified the threshold will be inherited from the ``endpoint_error_threshold_ratio``; if that is not specified then the namespace's ``error_threshold_ratio``; otherwise **0.01**. + - ``minimum_error_rps``: Minimum error rate per second for the endpoint. Minimum is zero. If not specified the threshold will be inherited from the ``endpoint_minimum_error_rps``; if that is not specified then the namespace's ``minimum_error_rps``; otherwise **5**. Moving a Service to a different location type ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1128,12 +922,6 @@ An example of switching from region to superregion discovery: - advertise: [region] + advertise: [region, superregion] -1b. When moving from a large grouping to a smaller grouping (like -moving from superregion => region) you must add an additional constraint -to ensure Marathon balances the tasks evenly:: - - extra_constraints: [['region', 'GROUP_BY', 2]] - 2. (Optional) Use zkCli.sh to monitor your new registrations for each superregion you are changing:: @@ -1143,7 +931,7 @@ superregion you are changing:: [host1-uswest1adevc_0000015910, host2-uswest1cdevc_0000015898, host3-uswest1cdevc_0000015893] [zk: 10.40.5.6:22181(CONNECTED) 2] -2b. Run ``paasta status -v`` to verify that Marathon has balanced services +2b. Run ``paasta status -v`` to verify that PaaSTA has balanced services across the infrastructure as expected. 3. Once zookeeper shows the proper servers, switch the discovery key:: @@ -1252,7 +1040,7 @@ An example of a service that only pages on a cluster called "prod":: team: devs page: false - # marathon-prod.yaml + # kubernetes-prod.yaml main: instances: 3 monitoring: @@ -1271,13 +1059,13 @@ A service that pages everywhere, but only makes a ticket for a tron job:: page: false ticket: true -A marathon/kubernetes service that overrides options on different instances (canary):: +A kubernetes service that overrides options on different instances (canary):: # monitoring.yaml team: frontend page: false - # marathon-prod.yaml or kubernetes-prod.yaml + # kubernetes-prod.yaml main: instances: 20 monitoring: diff --git a/example_cluster/dind-cluster-v1.13.sh b/example_cluster/dind-cluster-v1.13.sh index 0f571f1c5b..3ae8f4515a 100755 --- a/example_cluster/dind-cluster-v1.13.sh +++ b/example_cluster/dind-cluster-v1.13.sh @@ -1703,7 +1703,7 @@ spec: - key: CriticalAddonsOnly operator: Exists - effect: NoSchedule - key: node-role.kubernetes.io/master + key: node-role.kubernetes.io/control-plane operator: Exists volumes: - name: lib-modules @@ -1819,7 +1819,7 @@ function dind::up { else # FIXME: this may fail depending on k8s/kubeadm version # FIXME: check for taint & retry if it's there - "${kubectl}" --context "$ctx" taint nodes $(dind::master-name) node-role.kubernetes.io/master- || true + "${kubectl}" --context "$ctx" taint nodes $(dind::master-name) node-role.kubernetes.io/control-plane- || true fi case "${CNI_PLUGIN}" in bridge | ptp) diff --git a/paasta_tools/__init__.py b/paasta_tools/__init__.py index 0a818691dd..e809f32fcc 100644 --- a/paasta_tools/__init__.py +++ b/paasta_tools/__init__.py @@ -17,4 +17,4 @@ # setup phase, the dependencies may not exist on disk yet. # # Don't bump version manually. See `make release` docs in ./Makefile -__version__ = "1.4.33" +__version__ = "1.8.2" diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml index a7ffb68434..1518b6b65d 100644 --- a/paasta_tools/api/api_docs/oapi.yaml +++ b/paasta_tools/api/api_docs/oapi.yaml @@ -1480,6 +1480,12 @@ paths: required: false schema: type: boolean + - description: Search all namespaces for running copies + in: query + name: all_namespaces + required: false + schema: + type: boolean responses: "200": content: diff --git a/paasta_tools/api/api_docs/swagger.json b/paasta_tools/api/api_docs/swagger.json index 7b0a30b051..9caa3c8cac 100644 --- a/paasta_tools/api/api_docs/swagger.json +++ b/paasta_tools/api/api_docs/swagger.json @@ -540,6 +540,13 @@ "name": "new", "required": false, "type": "boolean" + }, + { + "in": "query", + "description": "Search all namespaces for running copies", + "name": "all_namespaces", + "required": false, + "type": "boolean" } ] } diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py index 2a9c3af0b5..945a1d5187 100644 --- a/paasta_tools/api/views/instance.py +++ b/paasta_tools/api/views/instance.py @@ -136,6 +136,7 @@ def instance_status(request): instance = request.swagger_data.get("instance") verbose = request.swagger_data.get("verbose") or 0 use_new = request.swagger_data.get("new") or False + all_namespaces = request.swagger_data.get("all_namespaces") or False include_envoy = request.swagger_data.get("include_envoy") if include_envoy is None: include_envoy = True @@ -199,6 +200,7 @@ def instance_status(request): use_new=use_new, instance_type=instance_type, settings=settings, + all_namespaces=all_namespaces, ) ) elif instance_type == "tron": diff --git a/paasta_tools/cli/cmds/get_image_version.py b/paasta_tools/cli/cmds/get_image_version.py index 4d2e0b7efa..4bc7d36134 100644 --- a/paasta_tools/cli/cmds/get_image_version.py +++ b/paasta_tools/cli/cmds/get_image_version.py @@ -27,6 +27,7 @@ from paasta_tools.utils import get_pipeline_deploy_group_configs from paasta_tools.utils import list_services from paasta_tools.utils import load_v2_deployments_json +from paasta_tools.utils import optionally_load_system_paasta_config from paasta_tools.utils import parse_timestamp @@ -71,9 +72,16 @@ def add_subparser(subparsers: argparse._SubParsersAction) -> None: def check_enable_automated_redeploys(service: str, soa_dir: str) -> bool: - # TODO: Handle global flag deploy_steps = get_pipeline_deploy_group_configs(service, soa_dir) - return any([step.get("enable_automated_redeploys", False) for step in deploy_steps]) + enabled_by_default = ( + optionally_load_system_paasta_config().get_enable_automated_redeploys_default() + ) + return any( + [ + step.get("enable_automated_redeploys", enabled_by_default) + for step in deploy_steps + ] + ) def extract_timestamp(image_version: str) -> Optional[datetime.datetime]: diff --git a/paasta_tools/cli/cmds/local_run.py b/paasta_tools/cli/cmds/local_run.py index 3905b6921e..da3b88a0f3 100755 --- a/paasta_tools/cli/cmds/local_run.py +++ b/paasta_tools/cli/cmds/local_run.py @@ -39,6 +39,7 @@ from paasta_tools.cli.utils import figure_out_service_name from paasta_tools.cli.utils import get_instance_config from paasta_tools.cli.utils import get_service_auth_token +from paasta_tools.cli.utils import get_sso_service_auth_token from paasta_tools.cli.utils import lazy_choices_completer from paasta_tools.cli.utils import list_instances from paasta_tools.cli.utils import pick_random_port @@ -507,17 +508,6 @@ def add_subparser(subparsers): required=False, default=False, ) - list_parser.add_argument( - "--use-service-auth-token", - help=( - "Acquire service authentication token for the underlying instance," - " and set it in the container environment" - ), - action="store_true", - dest="use_service_auth_token", - required=False, - default=False, - ) list_parser.add_argument( "--sha", help=( @@ -540,6 +530,29 @@ def add_subparser(subparsers): "Same as the -v / --volume parameter to docker run: hostPath:containerPath[:mode]" ), ) + service_auth_group = list_parser.add_mutually_exclusive_group() + service_auth_group.add_argument( + "--use-service-auth-token", + help=( + "Acquire service authentication token for the underlying instance," + " and set it in the container environment" + ), + action="store_true", + dest="use_service_auth_token", + required=False, + default=False, + ) + service_auth_group.add_argument( + "--use-sso-service-auth-token", + help=( + "Acquire service authentication token from SSO provider," + " and set it in the container environment" + ), + action="store_true", + dest="use_sso_service_auth_token", + required=False, + default=False, + ) list_parser.set_defaults(command=paasta_local_run) @@ -830,6 +843,7 @@ def run_docker_container( use_okta_role=False, assume_role_aws_account: Optional[str] = None, use_service_auth_token: bool = False, + use_sso_service_auth_token: bool = False, ): """docker-py has issues running a container with a TTY attached, so for consistency we execute 'docker run' directly in both interactive and @@ -921,6 +935,8 @@ def run_docker_container( if use_service_auth_token: environment["YELP_SVC_AUTHZ_TOKEN"] = get_service_auth_token() + elif use_sso_service_auth_token: + environment["YELP_SVC_AUTHZ_TOKEN"] = get_sso_service_auth_token() local_run_environment = get_local_run_environment_vars( instance_config=instance_config, port0=chosen_port, framework=framework @@ -1271,6 +1287,7 @@ def configure_and_run_docker_container( assume_role_aws_account=assume_role_aws_account, use_okta_role=args.use_okta_role, use_service_auth_token=args.use_service_auth_token, + use_sso_service_auth_token=args.use_sso_service_auth_token, ) diff --git a/paasta_tools/cli/cmds/logs.py b/paasta_tools/cli/cmds/logs.py index 51b76a1298..f73f02286a 100644 --- a/paasta_tools/cli/cmds/logs.py +++ b/paasta_tools/cli/cmds/logs.py @@ -329,6 +329,12 @@ def paasta_app_output_passes_filter( # https://github.com/gweis/isodate/issues/53 except ValueError: return True + except AttributeError: + # Timestamp might be missing. We had an issue where OTel was splitting overly long log lines + # and not including timestamps in the resulting log records (OBSPLAT-2216). + # Although this was then fixed in OTel, we should not rely on timestamps being present, + # as the format cannot be guaranteed. + return False if not check_timestamp_in_range(timestamp, start_time, end_time): return False return ( @@ -1209,7 +1215,7 @@ def print_logs_by_time( for line in reader.get_log_reader( log_name=stream_name, start_datetime=start_time, end_datetime=end_time ): - if paasta_log_line_passes_filter( + if paasta_app_output_passes_filter( line, levels, service, @@ -1266,7 +1272,7 @@ async def tail_logs_from_nats() -> None: msg = await sub.next_msg(timeout=None) decoded_data = msg.data.decode("utf-8") - if paasta_log_line_passes_filter( + if paasta_app_output_passes_filter( decoded_data, levels, service, diff --git a/paasta_tools/cli/cmds/spark_run.py b/paasta_tools/cli/cmds/spark_run.py index 6da09265d9..3b9300ebbf 100644 --- a/paasta_tools/cli/cmds/spark_run.py +++ b/paasta_tools/cli/cmds/spark_run.py @@ -239,9 +239,9 @@ def add_subparser(subparsers): "default_pool" ) except PaastaNotConfiguredError: - default_spark_cluster = "pnw-devc" + default_spark_cluster = "pnw-devc-spark" default_spark_pool = "batch" - valid_clusters = ["spark-pnw-prod", "pnw-devc"] + valid_clusters = ["pnw-devc-spark", "pnw-prod-spark"] list_parser.add_argument( "-c", diff --git a/paasta_tools/cli/cmds/status.py b/paasta_tools/cli/cmds/status.py index 32f1324619..a831c38fb2 100644 --- a/paasta_tools/cli/cmds/status.py +++ b/paasta_tools/cli/cmds/status.py @@ -170,6 +170,14 @@ def add_subparser( default=DEFAULT_SOA_DIR, help="define a different soa config directory", ) + status_parser.add_argument( + "-A", + "--all-namespaces", + dest="all_namespaces", + action="store_true", + default=False, + help="Search all PaaSTA-managed namespaces for possible running versions (Will search only your currently-configured namespace by default). Useful if you are moving your instance(s) to a new namespace", + ) version = status_parser.add_mutually_exclusive_group() @@ -292,6 +300,7 @@ def paasta_status_on_api_endpoint( verbose: int, new: bool = False, is_eks: bool = False, + all_namespaces: bool = False, ) -> int: output = [ "", @@ -310,6 +319,7 @@ def paasta_status_on_api_endpoint( instance=instance, verbose=verbose, new=new, + all_namespaces=all_namespaces, ) except client.api_error as exc: output.append(PaastaColors.red(exc.reason)) @@ -709,8 +719,11 @@ def should_job_info_be_shown(cluster_state): def get_pod_uptime(pod_deployed_timestamp: str): - pod_creation_time = datetime.strptime(pod_deployed_timestamp, "%Y-%m-%dT%H:%M:%SZ") - pod_uptime = datetime.utcnow() - pod_creation_time + # NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC + pod_creation_time = datetime.strptime( + pod_deployed_timestamp, "%Y-%m-%dT%H:%M:%SZ" + ).replace(tzinfo=timezone.utc) + pod_uptime = datetime.now(timezone.utc) - pod_creation_time pod_uptime_total_seconds = pod_uptime.total_seconds() pod_uptime_days = divmod(pod_uptime_total_seconds, 86400) pod_uptime_hours = divmod(pod_uptime_days[1], 3600) @@ -720,7 +733,7 @@ def get_pod_uptime(pod_deployed_timestamp: str): def append_pod_status(pod_status, output: List[str]): - output.append(f" Pods:") + output.append(" Pods:") rows: List[Union[str, Tuple[str, str, str, str]]] = [ ("Pod Name", "Host", "Phase", "Uptime") ] @@ -834,7 +847,7 @@ def _print_flink_status_from_job_manager( # So that paasta status -v and kubectl get pods show the same consistent result. if verbose and len(status["pod_status"]) > 0: append_pod_status(status["pod_status"], output) - output.append(f" No other information available in non-running state") + output.append(" No other information available in non-running state") return 0 if status["state"] == "running": @@ -844,7 +857,7 @@ def _print_flink_status_from_job_manager( service=service, instance=instance, client=client ) except Exception as e: - output.append(PaastaColors.red(f"Exception when talking to the API:")) + output.append(PaastaColors.red("Exception when talking to the API:")) output.append(str(e)) return 1 @@ -869,7 +882,7 @@ def _print_flink_status_from_job_manager( service=service, instance=instance, client=client ) except Exception as e: - output.append(PaastaColors.red(f"Exception when talking to the API:")) + output.append(PaastaColors.red("Exception when talking to the API:")) output.append(str(e)) return 1 @@ -880,7 +893,7 @@ def _print_flink_status_from_job_manager( try: jobs = a_sync.block(get_flink_job_details, service, instance, job_ids, client) except Exception as e: - output.append(PaastaColors.red(f"Exception when talking to the API:")) + output.append(PaastaColors.red("Exception when talking to the API:")) output.append(str(e)) return 1 @@ -896,7 +909,7 @@ def _print_flink_status_from_job_manager( max(10, shutil.get_terminal_size().columns - 52), max_job_name_length ) - output.append(f" Jobs:") + output.append(" Jobs:") if verbose > 1: output.append( f' {"Job Name": <{allowed_max_job_name_length}} State Job ID Started' @@ -1087,7 +1100,9 @@ def get_instance_state(status: InstanceStatusKubernetesV2) -> str: else: return PaastaColors.green("Running") else: - versions = sorted(status.versions, key=lambda x: x.create_timestamp) + versions = sorted( + status.versions, key=lambda x: x.create_timestamp, reverse=True + ) git_shas = {r.git_sha for r in versions} config_shas = {r.config_sha for r in versions} bouncing_to = [] @@ -1303,10 +1318,10 @@ def get_replica_state(pod: KubernetesPodV2) -> ReplicaState: # This logic likely needs refining main_container = get_main_container(pod) if main_container: - # K8s API is returning timestamps in YST, so we use now() instead of utcnow() + # NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC warming_up = ( pod.create_timestamp + main_container.healthcheck_grace_period - > datetime.now().timestamp() + > datetime.now(timezone.utc).timestamp() ) if pod.mesh_ready is False: if main_container.state != "running": @@ -1408,14 +1423,17 @@ def create_replica_table( ) if state == ReplicaState.WARMING_UP: if verbose > 0: - warmup_duration = datetime.now().timestamp() - pod.create_timestamp + # NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC + warmup_duration = ( + datetime.now(timezone.utc).timestamp() - pod.create_timestamp + ) humanized_duration = humanize.naturaldelta( timedelta(seconds=warmup_duration) ) grace_period_remaining = ( pod.create_timestamp + main_container.healthcheck_grace_period - - datetime.now().timestamp() + - datetime.now(timezone.utc).timestamp() ) humanized_remaining = humanize.naturaldelta( timedelta(seconds=grace_period_remaining) @@ -1780,6 +1798,7 @@ def node_property_to_str(prop: Dict[str, Any], verbose: int) -> str: parsed_time = datetime.strptime(value, "%Y-%m-%dT%H:%M:%SZ").replace( tzinfo=timezone.utc ) + # NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC now = datetime.now(timezone.utc) return ( humanize.naturaldelta( @@ -1815,7 +1834,7 @@ def print_kafka_status( desired_state = annotations.get(paasta_prefixed("desired_state")) if desired_state is None: raise ValueError( - f"expected desired state in kafka annotation, but received none" + "expected desired state in kafka annotation, but received none" ) output.append(f" State: {desired_state}") @@ -1841,7 +1860,7 @@ def print_kafka_status( ) brokers = status["brokers"] - output.append(f" Brokers:") + output.append(" Brokers:") if verbose: headers = ["Id", "Phase", "IP", "Pod Name", "Started"] @@ -1854,10 +1873,11 @@ def print_kafka_status( PaastaColors.green if broker["phase"] == "Running" else PaastaColors.red ) + # NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC start_time = datetime.strptime( broker["deployed_timestamp"], "%Y-%m-%dT%H:%M:%SZ" - ) - delta = datetime.utcnow() - start_time + ).replace(tzinfo=timezone.utc) + delta = datetime.now(timezone.utc) - start_time formatted_start_time = f"{str(start_time)} ({humanize.naturaltime(delta)})" if verbose: @@ -2140,6 +2160,7 @@ def report_status_for_cluster( lock: Lock, verbose: int = 0, new: bool = False, + all_namespaces: bool = False, ) -> Tuple[int, Sequence[str]]: """With a given service and cluster, prints the status of the instances in that cluster""" @@ -2193,6 +2214,7 @@ def report_status_for_cluster( lock=lock, verbose=verbose, new=new, + all_namespaces=all_namespaces, is_eks=(instance_config_class in EKS_DEPLOYMENT_CONFIGS), ) ) @@ -2416,6 +2438,7 @@ def paasta_status(args) -> int: lock=lock, verbose=args.verbose, new=new, + all_namespaces=args.all_namespaces, ), ) ) diff --git a/paasta_tools/cli/schemas/smartstack_schema.json b/paasta_tools/cli/schemas/smartstack_schema.json index 0774c5f88f..5314b0909f 100644 --- a/paasta_tools/cli/schemas/smartstack_schema.json +++ b/paasta_tools/cli/schemas/smartstack_schema.json @@ -255,7 +255,19 @@ "maximum": 1 }, "minimum_error_rps": { - "type": "integer", + "type": "number", + "minimum": 0 + }, + "default_endpoint_alerting": { + "type": "boolean" + }, + "endpoint_error_threshold_ratio": { + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "endpoint_minimum_error_rps": { + "type": "number", "minimum": 0 }, "endpoints": { @@ -271,6 +283,10 @@ "minimum": 0, "maximum": 1 }, + "minimum_error_rps": { + "type": "number", + "minimum": 0 + }, "team": { "type": "string" }, diff --git a/paasta_tools/cli/schemas/tron_schema.json b/paasta_tools/cli/schemas/tron_schema.json index 0a3f148180..5c56833aea 100644 --- a/paasta_tools/cli/schemas/tron_schema.json +++ b/paasta_tools/cli/schemas/tron_schema.json @@ -663,10 +663,6 @@ }, "deploy_group": { "type": "string" - }, - "uses_bulkdata": { - "type": "boolean", - "$comment": "XXX: this entry should be deleted once we've refactored existing configs to use the correct toggle location" } } } diff --git a/paasta_tools/cli/utils.py b/paasta_tools/cli/utils.py index 979fbff996..b05240e1e4 100644 --- a/paasta_tools/cli/utils.py +++ b/paasta_tools/cli/utils.py @@ -82,6 +82,7 @@ from vault_tools.paasta_secret import get_client as get_vault_client from vault_tools.paasta_secret import get_vault_url from vault_tools.paasta_secret import get_vault_ca + from okta_auth import get_and_cache_jwt_default except ImportError: def get_vault_client(url: str, capath: str) -> None: @@ -93,6 +94,9 @@ def get_vault_url(ecosystem: str) -> str: def get_vault_ca(ecosystem: str) -> str: return "" + def get_and_cache_jwt_default(client_id: str) -> str: + return "" + log = logging.getLogger(__name__) @@ -1132,3 +1136,9 @@ def get_service_auth_token() -> str: ) response = vault_client.secrets.identity.generate_signed_id_token(name=vault_role) return response["data"]["token"] + + +def get_sso_service_auth_token() -> str: + """Generate an authentication token for the calling user from the Single Sign On provider""" + client_id = load_system_paasta_config().get_service_auth_sso_oidc_client_id() + return get_and_cache_jwt_default(client_id) diff --git a/paasta_tools/config_utils.py b/paasta_tools/config_utils.py index 05aa027ed7..a0a0a58a4d 100644 --- a/paasta_tools/config_utils.py +++ b/paasta_tools/config_utils.py @@ -25,6 +25,7 @@ "deploy", "smartstack", "cassandracluster", + "eks", ) # this could use a better name - but basically, this is for pairs of instance types @@ -70,7 +71,14 @@ def write_auto_config_data( if comment: content = ( yaml.round_trip_load( - comment.format(regular_filename=f"{service}/{extra_info}.yaml") + comment.format( + # this is a bit of a hack, but we've decided to not rename files back to kubernetes-* + # files. while we still need to update things to reference the eks files directly, there's + # still a couple of places where we still need kubernetes-* files (e.g., unmigrated operators) + # so for now let's just assume that autotuned things will always actually have their human-managed + # config in eks-* files + regular_filename=f"{service}/{extra_info.replace('kubernetes-', 'eks-')}.yaml", + ) ) if comment else {} diff --git a/paasta_tools/contrib/service_shard_update.py b/paasta_tools/contrib/service_shard_update.py index 9ec91c9858..a48031c68d 100644 --- a/paasta_tools/contrib/service_shard_update.py +++ b/paasta_tools/contrib/service_shard_update.py @@ -39,6 +39,13 @@ def parse_args(): action="store_true", dest="verbose", ) + parser.add_argument( + "-d", + "--dry-run", + help="Do not commit changes to git", + action="store_true", + dest="dry_run", + ) parser.add_argument( "--source-id", help="String to attribute the changes in the commit message.", @@ -115,6 +122,48 @@ def parse_args(): type=int, dest="timeout_server_ms", ) + parser.add_argument( + "--autotune-min-cpus", + help="Minimum number of CPUs Autotune should give the shard", + required=False, + type=float, + dest="autotune_min_cpus", + ) + parser.add_argument( + "--autotune-max-cpus", + help="Maximum number of CPUs Autotune should give the shard", + required=False, + type=float, + dest="autotune_max_cpus", + ) + parser.add_argument( + "--autotune-min-mem", + help="Minimum amount of memory Autotune should give the shard", + required=False, + type=int, + dest="autotune_min_mem", + ) + parser.add_argument( + "--autotune-max-mem", + help="Maximum amount of memory Autotune should give the shard", + required=False, + type=int, + dest="autotune_max_mem", + ) + parser.add_argument( + "--autotune-min-disk", + help="Minimum amount of disk Autotune should give the shard", + required=False, + type=int, + dest="autotune_min_disk", + ) + parser.add_argument( + "--autotune-max-disk", + help="Maximum amount of disk Autotune should give the shard", + required=False, + type=int, + dest="autotune_max_disk", + ) return parser.parse_args() @@ -195,9 +244,11 @@ def main(args): instance_config = { "deploy_group": f"{deploy_prefix}.{args.shard_name}", "min_instances": args.min_instance_count, - "max_instances": args.prod_max_instance_count - if deploy_prefix == "prod" - else args.non_prod_max_instance_count, + "max_instances": ( + args.prod_max_instance_count + if deploy_prefix == "prod" + else args.non_prod_max_instance_count + ), "env": { "PAASTA_SECRET_BUGSNAG_API_KEY": "SECRET(bugsnag_api_key)", }, @@ -217,6 +268,40 @@ def main(args): instance_config["cpus"] = args.cpus if args.mem is not None: instance_config["mem"] = args.mem + if any( + ( + args.autotune_min_cpus, + args.autotune_max_cpus, + args.autotune_min_mem, + args.autotune_max_mem, + args.autotune_min_disk, + args.autotune_max_disk, + ) + ): + limit_config = {} + limit_config["cpus"] = { + "min": args.autotune_min_cpus, + "max": args.autotune_max_cpus, + } + limit_config["mem"] = { + "min": args.autotune_min_mem, + "max": args.autotune_max_mem, + } + limit_config["disk"] = { + "min": args.autotune_min_disk, + "max": args.autotune_max_disk, + } + + # remove any None values to keep the config clean + for resource in list(limit_config): + for key in list(limit_config[resource]): + if limit_config[resource][key] is None: + del limit_config[resource][key] + if len(limit_config[resource]) == 0: + del limit_config[resource] + + if len(limit_config) > 0: + instance_config["autotune_limits"] = limit_config # If the service config does not contain definitions for the shard in each ecosystem # Add the missing definition and write to the corresponding config if args.shard_name not in config_file.keys(): @@ -247,7 +332,7 @@ def main(args): log.info(f"{args.shard_name} is in smartstack config already, skipping.") # Only commit to remote if changes were made - if changes_made: + if changes_made and not args.dry_run: updater.commit_to_remote() trigger_deploys(args.service) else: diff --git a/paasta_tools/instance/kubernetes.py b/paasta_tools/instance/kubernetes.py index 0a90db4bd9..7f57b8c380 100644 --- a/paasta_tools/instance/kubernetes.py +++ b/paasta_tools/instance/kubernetes.py @@ -606,6 +606,7 @@ async def kubernetes_status_v2( include_envoy: bool, instance_type: str, settings: Any, + all_namespaces: bool = False, ) -> Dict[str, Any]: status: Dict[str, Any] = {} config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader @@ -620,9 +621,12 @@ async def kubernetes_status_v2( if kube_client is None: return status - relevant_namespaces = await a_sync.to_async(find_all_relevant_namespaces)( - service, instance, kube_client, job_config - ) + if all_namespaces: + relevant_namespaces = await a_sync.to_async(find_all_relevant_namespaces)( + service, instance, kube_client, job_config + ) + else: + relevant_namespaces = {job_config.get_kubernetes_namespace()} tasks: List["asyncio.Future[Dict[str, Any]]"] = [] @@ -1240,6 +1244,7 @@ def instance_status( use_new: bool, instance_type: str, settings: Any, + all_namespaces: bool, ) -> Mapping[str, Any]: status = {} @@ -1267,6 +1272,7 @@ def instance_status( verbose=verbose, include_envoy=include_envoy, settings=settings, + all_namespaces=all_namespaces, ) else: status["kubernetes"] = kubernetes_status( diff --git a/paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py b/paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py index e67da0391c..14abc65c1c 100644 --- a/paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py +++ b/paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py @@ -59,7 +59,7 @@ def nodes_for_cleanup(ec2_client: Client, nodes: Sequence[V1Node]) -> List[V1Nod node for node in nodes if not is_node_ready(node) - and "node-role.kubernetes.io/master" not in node.metadata.labels + and "node-role.kubernetes.io/control-plane" not in node.metadata.labels ] terminated = terminated_nodes(ec2_client, not_ready) return terminated diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index e6fdf1cc31..ba88a5a8c1 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -20,6 +20,7 @@ import os import re from datetime import datetime +from datetime import timezone from enum import Enum from functools import lru_cache from inspect import currentframe @@ -51,8 +52,6 @@ from kubernetes.client import models from kubernetes.client import V1Affinity from kubernetes.client import V1AWSElasticBlockStoreVolumeSource -from kubernetes.client import V1beta1CustomResourceDefinition -from kubernetes.client import V1beta1CustomResourceDefinitionList from kubernetes.client import V1beta1PodDisruptionBudget from kubernetes.client import V1beta1PodDisruptionBudgetSpec from kubernetes.client import V1Capabilities @@ -70,12 +69,12 @@ from kubernetes.client import V1EnvVar from kubernetes.client import V1EnvVarSource from kubernetes.client import V1ExecAction -from kubernetes.client import V1Handler from kubernetes.client import V1HostPathVolumeSource from kubernetes.client import V1HTTPGetAction from kubernetes.client import V1KeyToPath from kubernetes.client import V1LabelSelector from kubernetes.client import V1Lifecycle +from kubernetes.client import V1LifecycleHandler from kubernetes.client import V1LimitRange from kubernetes.client import V1LimitRangeItem from kubernetes.client import V1LimitRangeSpec @@ -607,12 +606,6 @@ def __init__( self.policy = kube_client.PolicyV1beta1Api(self.api_client) self.apiextensions = kube_client.ApiextensionsV1Api(self.api_client) - # We need to support apiextensions /v1 and /v1beta1 in order - # to make our upgrade to k8s 1.22 smooth, otherwise - # updating the CRDs make this script fail - self.apiextensions_v1_beta1 = kube_client.ApiextensionsV1beta1Api( - self.api_client - ) self.custom = kube_client.CustomObjectsApi(self.api_client) self.autoscaling = kube_client.AutoscalingV2beta2Api(self.api_client) self.rbac = kube_client.RbacAuthorizationV1Api(self.api_client) @@ -1115,7 +1108,7 @@ def get_hacheck_sidecar_container( return V1Container( image=system_paasta_config.get_hacheck_sidecar_image_url(), lifecycle=V1Lifecycle( - pre_stop=V1Handler( + pre_stop=V1LifecycleHandler( _exec=V1ExecAction( command=[ "/bin/sh", @@ -1158,7 +1151,7 @@ def get_gunicorn_exporter_sidecar_container( env=self.get_kubernetes_environment(), ports=[V1ContainerPort(container_port=9117)], lifecycle=V1Lifecycle( - pre_stop=V1Handler( + pre_stop=V1LifecycleHandler( _exec=V1ExecAction( command=[ "/bin/sh", @@ -1406,7 +1399,17 @@ def get_security_context(self) -> Optional[V1SecurityContext]: return V1SecurityContext(capabilities=V1Capabilities(drop=CAPS_DROP)) else: return V1SecurityContext( - capabilities=V1Capabilities(add=cap_add, drop=CAPS_DROP) + # XXX: we should probably generally work in sets, but V1Capabilities is typed as accepting + # lists of string only + capabilities=V1Capabilities( + add=cap_add, + # NOTE: this is necessary as containerd differs in behavior from dockershim: in dockershim + # dropped capabilities were overriden if the same capability was added - but in containerd + # the dropped capabilities appear to have higher priority. + # WARNING: this must be sorted - otherwise the order of the capabilities will be different + # on every setup_kubernetes_job run and cause unnecessary redeployments + drop=sorted(list(set(CAPS_DROP) - set(cap_add))), + ) ) def get_kubernetes_containers( @@ -1467,20 +1470,20 @@ def get_readiness_probe( else: return self.get_liveness_probe(service_namespace_config) - def get_kubernetes_container_termination_action(self) -> V1Handler: + def get_kubernetes_container_termination_action(self) -> V1LifecycleHandler: command = self.config_dict.get("lifecycle", KubeLifecycleDict({})).get( "pre_stop_command", [] ) # default pre stop hook for the container if not command: - return V1Handler( + return V1LifecycleHandler( _exec=V1ExecAction( command=["/bin/sh", "-c", f"sleep {DEFAULT_PRESTOP_SLEEP_SECONDS}"] ) ) if isinstance(command, str): command = [command] - return V1Handler(_exec=V1ExecAction(command=command)) + return V1LifecycleHandler(_exec=V1ExecAction(command=command)) def get_pod_volumes( self, @@ -2950,7 +2953,7 @@ def recent_container_restart( last_timestamp: Optional[int], time_window_s: int = 900, # 15 mins ) -> bool: - min_timestamp = datetime.now().timestamp() - time_window_s + min_timestamp = datetime.now(timezone.utc).timestamp() - time_window_s return ( restart_count > 0 and last_state == "terminated" @@ -3672,7 +3675,8 @@ async def get_events_for_object( ) events = events.items if events else [] if max_age_in_seconds and max_age_in_seconds > 0: - min_timestamp = datetime.now().timestamp() - max_age_in_seconds + # NOTE: the k8s API returns timestamps in UTC, so we make sure to always work in UTC + min_timestamp = datetime.now(timezone.utc).timestamp() - max_age_in_seconds events = [ evt for evt in events @@ -4158,12 +4162,8 @@ def mode_to_int(mode: Optional[Union[str, int]]) -> Optional[int]: def update_crds( kube_client: KubeClient, - desired_crds: Collection[ - Union[V1CustomResourceDefinition, V1beta1CustomResourceDefinition] - ], - existing_crds: Union[ - V1CustomResourceDefinitionList, V1beta1CustomResourceDefinitionList - ], + desired_crds: Collection[Union[V1CustomResourceDefinition]], + existing_crds: Union[V1CustomResourceDefinitionList], ) -> bool: for desired_crd in desired_crds: existing_crd = None @@ -4173,10 +4173,7 @@ def update_crds( break try: - if "apiextensions.k8s.io/v1beta1" == desired_crd.api_version: - apiextensions = kube_client.apiextensions_v1_beta1 - else: - apiextensions = kube_client.apiextensions + apiextensions = kube_client.apiextensions if existing_crd: desired_crd.metadata[ diff --git a/paasta_tools/paastaapi/api/service_api.py b/paasta_tools/paastaapi/api/service_api.py index 19d19e7684..63901f7cee 100644 --- a/paasta_tools/paastaapi/api/service_api.py +++ b/paasta_tools/paastaapi/api/service_api.py @@ -1322,6 +1322,7 @@ def __status_instance( include_envoy (bool): Include Envoy information. [optional] include_mesos (bool): Include Mesos information. [optional] new (bool): Use new version of paasta status for services. [optional] + all_namespaces (bool): Search all namespaces for running copies. [optional] _return_http_data_only (bool): response data without head status code and headers. Default is True. _preload_content (bool): if False, the urllib3.HTTPResponse object @@ -1389,6 +1390,7 @@ def __status_instance( 'include_envoy', 'include_mesos', 'new', + 'all_namespaces', ], 'required': [ 'service', @@ -1419,6 +1421,8 @@ def __status_instance( (bool,), 'new': (bool,), + 'all_namespaces': + (bool,), }, 'attribute_map': { 'service': 'service', @@ -1427,6 +1431,7 @@ def __status_instance( 'include_envoy': 'include_envoy', 'include_mesos': 'include_mesos', 'new': 'new', + 'all_namespaces': 'all_namespaces', }, 'location_map': { 'service': 'path', @@ -1435,6 +1440,7 @@ def __status_instance( 'include_envoy': 'query', 'include_mesos': 'query', 'new': 'query', + 'all_namespaces': 'query', }, 'collection_format_map': { } diff --git a/paasta_tools/setup_kubernetes_cr.py b/paasta_tools/setup_kubernetes_cr.py index 5972b4da72..0d0e810368 100644 --- a/paasta_tools/setup_kubernetes_cr.py +++ b/paasta_tools/setup_kubernetes_cr.py @@ -166,7 +166,6 @@ def setup_all_custom_resources( # we need to try both possibilities for apiextension in [ kube_client.apiextensions, - kube_client.apiextensions_v1_beta1, ]: try: @@ -175,7 +174,7 @@ def setup_all_custom_resources( ).items except ApiException: log.debug( - "Listing CRDs with apiextensions/v1 not supported on this cluster, falling back to v1beta1" + "Listing CRDs with apiextensions/v1 not supported on this cluster" ) crds_list = [] diff --git a/paasta_tools/setup_kubernetes_crd.py b/paasta_tools/setup_kubernetes_crd.py index eabc667c06..9c8b1e0494 100644 --- a/paasta_tools/setup_kubernetes_crd.py +++ b/paasta_tools/setup_kubernetes_crd.py @@ -27,9 +27,7 @@ from typing import Sequence import service_configuration_lib -from kubernetes.client import V1beta1CustomResourceDefinition from kubernetes.client import V1CustomResourceDefinition -from kubernetes.client.exceptions import ApiException from paasta_tools.kubernetes_tools import KubeClient from paasta_tools.kubernetes_tools import paasta_prefixed @@ -106,22 +104,7 @@ def setup_kube_crd( label_selector=paasta_prefixed("service") ) - # This step can fail in k8s 1.22 since this version is not existing anymore - # we need to support this for the transition - try: - existing_crds_v1_beta1 = ( - kube_client.apiextensions_v1_beta1.list_custom_resource_definition( - label_selector=paasta_prefixed("service") - ) - ) - except ApiException: - existing_crds_v1_beta1 = [] - log.debug( - "Listing CRDs with apiextensions/v1beta1 not supported on this cluster, falling back to v1" - ) - desired_crds = [] - desired_crds_v1_beta1 = [] for service in services: crd_config = service_configuration_lib.read_extra_service_information( service, f"crd-{cluster}", soa_dir=soa_dir @@ -136,31 +119,18 @@ def setup_kube_crd( metadata["labels"]["yelp.com/paasta_service"] = service metadata["labels"][paasta_prefixed("service")] = service - if "apiextensions.k8s.io/v1beta1" == crd_config["apiVersion"]: - desired_crd = V1beta1CustomResourceDefinition( - api_version=crd_config.get("apiVersion"), - kind=crd_config.get("kind"), - metadata=metadata, - spec=crd_config.get("spec"), - ) - desired_crds_v1_beta1.append(desired_crd) - else: - desired_crd = V1CustomResourceDefinition( - api_version=crd_config.get("apiVersion"), - kind=crd_config.get("kind"), - metadata=metadata, - spec=crd_config.get("spec"), - ) - desired_crds.append(desired_crd) + desired_crd = V1CustomResourceDefinition( + api_version=crd_config.get("apiVersion"), + kind=crd_config.get("kind"), + metadata=metadata, + spec=crd_config.get("spec"), + ) + desired_crds.append(desired_crd) return update_crds( kube_client=kube_client, desired_crds=desired_crds, existing_crds=existing_crds, - ) and update_crds( - kube_client=kube_client, - desired_crds=desired_crds_v1_beta1, - existing_crds=existing_crds_v1_beta1, ) diff --git a/paasta_tools/setup_tron_namespace.py b/paasta_tools/setup_tron_namespace.py index be5112707e..d4ed73cbab 100755 --- a/paasta_tools/setup_tron_namespace.py +++ b/paasta_tools/setup_tron_namespace.py @@ -23,6 +23,7 @@ import argparse import logging import sys +from typing import Dict from typing import List import ruamel.yaml as yaml @@ -62,6 +63,13 @@ def parse_args(): ) parser.add_argument("-v", "--verbose", action="store_true", default=False) parser.add_argument("--dry-run", action="store_true", default=False) + parser.add_argument( + "--bulk-config-fetch", + dest="bulk_config_fetch", + action="store_true", + default=False, + help="Attempt to fetch all configs in bulk rather than one by one", + ) parser.add_argument( "--cluster", help="Cluster to read configs for. Defaults to the configuration in /etc/paasta", @@ -162,6 +170,7 @@ def main(): k8s_enabled_for_cluster = ( yaml.safe_load(master_config).get("k8s_options", {}).get("enabled", False) ) + new_configs: Dict[str, str] = {} # service -> new_config for service in sorted(services): try: new_config = tron_tools.create_complete_config( @@ -171,6 +180,7 @@ def main(): k8s_enabled=k8s_enabled_for_cluster, dry_run=args.dry_run, ) + new_configs[service] = new_config if args.dry_run: log.info(f"Would update {service} to:") log.info(f"{new_config}") @@ -187,17 +197,44 @@ def main(): for_validation=False, ) ensure_service_accounts(job_configs) + if not args.bulk_config_fetch: + if client.update_namespace(service, new_config): + updated.append(service) + log.debug(f"Updated {service}") + else: + skipped.append(service) + log.debug(f"Skipped {service}") - if client.update_namespace(service, new_config): - updated.append(service) - log.debug(f"Updated {service}") - else: - skipped.append(service) - log.debug(f"Skipped {service}") except Exception: - log.exception(f"Update for {service} failed:") + if args.bulk_config_fetch: + # service account creation should be the only action that can throw if this flag is true, + # so we can safely assume that's what happened here in the log message + log.exception( + f"Failed to create service account for {service} (will skip reconfiguring):" + ) + + # since service account creation failed, we want to skip reconfiguring this service + # as the new config will likely fail due to the missing service account - even though + # the rest of the config is valid + new_configs.pop(service, None) + else: + log.exception(f"Update for {service} failed:") + + # NOTE: this happens for both ways of updating (bulk fetch and JIT fetch) + # since we need to print out what failed in either case failed.append(service) + if args.dry_run and args.bulk_config_fetch: + updated_namespaces = client.update_namespaces(new_configs) + + if updated_namespaces: + updated = list(updated_namespaces.keys()) + log.debug(f"Updated {updated}") + + if updated_namespaces != new_configs.keys(): + skipped = set(new_configs.keys()) - set(updated_namespaces.keys()) + log.debug(f"Skipped {skipped}") + skipped_report = skipped if args.verbose else len(skipped) log.info( f"Updated following namespaces: {updated}, " diff --git a/paasta_tools/spark_tools.py b/paasta_tools/spark_tools.py index 553c0cf319..aa581a826c 100644 --- a/paasta_tools/spark_tools.py +++ b/paasta_tools/spark_tools.py @@ -20,7 +20,7 @@ SPARK_AWS_CREDS_PROVIDER = "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" SPARK_EXECUTOR_NAMESPACE = "paasta-spark" SPARK_DRIVER_POOL = "stable" -SPARK_JOB_USER = "TRON" +SPARK_TRON_JOB_USER = "TRON" SPARK_PROMETHEUS_SHARD = "ml-compute" SPARK_DNS_POD_TEMPLATE = "/nail/srv/configs/spark_dns_pod_template.yaml" MEM_MULTIPLIER = {"k": 1024, "m": 1024**2, "g": 1024**3, "t": 1024**4} @@ -176,7 +176,9 @@ def inject_spark_conf_str(original_cmd: str, spark_conf_str: str) -> str: return original_cmd -def auto_add_timeout_for_spark_job(cmd: str, timeout_job_runtime: str) -> str: +def auto_add_timeout_for_spark_job( + cmd: str, timeout_job_runtime: str, silent: bool = False +) -> str: # Timeout only to be added for spark-submit commands # TODO: Add timeout for jobs using mrjob with spark-runner if "spark-submit" not in cmd: @@ -189,16 +191,17 @@ def auto_add_timeout_for_spark_job(cmd: str, timeout_job_runtime: str) -> str: split_cmd = cmd.split("spark-submit") # split_cmd[0] will always be an empty string or end with a space cmd = f"{split_cmd[0]}timeout {timeout_job_runtime} spark-submit{split_cmd[1]}" - log.info( - PaastaColors.blue( - f"NOTE: Job will exit in given time {timeout_job_runtime}. " - f"Adjust timeout value using --timeout-job-timeout. " - f"New Updated Command with timeout: {cmd}" - ), - ) + if not silent: + log.info( + PaastaColors.blue( + f"NOTE: Job will exit in given time {timeout_job_runtime}. " + f"Adjust timeout value using --timeout-job-runtime. " + f"New Updated Command with timeout: {cmd}" + ), + ) except Exception as e: err_msg = ( - f"'timeout' could not be added to command: '{cmd}' due to error '{e}'. " + f"'timeout' could not be added to spark command: '{cmd}' due to error '{e}'. " "Please report to #spark." ) log.warn(err_msg) @@ -211,9 +214,12 @@ def build_spark_command( spark_config_dict: Dict[str, Any], is_mrjob: bool, timeout_job_runtime: str, + silent: bool = False, ) -> str: - command = f"{inject_spark_conf_str(original_cmd, create_spark_config_str(spark_config_dict, is_mrjob=is_mrjob))}" - return auto_add_timeout_for_spark_job(command, timeout_job_runtime) + command = inject_spark_conf_str( + original_cmd, create_spark_config_str(spark_config_dict, is_mrjob=is_mrjob) + ) + return auto_add_timeout_for_spark_job(command, timeout_job_runtime, silent=silent) def get_spark_ports_from_config(spark_conf: Dict[str, str]) -> List[int]: @@ -238,6 +244,7 @@ def get_spark_driver_monitoring_annotations( def get_spark_driver_monitoring_labels( spark_config: Dict[str, str], + user: str, ) -> Dict[str, str]: """ Returns Spark driver pod labels - generally for Prometheus metric relabeling. @@ -245,7 +252,7 @@ def get_spark_driver_monitoring_labels( ui_port_str = str(spark_config.get("spark.ui.port", "")) labels = { "paasta.yelp.com/prometheus_shard": SPARK_PROMETHEUS_SHARD, - "spark.yelp.com/user": SPARK_JOB_USER, + "spark.yelp.com/user": user, "spark.yelp.com/driver_ui_port": ui_port_str, } return labels diff --git a/paasta_tools/tron/client.py b/paasta_tools/tron/client.py index 84df93c3f9..90cd34c109 100644 --- a/paasta_tools/tron/client.py +++ b/paasta_tools/tron/client.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from typing import Dict from urllib.parse import urljoin import requests @@ -96,6 +97,38 @@ def update_namespace(self, namespace, new_config, skip_if_unchanged=True): }, ) + def update_namespaces( + self, new_configs: Dict[str, str], skip_if_unchanged: bool = True + ): + """Updates the configuration for a namespace. + + :param namespace: str + :param new_config: str, should be valid YAML. + :param skip_if_unchanged: boolean. If False, will send the update + even if the current config matches the new config. + """ + current_configs: Dict[str, Dict[str, str]] = self._get("/api/config") # type: ignore # we don't have a good way to share types between tron/paasta + responses: Dict[str, str] = {} + for namespace, new_config in new_configs.items(): + current_config = current_configs.get(namespace, {}) + if skip_if_unchanged: + if yaml.safe_load(new_config) == yaml.safe_load( + current_config["config"] + ): + log.debug("No change in config, skipping update.") + continue + + responses[namespace] = self._post( + "/api/config", + data={ + "name": namespace, + "config": new_config, + "hash": current_config["hash"], + "check": 0, + }, + ) + return responses + def list_namespaces(self): """Gets the namespaces that are currently configured.""" response = self._get("/api") diff --git a/paasta_tools/tron_tools.py b/paasta_tools/tron_tools.py index 13163a345a..dcde970af1 100644 --- a/paasta_tools/tron_tools.py +++ b/paasta_tools/tron_tools.py @@ -62,7 +62,9 @@ from paasta_tools import spark_tools from paasta_tools.kubernetes_tools import ( + NodeSelectorConfig, allowlist_denylist_to_requirements, + contains_zone_label, get_service_account_name, limit_size_with_hash, raw_selectors_to_requirements, @@ -248,6 +250,7 @@ class TronActionConfigDict(InstanceConfigDict, total=False): # maneuvering to unify command: str service_account_name: str + node_selectors: Dict[str, NodeSelectorConfig] # the values for this dict can be anything since it's whatever # spark accepts @@ -347,7 +350,7 @@ def build_spark_config(self) -> Dict[str, str]: docker_img_url = self.get_docker_url(system_paasta_config) - spark_conf_builder = SparkConfBuilder() + spark_conf_builder = SparkConfBuilder(is_driver_on_k8s_tron=True) spark_conf = spark_conf_builder.get_spark_conf( cluster_manager="kubernetes", spark_app_base_name=spark_app_name, @@ -366,7 +369,7 @@ def build_spark_config(self) -> Dict[str, str]: force_spark_resource_configs=self.config_dict.get( "force_spark_resource_configs", False ), - user=spark_tools.SPARK_JOB_USER, + user=spark_tools.SPARK_TRON_JOB_USER, ) # delete the dynamically generated spark.app.id to prevent frequent config updates in Tron. # spark.app.id will be generated later by yelp spark-submit wrapper or Spark itself. @@ -380,16 +383,17 @@ def build_spark_config(self) -> Dict[str, str]: if "spark.app.name" not in stringified_spark_args else stringified_spark_args["spark.app.name"] ) - # TODO: Remove this once dynamic pod template is generated inside the driver using spark-submit wrapper + + # TODO(MLCOMPUTE-1220): Remove this once dynamic pod template is generated inside the driver using spark-submit wrapper if "spark.kubernetes.executor.podTemplateFile" in spark_conf: - print( + log.info( f"Replacing spark.kubernetes.executor.podTemplateFile=" f"{spark_conf['spark.kubernetes.executor.podTemplateFile']} with " f"spark.kubernetes.executor.podTemplateFile={spark_tools.SPARK_DNS_POD_TEMPLATE}" ) - spark_conf[ - "spark.kubernetes.executor.podTemplateFile" - ] = spark_tools.SPARK_DNS_POD_TEMPLATE + spark_conf[ + "spark.kubernetes.executor.podTemplateFile" + ] = spark_tools.SPARK_DNS_POD_TEMPLATE spark_conf.update( { @@ -593,18 +597,39 @@ def get_node_selectors(self) -> Dict[str, str]: def get_node_affinities(self) -> Optional[List[Dict[str, Union[str, List[str]]]]]: """Converts deploy_whitelist and deploy_blacklist in node affinities. - note: At the time of writing, `kubectl describe` does not show affinities, + NOTE: At the time of writing, `kubectl describe` does not show affinities, only selectors. To see affinities, use `kubectl get pod -o json` instead. + + WARNING: At the time of writing, we only used requiredDuringSchedulingIgnoredDuringExecution node affinities in Tron as we currently have + no use case for preferredDuringSchedulingIgnoredDuringExecution node affinities. """ requirements = allowlist_denylist_to_requirements( allowlist=self.get_deploy_whitelist(), denylist=self.get_deploy_blacklist(), ) + node_selectors = self.config_dict.get("node_selectors", {}) requirements.extend( raw_selectors_to_requirements( - raw_selectors=self.config_dict.get("node_selectors", {}), # type: ignore + raw_selectors=node_selectors, ) ) + + system_paasta_config = load_system_paasta_config() + if system_paasta_config.get_enable_tron_tsc(): + # PAASTA-18198: To improve AZ balance with Karpenter, we temporarily allow specifying zone affinities per pool + pool_node_affinities = system_paasta_config.get_pool_node_affinities() + if pool_node_affinities and self.get_pool() in pool_node_affinities: + current_pool_node_affinities = pool_node_affinities[self.get_pool()] + # If the service already has a node selector for a zone, we don't want to override it + if current_pool_node_affinities and not contains_zone_label( + node_selectors + ): + requirements.extend( + raw_selectors_to_requirements( + raw_selectors=current_pool_node_affinities, + ) + ) + if not requirements: return None @@ -962,6 +987,9 @@ def format_tron_action_dict(action_config: TronActionConfig): "service_account_name": action_config.get_service_account_name(), } + # we need this loaded in several branches, so we'll load it once at the start to simplify things + system_paasta_config = load_system_paasta_config() + if executor in KUBERNETES_EXECUTOR_NAMES: # we'd like Tron to be able to distinguish between spark and normal actions # even though they both run on k8s @@ -983,6 +1011,26 @@ def format_tron_action_dict(action_config: TronActionConfig): result["node_selectors"] = action_config.get_node_selectors() result["node_affinities"] = action_config.get_node_affinities() + if system_paasta_config.get_enable_tron_tsc(): + # XXX: this is currently hardcoded since we should only really need TSC for zone-aware scheduling + result["topology_spread_constraints"] = [ + { + # try to evenly spread pods across specified topology + "max_skew": 1, + # narrow down what pods to consider when spreading + "label_selector": { + # only consider pods that are managed by tron + "app.kubernetes.io/managed-by": "tron", + # and in the same pool + "paasta.yelp.com/pool": action_config.get_pool(), + }, + # now, spread across AZs + "topology_key": "topology.kubernetes.io/zone", + # but if not possible, schedule even with a zonal imbalance + "when_unsatisfiable": "ScheduleAnyway", + }, + ] + # XXX: once we're off mesos we can make get_cap_* return just the cap names as a list result["cap_add"] = [cap["value"] for cap in action_config.get_cap_add()] result["cap_drop"] = [cap["value"] for cap in action_config.get_cap_drop()] @@ -1000,10 +1048,15 @@ def format_tron_action_dict(action_config: TronActionConfig): "app.kubernetes.io/managed-by": "tron", } - # we can hardcode this for now as batches really shouldn't - # need routable IPs and we know that Spark probably does. result["annotations"] = { + # we can hardcode this for now as batches really shouldn't + # need routable IPs and we know that Spark does. "paasta.yelp.com/routable_ip": "true" if executor == "spark" else "false", + # we have a large amount of tron pods whose instance names are too long for a k8s label + # ...so let's toss them into an annotation so that tooling can read them (since the length + # limit is much higher (256kb)) + "paasta.yelp.com/service": action_config.get_service(), + "paasta.yelp.com/instance": action_config.get_instance(), } result["labels"]["yelp.com/owner"] = "compute_infra_platform_experience" @@ -1023,14 +1076,12 @@ def format_tron_action_dict(action_config: TronActionConfig): # XXX: now that we're actually passing through extra_volumes correctly (e.g., using get_volumes()), # we can get rid of the default_volumes from the Tron master config - system_paasta_config = load_system_paasta_config() extra_volumes = action_config.get_volumes( system_paasta_config.get_volumes(), uses_bulkdata_default=system_paasta_config.get_uses_bulkdata_default(), ) if executor == "spark": is_mrjob = action_config.config_dict.get("mrjob", False) - system_paasta_config = load_system_paasta_config() # inject additional Spark configs in case of Spark commands result["command"] = spark_tools.build_spark_command( result["command"], @@ -1039,6 +1090,7 @@ def format_tron_action_dict(action_config: TronActionConfig): action_config.config_dict.get( "max_runtime", spark_tools.DEFAULT_SPARK_RUNTIME_TIMEOUT ), + silent=True, ) # point to the KUBECONFIG needed by Spark driver result["env"]["KUBECONFIG"] = system_paasta_config.get_spark_kubeconfig() @@ -1069,7 +1121,8 @@ def format_tron_action_dict(action_config: TronActionConfig): ) ) monitoring_labels = spark_tools.get_spark_driver_monitoring_labels( - action_config.action_spark_config + action_config.action_spark_config, + user=spark_tools.SPARK_TRON_JOB_USER, ) result["annotations"].update(monitoring_annotations) result["labels"].update(monitoring_labels) diff --git a/paasta_tools/utils.py b/paasta_tools/utils.py index a5d585c8ca..fe71a280fe 100644 --- a/paasta_tools/utils.py +++ b/paasta_tools/utils.py @@ -2044,6 +2044,7 @@ class SystemPaastaConfigDict(TypedDict, total=False): use_multiple_log_readers: Optional[List[str]] service_auth_token_settings: ProjectedSAVolume service_auth_vault_role: str + service_auth_sso_oidc_client_id: str always_authenticating_services: List[str] vitess_images: Dict superregion_to_region_mapping: Dict @@ -2051,6 +2052,8 @@ class SystemPaastaConfigDict(TypedDict, total=False): vitess_tablet_pool_type_mapping: Dict vitess_throttling_config: Dict uses_bulkdata_default: bool + enable_automated_redeploys_default: bool + enable_tron_tsc: bool def load_system_paasta_config( @@ -2764,6 +2767,9 @@ def get_service_auth_token_volume_config(self) -> ProjectedSAVolume: def get_service_auth_vault_role(self) -> str: return self.config_dict.get("service_auth_vault_role", "service_authz") + def get_service_auth_sso_oidc_client_id(self) -> str: + return self.config_dict.get("service_auth_sso_oidc_client_id", "") + def get_always_authenticating_services(self) -> List[str]: return self.config_dict.get("always_authenticating_services", []) @@ -2813,6 +2819,12 @@ def get_vitess_throttling_config(self) -> Dict: def get_uses_bulkdata_default(self) -> bool: return self.config_dict.get("uses_bulkdata_default", True) + def get_enable_automated_redeploys_default(self) -> bool: + return self.config_dict.get("enable_automated_redeploys_default", False) + + def get_enable_tron_tsc(self) -> bool: + return self.config_dict.get("enable_tron_tsc", True) + def _run( command: Union[str, List[str]], diff --git a/paasta_tools/vitesscluster_tools.py b/paasta_tools/vitesscluster_tools.py index 7d0676334f..527cc84f9c 100644 --- a/paasta_tools/vitesscluster_tools.py +++ b/paasta_tools/vitesscluster_tools.py @@ -104,6 +104,7 @@ "throttle_check_as_check_self": "true", "db_charset": "utf8mb4", "disable_active_reparents": "true", + "init_shard": "0", } @@ -134,6 +135,8 @@ class GatewayConfigDict(TypedDict, total=False): extraEnv: List[Union[KVEnvVar, KVEnvVarValueFrom]] extraFlags: Dict[str, str] extraLabels: Dict[str, str] + extraVolumeMounts: List[Dict[str, Any]] + extraVolumes: List[Dict[str, Any]] lifecycle: Dict[str, Dict[str, Dict[str, List[str]]]] replicas: int resources: Dict[str, Any] @@ -258,26 +261,6 @@ def get_cell_config( config = CellConfigDict( name=cell, gateway=GatewayConfigDict( - lifecycle={ - "preStop": { - "exec": { - "command": [ - "/bin/sh", - "-c", - f"/cloudmap/scripts/deregister_from_cloudmap.sh vtgate-{cell} {aws_region}", - ] - } - }, - "postStart": { - "exec": { - "command": [ - "/bin/sh", - "-c", - f"/cloudmap/scripts/register_to_cloudmap.sh vtgate-{cell} {aws_region}", - ] - } - }, - }, affinity={"nodeAffinity": node_affinity}, extraEnv=updated_vtgate_extra_env, extraFlags={ @@ -287,6 +270,28 @@ def get_cell_config( "mysql_auth_vault_tls_ca": f"/etc/vault/all_cas/acm-privateca-{region}.crt", "mysql_auth_vault_ttl": "60s", }, + extraVolumeMounts=[ + { + "mountPath": "/nail/srv", + "name": "srv-configs", + "readOnly": True, + }, + { + "mountPath": "/nail/etc/srv-configs", + "name": "etc-srv-configs", + "readOnly": True, + }, + ], + extraVolumes=[ + { + "name": "srv-configs", + "hostPath": {"path": "/nail/srv"}, + }, + { + "name": "etc-srv-configs", + "hostPath": {"path": "/nail/etc/srv-configs"}, + }, + ], extraLabels=labels, replicas=replicas, resources={ diff --git a/requirements-minimal.txt b/requirements-minimal.txt index 9e981f1bb0..7b6ea9a398 100644 --- a/requirements-minimal.txt +++ b/requirements-minimal.txt @@ -29,7 +29,7 @@ kazoo >= 2.0.0 # that we can use across our different clusters (e.g, if X.0.0 removes an API version that we use # in any cluster, this upper-bound should be < X.0.0) # we should probably also be better at setting a correct lower-bound, but that's less likely to cause issues. -kubernetes >= 18.20.0, < 22.0.0 +kubernetes >= 18.20.0, < 26.0.0 ldap3 manhole mypy-extensions >= 0.3.0 @@ -56,7 +56,7 @@ requests-cache >= 0.4.10 retry ruamel.yaml sensu-plugin -service-configuration-lib >= 2.18.21 +service-configuration-lib >= 3.0.0 signalfx slackclient >= 1.2.1 sticht >= 1.1.0 diff --git a/requirements.txt b/requirements.txt index fc86a1d482..9887bf9df6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -45,7 +45,7 @@ jmespath==0.9.3 jsonref==0.1 jsonschema==2.5.1 kazoo==2.8.0 -kubernetes==21.7.0 +kubernetes==24.2.0 ldap3==2.6 manhole==1.5.0 MarkupSafe==1.1.1 @@ -91,7 +91,7 @@ rsa==4.7.2 ruamel.yaml==0.15.96 s3transfer==0.10.0 sensu-plugin==0.3.1 -service-configuration-lib==2.18.21 +service-configuration-lib==3.0.0 setuptools==39.0.1 signalfx==1.0.17 simplejson==3.10.0 diff --git a/tests/cli/test_cmds_local_run.py b/tests/cli/test_cmds_local_run.py index 6cf7c23693..d9871c1167 100644 --- a/tests/cli/test_cmds_local_run.py +++ b/tests/cli/test_cmds_local_run.py @@ -395,6 +395,7 @@ def test_configure_and_run_command_uses_cmd_from_config( args.assume_pod_identity = False args.use_okta_role = False args.use_service_auth_token = False + args.use_sso_service_auth_token = False mock_secret_provider_kwargs = { "vault_cluster_config": {}, @@ -438,6 +439,7 @@ def test_configure_and_run_command_uses_cmd_from_config( assume_role_aws_account=None, use_okta_role=False, use_service_auth_token=False, + use_sso_service_auth_token=False, ) @@ -473,6 +475,7 @@ def test_configure_and_run_uses_bash_by_default_when_interactive( args.assume_pod_identity = False args.use_okta_role = False args.use_service_auth_token = False + args.use_sso_service_auth_token = False return_code = configure_and_run_docker_container( docker_client=mock_docker_client, @@ -515,6 +518,7 @@ def test_configure_and_run_uses_bash_by_default_when_interactive( assume_pod_identity=False, use_okta_role=False, use_service_auth_token=False, + use_sso_service_auth_token=False, ) @@ -556,6 +560,7 @@ def test_configure_and_run_pulls_image_when_asked( args.assume_pod_identity = False args.use_okta_role = False args.use_service_auth_token = False + args.use_sso_service_auth_token = False return_code = configure_and_run_docker_container( docker_client=mock_docker_client, @@ -600,6 +605,7 @@ def test_configure_and_run_pulls_image_when_asked( assume_role_aws_account="dev", use_okta_role=False, use_service_auth_token=False, + use_sso_service_auth_token=False, ) @@ -637,6 +643,7 @@ def test_configure_and_run_docker_container_defaults_to_interactive_instance( args.assume_pod_identity = False args.use_okta_role = False args.use_service_auth_token = False + args.use_sso_service_auth_token = False mock_config = mock.create_autospec(AdhocJobConfig) mock_get_default_interactive_config.return_value = mock_config @@ -681,6 +688,7 @@ def test_configure_and_run_docker_container_defaults_to_interactive_instance( assume_role_aws_account="dev", use_okta_role=False, use_service_auth_token=False, + use_sso_service_auth_token=False, ) diff --git a/tests/cli/test_cmds_status.py b/tests/cli/test_cmds_status.py index 9f612278b7..cb4457837e 100644 --- a/tests/cli/test_cmds_status.py +++ b/tests/cli/test_cmds_status.py @@ -340,6 +340,7 @@ def test_status_calls_sergeants( args.registration = None args.service_instance = None args.new = False + args.all_namespaces = False return_value = paasta_status(args) assert return_value == 1776 @@ -355,6 +356,7 @@ def test_status_calls_sergeants( lock=mock.ANY, verbose=False, new=False, + all_namespaces=False, ) @@ -390,6 +392,7 @@ def __init__( service_instance=None, new=False, old=False, + all_namespaces=False, ): self.service = service self.soa_dir = soa_dir @@ -402,6 +405,7 @@ def __init__( self.service_instance = service_instance self.new = new self.old = old + self.all_namespaces = all_namespaces @patch("paasta_tools.cli.cmds.status.get_instance_configs_for_service", autospec=True) @@ -890,6 +894,7 @@ def test_status_with_registration( verbose=False, service_instance=None, new=False, + all_namespaces=True, # Bonus all_namespaces test ) return_value = paasta_status(args) @@ -908,6 +913,7 @@ def test_status_with_registration( lock=mock.ANY, verbose=args.verbose, new=False, + all_namespaces=True, ) @@ -1814,8 +1820,10 @@ def test_running(self, mock_kubernetes_status_v2): assert remove_ansi_escape_sequences(instance_state) == "Running" def test_bouncing(self, mock_kubernetes_status_v2): + old_version = mock_kubernetes_status_v2.versions[0] new_version = paastamodels.KubernetesVersion( - create_timestamp=1.0, + # ensure creation is after current version + create_timestamp=old_version.create_timestamp + 1000, git_sha="bbb111", config_sha="config111", ready_replicas=0, @@ -1826,9 +1834,29 @@ def test_bouncing(self, mock_kubernetes_status_v2): instance_state = remove_ansi_escape_sequences(instance_state) assert instance_state == "Bouncing to bbb111, config111" + def test_bouncing_ordering(self, mock_kubernetes_status_v2): + old_version = mock_kubernetes_status_v2.versions[0] + new_version = paastamodels.KubernetesVersion( + # ensure creation is _before_ current version + create_timestamp=old_version.create_timestamp - 1000, + git_sha="bbb111", + config_sha="config111", + ready_replicas=0, + ) + mock_kubernetes_status_v2.versions.append(new_version) + + instance_state = get_instance_state(mock_kubernetes_status_v2) + instance_state = remove_ansi_escape_sequences(instance_state) + assert instance_state != "Bouncing to bbb111, config111" + assert ( + instance_state + == f"Bouncing to {old_version.git_sha[:8]}, {old_version.config_sha}" + ) + def test_bouncing_git_sha_change_only(self, mock_kubernetes_status_v2): + old_version = mock_kubernetes_status_v2.versions[0] new_version = paastamodels.KubernetesVersion( - create_timestamp=1.0, + create_timestamp=old_version.create_timestamp + 1000, git_sha="bbb111", config_sha=mock_kubernetes_status_v2.versions[0].config_sha, ready_replicas=0, @@ -2405,16 +2433,16 @@ def test_output( expected_output = [ f" Kafka View Url: {status['kafka_view_url']}", f" Zookeeper: {status['zookeeper']}", - f" State: testing", + " State: testing", f" Ready: {str(status['cluster_ready']).lower()}", f" Health: {PaastaColors.red('unhealthy')}", f" Reason: {status['health']['message']}", f" Offline Partitions: {status['health']['offline_partitions']}", f" Under Replicated Partitions: {status['health']['under_replicated_partitions']}", - f" Brokers:", - f" Id Phase Started", - f" 0 {PaastaColors.green('Running')} 2020-03-25 16:24:21 ({mock_naturaltime.return_value})", - f" 1 {PaastaColors.red('Pending')} 2020-03-25 16:24:21 ({mock_naturaltime.return_value})", + " Brokers:", + " Id Phase Started", + f" 0 {PaastaColors.green('Running')} 2020-03-25 16:24:21+00:00 ({mock_naturaltime.return_value})", + f" 1 {PaastaColors.red('Pending')} 2020-03-25 16:24:21+00:00 ({mock_naturaltime.return_value})", ] assert expected_output == output diff --git a/tests/instance/test_kubernetes.py b/tests/instance/test_kubernetes.py index a86284870b..702e0b0f32 100644 --- a/tests/instance/test_kubernetes.py +++ b/tests/instance/test_kubernetes.py @@ -120,6 +120,7 @@ def instance_status_kwargs(): include_envoy=False, settings=mock.Mock(), use_new=False, + all_namespaces=False, ) @@ -603,6 +604,78 @@ def test_pod_timeout( assert status assert "Could not fetch instance data" in status["error_message"] + def test_all_namespaces( + self, + mock_replicasets_for_service_instance, + mock_LONG_RUNNING_INSTANCE_TYPE_HANDLERS, + mock_load_service_namespace_config, + mock_pods_for_service_instance, + mock_mesh_status, + mock_get_pod_event_messages, + mock_pod, + mock_find_all_relevant_namespaces, + ): + mock_find_all_relevant_namespaces.return_value = ["paasta"] + mock_job_config = mock.Mock( + get_persistent_volumes=mock.Mock(return_value=[]), + get_kubernetes_namespace=mock.Mock(return_value="paastasvc-service"), + ) + mock_LONG_RUNNING_INSTANCE_TYPE_HANDLERS[ + "kubernetes" + ].loader.return_value = mock_job_config + mock_replicasets_for_service_instance.return_value = [ + Struct( + spec=Struct(replicas=1), + metadata=Struct( + name="replicaset_1", + creation_timestamp=datetime.datetime(2021, 3, 5), + deletion_timestamp=None, + labels={ + "paasta.yelp.com/git_sha": "aaa000", + "paasta.yelp.com/config_sha": "config000", + }, + ), + ), + ] + mock_load_service_namespace_config.return_value = {} + mock_job_config.get_registrations.return_value = ["service.instance"] + mock_get_pod_event_messages.return_value = [] + + with asynctest.patch( + "paasta_tools.instance.kubernetes.get_versions_for_replicasets", + autospec=True, + ) as mock_get_versions_for_replicasets: + pik.kubernetes_status_v2( + service="service", + instance="instance", + verbose=0, + include_envoy=False, + instance_type="kubernetes", + settings=mock.Mock(), + ) + + # We are only testing that we + assert not mock_find_all_relevant_namespaces.called + _, _, get_rs_kwargs = mock_get_versions_for_replicasets.mock_calls[0] + assert get_rs_kwargs["namespaces"] == {"paastasvc-service"} + + with asynctest.patch( + "paasta_tools.instance.kubernetes.get_versions_for_replicasets", + autospec=True, + ) as mock_get_versions_for_replicasets: + pik.kubernetes_status_v2( + service="service", + instance="instance", + verbose=0, + include_envoy=False, + instance_type="kubernetes", + settings=mock.Mock(), + all_namespaces=True, + ) + assert mock_find_all_relevant_namespaces.called + _, _, get_rs_kwargs = mock_get_versions_for_replicasets.mock_calls[0] + assert get_rs_kwargs["namespaces"] == ["paasta"] + @mock.patch("paasta_tools.kubernetes_tools.get_kubernetes_app_by_name", autospec=True) def test_job_status_include_replicaset_non_verbose(mock_get_kubernetes_app_by_name): diff --git a/tests/kubernetes/bin/test_paasta_cleanup_stale_nodes.py b/tests/kubernetes/bin/test_paasta_cleanup_stale_nodes.py index 13954a8ed4..b7bfd374ba 100644 --- a/tests/kubernetes/bin/test_paasta_cleanup_stale_nodes.py +++ b/tests/kubernetes/bin/test_paasta_cleanup_stale_nodes.py @@ -16,7 +16,7 @@ def test_nodes_for_cleanup(): ) as mock_terminated_nodes: m1, m2, m3 = mock.MagicMock(), mock.MagicMock(), mock.MagicMock() m4 = mock.MagicMock() - m4.metadata.labels = {"node-role.kubernetes.io/master": ""} + m4.metadata.labels = {"node-role.kubernetes.io/control-plane": ""} mock_ec2_client = mock.Mock() mock_terminated_nodes.return_value = [m2, m3] for_cleanup = nodes_for_cleanup(mock_ec2_client, [m1, m2, m3, m4]) @@ -133,7 +133,7 @@ def test_main(): else: m.metadata.labels = { "failure-domain.beta.kubernetes.io/region": "us-west-1", - "node-role.kubernetes.io/master": "", + "node-role.kubernetes.io/control-plane": "", } mock_get_all_nodes.return_value = [m1, m2, m3, m4] diff --git a/tests/test_config_utils.py b/tests/test_config_utils.py index 17a76f3d53..2ce961a3b2 100644 --- a/tests/test_config_utils.py +++ b/tests/test_config_utils.py @@ -232,7 +232,7 @@ def test_auto_config_updater_validate(mock_validate_file, all_valid, updater): mock_validate_file.side_effect = [True, all_valid, True] updater.write_configs("foo", "kubernetes-norcal-devc", {"a": 2}) - updater.write_configs("foo", "kubernetes-pnw-devc", {"a": 2}) + updater.write_configs("foo", "eks-pnw-devc", {"a": 2}) assert updater.validate() == all_valid assert mock_validate_file.call_count == 2 diff --git a/tests/test_kubernetes_tools.py b/tests/test_kubernetes_tools.py index a4444627a3..dd1e67c268 100644 --- a/tests/test_kubernetes_tools.py +++ b/tests/test_kubernetes_tools.py @@ -26,12 +26,12 @@ from kubernetes.client import V1EnvVar from kubernetes.client import V1EnvVarSource from kubernetes.client import V1ExecAction -from kubernetes.client import V1Handler from kubernetes.client import V1HostPathVolumeSource from kubernetes.client import V1HTTPGetAction from kubernetes.client import V1KeyToPath from kubernetes.client import V1LabelSelector from kubernetes.client import V1Lifecycle +from kubernetes.client import V1LifecycleHandler from kubernetes.client import V1NodeAffinity from kubernetes.client import V1NodeSelector from kubernetes.client import V1NodeSelectorRequirement @@ -603,7 +603,7 @@ def test_get_sidecar_containers(self): ], image="some-docker-image", lifecycle=V1Lifecycle( - pre_stop=V1Handler( + pre_stop=V1LifecycleHandler( _exec=V1ExecAction( command=[ "/bin/sh", @@ -649,7 +649,7 @@ def test_get_sidecar_containers(self): ], image="some-docker-image", lifecycle=V1Lifecycle( - pre_stop=V1Handler( + pre_stop=V1LifecycleHandler( _exec=V1ExecAction( command=[ "/bin/sh", @@ -929,7 +929,7 @@ def test_get_kubernetes_containers(self, prometheus_port, expected_ports): resources=mock_get_resource_requirements.return_value, image=mock_get_docker_url.return_value, lifecycle=V1Lifecycle( - pre_stop=V1Handler( + pre_stop=V1LifecycleHandler( _exec=V1ExecAction(command=["/bin/sh", "-c", "sleep 30"]) ) ), @@ -1067,8 +1067,9 @@ def test_get_security_context_without_cap_add(self): def test_get_security_context_with_cap_add(self): self.deployment.config_dict["cap_add"] = ["SETGID"] + expected_dropped_caps = sorted(list(set(CAPS_DROP) - {"SETGID"})) expected_security_context = V1SecurityContext( - capabilities=V1Capabilities(add=["SETGID"], drop=CAPS_DROP) + capabilities=V1Capabilities(add=["SETGID"], drop=expected_dropped_caps) ) assert self.deployment.get_security_context() == expected_security_context @@ -2201,7 +2202,7 @@ def test_kubernetes_container_termination_action( self.deployment.config_dict["lifecycle"] = { "pre_stop_command": termination_action } - handler = V1Handler(_exec=V1ExecAction(command=expected)) + handler = V1LifecycleHandler(_exec=V1ExecAction(command=expected)) assert self.deployment.get_kubernetes_container_termination_action() == handler @pytest.mark.parametrize( @@ -4239,7 +4240,7 @@ def test_load_custom_resources(): ] -def test_warning_big_bounce(): +def test_warning_big_bounce_default_config(): job_config = kubernetes_tools.KubernetesDeploymentConfig( service="service", instance="instance", @@ -4279,7 +4280,7 @@ def test_warning_big_bounce(): job_config.format_kubernetes_app().spec.template.metadata.labels[ "paasta.yelp.com/config_sha" ] - == "configd2fd7b15" + == "config3bd814d2" ), "If this fails, just change the constant in this test, but be aware that deploying this change will cause every service to bounce!" @@ -4325,10 +4326,57 @@ def test_warning_big_bounce_routable_pod(): job_config.format_kubernetes_app().spec.template.metadata.labels[ "paasta.yelp.com/config_sha" ] - == "configa2ea39be" + == "configf23a3edb" ), "If this fails, just change the constant in this test, but be aware that deploying this change will cause every smartstack-registered service to bounce!" +def test_warning_big_bounce_common_config(): + job_config = kubernetes_tools.KubernetesDeploymentConfig( + service="service", + instance="instance", + cluster="cluster", + config_dict={ + # XXX: this should include other common options that are used + "cap_add": ["SET_GID"], + }, + branch_dict={ + "docker_image": "abcdef", + "git_sha": "deadbeef", + "image_version": None, + "force_bounce": None, + "desired_state": "start", + }, + ) + + with mock.patch( + "paasta_tools.utils.load_system_paasta_config", + return_value=SystemPaastaConfig( + { + "volumes": [], + "hacheck_sidecar_volumes": [], + "expected_slave_attributes": [{"region": "blah"}], + "docker_registry": "docker-registry.local", + }, + "/fake/dir/", + ), + autospec=True, + ) as mock_load_system_paasta_config, mock.patch( + "paasta_tools.kubernetes_tools.load_system_paasta_config", + new=mock_load_system_paasta_config, + autospec=False, + ), mock.patch( + "paasta_tools.kubernetes_tools.load_service_namespace_config", + return_value=ServiceNamespaceConfig(), + autospec=True, + ): + assert ( + job_config.format_kubernetes_app().spec.template.metadata.labels[ + "paasta.yelp.com/config_sha" + ] + == "configb24f9dd2" + ), "If this fails, just change the constant in this test, but be aware that deploying this change will cause every service to bounce!" + + @pytest.mark.parametrize( "pod_node_name,node,expected", [ diff --git a/tests/test_tron_tools.py b/tests/test_tron_tools.py index a53c605024..11a08270ee 100644 --- a/tests/test_tron_tools.py +++ b/tests/test_tron_tools.py @@ -24,6 +24,7 @@ "volumes": [], "dockercfg_location": "/mock/dockercfg", "spark_k8s_role": "spark", + "enable_tron_tsc": True, } ), "/mock/system/configs", @@ -39,6 +40,7 @@ "tron_k8s_cluster_overrides": { "paasta-dev-test": "paasta-dev", }, + "enable_tron_tsc": True, } ), "/mock/system/configs", @@ -111,7 +113,9 @@ def test_get_env( "paasta_tools.utils.get_service_docker_registry", autospec=True, ), mock.patch( - "paasta_tools.tron_tools.load_system_paasta_config", autospec=True + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, ): env = action_config.get_env() assert not any([env.get("SPARK_OPTS"), env.get("CLUSTERMAN_RESOURCES")]) @@ -287,20 +291,10 @@ def test_get_action_config( "my_job", job_dict, cluster, soa_dir=soa_dir ) - mock_paasta_system_config = utils.SystemPaastaConfig( - config=utils.SystemPaastaConfigDict( - { - "tron_k8s_cluster_overrides": { - "paasta-dev-test": "paasta-dev", - } - } - ), - directory="/mock/system/configs", - ) with mock.patch( "paasta_tools.tron_tools.load_system_paasta_config", autospec=True, - return_value=mock_paasta_system_config, + return_value=MOCK_SYSTEM_PAASTA_CONFIG_OVERRIDES, ): action_config = job_config._get_action_config( "normal", action_dict=action_dict @@ -341,7 +335,11 @@ def test_get_action_config( cluster=expected_cluster, ) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) @mock.patch("paasta_tools.tron_tools.load_v2_deployments_json", autospec=True) def test_get_action_config_load_deployments_false( self, mock_load_deployments, mock_load_system_paasta_config @@ -362,9 +360,6 @@ def test_get_action_config_load_deployments_false( "my_job", job_dict, cluster, load_deployments=False, soa_dir=soa_dir ) mock_load_deployments.side_effect = NoDeploymentsAvailable - mock_load_system_paasta_config.return_value.get_tron_k8s_cluster_overrides.return_value = ( - {} - ) action_config = job_config._get_action_config("normal", action_dict) @@ -530,7 +525,11 @@ def test_format_tron_job_dict_with_cleanup_action( } @mock.patch("paasta_tools.utils.get_pipeline_deploy_groups", autospec=True) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) def test_validate_all_actions( self, mock_load_system_paasta_config, mock_get_pipeline_deploy_groups ): @@ -552,7 +551,11 @@ def test_validate_all_actions( assert len(errors) == 3 @mock.patch("paasta_tools.utils.get_pipeline_deploy_groups", autospec=True) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) def test_validate_invalid_deploy_group( self, mock_load_system_paasta_config, mock_get_pipeline_deploy_groups ): @@ -572,7 +575,11 @@ def test_validate_invalid_deploy_group( assert len(errors) == 1 @mock.patch("paasta_tools.utils.get_pipeline_deploy_groups", autospec=True) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) def test_validate_valid_deploy_group( self, mock_load_system_paasta_config, mock_get_pipeline_deploy_groups ): @@ -592,7 +599,11 @@ def test_validate_valid_deploy_group( assert len(errors) == 0 @mock.patch("paasta_tools.utils.get_pipeline_deploy_groups", autospec=True) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) def test_validate_invalid_action_deploy_group( self, mock_load_system_paasta_config, mock_get_pipeline_deploy_groups ): @@ -616,7 +627,11 @@ def test_validate_invalid_action_deploy_group( assert len(errors) == 1 @mock.patch("paasta_tools.utils.get_pipeline_deploy_groups", autospec=True) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) def test_validate_action_valid_deploy_group( self, mock_load_system_paasta_config, mock_get_pipeline_deploy_groups ): @@ -641,7 +656,11 @@ def test_validate_action_valid_deploy_group( "paasta_tools.tron_tools.TronActionConfig.build_spark_config", autospec=True ) @mock.patch("paasta_tools.utils.get_pipeline_deploy_groups", autospec=True) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) def test_validate_invalid_cpus_in_executor_spark_action( self, mock_load_system_paasta_config, @@ -673,7 +692,11 @@ def test_validate_invalid_cpus_in_executor_spark_action( "paasta_tools.tron_tools.TronActionConfig.build_spark_config", autospec=True ) @mock.patch("paasta_tools.utils.get_pipeline_deploy_groups", autospec=True) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) def test_validate_invalid_mem_in_executor_spark_action( self, mock_load_system_paasta_config, @@ -705,7 +728,11 @@ def test_validate_invalid_mem_in_executor_spark_action( "paasta_tools.tron_tools.TronActionConfig.build_spark_config", autospec=True ) @mock.patch("paasta_tools.utils.get_pipeline_deploy_groups", autospec=True) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) def test_validate_valid_executor_spark_action( self, mock_load_system_paasta_config, @@ -733,7 +760,11 @@ def test_validate_valid_executor_spark_action( assert len(errors) == 0 @mock.patch("paasta_tools.utils.get_pipeline_deploy_groups", autospec=True) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) def test_validate_monitoring( self, mock_load_system_paasta_config, mock_get_pipeline_deploy_groups ): @@ -750,7 +781,11 @@ def test_validate_monitoring( assert len(errors) == 0 @mock.patch("paasta_tools.utils.get_pipeline_deploy_groups", autospec=True) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) def test_validate_monitoring_without_team( self, mock_load_system_paasta_config, mock_get_pipeline_deploy_groups ): @@ -768,7 +803,11 @@ def test_validate_monitoring_without_team( assert job_config.get_monitoring()["team"] == "default_team" @mock.patch("paasta_tools.utils.get_pipeline_deploy_groups", autospec=True) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) def test_validate_monitoring_with_invalid_team( self, mock_load_system_paasta_config, mock_get_pipeline_deploy_groups ): @@ -798,12 +837,15 @@ def test_get_monitoring(self, tronfig_monitoring): class TestTronTools: - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) - def test_load_tron_config(self, mock_system_paasta_config): + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) + def test_load_tron_config(self, mock_load_system_paasta_config): result = tron_tools.load_tron_config() - assert mock_system_paasta_config.return_value.get_tron_config.call_count == 1 assert result == tron_tools.TronConfig( - mock_system_paasta_config.return_value.get_tron_config.return_value + mock_load_system_paasta_config().get_tron_config() ) @mock.patch("paasta_tools.tron_tools.load_tron_config", autospec=True) @@ -908,10 +950,13 @@ def test_format_tron_action_dict_default_executor(self): with mock.patch.object( action_config, "get_docker_registry", return_value="docker-registry.com:400" ), mock.patch( - "paasta_tools.utils.load_system_paasta_config", autospec=True + "paasta_tools.utils.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, ), mock.patch( "paasta_tools.tron_tools.load_system_paasta_config", autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, ), mock.patch( "paasta_tools.tron_tools.add_volumes_for_authenticating_services", autospec=True, @@ -977,6 +1022,7 @@ def test_format_tron_action_dict_paasta(self): ), mock.patch( "paasta_tools.tron_tools.load_system_paasta_config", autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, ), mock.patch( "paasta_tools.tron_tools.add_volumes_for_authenticating_services", autospec=True, @@ -997,6 +1043,17 @@ def test_format_tron_action_dict_paasta(self): "mem": 1200, "disk": 42, "env": mock.ANY, + "topology_spread_constraints": [ + { + "label_selector": { + "app.kubernetes.io/managed-by": "tron", + "paasta.yelp.com/pool": "special_pool", + }, + "max_skew": 1, + "topology_key": "topology.kubernetes.io/zone", + "when_unsatisfiable": "ScheduleAnyway", + }, + ], "secret_volumes": [ { "secret_volume_name": "tron-secret-my--service-secret1", @@ -1027,7 +1084,11 @@ def test_format_tron_action_dict_paasta(self): "yelp.com/owner": "compute_infra_platform_experience", "app.kubernetes.io/managed-by": "tron", }, - "annotations": {"paasta.yelp.com/routable_ip": "false"}, + "annotations": { + "paasta.yelp.com/routable_ip": "false", + "paasta.yelp.com/service": "my_service", + "paasta.yelp.com/instance": "my_job.do_something", + }, "cap_drop": CAPS_DROP, "cap_add": [], "secret_env": {}, @@ -1048,7 +1109,11 @@ def test_format_tron_action_dict_paasta(self): @mock.patch( "paasta_tools.kubernetes_tools.kube_config.load_kube_config", autospec=True ) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) @mock.patch("paasta_tools.tron_tools.get_k8s_url_for_cluster", autospec=True) @mock.patch( "service_configuration_lib.spark_config._get_k8s_docker_volumes_conf", @@ -1271,7 +1336,6 @@ def test_format_tron_action_dict_spark( "--conf spark.kubernetes.executor.label.yelp.com/pool=special_pool " "--conf spark.kubernetes.executor.label.paasta.yelp.com/pool=special_pool " "--conf spark.kubernetes.executor.label.yelp.com/owner=core_ml " - "--conf spark.kubernetes.executor.podTemplateFile=/nail/srv/configs/spark_dns_pod_template.yaml " "--conf spark.kubernetes.executor.volumes.hostPath.0.mount.path=/nail/bulkdata " "--conf spark.kubernetes.executor.volumes.hostPath.0.options.path=/nail/bulkdata " "--conf spark.kubernetes.executor.volumes.hostPath.0.mount.readOnly=true " @@ -1303,6 +1367,7 @@ def test_format_tron_action_dict_spark( "--conf spark.kubernetes.allocation.batch.size=512 " "--conf spark.kubernetes.decommission.script=/opt/spark/kubernetes/dockerfiles/spark/decom.sh " "--conf spark.logConf=true " + "--conf spark.kubernetes.executor.podTemplateFile=/nail/srv/configs/spark_dns_pod_template.yaml " "--conf spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.WebIdentityTokenCredentialsProvider " "--conf spark.driver.host=$PAASTA_POD_IP " "--conf spark.kubernetes.authenticate.executor.serviceAccountName=paasta--arn-aws-iam-000000000000-role-some-role " @@ -1343,6 +1408,17 @@ def test_format_tron_action_dict_spark( "KUBECONFIG": "/etc/kubernetes/spark.conf", "AWS_DEFAULT_REGION": "us-west-2", }, + "topology_spread_constraints": [ + { + "label_selector": { + "app.kubernetes.io/managed-by": "tron", + "paasta.yelp.com/pool": "stable", + }, + "max_skew": 1, + "topology_key": "topology.kubernetes.io/zone", + "when_unsatisfiable": "ScheduleAnyway", + }, + ], "node_selectors": {"yelp.com/pool": "stable"}, "cap_add": [], "cap_drop": [ @@ -1374,6 +1450,8 @@ def test_format_tron_action_dict_spark( }, "annotations": { "paasta.yelp.com/routable_ip": "true", + "paasta.yelp.com/service": "my_service", + "paasta.yelp.com/instance": "my_job.do_something", "prometheus.io/port": "39091", "prometheus.io/path": "/metrics/prometheus", }, @@ -1437,6 +1515,7 @@ def test_format_tron_action_dict_paasta_k8s_service_account(self): ), mock.patch( "paasta_tools.tron_tools.load_system_paasta_config", autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, ), mock.patch( "paasta_tools.tron_tools.add_volumes_for_authenticating_services", autospec=True, @@ -1463,9 +1542,22 @@ def test_format_tron_action_dict_paasta_k8s_service_account(self): }, "annotations": { "paasta.yelp.com/routable_ip": "false", + "paasta.yelp.com/service": "my_service", + "paasta.yelp.com/instance": "job_name.instance_name", }, "node_selectors": {"yelp.com/pool": "default"}, "env": mock.ANY, + "topology_spread_constraints": [ + { + "label_selector": { + "app.kubernetes.io/managed-by": "tron", + "paasta.yelp.com/pool": "default", + }, + "max_skew": 1, + "topology_key": "topology.kubernetes.io/zone", + "when_unsatisfiable": "ScheduleAnyway", + }, + ], "secret_env": {}, "field_selector_env": {"PAASTA_POD_IP": {"field_path": "status.podIP"}}, "secret_volumes": [], @@ -1568,6 +1660,7 @@ def test_format_tron_action_dict_paasta_k8s( ), mock.patch( "paasta_tools.tron_tools.load_system_paasta_config", autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, ), mock.patch( "paasta_tools.secret_tools.is_shared_secret_from_secret_name", autospec=True, @@ -1601,6 +1694,8 @@ def test_format_tron_action_dict_paasta_k8s( }, "annotations": { "paasta.yelp.com/routable_ip": "false", + "paasta.yelp.com/service": "my_service", + "paasta.yelp.com/instance": instance_name, }, "node_selectors": {"yelp.com/pool": "special_pool"}, "node_affinities": [ @@ -1611,6 +1706,17 @@ def test_format_tron_action_dict_paasta_k8s( } ], "env": mock.ANY, + "topology_spread_constraints": [ + { + "label_selector": { + "app.kubernetes.io/managed-by": "tron", + "paasta.yelp.com/pool": "special_pool", + }, + "max_skew": 1, + "topology_key": "topology.kubernetes.io/zone", + "when_unsatisfiable": "ScheduleAnyway", + }, + ], "secret_env": { "SOME_SECRET": { "secret_name": "tron-secret-my--service-secret--name", @@ -1693,6 +1799,7 @@ def test_format_tron_action_dict_paasta_no_branch_dict(self): ), mock.patch( "paasta_tools.tron_tools.load_system_paasta_config", autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, ), mock.patch( "paasta_tools.tron_tools.add_volumes_for_authenticating_services", autospec=True, @@ -1709,6 +1816,17 @@ def test_format_tron_action_dict_paasta_no_branch_dict(self): "mem": 1200, "disk": 42, "env": mock.ANY, + "topology_spread_constraints": [ + { + "label_selector": { + "app.kubernetes.io/managed-by": "tron", + "paasta.yelp.com/pool": "special_pool", + }, + "max_skew": 1, + "topology_key": "topology.kubernetes.io/zone", + "when_unsatisfiable": "ScheduleAnyway", + }, + ], "secret_volumes": [ { "secret_volume_name": "tron-secret-my--service-secret1", @@ -1736,7 +1854,11 @@ def test_format_tron_action_dict_paasta_no_branch_dict(self): "yelp.com/owner": "compute_infra_platform_experience", "app.kubernetes.io/managed-by": "tron", }, - "annotations": {"paasta.yelp.com/routable_ip": "false"}, + "annotations": { + "paasta.yelp.com/routable_ip": "false", + "paasta.yelp.com/service": "my_service", + "paasta.yelp.com/instance": "my_job.do_something", + }, "cap_drop": CAPS_DROP, "cap_add": [], "secret_env": {}, @@ -1783,7 +1905,11 @@ def test_load_tron_service_config_empty(self, mock_read_extra_service_informatio service_name="service", extra_info="tron-test-cluster", soa_dir="fake" ) - @mock.patch("paasta_tools.tron_tools.load_system_paasta_config", autospec=True) + @mock.patch( + "paasta_tools.tron_tools.load_system_paasta_config", + autospec=True, + return_value=MOCK_SYSTEM_PAASTA_CONFIG, + ) @mock.patch("paasta_tools.tron_tools.load_tron_config", autospec=True) @mock.patch("paasta_tools.tron_tools.load_tron_service_config", autospec=True) @mock.patch("paasta_tools.tron_tools.format_tron_job_dict", autospec=True) @@ -1882,7 +2008,7 @@ def test_create_complete_config_e2e(self, tmpdir): # that are not static, this will cause continuous reconfiguration, which # will add significant load to the Tron API, which happened in DAR-1461. # but if this is intended, just change the hash. - assert hasher.hexdigest() == "26dae1d70ae0b937706e3de597cc07e8" + assert hasher.hexdigest() == "31634ab048abe9b40b71851797d48e4d" def test_override_default_pool_override(self, tmpdir): soa_dir = tmpdir.mkdir("test_create_complete_config_soa") diff --git a/tests/test_vitesscluster_tools.py b/tests/test_vitesscluster_tools.py index 19438a2bb8..58237eb9cf 100644 --- a/tests/test_vitesscluster_tools.py +++ b/tests/test_vitesscluster_tools.py @@ -117,26 +117,21 @@ "mysql_auth_vault_ttl": "60s", }, "extraLabels": {"tablet_type": "fake_keyspaces_migration"}, - "lifecycle": { - "postStart": { - "exec": { - "command": [ - "/bin/sh", - "-c", - "/cloudmap/scripts/register_to_cloudmap.sh vtgate-fake_cell mo-ck_r-e", - ] - } + "extraVolumeMounts": [ + {"mountPath": "/nail/srv", "name": "srv-configs", "readOnly": True}, + { + "mountPath": "/nail/etc/srv-configs", + "name": "etc-srv-configs", + "readOnly": True, }, - "preStop": { - "exec": { - "command": [ - "/bin/sh", - "-c", - "/cloudmap/scripts/deregister_from_cloudmap.sh vtgate-fake_cell mo-ck_r-e", - ] - } + ], + "extraVolumes": [ + {"hostPath": {"path": "/nail/srv"}, "name": "srv-configs"}, + { + "hostPath": {"path": "/nail/etc/srv-configs"}, + "name": "etc-srv-configs", }, - }, + ], "replicas": 1, "resources": { "limits": {"cpu": "100m", "memory": "256Mi"}, @@ -334,6 +329,7 @@ "enforce-tableacl-config": "true", "grpc_max_message_size": "134217728", "init_tablet_type": "replica", + "init_shard": "0", "keep_logs": "72h", "log_err_stacks": "true", "queryserver-config-schema-reload-time": "1800", @@ -522,6 +518,7 @@ "enforce-tableacl-config": "true", "grpc_max_message_size": "134217728", "init_tablet_type": "replica", + "init_shard": "0", "keep_logs": "72h", "log_err_stacks": "true", "queryserver-config-schema-reload-time": "1800", diff --git a/tox.ini b/tox.ini index 40e3123e84..d1902ea0ba 100644 --- a/tox.ini +++ b/tox.ini @@ -24,7 +24,7 @@ commands = [testenv:dev-api] envdir = .tox/py38-linux/ -passenv = PAASTA_TEST_CLUSTER KUBECONFIG PAASTA_SYSTEM_CONFIG_DIR +passenv = PAASTA_TEST_CLUSTER KUBECONFIG PAASTA_SYSTEM_CONFIG_DIR KUBECONTEXT AWS_PROFILE deps = --only-binary=grpcio --requirement={toxinidir}/requirements.txt diff --git a/yelp_package/Makefile b/yelp_package/Makefile index 795e95359d..81b79e9e48 100644 --- a/yelp_package/Makefile +++ b/yelp_package/Makefile @@ -13,7 +13,7 @@ # limitations under the License. # Edit this release and run "make release" -RELEASE=1.4.33 +RELEASE=1.8.2 SHELL=/bin/bash diff --git a/yelp_package/extra_requirements_yelp.txt b/yelp_package/extra_requirements_yelp.txt index 25c77c5fa5..3774624bf4 100644 --- a/yelp_package/extra_requirements_yelp.txt +++ b/yelp_package/extra_requirements_yelp.txt @@ -16,11 +16,14 @@ monk==1.3.0 # yelp-clog dependency mrjob==0.7.4 # scribereader dependency named-decorator==0.1.4 # yelp-profiling dependency ndg-httpsclient==0.4.3 # vault-tools dependency +okta-auth==1.0.1 pycparser==2.20 # vault-tools dependency pygpgme==0.3 # vault-tools dependency +pyjwt==2.9.0 # okta-auth dependency pyopenssl==19.0.0 # vault-tools dependency PySubnetTree==0.34 # yelp-lib dependency python-jsonschema-objects==0.3.1 # slo-transcoder dependency +saml-helper==2.3.3 # okta-auth dependency scribereader==1.1.1 signalform-tools==0.0.16 # slo-transcoder dependency slo-transcoder==3.3.0