diff --git a/.activate.sh b/.activate.sh
index 5d40028f79..5f6cfd1bc1 120000
--- a/.activate.sh
+++ b/.activate.sh
@@ -1 +1 @@
-.tox/py37-linux/bin/activate
\ No newline at end of file
+.tox/py38-linux/bin/activate
\ No newline at end of file
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1e1e843e68..3732c0107b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,7 +17,10 @@ jobs:
       fail-fast: false
       matrix:
         toxenv:
-          - py38-linux,docs,mypy,tests
+          - py38-linux
+          - docs
+          - mypy
+          - tests
           - general_itests
     env:
       DOCKER_REGISTRY: ""
diff --git a/.gitignore b/.gitignore
index 660fe9e3e0..44cc926814 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,12 +43,14 @@ paasta_itests/fake_etc_paasta/marathon.json
 yelp_package/gopath
 .mypy_cache/
 .pytest_cache/
+.hypothesis/
 debian/.debhelper
 example_cluster/paasta/docker_registry.json
 general_itests/fake_etc_paasta/clusters.json
 pip-wheel-metadata
 debian/debhelper-build-stamp
 unique-run
+.vault-token
 
 # Coverage artifacts
 .coverage
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000000..dc3c22889d
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,35 @@
+# Read the Docs configuration file for Sphinx projects
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# RTD defaults as of 2023-11-08
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.8"
+    # You can also specify other tool versions:
+    # nodejs: "20"
+    # rust: "1.70"
+    # golang: "1.20"
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+  # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
+  # builder: "dirhtml"
+  # Fail on all warnings to avoid broken references
+  # fail_on_warning: true
+
+# Optionally build your docs in additional formats such as PDF and ePub
+# formats:
+#   - pdf
+#   - epub
+
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+  install:
+    - requirements: requirements-docs.txt
diff --git a/Makefile b/Makefile
index c79ca995ab..cb9ded6341 100644
--- a/Makefile
+++ b/Makefile
@@ -23,8 +23,10 @@ endif
 
 ifeq ($(PAASTA_ENV),YELP)
 	export DOCKER_REGISTRY ?= docker-dev.yelpcorp.com/
+	export DOCKER_OPT_ARGS ?=
 else
-	export DOCKER_REGISTRY ?= ""
+	export DOCKER_REGISTRY ?= docker.io/
+	export DOCKER_OPT_ARGS ?= --user `id -u`:`id -g`
 	export INDEX_URL_BUILD_ARG ?= PIP_INDEX_URL
 endif
 
@@ -50,7 +52,7 @@ test-not-yelpy: .paasta/bin/activate
 	.paasta/bin/tox -e tests
 
 quick-test: .tox/py38-linux
-	TZ=UTC .tox/py38-linux/bin/py.test --last-failed -x -- tests
+	TZ=UTC .tox/py38-linux/bin/py.test --failed-first -x --disable-warnings -- tests
 
 .tox/py38-linux: .paasta/bin/activate
 	.paasta/bin/tox
@@ -115,8 +117,8 @@ k8s_clean: .paasta/bin/activate
 #   in paasta repo: java -jar ~/openapi-generator/modules/openapi-generator-cli/target/openapi-generator-cli.jar
 openapi-codegen:
 	rm -rf paasta_tools/paastaapi
-	docker run --rm -i --user `id -u`:`id -g` -v `pwd`:/src -w /src \
-		yelp/openapi-generator-cli:20201026 \
+	docker run --rm -i ${DOCKER_OPT_ARGS} -v `pwd`:/src -w /src \
+		${DOCKER_REGISTRY}yelp/openapi-generator-cli:20201026 \
 		generate \
 		-i paasta_tools/api/api_docs/oapi.yaml \
 		-g python-experimental \
@@ -127,8 +129,8 @@ openapi-codegen:
 	rm -rf temp-openapi-client
 
 swagger-validate:
-	docker run --rm -i --user `id -u`:`id -g` -v `pwd`:/src -w /src \
-		yelp/openapi-generator-cli:20201026 \
+	docker run --rm -i ${DOCKER_OPT_ARGS} -v `pwd`:/src -w /src \
+		${DOCKER_REGISTRY}yelp/openapi-generator-cli:20201026 \
 		validate \
 		-i paasta_tools/api/api_docs/swagger.json
 
@@ -158,6 +160,13 @@ setup-kubernetes-job: k8s_fake_cluster generate_deployments_for_service
 	export PAASTA_TEST_CLUSTER=kind-${USER}-k8s-test;\
 	.tox/py38-linux/bin/python -m paasta_tools.list_kubernetes_service_instances -d ./soa_config_playground --shuffle --group-lines 1 | xargs --no-run-if-empty .tox/py38-linux/bin/python -m paasta_tools.setup_kubernetes_job -d ./soa_config_playground -c kind-${USER}-k8s-test
 
+.PHONY: cleanup-kubernetes-jobs
+cleanup-kubernetes-jobs:
+	export KUBECONFIG=./k8s_itests/kubeconfig;\
+	export PAASTA_SYSTEM_CONFIG_DIR=./etc_paasta_playground/;\
+	export PAASTA_TEST_CLUSTER=kind-${USER}-k8s-test;\
+	.tox/py38-linux/bin/python -m paasta_tools.cleanup_kubernetes_jobs -d ./soa_config_playground -c kind-${USER}-k8s-test --force
+
 .PHONY: paasta-secrets-sync
 paasta-secrets-sync: setup-kubernetes-job .vault-token
 	export KUBECONFIG=./k8s_itests/kubeconfig;\
@@ -165,6 +174,19 @@ paasta-secrets-sync: setup-kubernetes-job .vault-token
 	export PAASTA_TEST_CLUSTER=kind-${USER}-k8s-test;\
 	{ .tox/py38-linux/bin/python -m paasta_tools.list_kubernetes_service_instances -d ./soa_config_playground ; echo -n \ _shared; } | cut -f1 -d"." | uniq | shuf | xargs .tox/py38-linux/bin/python -m paasta_tools.kubernetes.bin.paasta_secrets_sync -v -d ./soa_config_playground -t ./.vault-token
 
+define ANNOUNCE_CRONS_BODY
+The following PaaSTA cron jobs will run on an infinite loop using the PaaSTA Playground k8s cluster:
+- setup-kubernetes-job
+- cleanup-kubernetes-job
+- paasta-secrets-sync
+- generate_deployments_for_service
+endef
+export ANNOUNCE_CRONS_BODY
+.PHONY: paasta-crons
+make paasta-cronjobs:
+	@echo "$$ANNOUNCE_CRONS_BODY"
+	while true; do make paasta-secrets-sync && make cleanup-kubernetes-jobs; sleep 5; done
+
 .vault-token:
 	export VAULT_ADDR=https://vault-devc.yelpcorp.com:8200 ;\
 	export VAULT_SKIP_VERIFY=true ;\
diff --git a/README.md b/README.md
index ee19b25396..eb5b5b58d5 100644
--- a/README.md
+++ b/README.md
@@ -5,17 +5,23 @@
 ![PaaSTA Logo](http://engineeringblog.yelp.com/images/previews/paasta_preview.png)
 
 PaaSTA is a highly-available, distributed system for building, deploying, and
-running services using containers and Apache Mesos!
+running services using containers and Kubernetes.
+
+PaaSTA has been running production services at Yelp since 2016. It was
+originally designed to run on top of Apache Mesos but has subsequently been
+updated to use Kubernetes. Over time the features and functionality that
+PaaSTA provides have increased but the principal design remains the same.
+
+PaaSTA aims to take a declarative description of the services that teams need
+to run and then ensures that those services are deployed safely, efficiently,
+and in a manner that is easy for the teams to maintain. Rather than managing
+Kubernetes YAML files, PaaSTA provides a simplified schema to describe your service
+and in addition to configuring Kubernetes it can also configure other infrastructure
+tools to provide monitoring, logging, cost management etc.
 
 Want to know more about the opinions behind what makes PaaSTA special? Check
 out the [PaaSTA Principles](http://paasta.readthedocs.io/en/latest/about/paasta_principles.html).
 
-*Note*: PaaSTA has been running in production at Yelp for years,
-and has a number of "Yelpisms" still lingering in the codebase. We have made
-efforts to excise them, but there are bound to be lingering issues. Please help us
-by opening an [issue](https://github.com/Yelp/paasta/issues/new) or
-better yet a [pull request](https://github.com/Yelp/paasta/pulls).
-
 ## Components
 
 *Note*: PaaSTA is an opinionated platform that uses a few un-opinionated
@@ -23,27 +29,37 @@ tools. It requires a non-trivial amount of infrastructure to be in place
 before it works completely:
 
  * [Docker](http://www.docker.com/) for code delivery and containment
- * [Mesos](http://mesos.apache.org/) / [Kubernetes](https://kubernetes.io/) for code execution and scheduling (runs Docker containers)
- * [Marathon](https://mesosphere.github.io/marathon/) for managing long-running services
+ * [Kubernetes](https://kubernetes.io/) for code execution and scheduling (runs Docker containers)
  * [Tron](https://tron.readthedocs.io/en/latest/) for running things on a timer (nightly batches)
- * [SmartStack](http://nerds.airbnb.com/smartstack-service-discovery-cloud/) / [Envoy](https://www.envoyproxy.io/) for service registration and discovery
- * [Sensu](https://sensuapp.org/) for monitoring/alerting
+ * [SmartStack](http://nerds.airbnb.com/smartstack-service-discovery-cloud/) and [Envoy](https://www.envoyproxy.io/) for service registration and discovery
+ * [Sensu](https://sensu.io/) for monitoring/alerting
  * [Jenkins](https://jenkins-ci.org/) (optionally) for continuous deployment
+ * [Prometheus](https://prometheus.io/) and [HPA](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) for autoscaling services
+
+One advantage to having a PaaS composed of components like these is you
+get to reuse them for other purposes. For example, at Yelp Sensu is not just for
+PaaSTA, it can be used to monitor all sorts of things. We also use Kubernetes to run
+other more complex workloads like [Jolt](https://dcos.io/events/2017/jolt-distributed-fault-tolerant-tests-at-scale-on-mesos/) and [Cassandra](https://engineeringblog.yelp.com/2020/11/orchestrating-cassandra-on-kubernetes-with-operators.html). Our service mesh, which
+is a heavily customised version of SmartStack and Envoy, allows many systems at Yelp
+to communicate with PaaSTA services and each other.
+
+On the other hand, requiring lots of components, means lots of infrastructure to
+setup before PaaSTA can work effectively! Realistacally, running PaaSTA outside of Yelp
+would not be sensible, because in addition to the integrations mentioned above we also
+have strong opinions encoded in other tooling that you would need to replicate. Nevertheless,
+we code PaaSTA in the open because we think it is useful to share our approach and hope that
+the code can at least help others understand or solve similar problems.
 
-The main advantage to having a PaaS composed of components like these is you
-get to reuse them for other purposes. For example at Yelp Sensu is not just for
-PaaSTA, it can be used to monitor all sorts of things. Also Mesos can be
-re-used for things like custom frameworks. For example at Yelp we use the Mesos
-infrastructure to run our large-scale testing framework:
-[Jolt](https://dcos.io/events/2017/jolt-distributed-fault-tolerant-tests-at-scale-on-mesos/).
-SmartStack is used at Yelp for service discovery for Non-PaaSTA things as well,
-like databases, legacy apps, and Puppet-defined apps. Most PaaS's do not
-allow for this type of component re-use.
-
-On the other hand, requiring lots of components means lots of infrastructure to
-setup before PaaSTA is fully baked. If you are looking for a project that
-doesn't require external components, we encourage you to look at the doc
-[comparing PaaSTA to other tools](https://github.com/Yelp/paasta/blob/master/comparison.md).
+## Integrations and Features
+
+In addition to the direct integrations above PaaSTA also relies on other components
+to provide PaaSTA users with other features and to manage compute capacity at Yelp.
+
+* We use [Karpenter](https://karpenter.sh/) to autoscale pools of EC2 instances to run PaaSTA. Formerly we used our own autoscaler [Clusterman](https://engineeringblog.yelp.com/2019/11/open-source-clusterman.html)
+* We bake AMIs using [Packer](https://www.packer.io/)
+* We collect logs from services and send them via [Monk](https://engineeringblog.yelp.com/2020/01/streams-and-monk-how-yelp-approaches-kafka-in-2020.html) to [Kafka](https://kafka.apache.org/)
+* We use [StatefulSets](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) to run a few stateful PaaSTA services
+* We autotune the resources needed by each service by monitoring usage (similar to [VPA](https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler))
 
 ## Design Goals
 
@@ -54,16 +70,11 @@ doesn't require external components, we encourage you to look at the doc
  * No single points of failure
  * Pleasant interface
 
-PaaSTA is an opinionated platform, and it is not designed to interoperate with
-every possible backend service out there.
-
-Think of it as an example of how we have integrated these technologies together
-to build a cohesive PaaS. It is not a turn-key PaaS solution.
-
 ## Getting Started
 
 See the [getting started](http://paasta.readthedocs.io/en/latest/installation/getting_started.html)
-documentation for how to deploy PaaSTA.
+documentation for how to deploy PaaSTA. This reference is intended to help understand how PaaSTA
+works but we don't advise that you use PaaSTA in production.
 
 ## Debugging PaaSTA (in VS Code)
 
diff --git a/contrib/python-k8s-client.diff b/contrib/python-k8s-client.diff
new file mode 100644
index 0000000000..1ee45ccca7
--- /dev/null
+++ b/contrib/python-k8s-client.diff
@@ -0,0 +1,11 @@
+--- a/debian/paasta/opt/venvs/paasta/lib/python3.8/site-packages/kubernetes/client/api_client.py
++++ b/debian/paasta/opt/venvs/paasta/lib/python3.8/site-packages/kubernetes/client/api_client.py
+@@ -629,7 +629,7 @@
+                                                    'get_real_child_model'):
+             return data
+
+-        kwargs = {}
++        kwargs = {"local_vars_configuration": self.configuration}
+         if (data is not None and
+                 klass.openapi_types is not None and
+                 isinstance(data, (list, dict))):
diff --git a/debian/changelog b/debian/changelog
index 6fd31a4a94..40ba0a47fa 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,698 @@
+paasta-tools (0.218.6) xenial; urgency=medium
+
+  * 0.218.6 tagged with 'make release'
+    Commit: Speed up secret syncing (#3803)  It turns out that the
+    slowness here was due to our hardcoded .3s sleep - we"re now syncing
+    a lot more secrets (and generally doing more work) because of our
+    namespace sharding, so this sleep ends up taking quite a bit of
+    time.  We generally haven"t had to throttle much (outside of Pod
+    creation, but that"s generally due to other constraints: e.g.,
+    cluster autoscaling and whatnot), so it"s probably fine to remove
+    this sleep entirely for the time being.  That said, in the interest
+    of safety, I"ve made this delay configurable - so if this ends up
+    causing issues after release (or in the far future), we can easily
+    tweak the delay.
+
+ -- Luis Perez <luisp@yelp.com>  Tue, 20 Feb 2024 11:59:32 -0800
+
+paasta-tools (0.218.5) xenial; urgency=medium
+
+  * 0.218.5 tagged with 'make release'
+    Commit: PAASTA-18005: Add default timeout to paasta-api (#3800)
+
+ -- Jon Lee <jonlee@yelp.com>  Thu, 15 Feb 2024 06:09:53 -0800
+
+paasta-tools (0.218.4) xenial; urgency=medium
+
+  * 0.218.4 tagged with 'make release'
+    Commit: Scale with ready pods count rather current replicas for
+    uwsgi (#3802)  We had some surprising behavior recently where an
+    autoscaled app was crashlooping and the HPA did not keep the desired
+    scale as-is - we believe that this is due to a mismatch in how we
+    calculate the value we send to k8s:  The existing PromQL divides by
+    current replica to turn things into a utilization percentage such
+    that when the HPA multiplies later on by the number of current
+    replicas, we reach our target replicas.  However, the HPA will
+    actually multiply by the number of ready replicas, leading to a
+    mismatch.  (h/t to @krall for wording things much better than I
+    could)
+
+ -- Luis Perez <luisp@yelp.com>  Wed, 14 Feb 2024 10:06:51 -0800
+
+paasta-tools (0.218.3) xenial; urgency=medium
+
+  * 0.218.3 tagged with 'make release'
+    Commit: Fixes local tests and openapi-codegen (#3801)
+
+ -- Jon Lee <jonlee@yelp.com>  Tue, 13 Feb 2024 13:49:03 -0800
+
+paasta-tools (0.218.2) xenial; urgency=medium
+
+  * 0.218.2 tagged with 'make release'
+    Commit: Remove spark mesos logic (#3799)
+
+ -- Chi Chang <chi@yelp.com>  Thu, 08 Feb 2024 09:07:55 -0800
+
+paasta-tools (0.218.1) xenial; urgency=medium
+
+  * 0.218.1 tagged with 'make release'
+    Commit: Merge branch "u/mpiano/SECNESS-1911"
+
+ -- Matteo Piano <mpiano@yelp.com>  Wed, 07 Feb 2024 02:03:23 -0800
+
+paasta-tools (0.218.0) xenial; urgency=medium
+
+  * 0.218.0 tagged with 'make release'
+    Commit: Call ensure_namespace() in paasta_secrets_sync (#3792)  At
+    the moment, you cannot pre-create secrets on a new dual-name cluster
+    since the namespaces for services will not yet have been created by
+    setup_kubernetes_job (and they may not have been created by Flux yet
+    either).  We can fix this by calling ensure_namespace() in the
+    secret sync code - the caching we have on
+    ensure_namespace()/get_all_namespaces() means that this *should* be
+    pretty fast as it should be grabbing cached data most of the time.
+
+ -- Luis Perez <luisp@yelp.com>  Tue, 06 Feb 2024 09:25:50 -0800
+
+paasta-tools (0.217.3) xenial; urgency=medium
+
+  * 0.217.3 tagged with 'make release'
+    Commit: Displays EKS in PaaSTA Status output (#3788)
+
+ -- Jon Lee <jonlee@yelp.com>  Tue, 30 Jan 2024 11:11:19 -0800
+
+paasta-tools (0.217.2) xenial; urgency=medium
+
+  * 0.217.2 tagged with 'make release'
+    Commit: Merge pull request #3784 from
+    Yelp/jfong/fix_noconfiguration_race_condition  Catch NoConfiguration
+    exceptions in bounce_status
+
+ -- Jen Patague <jfong@yelp.com>  Thu, 25 Jan 2024 12:59:59 -0800
+
+paasta-tools (0.217.1) xenial; urgency=medium
+
+  * 0.217.1 tagged with 'make release'
+    Commit: Merge pull request #3782 from Yelp/u/vit/SEC-18515-bump-
+    boto3  SEC-18515: Bump boto3 to enable IMDSv2 usage
+
+ -- Vincent Thibault <vit@yelp.com>  Wed, 24 Jan 2024 13:02:25 -0800
+
+paasta-tools (0.217.0) xenial; urgency=medium
+
+  * 0.217.0 tagged with 'make release'
+    Commit: Merge pull request #3758 from Yelp/u/gonabavi/DREIMP-
+    10204_vitess_deployment_poc  Setup scaffolding for Vitess deployment
+    PoC
+
+ -- Luis Perez <luisp@yelp.com>  Tue, 23 Jan 2024 09:08:49 -0800
+
+paasta-tools (0.216.0) xenial; urgency=medium
+
+  * 0.216.0 tagged with 'make release'
+    Commit: Merge pull request #3776 from
+    Yelp/u/jfong/remove_uwsgi_sidecar  Stop using uwsgi sidecars and
+    supporting custom stats port (Again)
+
+ -- Jen Patague <jfong@yelp.com>  Tue, 16 Jan 2024 14:00:16 -0800
+
+paasta-tools (0.215.1) xenial; urgency=medium
+
+  * 0.215.1 tagged with 'make release'
+    Commit: Add placeholder value for non-existent boto_keys (#3773)
+    Previously, using an invalid value here would cause the service to
+    fail to launch. Now that we"re migrating away from using AWS users,
+    people mess this up more often, and it may be desirable to let the
+    pod launch with placeholder values.  Placeholder values instead just
+    ignoring them and not placing keys was much simpler to implement,
+    and only very marginally less desirable IMO.  We still get a log
+    line that we can alert on when this happens.
+
+ -- Luis Perez <luisp@yelp.com>  Tue, 16 Jan 2024 09:56:23 -0800
+
+paasta-tools (0.215.0) xenial; urgency=medium
+
+  * 0.215.0 tagged with 'make release'
+    Commit: Make get_running_task_allocation include all namespaces by
+    default (#3774)  For the most part, we"re not really excluding many
+    namespaces (most of our namespaces start with paasta).  That said,
+    now that we allow folks to use any arbitrary namespace, we can run
+    into issues where a namespace is not in our allowlist and then
+    autotune silently stops working.  I don"t foresee this adding too
+    much additional data (and if it does, we can filter out noisy
+    namespaces).  Co-authored-by: tzhu
+
+ -- Luis Perez <luisp@yelp.com>  Fri, 12 Jan 2024 08:18:55 -0800
+
+paasta-tools (0.214.0) xenial; urgency=medium
+
+  * 0.214.0 tagged with 'make release'
+    Commit: Merge pull request #3761 from Yelp/u/cuza/patching-python-
+    k8s-client  Patching python-k8s-client at build time
+
+ -- Luis Perez <luisp@yelp.com>  Wed, 10 Jan 2024 12:24:51 -0800
+
+paasta-tools (0.213.1) xenial; urgency=medium
+
+  * 0.213.1 tagged with 'make release'
+    Commit: Merge pull request #3768 from Yelp/luisp/i-am-very-sorry
+    Load eks- and kubernetes- files in autotune merging
+
+ -- Jen Patague <jfong@yelp.com>  Mon, 08 Jan 2024 17:04:39 -0800
+
+paasta-tools (0.213.0) xenial; urgency=medium
+
+  * 0.213.0 tagged with 'make release'
+    Commit: Remove paasta performance-check (#3750)  This was never
+    really utilized as intended and the necessary infra to run this was
+    torn down long ago - let"s delete this until someone has the desire
+    and will to bring it back :)
+
+ -- Luis Perez <luisp@yelp.com>  Mon, 08 Jan 2024 11:43:25 -0800
+
+paasta-tools (0.212.1) xenial; urgency=medium
+
+  * 0.212.1 tagged with 'make release'
+    Commit: Merge pull request #3767 from rockdog/master  Fix
+    NO_DESCRIPTION_MESSAGE used in paasta info cli command
+
+ -- Luis Perez <luisp@yelp.com>  Mon, 08 Jan 2024 08:08:15 -0800
+
+paasta-tools (0.212.0) xenial; urgency=medium
+
+  * 0.212.0 tagged with 'make release'
+    Commit: Merge pull request #3763 from
+    Yelp/u/jfong/no_include_smartstack  Remove include_smartstack
+
+ -- Jen Patague <jfong@yelp.com>  Fri, 05 Jan 2024 09:40:59 -0800
+
+paasta-tools (0.211.2) xenial; urgency=medium
+
+  * 0.211.2 tagged with 'make release'
+    Commit: Guess the correct context (or allow override) in
+    paasta_habitat_fixer (#3766)  Turns out that I didn"t read the admin
+    kubeconf closely enough and missed that the context names there are
+    prefixed with kubernetes-admin@
+
+ -- Luis Perez <luisp@yelp.com>  Thu, 04 Jan 2024 15:46:52 -0800
+
+paasta-tools (0.211.1) xenial; urgency=medium
+
+  * 0.211.1 tagged with 'make release'
+    Commit: Fix debian link (#3765)  This is what I get for naming the
+    file differently from the command - `paasta_habitat_fixer` does not
+    work as bin/ does not contain a paasta_habitat_fixer.py
+
+ -- Luis Perez <luisp@yelp.com>  Thu, 04 Jan 2024 13:49:39 -0800
+
+paasta-tools (0.211.0) xenial; urgency=medium
+
+  * 0.211.0 tagged with 'make release'
+    Commit: Add workaround script to fix habitat labels (#3762)  We"re
+    seeing some weirdness with Karpenter where one of our custom labels
+    (yelp.com/habitat) is sometimes being set as a random integer.
+    While we debug this, we have enough information to write a script to
+    fix these.
+
+ -- Luis Perez <luisp@yelp.com>  Thu, 04 Jan 2024 10:18:00 -0800
+
+paasta-tools (0.210.0) xenial; urgency=medium
+
+  * 0.210.0 tagged with 'make release'
+    Commit: Merge branch "u/krall/fix_local_run_docker_hostname"
+
+ -- Evan Krall <krall@yelp.com>  Wed, 03 Jan 2024 11:14:38 -0800
+
+paasta-tools (0.209.3) xenial; urgency=medium
+
+  * 0.209.3 tagged with 'make release'
+    Commit: Merge branch "u/krall/optimize_paasta_validate"
+
+ -- Evan Krall <krall@yelp.com>  Fri, 15 Dec 2023 13:36:26 -0800
+
+paasta-tools (0.209.2) xenial; urgency=medium
+
+  * 0.209.2 tagged with 'make release'
+    Commit: remove use_k8s deprecated flag (#3749)
+
+ -- Jon Lee <jonlee@yelp.com>  Fri, 15 Dec 2023 05:53:59 -0800
+
+paasta-tools (0.209.1) xenial; urgency=medium
+
+  * 0.209.1 tagged with 'make release'
+    Commit: Merge pull request #3751 from
+    Yelp/fix_autoscaler_check_symlink  Add missing symlink for
+    check_autoscaler_max_instances
+
+ -- Jen Patague <jfong@yelp.com>  Thu, 14 Dec 2023 16:31:41 -0800
+
+paasta-tools (0.209.0) xenial; urgency=medium
+
+  * 0.209.0 tagged with 'make release'
+    Commit: Merge branch "u/krall/podtopologyspread"
+
+ -- Evan Krall <krall@yelp.com>  Thu, 14 Dec 2023 13:08:44 -0800
+
+paasta-tools (0.208.0) xenial; urgency=medium
+
+  * 0.208.0 tagged with 'make release'
+    Commit: Add support for setting lower/upper-bounds for autotuned
+    resources (#3744)  At the moment this only supports a specific
+    subset of top-level resource keys: * cpus * mem * disk  Supporting
+    {min,max}_instances should be pretty trivial (but I"m not quite sure
+    when we"d want to use that yet, so I left those out of the
+    allowlist).  I"m explicitly not supporting these values for sidecars
+    since: a) it seemed like it would complicate things a bit and, b)
+    I"m not sure we have a usecase for this :p
+
+ -- Luis Perez <luisp@yelp.com>  Thu, 14 Dec 2023 11:46:59 -0800
+
+paasta-tools (0.207.12) xenial; urgency=medium
+
+  * 0.207.12 tagged with 'make release'
+    Commit: Merge pull request #3746 from Yelp/jfong/PAASTA-18122  Stop
+    deleting brutal deployments
+
+ -- Jen Patague <jfong@yelp.com>  Thu, 14 Dec 2023 10:52:01 -0800
+
+paasta-tools (0.207.11) xenial; urgency=medium
+
+  * 0.207.11 tagged with 'make release'
+    Commit: Merge pull request #3743 from Yelp/u/vit/PAASTA-18111-fix-
+    secret-validation  Secret validation now take into account service:
+    override.
+
+ -- Vincent Thibault <vit@yelp.com>  Thu, 30 Nov 2023 06:30:56 -0800
+
+paasta-tools (0.207.10) xenial; urgency=medium
+
+  * 0.207.10 tagged with 'make release'
+    Commit: MLCOMPUTE-1035 Update dnsPolicy (#3742)
+
+ -- Li Rong <lirong@yelp.com>  Fri, 24 Nov 2023 09:17:41 -0800
+
+paasta-tools (0.207.9) xenial; urgency=medium
+
+  * 0.207.9 tagged with 'make release'
+    Commit: Merge pull request #3741 from Yelp/MLCOMPUTE-
+    1098_fix_parsing_error  MLCOMPUTE-1098 | fix error in parsing user
+    spark args
+
+ -- Sameer Sharma <sameersharma@yelp.com>  Wed, 22 Nov 2023 10:12:30 -0800
+
+paasta-tools (0.207.8) xenial; urgency=medium
+
+  * 0.207.8 tagged with 'make release'
+    Commit: Merge branch "u/mpiano/SECNESS-1815"
+
+ -- Matteo Piano <mpiano@yelp.com>  Mon, 20 Nov 2023 01:22:55 -0800
+
+paasta-tools (0.207.7) xenial; urgency=medium
+
+  * 0.207.7 tagged with 'make release'
+    Commit: Released 0.207.6 via make release
+
+ -- Luis Perez <luisp@yelp.com>  Thu, 16 Nov 2023 12:23:29 -0800
+
+paasta-tools (0.207.6) xenial; urgency=medium
+
+  * 0.207.6 tagged with 'make release'
+    Commit: Merge pull request #3738 from
+    Yelp/jfong/rollback_conditions_fix  Fix conditions for displaying
+    disable auto rollbacks button
+
+ -- Jen Patague <jfong@yelp.com>  Thu, 16 Nov 2023 12:02:54 -0800
+
+paasta-tools (0.207.5) xenial; urgency=medium
+
+  * 0.207.5 tagged with 'make release'
+    Commit: Merge pull request #3737 from
+    Yelp/u/jfong/fix_paasta_secrets_eks  Fix paasta secrets sync
+    namespace checking for eks instances
+
+ -- Jen Patague <jfong@yelp.com>  Tue, 31 Oct 2023 10:45:47 -0700
+
+paasta-tools (0.207.4) xenial; urgency=medium
+
+  * 0.207.4 tagged with 'make release'
+    Commit: Merge pull request #3736 from
+    Yelp/d/manpreet/limit_spark_app_name_to_63_characters  Upgrade
+    service-configuration-lib to limit character count in spark a…
+
+ -- Manpreet Singh <manpreet@yelp.com>  Tue, 31 Oct 2023 09:21:36 -0700
+
+paasta-tools (0.207.3) xenial; urgency=medium
+
+  * 0.207.3 tagged with 'make release'
+    Commit: Merge remote-tracking branch "origin/u/mpiano/SECNESS-1787"
+
+ -- Matteo Piano <mpiano@yelp.com>  Tue, 31 Oct 2023 06:48:08 -0700
+
+paasta-tools (0.207.2) xenial; urgency=medium
+
+  * 0.207.2 tagged with 'make release'
+    Commit: Merge branch "u/krall/limitrange_paastasvc-"
+
+ -- Evan Krall <krall@yelp.com>  Fri, 27 Oct 2023 11:29:04 -0700
+
+paasta-tools (0.207.1) xenial; urgency=medium
+
+  * 0.207.1 tagged with 'make release'
+    Commit: Merge pull request #3733 from
+    Yelp/jfong/bounce_status_relay_404  Catch statefulset missing as 404
+    in API bounce_status
+
+ -- Jen Patague <jfong@yelp.com>  Fri, 27 Oct 2023 09:49:30 -0700
+
+paasta-tools (0.207.0) xenial; urgency=medium
+
+  * 0.207.0 tagged with 'make release'
+    Commit: Merge branch "u/krall/allow_non_paastasvc"
+
+ -- Evan Krall <krall@yelp.com>  Thu, 26 Oct 2023 11:18:27 -0700
+
+paasta-tools (0.206.0) xenial; urgency=medium
+
+  * 0.206.0 tagged with 'make release'
+    Commit: Merge pull request #3729 from Yelp/u/cuza/making-skj-and-ckj-
+    aware-of-downthenup-bounces-across-namespaces  making skj and ckj
+    aware of downthenup bounces across namespaces
+
+ -- Dave Cuza <dcuza@yelp.com>  Wed, 25 Oct 2023 10:18:25 -0700
+
+paasta-tools (0.205.1) xenial; urgency=medium
+
+  * 0.205.1 tagged with 'make release'
+    Commit: Merge pull request #3730 from
+    Yelp/u/jfong/fix_spark_run_docker_reg  Use the correct registry_uri
+    to check if we need to sudo
+
+ -- Jen Patague <jfong@yelp.com>  Mon, 23 Oct 2023 17:08:53 -0700
+
+paasta-tools (0.205.0) xenial; urgency=medium
+
+  * 0.205.0 tagged with 'make release'
+    Commit: Respect a service"s docker_registry for adhoc spark-runs
+    (#3728)  There are some services where we want to ensure that a
+    specialized docker registry is always used - even for adhoc
+    development runs.  This change is loosely based on what we do in
+    push-to-registry, where we read service.yaml to see if a specialized
+    registry needs to be used.
+
+ -- Luis Perez <luisp@yelp.com>  Mon, 23 Oct 2023 13:45:05 -0700
+
+paasta-tools (0.204.2) xenial; urgency=medium
+
+  * 0.204.2 tagged with 'make release'
+    Commit: Bump service-configuration-lib to v2.18.10 (#3725)
+
+ -- Chi Chang <chi@yelp.com>  Fri, 20 Oct 2023 03:20:32 -0700
+
+paasta-tools (0.204.1) xenial; urgency=medium
+
+  * 0.204.1 tagged with 'make release'
+    Commit: Merge pull request #3724 from Yelp/sina/try-remote-branch-
+    first  DREIMP-10150: Base new commits on remote branch, if it exists
+
+ -- Sina Siadat <sina@yelp.com>  Mon, 16 Oct 2023 03:21:50 -0700
+
+paasta-tools (0.204.0) xenial; urgency=medium
+
+  * 0.204.0 tagged with 'make release'
+    Commit: Add support for autotuned type aliases (#3693)  For some
+    instance types that are autotuned (e.g., `kubernetes`), we may have
+    a largely-similar instance type (e.g., `eks`) and the ability to
+    trivially move instances back and forth between these.  For these,
+    there"s a couple different options for how to handle autotune: 1)
+    have tooling/documentation to migrate data between
+    `autotuned_defaults/` files on instance moves + teach the autotune
+    machinery what the correct cluster/filename to update should be (as
+    well as teach autotune how to gather data correctly) 2) have
+    tooling/documentation to temporarily pin the new instance type at
+    the old autotuned request until autotune has updated with the new
+    instance type data 3) add some form of aliasing and pretend (at the
+    autotune level) that    there"s a single instance type for these
+    largely-similar instance    types  This PR goes for option 3 as it
+    is the least complex and has the fewest moving parts/places for
+    things to horribly blow up.
+
+ -- Luis Perez <luisp@yelp.com>  Thu, 12 Oct 2023 12:18:40 -0700
+
+paasta-tools (0.203.0) xenial; urgency=medium
+
+  * 0.203.0 tagged with 'make release'
+    Commit: Merge branch "u/krall/PAASTA-18016_change_default_namespace"
+
+ -- Evan Krall <krall@yelp.com>  Wed, 11 Oct 2023 10:45:59 -0700
+
+paasta-tools (0.202.4) xenial; urgency=medium
+
+  * 0.202.4 tagged with 'make release'
+    Commit: Sync paasta secrets for eks instances as well (#3718)  We
+    could also teach this to only sync secrets for self-managed OR EKS
+    clusters individually - but that seems like more effort (and there"s
+    no real harm to syncing these to both: in fact, it makes rolling
+    back even easier since there"s no need to wait for secrets to re-
+    sync)
+
+ -- Luis Perez <luisp@yelp.com>  Tue, 10 Oct 2023 14:24:18 -0700
+
+paasta-tools (0.202.3) xenial; urgency=medium
+
+  * 0.202.3 tagged with 'make release'
+    Commit: [COREJAVA-998] Use registration name for paasta_instance
+    name if a single custom registration is used (#3717)  * [COREJAVA-
+    998] Use registration name for paasta_instance name if a single
+    custom registration is used  * Add rules to active-requests validate
+    * Add comment inline  * Update
+    paasta_tools/setup_prometheus_adapter_config.py  Simplify
+    envoy_filter_terms  Co-authored-by: Luis Pérez <luisp@yelp.com>  *
+    Update tests and add registration length check  * Update
+    tests/test_setup_prometheus_adapter_config.py  Remove debug print.
+    Co-authored-by: Luis Pérez <luisp@yelp.com>  ---------  Co-authored-
+    by: Luis Pérez <luisp@yelp.com>
+
+ -- Charan Gangaraju <sclg@yelp.com>  Mon, 09 Oct 2023 10:13:40 -0700
+
+paasta-tools (0.202.2) xenial; urgency=medium
+
+  * 0.202.2 tagged with 'make release'
+    Commit: [COREJAVA-995] Refactor to pass instance_config to all
+    scaling rules (#3716)  * [COREJAVA-995] Refactor to pass
+    instance_config to all scaling rules  * Update the type of
+    instance_config  * Remove namespace as parameter for active-requests
+    scaling  * remove default value as get_namespace() will always
+    return a value
+
+ -- Charan Gangaraju <sclg@yelp.com>  Fri, 06 Oct 2023 02:16:10 -0700
+
+paasta-tools (0.202.1) xenial; urgency=medium
+
+  * 0.202.1 tagged with 'make release'
+    Commit: Merge pull request #3715 from
+    Yelp/jfong/uppercase_iam_support  Support uppercase IAM role names
+
+ -- Jen Patague <jfong@yelp.com>  Tue, 03 Oct 2023 14:45:46 -0700
+
+paasta-tools (0.202.0) xenial; urgency=medium
+
+  * 0.202.0 tagged with 'make release'
+    Commit: Merge pull request #3714 from Yelp/jfong/PAASTA-18066
+    Validate iam_role and stop supporting kiam
+
+ -- Jen Patague <jfong@yelp.com>  Mon, 02 Oct 2023 10:04:47 -0700
+
+paasta-tools (0.201.5) xenial; urgency=medium
+
+  * 0.201.5 tagged with 'make release'
+    Commit: Remove sfx autoscaling link (#3710)  We haven"t used SFX for
+    autoscaling in quite some time and this confuses people that see it
+    referenced in the output of `paasta status`  At some point we"ll
+    want to replace this with a dashboard that visualizes how the
+    Prometheus-based autoscaling works - but that"s for future-us to do
+    :)
+
+ -- Luis Perez <luisp@yelp.com>  Wed, 27 Sep 2023 14:37:27 -0700
+
+paasta-tools (0.201.4) xenial; urgency=medium
+
+  * 0.201.4 tagged with 'make release'
+    Commit: Ensure secret_volume volumes are not optional (#3708)  We
+    noticed some logging from a newly added use of secret_volume that
+    makes us think that the default for these is that
+    V1SecretVolumeSources are optional - let"s clear up any ambiguity
+    and explicitly set a value for this  NOTE: I"m not quite sure if
+    this will require a big bounce of anything using secret_volume - but
+    I guess that"s also less of a concern than a big bounce of
+    *everything*
+
+ -- Luis Perez <luisp@yelp.com>  Mon, 25 Sep 2023 13:01:01 -0700
+
+paasta-tools (0.201.3) xenial; urgency=medium
+
+  * 0.201.3 tagged with 'make release'
+    Commit: Merge pull request #3709 from
+    Yelp/u/msurnin/add_cassandracluster_crd_autotune_spec_schema
+    COMPINFRA-3079: Add cassandracluster crd autotune spec schema
+
+ -- Mark Surnin <msurnin@yelp.com>  Mon, 25 Sep 2023 02:58:12 -0700
+
+paasta-tools (0.201.2) xenial; urgency=medium
+
+  * 0.201.2 tagged with 'make release'
+    Commit: Merge pull request #3703 from siadat/sina/add-
+    cassandracluster-to-known-config-types  COMPINFRA-3079: Add
+    cassandracluster to known autotuned config types
+
+ -- Sina Siadat <sina@yelp.com>  Tue, 19 Sep 2023 08:34:48 -0700
+
+paasta-tools (0.201.1) xenial; urgency=medium
+
+  * 0.201.1 tagged with 'make release'
+    Commit: [COREJAVA-901] Remove calculation of missing instances as it
+    can"t be calculated with data in HPA (#3706)  * [COREJAVA-901]
+    Remove calculation of missing instances as it can"t be calculated
+    with data in HPA  * Remove unused ready pods calculation
+
+ -- Charan Gangaraju <sclg@yelp.com>  Fri, 15 Sep 2023 07:44:20 -0700
+
+paasta-tools (0.201.0) xenial; urgency=medium
+
+  * 0.201.0 tagged with 'make release'
+    Commit: Merge pull request #3702 from Yelp/u/emanelsabban/PAASTA-
+    17988  Add eks support for cleanup kubernetes job - PAASTA-17988
+
+ -- Eman Elsabban <emanelsabban@yelp.com>  Fri, 15 Sep 2023 06:12:01 -0700
+
+paasta-tools (0.200.4) xenial; urgency=medium
+
+  * 0.200.4 tagged with 'make release'
+    Commit: [COREJAVA-897] Avoid using label_replace in series query
+    (#3704)
+
+ -- Charan Gangaraju <sclg@yelp.com>  Wed, 13 Sep 2023 13:40:45 -0700
+
+paasta-tools (0.200.3) xenial; urgency=medium
+
+  * 0.200.3 tagged with 'make release'
+    Commit: [COREJAVA-888] Add kube_deployment and kube_namespace to
+    series and metrics query for hpa (#3701)  * [COREJAVA-888] Add
+    kube_deployment and kube_namespace to series and metrics query for
+    hpa  * add a comment  * Format  * PR Feedback - rename variable and
+    minify promql
+
+ -- Charan Gangaraju <sclg@yelp.com>  Tue, 12 Sep 2023 06:53:28 -0700
+
+paasta-tools (0.200.2) xenial; urgency=medium
+
+  * 0.200.2 tagged with 'make release'
+    Commit: Add quotes around new label name in label_replace (#3700)  *
+    [COREJAVA-888] Add kube deployment label to envoy metrics
+    aggregation  * Format  * Add missing quotes  * PR Feedback - update
+    regex in label_replace and add comment  * Add quotes around new
+    label name in label_replace
+
+ -- Charan Gangaraju <sclg@yelp.com>  Mon, 11 Sep 2023 15:12:29 -0700
+
+paasta-tools (0.200.1) xenial; urgency=medium
+
+  * 0.200.1 tagged with 'make release'
+    Commit: [COREJAVA-888] Add kube deployment label to envoy metrics
+    aggregation (#3699)  * [COREJAVA-888] Add kube deployment label to
+    envoy metrics aggregation  * Format  * Add missing quotes  * PR
+    Feedback - update regex in label_replace and add comment
+
+ -- Charan Gangaraju <sclg@yelp.com>  Mon, 11 Sep 2023 13:54:00 -0700
+
+paasta-tools (0.200.0) xenial; urgency=medium
+
+  * 0.200.0 tagged with 'make release'
+    Commit: Merge pull request #3696 from Yelp/u/emanelsabban/PAASTA-
+    17987  Adding support for SKJ on EKS - PAASTA-17987
+
+ -- Eman Elsabban <emanelsabban@yelp.com>  Mon, 11 Sep 2023 07:40:31 -0700
+
+paasta-tools (0.199.1) xenial; urgency=medium
+
+  * 0.199.1 tagged with 'make release'
+    Commit: [COREJAVA-883] Update labels in filter terms to match k8
+    labels (#3697)  * [COREJAVA-883] Update labels in filter terms to
+    match k8 labels  * Update the metric according to the label change
+    * Remove accidental push of test file
+
+ -- Charan Gangaraju <sclg@yelp.com>  Fri, 08 Sep 2023 06:44:06 -0700
+
+paasta-tools (0.199.0) xenial; urgency=medium
+
+  * 0.199.0 tagged with 'make release'
+    Commit: Add API support for paasta <CMD>ing eks-* instances (#3685)
+    We do a tiny bit of lying here in order to keep the same PaaSTA
+    cluster name in the soaconfigs filenames (and CLI), but introduce
+    the concept of an API cluster so that we can direct queries for
+    things running from eks-* files to the correct PaaSTA API
+
+ -- Luis Perez <luisp@yelp.com>  Thu, 07 Sep 2023 10:00:44 -0700
+
+paasta-tools (0.198.3) xenial; urgency=medium
+
+  * 0.198.3 tagged with 'make release'
+    Commit: Merge pull request #3694 from Yelp/u/vit/fix-local-run-
+    secret  PAASTA-18017: Read secret where instance service is defined,
+    rather than service defined in CLI args.
+
+ -- Vincent Thibault <vit@yelp.com>  Wed, 06 Sep 2023 10:25:37 -0700
+
+paasta-tools (0.198.2) xenial; urgency=medium
+
+  * 0.198.2 tagged with 'make release'
+    Commit: MLCOMPUTE-1008 | update regex for Spark volume names to be
+    alphanumeric (#3690)  * MLCOMPUTE-1008 | update regex for Spark
+    volume names to be alphanumeric  * MLCOMPUTE-1008 | update regex
+    according to k8s standard, fail fast on incorrect volume names  *
+    MLCOMPUTE-1008 | fix mypy tests  * MLCOMPUTE-1008 | add unit tests  -
+    --------  Co-authored-by: Sameer Sharma <sameersharma@yelp.com>
+
+ -- Sameer Sharma <sameersharma@yelp.com>  Wed, 06 Sep 2023 09:18:30 -0700
+
+paasta-tools (0.198.1) xenial; urgency=medium
+
+  * 0.198.1 tagged with 'make release'
+    Commit: [COREJAVA-869] Add active requests to kubernetes schema
+    (#3692)
+
+ -- Charan Gangaraju <sclg@yelp.com>  Tue, 05 Sep 2023 07:11:23 -0700
+
+paasta-tools (0.198.0) xenial; urgency=medium
+
+  * 0.198.0 tagged with 'make release'
+    Commit: [COREJAVA-800] Implement active requests autoscaler (#3688)
+    * [COREJAVA-800] Implement active requests autoscaler  * Update test
+    * Reformat code  * Add validation for active-requests  * Add tests
+    for autoscaling config validate  * Incorporate PR feedback  * Move
+    default threshold to a shared variable and update schema  * Update
+    paasta_tools/cli/cmds/validate.py  Co-authored-by: Luis Pérez
+    <luisp@yelp.com>  * Setup HPA for active-requests  ---------  Co-
+    authored-by: Luis Pérez <luisp@yelp.com>
+
+ -- Charan Gangaraju <sclg@yelp.com>  Fri, 01 Sep 2023 09:11:07 -0700
+
+paasta-tools (0.197.1) xenial; urgency=medium
+
+  * 0.197.1 tagged with 'make release'
+    Commit: Bump service-configuration-lib to v2.18.6 (#3682)  * Bump
+    service-configuration-lib to v2.18.5  * Add an option for explicitly
+    specifying spark app id
+
+ -- Chi Chang <chi@yelp.com>  Thu, 24 Aug 2023 03:46:20 -0700
+
+paasta-tools (0.197.0) xenial; urgency=medium
+
+  * 0.197.0 tagged with 'make release'
+    Commit: Merge pull request #3680 from Yelp/u/emanelsabban/PAASTA-
+    17985  Setup eks-$clustername schema files - PAASTA-17985
+
+ -- Eman Elsabban <emanelsabban@yelp.com>  Tue, 22 Aug 2023 05:20:27 -0700
+
 paasta-tools (0.196.0) xenial; urgency=medium
 
   * 0.196.0 tagged with 'make release'
diff --git a/debian/paasta-tools.links b/debian/paasta-tools.links
index 4f2e1769ff..42e04acb7f 100644
--- a/debian/paasta-tools.links
+++ b/debian/paasta-tools.links
@@ -8,6 +8,7 @@ opt/venvs/paasta-tools/bin/check_marathon_has_apps.py usr/bin/check_marathon_has
 opt/venvs/paasta-tools/bin/check_marathon_services_frontends.py usr/bin/check_marathon_services_frontends
 opt/venvs/paasta-tools/bin/check_kubernetes_api.py usr/bin/check_kubernetes_api
 opt/venvs/paasta-tools/bin/check_kubernetes_services_replication.py usr/bin/check_kubernetes_services_replication
+opt/venvs/paasta-tools/bin/check_autoscaler_max_instances.py usr/bin/check_autoscaler_max_instances
 opt/venvs/paasta-tools/bin/check_mesos_active_frameworks.py usr/bin/check_mesos_active_frameworks
 opt/venvs/paasta-tools/bin/check_mesos_duplicate_frameworks.py usr/bin/check_mesos_duplicate_frameworks
 opt/venvs/paasta-tools/bin/check_mesos_quorum.py usr/bin/check_mesos_quorum
@@ -56,3 +57,4 @@ opt/venvs/paasta-tools/bin/setup_kubernetes_cr.py usr/bin/setup_kubernetes_cr
 opt/venvs/paasta-tools/bin/setup_prometheus_adapter_config.py usr/bin/setup_prometheus_adapter_config
 opt/venvs/paasta-tools/bin/synapse_srv_namespaces_fact.py usr/bin/synapse_srv_namespaces_fact
 opt/venvs/paasta-tools/bin/paasta_update_soa_memcpu.py usr/bin/paasta_update_soa_memcpu
+opt/venvs/paasta-tools/bin/habitat_fixer.py usr/bin/paasta_habitat_fixer
diff --git a/debian/rules b/debian/rules
index ceb60ed69d..609fb0aff1 100755
--- a/debian/rules
+++ b/debian/rules
@@ -24,3 +24,5 @@ override_dh_virtualenv:
 		--preinstall no-manylinux1 \
 		--preinstall=-rrequirements-bootstrap.txt
 	cp yelp_package/gopath/paasta_go $(DH_VENV_DIR)/bin/paasta_go
+	@echo patching k8s client lib
+	patch $(DH_VENV_DIR)/lib/python3.8/site-packages/kubernetes/client/api_client.py contrib/python-k8s-client.diff
diff --git a/docs/source/autoscaling.rst b/docs/source/autoscaling.rst
index c3f6865e5f..82d0da8577 100644
--- a/docs/source/autoscaling.rst
+++ b/docs/source/autoscaling.rst
@@ -65,20 +65,13 @@ The currently available metrics providers are:
   Measures the CPU usage of your service's container.
 
 :uwsgi:
-  With the ``uwsgi`` metrics provider, Paasta will configure your pods to run an additional container with the `uwsgi_exporter <https://github.com/timonwong/uwsgi_exporter>`_ image.
-  This sidecar will listen on port 9117, and will request metrics from your uWSGI master via its `stats server <http://uwsgi-docs.readthedocs.io/en/latest/StatsServer.html>`_.
-  The uwsgi_exporter container needs to know what port your uWSGI master's stats server is on - you can configure this with the ``uwsgi_stats_port`` key in the ``autoscaling`` dictionary.
-  ``uwsgi_exporter`` will translate the uWSGI stats into Prometheus format, which Prometheus will scrape.
+  With the ``uwsgi`` metrics provider, Paasta will configure your pods to be scraped from your uWSGI master via its `stats server <http://uwsgi-docs.readthedocs.io/en/latest/StatsServer.html>`_.
+  We currently only support uwsgi stats on port 8889, and Prometheus will attempt to scrape that port.
 
   .. note::
 
-    If you have configured your service to use a non-default stats port (8889), you need to explicity set ``uwsgi_stats_port`` in your autoscaling config with the same value to ensure that metrics are being exported.
+    If you have configured your service to use a non-default stats port (8889), PaaSTA will not scale your service correctly!
 
-  Extra parameters:
-
-  :uwsgi_stats_port:
-    the port that your uWSGI master process will respond to with stats.
-    Defaults to 8889.
 
 :gunicorn:
   With the ``gunicorn`` metrics provider, Paasta will configure your pods to run an additional container with the `statsd_exporter <https://github.com/prometheus/statsd_exporter>`_ image.
@@ -100,6 +93,17 @@ The currently available decicion policies are:
   :offset:
     Float between 0.0 and 1.0, representing expected baseline load for each container.
     Defaults to 0.0.
+
+    **DEPRECATED** - while it was previously more complicated, offset is now simply subtracted from your setpoint.
+    For example, ``setpoint: 0.6`` with ``offset: 0.25`` is equivalent to ``setpoint: 0.35`` with no ``offset``.
+    We recommend you just lower your setpoint by the same amount and remove the ``offset``.
+
+    Previously, offset was used to counteract the fake utilization that would be seen by our old uWSGI metrics provider.
+    Under the old system, the uWSGI metrics provider would always see 1 extra worker busy, because the metrics query was proxied through the actual uWSGI workers.
+    Having the autoscaler understand how much load was fake and how much was real helped it converge faster to your target load.
+    Nowadays, we measure uWSGI utilization in a different way that does not use a uWSGI worker, so this is no longer necessary.
+    Support for ``offset`` was only retained to provide a smooth transition from the old system to the new system.
+
   :good_enough_window:
     **Not currently supported**
     An array of two utilization values [low, high].
@@ -133,3 +137,18 @@ of instances PaaSTA thinks your service should have.
 Finally, remember to set the ``decision_policy`` of the ``autoscaling``
 parameter for each service instance to ``"bespoke"`` or else PaaSTA will
 attempt to autoscale your service with the default autoscaling method.
+
+
+``max_instances`` alerting
+--------------------------
+
+In order to make you aware of when your ``max_instances`` may be too low, causing issues with your service, paasta will send you alerts if all of the following conditions are true:
+
+  * The autoscaler has scaled your service to ``max_instances``.
+
+  * The load on your service (as measured by the ``metrics_provider`` you specified, e.g. your worker utilization or CPU utilization) is above ``max_instances_alert_threshold``.
+
+The default value for ``max_instances_alert_threshold`` is whatever your ``setpoint`` is.
+This means by default the alert will trigger when the autoscaler wants to scale up but is prevented from doing so by your ``max_instances`` setting.
+If this alert is noisy, you can try setting ``max_instances_alert_threshold`` to something a little higher than your ``setpoint``.
+Setting a very high value (a utilization value your metrics_provider would never measure) will effectively disable this alert.
diff --git a/docs/source/deploy_groups.rst b/docs/source/deploy_groups.rst
index 9d6d28d80a..3783c9e0a0 100644
--- a/docs/source/deploy_groups.rst
+++ b/docs/source/deploy_groups.rst
@@ -20,7 +20,6 @@ As an example, consider a service with the following deploy.yaml:
    - step: itest
    - step: security-check
    - step: push-to-registry
-   - step: performance-check
    - step: dev-stage.everything
      trigger_next_step_manually: true
    - step: prod.canary
@@ -29,7 +28,7 @@ As an example, consider a service with the following deploy.yaml:
 
 This pipeline will:
 
-1. Run ``itest``, ``security-check``, ``push-to-registry``, and ``performance-check`` steps, which are build and testing steps.
+1. Run ``itest``, ``security-check``, and ``push-to-registry`` steps, which are build and testing steps.
    During ``itest`` phase, a new container image is built (per the `Paasta Contract <about/contract.html>`_).
    This image is pushed to Paasta's Docker registry in the ``push-to-registry`` step.
 2. Deploy the new container image to all instances with ``deploy_group: dev-stage.everything``, and wait for someone to click a button in Jenkins before continuing.
@@ -125,3 +124,23 @@ String interpolation
 --------------------
 
 Deploy groups support string interpolation for the following variables: ``cluster``, ``instance`` and ``service``. String interpolation works by surrounding the variable's name with braces (``{}``) in the ``deploy_group`` field -- this is python's ``str.format`` syntax. E.g. ``deploy_group: '{cluster}.all'``. You must still specify explicit deploy groups in your ``deploy.yaml`` however.
+
+Parallel steps
+--------------------
+
+Parallel steps are supported in ``deploy.yaml`` to allow steps that aren't reliant on each other to be executed at the same time. The parallel block also supports waiting before moving on to the next step.
+
+As an example the following deploy.yaml will execute steps ``security-check`` & ``command-test`` together. It will then wait for user input before moving on to the ``performance-check`` step.
+
+.. sourcecode:: yaml
+
+   ---
+   pipeline:
+   - parallel:
+     - step: security-check
+     - step: command-test
+     trigger_next_step_manually: true
+   - step: performance-check
+   - step: prod.canary
+     trigger_next_step_manually: true
+   - step: prod.non_canary
diff --git a/docs/source/generated/paasta_tools.check_autoscaler_max_instances.rst b/docs/source/generated/paasta_tools.check_autoscaler_max_instances.rst
new file mode 100644
index 0000000000..cb6b39db8d
--- /dev/null
+++ b/docs/source/generated/paasta_tools.check_autoscaler_max_instances.rst
@@ -0,0 +1,7 @@
+paasta\_tools.check\_autoscaler\_max\_instances module
+======================================================
+
+.. automodule:: paasta_tools.check_autoscaler_max_instances
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/generated/paasta_tools.cli.cmds.performance_check.rst b/docs/source/generated/paasta_tools.cli.cmds.performance_check.rst
deleted file mode 100644
index 23a860c6f1..0000000000
--- a/docs/source/generated/paasta_tools.cli.cmds.performance_check.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-paasta\_tools.cli.cmds.performance\_check module
-================================================
-
-.. automodule:: paasta_tools.cli.cmds.performance_check
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/generated/paasta_tools.cli.cmds.rst b/docs/source/generated/paasta_tools.cli.cmds.rst
index f475fd99a6..e676079277 100644
--- a/docs/source/generated/paasta_tools.cli.cmds.rst
+++ b/docs/source/generated/paasta_tools.cli.cmds.rst
@@ -24,7 +24,6 @@ Submodules
    paasta_tools.cli.cmds.mesh_status
    paasta_tools.cli.cmds.metastatus
    paasta_tools.cli.cmds.pause_service_autoscaler
-   paasta_tools.cli.cmds.performance_check
    paasta_tools.cli.cmds.push_to_registry
    paasta_tools.cli.cmds.remote_run
    paasta_tools.cli.cmds.rollback
diff --git a/docs/source/generated/paasta_tools.contrib.habitat_fixer.rst b/docs/source/generated/paasta_tools.contrib.habitat_fixer.rst
new file mode 100644
index 0000000000..953e064283
--- /dev/null
+++ b/docs/source/generated/paasta_tools.contrib.habitat_fixer.rst
@@ -0,0 +1,7 @@
+paasta\_tools.contrib.habitat\_fixer module
+===========================================
+
+.. automodule:: paasta_tools.contrib.habitat_fixer
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/generated/paasta_tools.contrib.rst b/docs/source/generated/paasta_tools.contrib.rst
index d2193d5888..b80741b8ba 100644
--- a/docs/source/generated/paasta_tools.contrib.rst
+++ b/docs/source/generated/paasta_tools.contrib.rst
@@ -14,6 +14,7 @@ Submodules
    paasta_tools.contrib.emit_allocated_cpu_metrics
    paasta_tools.contrib.get_running_task_allocation
    paasta_tools.contrib.graceful_container_drain
+   paasta_tools.contrib.habitat_fixer
    paasta_tools.contrib.ide_helper
    paasta_tools.contrib.is_pod_healthy_in_proxy
    paasta_tools.contrib.is_pod_healthy_in_smartstack
diff --git a/docs/source/generated/paasta_tools.rst b/docs/source/generated/paasta_tools.rst
index 63273645b5..e5bbb977e0 100644
--- a/docs/source/generated/paasta_tools.rst
+++ b/docs/source/generated/paasta_tools.rst
@@ -33,6 +33,7 @@ Submodules
    paasta_tools.bounce_lib
    paasta_tools.broadcast_log_to_services
    paasta_tools.cassandracluster_tools
+   paasta_tools.check_autoscaler_max_instances
    paasta_tools.check_cassandracluster_services_replication
    paasta_tools.check_flink_services_health
    paasta_tools.check_kubernetes_api
@@ -107,7 +108,7 @@ Submodules
    paasta_tools.synapse_srv_namespaces_fact
    paasta_tools.tron_tools
    paasta_tools.utils
-   paasta_tools.vitess_tools
+   paasta_tools.vitesscluster_tools
 
 Module contents
 ---------------
diff --git a/docs/source/generated/paasta_tools.vitess_tools.rst b/docs/source/generated/paasta_tools.vitess_tools.rst
deleted file mode 100644
index af379915df..0000000000
--- a/docs/source/generated/paasta_tools.vitess_tools.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-paasta\_tools.vitess\_tools module
-==================================
-
-.. automodule:: paasta_tools.vitess_tools
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/generated/paasta_tools.vitesscluster_tools.rst b/docs/source/generated/paasta_tools.vitesscluster_tools.rst
new file mode 100644
index 0000000000..9567a92252
--- /dev/null
+++ b/docs/source/generated/paasta_tools.vitesscluster_tools.rst
@@ -0,0 +1,7 @@
+paasta\_tools.vitesscluster\_tools module
+=========================================
+
+.. automodule:: paasta_tools.vitesscluster_tools
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 7911b2da28..2227f36658 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -16,6 +16,7 @@ Directions (For PaaSTA Users)
    autoscaling
    hpa
    deploy_groups
+   persistent_volumes
 
 About PaaSTA's Architecture
 ---------------------------
diff --git a/docs/source/persistent_volumes.rst b/docs/source/persistent_volumes.rst
new file mode 100644
index 0000000000..f728283a6a
--- /dev/null
+++ b/docs/source/persistent_volumes.rst
@@ -0,0 +1,36 @@
+==================
+Persistent Volumes
+==================
+
+What are Persistent Volumes?
+----------------------------
+Persistent Volumes are a Kubernetes feature that allows you to attach stateful storage (like an EBS volume) to Pods (i.e., your PaaSTA instance).
+
+Caveats
+-------
+In general, we discourage the use of Persistent Volumes in favor of totally stateless services (i.e., where the state is separated from the service itself in a DB, S3, etc.)
+
+That said, there are several things to keep in mind before deciding to use Persistent Volumes:
+   - PaaSTA does not provide monitoring for Persistent Volumes - you are responsible for staying on top of your usage (i.e., there is no alerting for full or almost full volumes)
+   - Persistent Volumes cannot be resized online - if you run out of space and need to expand your volume: there *will* be downtime
+      - Additionally, PaaSTA cannot automatically handle this resize: you *will* need to find an engineer on the Compute Infrastructure team to do this
+   - If you need to delete the Persistent Volume for your service for whatever reason, you will need an engineer on the Compute Infrastructure team to do this for you
+   - As of this writing (2024-02-16), we've occasionally noticed some large (double-digit minute) delays where Kubernetes is unable to attach the EBS volume backing a Persistent Volume because of AWS errors.
+      - In other words, until the cause of this issue is identified, it's possible that a Spot interruption (or other sort of disruption) could potentially cause downtime for your service
+
+How do I use Persistent Volumes in PaaSTA?
+------------------------------------------
+If the above is acceptable, adding a block like:
+
+.. sourcecode:: yaml
+
+   persistent_volumes:
+   - container_path: /path/to/mount
+     # if you're a power-user, know what you're doing, and need something more specific than a bog-standard GP3 EBS volume - come talk to use in #paasta
+     storage_class_name: ebs-retain-gp3
+     size: 10 # in GB
+     mode: RW  # unless you're populating the EBS volume externally, you likely want to be able to write to the volume :)
+
+to your instance config will attach a Persistent Volume with 10GB of storage to every replica of your PaaSTA instance at ``/path/to/mount``.
+
+NOTE: a Persistent Volume will be created *per-replica* - they are *not* shared between replicas.
diff --git a/docs/source/yelpsoa_configs.rst b/docs/source/yelpsoa_configs.rst
index c1457dbbc1..83dab2c0bb 100644
--- a/docs/source/yelpsoa_configs.rst
+++ b/docs/source/yelpsoa_configs.rst
@@ -392,6 +392,13 @@ instance MAY have:
     * ``decision_policy``: Which method the autoscaler will use to determine when to autoscale a service.
       Should be ``proportional`` or ``bespoke``.
 
+    * ``setpoint``: The target utilization (as measured by your ``metrics_provider``) that the autoscaler will try to achieve.
+    Default value is 0.8.
+
+    * ``max_instances_alert_threshold``: If the autoscaler has scaled your service to ``max_instances``,
+    and the service's utilization (as measured by your ``metrics_provider``) is above this value, you'll get an alert.
+    The default is the same as your ``setpoint``.
+
   * ``deploy_group``: A string identifying what deploy group this instance belongs
     to. The ``step`` parameter in ``deploy.yaml`` references this value
     to determine the order in which to build & deploy deploy groups. Defaults to
@@ -483,8 +490,7 @@ instance MAY have:
   * ``namespace``:
     **Currently in development, do not use.**
     The Kubernetes namespace where Paasta will create objects related to this service.
-    Defaults to ``paasta``.
-    Currently, only ``paasta`` and namespaces starting with ``paastasvc-`` are permitted.
+    Defaults to ``paastasvc-service--name`` (that is, the service name will have underscores replaced with ``--``.)
 
 **Note**: Although many of these settings are inherited from ``smartstack.yaml``,
 their thresholds are not the same. The reason for this has to do with control
diff --git a/general_itests/fake_soa_configs/fake_deployments_json_service/eks-test-cluster.yaml b/general_itests/fake_soa_configs/fake_deployments_json_service/eks-test-cluster.yaml
new file mode 100644
index 0000000000..d23e8270fa
--- /dev/null
+++ b/general_itests/fake_soa_configs/fake_deployments_json_service/eks-test-cluster.yaml
@@ -0,0 +1,11 @@
+---
+test_instance:
+   cpus: 0.1
+   ram: 100
+   disk: 512.3
+
+test_instance_2:
+   cpus: 0.1
+   ram: 250
+   disk: 256.7
+   deploy_group: test-cluster.test_instance
diff --git a/general_itests/fake_soa_configs_validate/fake_invalid_service/marathon-test-cluster.yaml b/general_itests/fake_soa_configs_validate/fake_invalid_service/kubernetes-test-cluster.yaml
similarity index 100%
rename from general_itests/fake_soa_configs_validate/fake_invalid_service/marathon-test-cluster.yaml
rename to general_itests/fake_soa_configs_validate/fake_invalid_service/kubernetes-test-cluster.yaml
diff --git a/general_itests/fake_soa_configs_validate/fake_valid_service/marathon-test-cluster.yaml b/general_itests/fake_soa_configs_validate/fake_valid_service/marathon-test-cluster.yaml
deleted file mode 100644
index cacffd9021..0000000000
--- a/general_itests/fake_soa_configs_validate/fake_valid_service/marathon-test-cluster.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
----
-main:
-  cpus: .1
-  mem: 100
-  disk: 200.0
-  instances: 1
-  env:
-    FOO: BAR
-  deploy_group: fake_deploy_group
diff --git a/general_itests/steps/paasta_execute_docker_command.py b/general_itests/steps/paasta_execute_docker_command.py
index f39c0e8eda..92fd6e4190 100644
--- a/general_itests/steps/paasta_execute_docker_command.py
+++ b/general_itests/steps/paasta_execute_docker_command.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+
 from behave import given
 from behave import then
 from behave import when
@@ -30,6 +32,7 @@ def docker_is_available(context):
 @given("a running docker container with task id {task_id} and image {image_name}")
 def create_docker_container(context, task_id, image_name):
     container_name = "paasta-itest-execute-in-containers"
+    image_name = os.getenv("DOCKER_REGISTRY", "docker-dev.yelpcorp.com/") + image_name
     try:
         context.docker_client.remove_container(container_name, force=True)
     except APIError:
diff --git a/paasta_tools/__init__.py b/paasta_tools/__init__.py
index a3cf5a0046..883099aa33 100644
--- a/paasta_tools/__init__.py
+++ b/paasta_tools/__init__.py
@@ -17,4 +17,4 @@
 # setup phase, the dependencies may not exist on disk yet.
 #
 # Don't bump version manually. See `make release` docs in ./Makefile
-__version__ = "0.196.0"
+__version__ = "0.218.6"
diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml
index 58ad9f55e7..e0d064a555 100644
--- a/paasta_tools/api/api_docs/oapi.yaml
+++ b/paasta_tools/api/api_docs/oapi.yaml
@@ -1771,12 +1771,6 @@ paths:
         schema:
           format: int32
           type: integer
-      - description: Include Smartstack information
-        in: query
-        name: include_smartstack
-        required: false
-        schema:
-          type: boolean
       - description: Include Envoy information
         in: query
         name: include_envoy
@@ -1825,13 +1819,6 @@ paths:
         required: true
         schema:
           type: string
-      - description: Include Smartstack information
-        in: query
-        name: include_smartstack
-        required: false
-        schema:
-          type: boolean
-          default: true
       - description: Include Envoy information
         in: query
         name: include_envoy
diff --git a/paasta_tools/api/client.py b/paasta_tools/api/client.py
index 2708db508f..d7391a1fe9 100644
--- a/paasta_tools/api/client.py
+++ b/paasta_tools/api/client.py
@@ -60,6 +60,10 @@ def get_paasta_oapi_client_by_url(
     config.ssl_ca_cert = ssl_ca_cert
 
     client = paastaapi.ApiClient(configuration=config)
+    # PAASTA-18005: Adds default timeout to paastaapi client
+    client.rest_client.pool_manager.connection_pool_kw[
+        "timeout"
+    ] = load_system_paasta_config().get_api_client_timeout()
     return PaastaOApiClient(
         autoscaler=paastaapis.AutoscalerApi(client),
         default=paastaapis.DefaultApi(client),
diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py
index 9783f0d648..34edfc4724 100644
--- a/paasta_tools/api/views/instance.py
+++ b/paasta_tools/api/views/instance.py
@@ -75,6 +75,7 @@
 from paasta_tools.utils import get_image_version_from_dockerurl
 from paasta_tools.utils import NoConfigurationForServiceError
 from paasta_tools.utils import NoDockerImageError
+from paasta_tools.utils import PAASTA_K8S_INSTANCE_TYPES
 from paasta_tools.utils import TimeoutError
 from paasta_tools.utils import validate_service_instance
 
@@ -150,7 +151,6 @@ def marathon_instance_status(
     service: str,
     instance: str,
     verbose: int,
-    include_smartstack: bool,
     include_envoy: bool,
     include_mesos: bool,
 ) -> Mapping[str, Any]:
@@ -174,7 +174,7 @@ def marathon_instance_status(
         )
     )
 
-    if include_smartstack or include_envoy:
+    if include_envoy:
         service_namespace_config = marathon_tools.load_service_namespace_config(
             service=service,
             namespace=job_config.get_nerve_namespace(),
@@ -184,26 +184,15 @@ def marathon_instance_status(
             tasks = [
                 task for app, _ in matching_apps_with_clients for task in app.tasks
             ]
-            if include_smartstack:
-                mstatus["smartstack"] = marathon_service_mesh_status(
-                    service,
-                    pik.ServiceMesh.SMARTSTACK,
-                    instance,
-                    job_config,
-                    service_namespace_config,
-                    tasks,
-                    should_return_individual_backends=verbose > 0,
-                )
-            if include_envoy:
-                mstatus["envoy"] = marathon_service_mesh_status(
-                    service,
-                    pik.ServiceMesh.ENVOY,
-                    instance,
-                    job_config,
-                    service_namespace_config,
-                    tasks,
-                    should_return_individual_backends=verbose > 0,
-                )
+            mstatus["envoy"] = marathon_service_mesh_status(
+                service,
+                pik.ServiceMesh.ENVOY,
+                instance,
+                job_config,
+                service_namespace_config,
+                tasks,
+                should_return_individual_backends=verbose > 0,
+            )
 
     if include_mesos:
         mstatus["mesos"] = marathon_mesos_status(service, instance, verbose)
@@ -640,9 +629,6 @@ def instance_status(request):
     instance = request.swagger_data.get("instance")
     verbose = request.swagger_data.get("verbose") or 0
     use_new = request.swagger_data.get("new") or False
-    include_smartstack = request.swagger_data.get("include_smartstack")
-    if include_smartstack is None:
-        include_smartstack = True
     include_envoy = request.swagger_data.get("include_envoy")
     if include_envoy is None:
         include_envoy = True
@@ -698,7 +684,6 @@ def instance_status(request):
                 service,
                 instance,
                 verbose,
-                include_smartstack=include_smartstack,
                 include_envoy=include_envoy,
                 include_mesos=include_mesos,
             )
@@ -712,7 +697,6 @@ def instance_status(request):
                     service=service,
                     instance=instance,
                     verbose=verbose,
-                    include_smartstack=include_smartstack,
                     include_envoy=include_envoy,
                     use_new=use_new,
                     instance_type=instance_type,
@@ -864,7 +848,7 @@ def bounce_status(request):
         error_message = traceback.format_exc()
         raise ApiFailure(error_message, 500)
 
-    if instance_type != "kubernetes":
+    if instance_type not in PAASTA_K8S_INSTANCE_TYPES:
         # We are using HTTP 204 to indicate that the instance exists but has
         # no bounce status to be returned.  The client should just mark the
         # instance as bounced.
@@ -873,13 +857,28 @@ def bounce_status(request):
         return response
 
     try:
-        return pik.bounce_status(service, instance, settings)
+        return pik.bounce_status(
+            service, instance, settings, is_eks=(instance_type == "eks")
+        )
+    except NoConfigurationForServiceError:
+        # Handle race condition where instance has been removed since the above validation
+        error_message = no_configuration_for_service_message(
+            settings.cluster,
+            service,
+            instance,
+        )
+        raise ApiFailure(error_message, 404)
     except asyncio.TimeoutError:
         raise ApiFailure(
             "Temporary issue fetching bounce status. Please try again.", 599
         )
-    except Exception:
+    except Exception as e:
         error_message = traceback.format_exc()
+        if getattr(e, "status", None) == 404:
+            # some bounces delete the app & recreate
+            # in this case, we relay the 404 and cli handles gracefully
+            raise ApiFailure(error_message, 404)
+        # for all others, treat as a 500
         raise ApiFailure(error_message, 500)
 
 
@@ -925,7 +924,6 @@ def get_deployment_version(
 def instance_mesh_status(request):
     service = request.swagger_data.get("service")
     instance = request.swagger_data.get("instance")
-    include_smartstack = request.swagger_data.get("include_smartstack")
     include_envoy = request.swagger_data.get("include_envoy")
 
     instance_mesh: Dict[str, Any] = {}
@@ -953,7 +951,6 @@ def instance_mesh_status(request):
                 instance=instance,
                 instance_type=instance_type,
                 settings=settings,
-                include_smartstack=include_smartstack,
                 include_envoy=include_envoy,
             )
         )
diff --git a/paasta_tools/check_autoscaler_max_instances.py b/paasta_tools/check_autoscaler_max_instances.py
new file mode 100755
index 0000000000..e1cd4c8c0f
--- /dev/null
+++ b/paasta_tools/check_autoscaler_max_instances.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python
+import argparse
+import asyncio
+import logging
+from typing import Type
+
+import pysensu_yelp
+
+from paasta_tools.eks_tools import EksDeploymentConfig
+from paasta_tools.instance import kubernetes as pik
+from paasta_tools.kubernetes_tools import get_kubernetes_app_name
+from paasta_tools.kubernetes_tools import KubeClient
+from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
+from paasta_tools.metrics.metastatus_lib import suffixed_number_value
+from paasta_tools.monitoring_tools import send_event
+from paasta_tools.paasta_service_config_loader import PaastaServiceConfigLoader
+from paasta_tools.utils import DEFAULT_SOA_DIR
+from paasta_tools.utils import list_services
+from paasta_tools.utils import load_system_paasta_config
+from paasta_tools.utils import SystemPaastaConfig
+
+log = logging.getLogger(__name__)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=(
+            "Check all autoscaled services to see if they're at their max_instances. If"
+            " so, send an alert if their utilization is above"
+            " max_instances_alert_threshold."
+        )
+    )
+    parser.add_argument(
+        "-d",
+        "--soa-dir",
+        dest="soa_dir",
+        default=DEFAULT_SOA_DIR,
+        help="Use a different soa config directory",
+    )
+    parser.add_argument(
+        "--dry-run",
+        dest="dry_run",
+        action="store_true",
+        help="Print Sensu alert events instead of sending them",
+    )
+    return parser.parse_args()
+
+
+async def check_max_instances(
+    soa_dir: str,
+    cluster: str,
+    instance_type_class: Type[KubernetesDeploymentConfig],
+    system_paasta_config: SystemPaastaConfig,
+    dry_run: bool = False,
+):
+    kube_client = KubeClient()
+    for service in list_services(soa_dir=soa_dir):
+        service_config = PaastaServiceConfigLoader(service=service, soa_dir=soa_dir)
+        for job_config in service_config.instance_configs(
+            cluster=cluster, instance_type_class=instance_type_class
+        ):
+            instance = job_config.get_instance()
+            if not job_config.get_autoscaling_metric_spec(
+                name=get_kubernetes_app_name(service, instance),
+                cluster=cluster,
+                kube_client=kube_client,
+                namespace=job_config.get_namespace(),
+            ):
+                # Not an instance that uses HPA, don't check.
+                # TODO: should we send status=0 here, in case someone disables autoscaling for their service / changes
+                # to bespoke autoscaler?
+                continue
+
+            if not job_config.get_docker_image():
+                # skip services that haven't been marked for deployment yet.
+                continue
+
+            autoscaling_status = await pik.autoscaling_status(
+                kube_client=kube_client,
+                job_config=job_config,
+                namespace=job_config.get_namespace(),
+            )
+            if autoscaling_status["min_instances"] == -1:
+                log.warning(
+                    f"HPA {job_config.get_sanitised_deployment_name()} not found."
+                )
+                continue
+
+            if (
+                autoscaling_status["min_instances"]
+                == autoscaling_status["max_instances"]
+            ) and "canary" in instance:
+                status = pysensu_yelp.Status.OK
+                output = (
+                    f"Not checking {service}.{instance} as the instance name contains"
+                    ' "canary" and min_instances == max_instances.'
+                )
+            elif (
+                autoscaling_status["desired_replicas"]
+                >= autoscaling_status["max_instances"]
+            ):
+                threshold = job_config.get_autoscaling_max_instances_alert_threshold()
+                setpoint = job_config.get_autoscaling_params()["setpoint"]
+                metric_threshold_target_ratio = threshold / setpoint
+
+                status = pysensu_yelp.Status.UNKNOWN
+                output = "how are there no metrics for this thing?"
+                for metric in autoscaling_status["metrics"]:
+                    current_value = suffixed_number_value(metric["current_value"])
+                    target_value = suffixed_number_value(metric["target_value"])
+
+                    if current_value / target_value > metric_threshold_target_ratio:
+                        status = pysensu_yelp.Status.CRITICAL
+                        output = (
+                            f"{service}.{instance}: Service is at max_instances, and"
+                            " ratio of current value to target value"
+                            f" ({current_value} / {target_value}) is greater than the"
+                            " ratio of max_instances_alert_threshold to setpoint"
+                            f" ({threshold} / {setpoint})"
+                        )
+                    else:
+                        status = pysensu_yelp.Status.OK
+                        output = (
+                            f"{service}.{instance}: Service is at max_instances, but"
+                            " ratio of current value to target value"
+                            f" ({current_value} / {target_value}) is below the ratio of"
+                            f" max_instances_alert_threshold to setpoint ({threshold} /"
+                            f" {setpoint})"
+                        )
+            else:
+                status = pysensu_yelp.Status.OK
+                output = f"{service}.{instance} is below max_instances."
+
+            monitoring_overrides = job_config.get_monitoring()
+            monitoring_overrides.update(
+                {
+                    "page": False,  # TODO: remove this line once this alert has been deployed for a little while.
+                    "runbook": "y/check-autoscaler-max-instances",
+                    "tip": (
+                        "The autoscaler wants to scale up to handle additional load"
+                        " because your service is overloaded, but cannot scale any"
+                        " higher because of max_instances. You may want to bump"
+                        " max_instances. To make this alert quieter, adjust"
+                        " autoscaling.max_instances_alert_threshold in yelpsoa-configs."
+                    ),
+                }
+            )
+            send_event(
+                service,
+                check_name=f"check_autoscaler_max_instances.{service}.{instance}",
+                overrides=monitoring_overrides,
+                status=status,
+                output=output,
+                soa_dir=soa_dir,
+                ttl=None,
+                cluster=cluster,
+                system_paasta_config=system_paasta_config,
+                dry_run=dry_run,
+            )
+
+
+def main():
+    args = parse_args()
+    system_paasta_config = load_system_paasta_config()
+
+    for instance_type_class in [KubernetesDeploymentConfig, EksDeploymentConfig]:
+        asyncio.run(
+            check_max_instances(
+                soa_dir=args.soa_dir,
+                cluster=system_paasta_config.get_cluster(),
+                instance_type_class=instance_type_class,
+                system_paasta_config=system_paasta_config,
+                dry_run=args.dry_run,
+            )
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/paasta_tools/check_kubernetes_services_replication.py b/paasta_tools/check_kubernetes_services_replication.py
index 9605751587..f239b5d9f2 100755
--- a/paasta_tools/check_kubernetes_services_replication.py
+++ b/paasta_tools/check_kubernetes_services_replication.py
@@ -33,10 +33,14 @@
 import logging
 from typing import Optional
 from typing import Sequence
+from typing import Union
 
+from paasta_tools import eks_tools
 from paasta_tools import kubernetes_tools
 from paasta_tools import monitoring_tools
 from paasta_tools.check_services_replication_tools import main
+from paasta_tools.check_services_replication_tools import parse_args
+from paasta_tools.eks_tools import EksDeploymentConfig
 from paasta_tools.kubernetes_tools import filter_pods_by_service_instance
 from paasta_tools.kubernetes_tools import is_pod_ready
 from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
@@ -50,7 +54,7 @@
 
 
 def check_healthy_kubernetes_tasks_for_service_instance(
-    instance_config: KubernetesDeploymentConfig,
+    instance_config: Union[KubernetesDeploymentConfig, EksDeploymentConfig],
     expected_count: int,
     all_pods: Sequence[V1Pod],
     dry_run: bool = False,
@@ -73,7 +77,7 @@ def check_healthy_kubernetes_tasks_for_service_instance(
 
 
 def check_kubernetes_pod_replication(
-    instance_config: KubernetesDeploymentConfig,
+    instance_config: Union[KubernetesDeploymentConfig, EksDeploymentConfig],
     all_tasks_or_pods: Sequence[V1Pod],
     replication_checker: KubeSmartstackEnvoyReplicationChecker,
     dry_run: bool = False,
@@ -81,7 +85,7 @@ def check_kubernetes_pod_replication(
     """Checks a service's replication levels based on how the service's replication
     should be monitored. (smartstack/envoy or k8s)
 
-    :param instance_config: an instance of KubernetesDeploymentConfig
+    :param instance_config: an instance of KubernetesDeploymentConfig or EksDeploymentConfig
     :param replication_checker: an instance of KubeSmartstackEnvoyReplicationChecker
     """
     default_alert_after = DEFAULT_ALERT_AFTER
@@ -129,7 +133,10 @@ def check_kubernetes_pod_replication(
 
 
 if __name__ == "__main__":
+    args = parse_args()
     main(
-        instance_type_class=kubernetes_tools.KubernetesDeploymentConfig,
+        instance_type_class=eks_tools.EksDeploymentConfig
+        if args.eks
+        else kubernetes_tools.KubernetesDeploymentConfig,
         check_service_replication=check_kubernetes_pod_replication,
     )
diff --git a/paasta_tools/check_services_replication_tools.py b/paasta_tools/check_services_replication_tools.py
index 27c286a94a..14f352bd26 100644
--- a/paasta_tools/check_services_replication_tools.py
+++ b/paasta_tools/check_services_replication_tools.py
@@ -30,10 +30,9 @@
 from mypy_extensions import Arg
 from mypy_extensions import NamedArg
 
-from paasta_tools.kubernetes_tools import get_all_namespaces
+from paasta_tools.kubernetes_tools import get_all_managed_namespaces
 from paasta_tools.kubernetes_tools import get_all_nodes
 from paasta_tools.kubernetes_tools import get_all_pods
-from paasta_tools.kubernetes_tools import get_matching_namespaces
 from paasta_tools.kubernetes_tools import KubeClient
 from paasta_tools.kubernetes_tools import V1Node
 from paasta_tools.kubernetes_tools import V1Pod
@@ -110,13 +109,6 @@ def parse_args() -> argparse.Namespace:
         dest="dry_run",
         help="Print Sensu alert events and metrics instead of sending them",
     )
-    parser.add_argument(
-        "--namespace-prefix",
-        help="prefix of the namespace to check services replication for"
-        "Used only when service is kubernetes",
-        dest="namespace_prefix",
-        default="paastasvc-",
-    )
     parser.add_argument(
         "--additional-namespaces",
         help="full names of namespaces to check services replication for that don't match --namespace-prefix"
@@ -127,6 +119,13 @@ def parse_args() -> argparse.Namespace:
         # to avoid having two cron jobs running with two different namespace-prefix
         default=["paasta"],
     )
+    parser.add_argument(
+        "--eks",
+        help="This flag checks k8 services running on EKS",
+        dest="eks",
+        action="store_true",
+        default=False,
+    )
     options = parser.parse_args()
 
     return options
@@ -209,9 +208,6 @@ def main(
     replication_checker: ReplicationChecker
 
     if namespace:
-        # Note: we will have by default namespace_prefix always set to paastasvc
-        # which means we could have namespace and namespace_prefix set at the same time
-        # what differentiate between which one we will use, will be this if statement
         tasks_or_pods, nodes = get_kubernetes_pods_and_nodes(namespace=namespace)
         replication_checker = KubeSmartstackEnvoyReplicationChecker(
             nodes=nodes,
@@ -219,7 +215,6 @@ def main(
         )
     else:
         tasks_or_pods, nodes = get_kubernetes_pods_and_nodes(
-            namespace_prefix=args.namespace_prefix,
             additional_namespaces=args.additional_namespaces,
         )
         replication_checker = KubeSmartstackEnvoyReplicationChecker(
@@ -273,7 +268,6 @@ def get_mesos_tasks_and_slaves(
 
 
 def get_kubernetes_pods_and_nodes(
-    namespace_prefix: Optional[str] = None,
     namespace: Optional[str] = None,
     additional_namespaces: Optional[Container[str]] = None,
 ) -> Tuple[List[V1Pod], List[V1Node]]:
@@ -283,12 +277,10 @@ def get_kubernetes_pods_and_nodes(
     if namespace:
         all_pods = get_all_pods(kube_client=kube_client, namespace=namespace)
     else:
-        all_namespaces = get_all_namespaces(kube_client)
-        for matching_namespace in get_matching_namespaces(
-            all_namespaces, namespace_prefix, additional_namespaces
-        ):
+        all_managed_namespaces = get_all_managed_namespaces(kube_client)
+        for managed_namespace in all_managed_namespaces:
             all_pods.extend(
-                get_all_pods(kube_client=kube_client, namespace=matching_namespace)
+                get_all_pods(kube_client=kube_client, namespace=managed_namespace)
             )
 
     all_nodes = get_all_nodes(kube_client)
diff --git a/paasta_tools/cleanup_kubernetes_jobs.py b/paasta_tools/cleanup_kubernetes_jobs.py
index 715224ed20..15e42b3a44 100644
--- a/paasta_tools/cleanup_kubernetes_jobs.py
+++ b/paasta_tools/cleanup_kubernetes_jobs.py
@@ -30,6 +30,8 @@
 - -t <KILL_THRESHOLD>, --kill-threshold: The decimal fraction of apps we think
     is sane to kill when this job runs
 - -f, --force: Force the killing of apps if we breach the threshold
+- -c, --cluster: Specifies the paasta cluster to check
+- --eks: This flag cleans up only k8 services that shouldn't be running on EKS leaving instances from eks-*.yaml files
 """
 import argparse
 import logging
@@ -41,11 +43,14 @@
 from typing import List
 from typing import Set
 from typing import Tuple
+from typing import Union
 
 from kubernetes.client import V1Deployment
 from kubernetes.client import V1StatefulSet
 from pysensu_yelp import Status
 
+from paasta_tools.eks_tools import EksDeploymentConfig
+from paasta_tools.eks_tools import load_eks_service_config
 from paasta_tools.kubernetes.application.controller_wrappers import DeploymentWrapper
 from paasta_tools.kubernetes.application.controller_wrappers import StatefulSetWrapper
 from paasta_tools.kubernetes.application.tools import Application
@@ -106,12 +111,12 @@ def alert_state_change(application: Application, cluster: str) -> Generator:
 
 
 def instance_is_not_bouncing(
-    instance_config: KubernetesDeploymentConfig,
+    instance_config: Union[KubernetesDeploymentConfig, EksDeploymentConfig],
     applications: List[Application],
 ) -> bool:
     """
 
-    :param instance_config: a KubernetesDeploymentConfig with the configuration of the instance
+    :param instance_config: a KubernetesDeploymentConfig or an EksDeploymentConfig with the configuration of the instance
     :param applications: a list of all deployments or stateful sets on the cluster that match the service
      and instance of provided instance_config
     """
@@ -119,10 +124,16 @@ def instance_is_not_bouncing(
         if isinstance(application, DeploymentWrapper):
             existing_app = application.item
             if (
-                existing_app.metadata.namespace == instance_config.get_namespace()
-                and (
-                    instance_config.get_instances()
-                    <= (existing_app.status.ready_replicas or 0)
+                (
+                    existing_app.metadata.namespace != instance_config.get_namespace()
+                    and (instance_config.get_bounce_method() == "downthenup")
+                )
+                or (
+                    existing_app.metadata.namespace == instance_config.get_namespace()
+                    and (
+                        instance_config.get_instances()
+                        <= (existing_app.status.ready_replicas or 0)
+                    )
                 )
             ) or instance_config.get_desired_state() == "stop":
                 return True
@@ -144,6 +155,7 @@ def get_applications_to_kill(
     cluster: str,
     valid_services: Set[Tuple[str, str]],
     soa_dir: str,
+    eks: bool = False,
 ) -> List[Application]:
     """
 
@@ -161,9 +173,21 @@ def get_applications_to_kill(
             if (service, instance) not in valid_services:
                 applications_to_kill.extend(applications)
             else:
-                instance_config = load_kubernetes_service_config(
-                    cluster=cluster, service=service, instance=instance, soa_dir=soa_dir
-                )
+                instance_config: Union[KubernetesDeploymentConfig, EksDeploymentConfig]
+                if eks:
+                    instance_config = load_eks_service_config(
+                        cluster=cluster,
+                        service=service,
+                        instance=instance,
+                        soa_dir=soa_dir,
+                    )
+                else:
+                    instance_config = load_kubernetes_service_config(
+                        cluster=cluster,
+                        service=service,
+                        instance=instance,
+                        soa_dir=soa_dir,
+                    )
                 try:
                     not_bouncing = instance_is_not_bouncing(
                         instance_config, applications
@@ -200,6 +224,7 @@ def cleanup_unused_apps(
     cluster: str,
     kill_threshold: float = 0.5,
     force: bool = False,
+    eks: bool = False,
 ) -> None:
     """Clean up old or invalid jobs/apps from kubernetes. Retrieves
     both a list of apps currently in kubernetes and a list of valid
@@ -217,11 +242,13 @@ def cleanup_unused_apps(
     applications_dict = list_all_applications(kube_client, APPLICATION_TYPES)
     log.info("Retrieving valid apps from yelpsoa_configs")
     valid_services = set(
-        get_services_for_cluster(instance_type="kubernetes", soa_dir=soa_dir)
+        get_services_for_cluster(
+            instance_type="eks" if eks else "kubernetes", soa_dir=soa_dir
+        )
     )
 
     applications_to_kill: List[Application] = get_applications_to_kill(
-        applications_dict, cluster, valid_services, soa_dir
+        applications_dict, cluster, valid_services, soa_dir, eks
     )
 
     log.debug("Running apps: %s" % list(applications_dict))
@@ -280,6 +307,13 @@ def parse_args(argv):
         default=False,
         help="Force the cleanup if we are above the " "kill_threshold",
     )
+    parser.add_argument(
+        "--eks",
+        help="This flag cleans up only k8 services that shouldn't be running on EKS leaving instances from eks-*.yaml files",
+        dest="eks",
+        action="store_true",
+        default=False,
+    )
     return parser.parse_args(argv)
 
 
@@ -289,13 +323,18 @@ def main(argv=None) -> None:
     kill_threshold = args.kill_threshold
     force = args.force
     cluster = args.cluster
+    eks = args.eks
     if args.verbose:
         logging.basicConfig(level=logging.DEBUG)
     else:
         logging.basicConfig(level=logging.WARNING)
     try:
         cleanup_unused_apps(
-            soa_dir, cluster=cluster, kill_threshold=kill_threshold, force=force
+            soa_dir,
+            cluster=cluster,
+            kill_threshold=kill_threshold,
+            force=force,
+            eks=eks,
         )
     except DontKillEverythingError:
         sys.exit(1)
diff --git a/paasta_tools/cli/cli.py b/paasta_tools/cli/cli.py
index 47b675fa11..46d99cbffc 100755
--- a/paasta_tools/cli/cli.py
+++ b/paasta_tools/cli/cli.py
@@ -118,7 +118,6 @@ def add_subparser(command, subparsers):
     "mesh-status": "mesh_status",
     "metastatus": "metastatus",
     "pause_service_autoscaler": "pause_service_autoscaler",
-    "performance-check": "performance_check",
     "push-to-registry": "push_to_registry",
     "remote-run": "remote_run",
     "rollback": "rollback",
diff --git a/paasta_tools/cli/cmds/autoscale.py b/paasta_tools/cli/cmds/autoscale.py
index de45236375..4f01d8e336 100644
--- a/paasta_tools/cli/cmds/autoscale.py
+++ b/paasta_tools/cli/cmds/autoscale.py
@@ -17,9 +17,13 @@
 import paasta_tools.paastaapi.models as paastamodels
 from paasta_tools.api import client
 from paasta_tools.cli.utils import figure_out_service_name
+from paasta_tools.cli.utils import get_instance_configs_for_service
+from paasta_tools.cli.utils import get_paasta_oapi_api_clustername
 from paasta_tools.cli.utils import lazy_choices_completer
 from paasta_tools.cli.utils import list_instances
+from paasta_tools.eks_tools import EksDeploymentConfig
 from paasta_tools.utils import _log_audit
+from paasta_tools.utils import DEFAULT_SOA_DIR
 from paasta_tools.utils import list_clusters
 from paasta_tools.utils import list_services
 from paasta_tools.utils import PaastaColors
@@ -52,13 +56,42 @@ def add_subparser(subparsers):
     autoscale_parser.add_argument(
         "--set", help="Set the number to scale to. Must be an Int.", type=int
     )
+    autoscale_parser.add_argument(
+        "-d",
+        "--soa-dir",
+        dest="soa_dir",
+        metavar="SOA_DIR",
+        default=DEFAULT_SOA_DIR,
+        help="define a different soa config directory",
+    )
     autoscale_parser.set_defaults(command=paasta_autoscale)
 
 
 def paasta_autoscale(args):
     log.setLevel(logging.DEBUG)
     service = figure_out_service_name(args)
-    api = client.get_paasta_oapi_client(cluster=args.cluster, http_res=True)
+    instance_config = next(
+        get_instance_configs_for_service(
+            service=service,
+            soa_dir=args.soa_dir,
+            clusters=[args.cluster],
+            instances=[args.instance],
+        ),
+        None,
+    )
+    if not instance_config:
+        print(
+            "Could not find config files for this service instance in soaconfigs. Maybe you mispelled an argument?"
+        )
+        return 1
+
+    api = client.get_paasta_oapi_client(
+        cluster=get_paasta_oapi_api_clustername(
+            cluster=args.cluster,
+            is_eks=(instance_config.__class__ == EksDeploymentConfig),
+        ),
+        http_res=True,
+    )
     if not api:
         print("Could not connect to paasta api. Maybe you misspelled the cluster?")
         return 1
diff --git a/paasta_tools/cli/cmds/info.py b/paasta_tools/cli/cmds/info.py
index e61fe8ab5d..e612ca032b 100644
--- a/paasta_tools/cli/cmds/info.py
+++ b/paasta_tools/cli/cmds/info.py
@@ -30,7 +30,7 @@
 from paasta_tools.utils import NoDeploymentsAvailable
 from paasta_tools.utils import PaastaColors
 
-NO_DESCRIPTION_MESSAGE = "No 'description' entry in service.yaml. Please a one line sentence that describes this service"
+NO_DESCRIPTION_MESSAGE = "No 'description' entry in service.yaml. Please add a one line sentence that describes this service"
 NO_EXTERNAL_LINK_MESSAGE = (
     "No 'external_link' entry in service.yaml. "
     "Please add one that points to a reference doc for your service"
diff --git a/paasta_tools/cli/cmds/local_run.py b/paasta_tools/cli/cmds/local_run.py
index 979b96765e..dde20796bf 100755
--- a/paasta_tools/cli/cmds/local_run.py
+++ b/paasta_tools/cli/cmds/local_run.py
@@ -25,6 +25,7 @@
 import uuid
 from os import execlpe
 from random import randint
+from typing import Optional
 from urllib.parse import urlparse
 
 import boto3
@@ -472,6 +473,12 @@ def add_subparser(subparsers):
         action="store_true",
         default=False,
     )
+    list_parser.add_argument(
+        "--assume-role-aws-account",
+        "--aws-account",
+        "-a",
+        help="Specify AWS account from which to source credentials",
+    )
     list_parser.add_argument(
         "--assume-role-arn",
         help=(
@@ -688,12 +695,31 @@ def check_if_port_free(port):
     return True
 
 
+def resolve_aws_account_from_runtimeenv() -> str:
+    try:
+        with open("/nail/etc/runtimeenv") as runtimeenv_file:
+            runtimeenv = runtimeenv_file.read()
+    except FileNotFoundError:
+        print(
+            "Unable to determine environment for AWS account name. Using 'dev'",
+            file=sys.stderr,
+        )
+        runtimeenv = "dev"
+
+    runtimeenv_to_account_overrides = {
+        "stage": "dev",
+        "corp": "corpprod",
+    }
+    return runtimeenv_to_account_overrides.get(runtimeenv, runtimeenv)
+
+
 def assume_aws_role(
     instance_config: InstanceConfig,
     service: str,
     assume_role_arn: str,
     assume_pod_identity: bool,
     use_okta_role: bool,
+    aws_account: str,
 ) -> AWSSessionCreds:
     """Runs AWS cli to assume into the correct role, then extract and return the ENV variables from that session"""
     pod_identity = instance_config.get_iam_role()
@@ -705,20 +731,7 @@ def assume_aws_role(
             file=sys.stderr,
         )
         sys.exit(1)
-    try:
-        with open("/nail/etc/runtimeenv") as runtimeenv_file:
-            aws_account = runtimeenv_file.read()
-            # Map runtimeenv in special cases to proper aws account name
-            if aws_account == "stage":
-                aws_account = "dev"
-            elif aws_account == "corp":
-                aws_account = "corpprod"
-    except FileNotFoundError:
-        print(
-            "Unable to determine environment for AWS account name. Using 'dev'",
-            file=sys.stderr,
-        )
-        aws_account = "dev"
+
     if pod_identity and (assume_pod_identity or assume_role_arn):
         print(
             "Calling aws-okta to assume role {} using account {}".format(
@@ -727,10 +740,26 @@ def assume_aws_role(
         )
     elif use_okta_role:
         print(f"Calling aws-okta using account {aws_account}")
+    elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ:
+        # Get a session using the current pod identity
+        print(
+            f"Found Pod Identity token in env. Assuming into role {os.environ['AWS_ROLE_ARN']}."
+        )
+        boto_session = boto3.Session()
+        credentials = boto_session.get_credentials()
+        assumed_creds_dict: AWSSessionCreds = {
+            "AWS_ACCESS_KEY_ID": credentials.access_key,
+            "AWS_SECRET_ACCESS_KEY": credentials.secret_key,
+            "AWS_SESSION_TOKEN": credentials.token,
+            "AWS_SECURITY_TOKEN": credentials.token,
+        }
+        return assumed_creds_dict
     else:
-        # use_okta_role, assume_pod_identity, and assume_role are all empty. This shouldn't happen
+        # use_okta_role, assume_pod_identity, and assume_role are all empty, and there's no
+        # pod identity (web identity token) in the env. This shouldn't happen
         print(
-            "Error: assume_aws_role called without required arguments", file=sys.stderr
+            "Error: assume_aws_role called without required arguments and no pod identity env",
+            file=sys.stderr,
         )
         sys.exit(1)
     # local-run will sometimes run as root - make sure that we get the actual
@@ -803,6 +832,7 @@ def run_docker_container(
     assume_pod_identity=False,
     assume_role_arn="",
     use_okta_role=False,
+    assume_role_aws_account: Optional[str] = None,
 ):
     """docker-py has issues running a container with a TTY attached, so for
     consistency we execute 'docker run' directly in both interactive and
@@ -826,7 +856,7 @@ def run_docker_container(
     else:
         chosen_port = pick_random_port(service)
     environment = instance_config.get_env_dictionary()
-    secret_volumes = {}
+    secret_volumes = {}  # type: ignore
     if not skip_secrets:
         # if secrets_for_owner_team enabled in yelpsoa for service
         if is_secrets_for_teams_enabled(service, soa_dir):
@@ -857,7 +887,7 @@ def run_docker_container(
                     secret_provider_name=secret_provider_name,
                     environment=environment,
                     soa_dir=soa_dir,
-                    service_name=service,
+                    service_name=instance_config.get_service(),
                     cluster_name=instance_config.cluster,
                     secret_provider_kwargs=secret_provider_kwargs,
                 )
@@ -865,7 +895,7 @@ def run_docker_container(
                     secret_provider_name=secret_provider_name,
                     secret_volumes_config=instance_config.get_secret_volumes(),
                     soa_dir=soa_dir,
-                    service_name=service,
+                    service_name=instance_config.get_service(),
                     cluster_name=instance_config.cluster,
                     secret_provider_kwargs=secret_provider_kwargs,
                 )
@@ -876,13 +906,19 @@ def run_docker_container(
                 )
                 sys.exit(1)
         environment.update(secret_environment)
-    if assume_role_arn or assume_pod_identity or use_okta_role:
+    if (
+        assume_role_arn
+        or assume_pod_identity
+        or use_okta_role
+        or "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ
+    ):
         aws_creds = assume_aws_role(
             instance_config,
             service,
             assume_role_arn,
             assume_pod_identity,
             use_okta_role,
+            assume_role_aws_account,
         )
         environment.update(aws_creds)
 
@@ -929,8 +965,8 @@ def run_docker_container(
         except TypeError:
             # If that fails, try to write it as bytes
             # This is for binary files like TLS keys
-            with open(temp_secret_filename, "wb") as f:
-                f.write(secret_content)
+            with open(temp_secret_filename, "wb") as fb:
+                fb.write(secret_content)
 
         # Append this to the list of volumes passed to docker run
         volumes.append(f"{temp_secret_filename}:{container_mount_path}:ro")
@@ -1029,8 +1065,10 @@ def _output_exit_code():
         running = docker_client.inspect_container(container_id)["State"]["Running"]
         if running:
             print("Your service is now running! Tailing stdout and stderr:")
-            for line in docker_client.attach(
-                container_id, stderr=True, stream=True, logs=True
+            for line in docker_client.logs(
+                container_id,
+                stderr=True,
+                stream=True,
             ):
                 # writing to sys.stdout.buffer lets us write the raw bytes we
                 # get from the docker client without having to convert them to
@@ -1072,6 +1110,7 @@ def configure_and_run_docker_container(
     cluster,
     system_paasta_config,
     args,
+    assume_role_aws_account,
     pull_image=False,
     dry_run=False,
 ):
@@ -1226,6 +1265,7 @@ def configure_and_run_docker_container(
         skip_secrets=args.skip_secrets,
         assume_pod_identity=args.assume_pod_identity,
         assume_role_arn=args.assume_role_arn,
+        assume_role_aws_account=assume_role_aws_account,
         use_okta_role=args.use_okta_role,
     )
 
@@ -1270,6 +1310,7 @@ def paasta_local_run(args):
     local_run_config = system_paasta_config.get_local_run_config()
 
     service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root)
+
     if args.cluster:
         cluster = args.cluster
     else:
@@ -1285,6 +1326,12 @@ def paasta_local_run(args):
                 file=sys.stderr,
             )
             return 1
+    assume_role_aws_account = args.assume_role_aws_account or (
+        system_paasta_config.get_kube_clusters()
+        .get(cluster, {})
+        .get("aws_account", resolve_aws_account_from_runtimeenv())
+    )
+
     instance = args.instance
     docker_client = get_docker_client()
 
@@ -1322,6 +1369,7 @@ def paasta_local_run(args):
             pull_image=pull_image,
             system_paasta_config=system_paasta_config,
             dry_run=args.action == "dry_run",
+            assume_role_aws_account=assume_role_aws_account,
         )
     except errors.APIError as e:
         print("Can't run Docker container. Error: %s" % str(e), file=sys.stderr)
diff --git a/paasta_tools/cli/cmds/logs.py b/paasta_tools/cli/cmds/logs.py
index 663283c07a..abb316e7cf 100644
--- a/paasta_tools/cli/cmds/logs.py
+++ b/paasta_tools/cli/cmds/logs.py
@@ -637,6 +637,15 @@ def __init__(self, cluster_map: Mapping[str, Any]) -> None:
             )
         self.cluster_map = cluster_map
 
+    def get_scribereader_selector(self, scribe_env: str) -> str:
+        # this is kinda silly, but until the scribereader cli becomes more ergonomic
+        # we'll need to do a little bit of string munging to let humans use scribereader
+        # in the same way we are (tl;dr: scribereader has sorta confusing behavior between
+        # what can be use for --ecosystem, --region, and --superregion and the fastest/least
+        # hacky thing to figure out which we wanna use is that any env with a - in it is a region
+        # and any without one is an ecosystem)
+        return "-e" if "-" in scribe_env else "-r"
+
     def run_code_over_scribe_envs(
         self,
         clusters: Sequence[str],
@@ -745,8 +754,10 @@ def callback(
             else:
                 kw["stream_name"] = stream_info.stream_name_fn(service)
             log.debug(
-                "Running the equivalent of 'scribereader -e {} {}'".format(
-                    scribe_env, kw["stream_name"]
+                "Running the equivalent of 'scribereader {} {} {}'".format(
+                    self.get_scribereader_selector(scribe_env),
+                    scribe_env,
+                    kw["stream_name"],
                 )
             )
             process = Process(target=self.scribe_tail, kwargs=kw)
@@ -1036,8 +1047,14 @@ def scribe_get_from_time(
         end_date_yst = end_time.astimezone(pytz.timezone("America/Los_Angeles")).date()
 
         log.debug(
-            "Running the equivalent of 'scribereader -e %s %s --min-date %s --max-date %s"
-            % (scribe_env, stream_name, start_date_yst, end_date_yst)
+            "Running the equivalent of 'scribereader %s %s %s --min-date %s --max-date %s"
+            % (
+                self.get_scribereader_selector(scribe_env),
+                scribe_env,
+                stream_name,
+                start_date_yst,
+                end_date_yst,
+            )
         )
         return scribereader.get_stream_reader(
             stream_name=stream_name,
@@ -1064,7 +1081,7 @@ def scribe_get_last_n_lines(
         @contextmanager
         def fake_context():
             log.debug(
-                f"Running the equivalent of 'scribereader -e {scribe_env} {stream_name}'"
+                f"Running the equivalent of 'scribereader -n {line_count} {self.get_scribereader_selector(scribe_env)} {scribe_env} {stream_name}'"
             )
             yield scribereader.get_stream_tailer(
                 stream_name=stream_name,
diff --git a/paasta_tools/cli/cmds/mark_for_deployment.py b/paasta_tools/cli/cmds/mark_for_deployment.py
index 8c7b2df2ed..58b7f2cc68 100644
--- a/paasta_tools/cli/cmds/mark_for_deployment.py
+++ b/paasta_tools/cli/cmds/mark_for_deployment.py
@@ -59,6 +59,7 @@
 from paasta_tools.cli.cmds.status import get_version_table_entry
 from paasta_tools.cli.cmds.status import recent_container_restart
 from paasta_tools.cli.utils import get_jenkins_build_output_url
+from paasta_tools.cli.utils import get_paasta_oapi_api_clustername
 from paasta_tools.cli.utils import lazy_choices_completer
 from paasta_tools.cli.utils import list_deploy_groups
 from paasta_tools.cli.utils import trigger_deploys
@@ -68,6 +69,7 @@
 from paasta_tools.cli.utils import validate_short_git_sha
 from paasta_tools.deployment_utils import get_currently_deployed_sha
 from paasta_tools.deployment_utils import get_currently_deployed_version
+from paasta_tools.eks_tools import EksDeploymentConfig
 from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
 from paasta_tools.long_running_service_tools import LongRunningServiceConfig
 from paasta_tools.marathon_tools import MarathonServiceConfig
@@ -984,8 +986,7 @@ def valid_transitions(self) -> Iterator[state_machine.TransitionDefinition]:
                 "dest": None,  # Don't actually change state, just call the before function.
                 "trigger": "disable_auto_rollbacks_button_clicked",
                 "conditions": [
-                    self.any_slo_failing,
-                    self.any_metric_failing,
+                    self.any_rollback_condition_failing,
                     self.auto_rollbacks_enabled,
                 ],
                 "before": self.disable_auto_rollbacks,
@@ -1496,12 +1497,16 @@ def diagnose_why_instance_is_stuck(
     should_ping_for_unhealthy_pods: bool,
     notify_fn: Optional[Callable[[str], None]] = None,
 ) -> None:
-    api = client.get_paasta_oapi_client(cluster=cluster)
+    api = client.get_paasta_oapi_client(
+        cluster=get_paasta_oapi_api_clustername(
+            cluster=cluster,
+            is_eks=(instance_config.get_instance_type() == "eks"),
+        ),
+    )
     try:
         status = api.service.status_instance(
             service=service,
             instance=instance,
-            include_smartstack=False,
             include_envoy=False,
             include_mesos=False,
             new=True,
@@ -1622,7 +1627,12 @@ def check_if_instance_is_done(
     api: Optional[client.PaastaOApiClient] = None,
 ) -> bool:
     if api is None:
-        api = client.get_paasta_oapi_client(cluster=cluster)
+        api = client.get_paasta_oapi_client(
+            cluster=get_paasta_oapi_api_clustername(
+                cluster=cluster,
+                is_eks=(instance_config.get_instance_type() == "eks"),
+            ),
+        )
         if not api:
             log.warning(
                 "Couldn't reach the PaaSTA api for {}! Assuming it is not "
@@ -1732,6 +1742,7 @@ def check_if_instance_is_done(
 WAIT_FOR_INSTANCE_CLASSES = [
     MarathonServiceConfig,
     KubernetesDeploymentConfig,
+    EksDeploymentConfig,
     CassandraClusterDeploymentConfig,
 ]
 
diff --git a/paasta_tools/cli/cmds/mesh_status.py b/paasta_tools/cli/cmds/mesh_status.py
index 0d26c1cd56..3ddecd5eb3 100644
--- a/paasta_tools/cli/cmds/mesh_status.py
+++ b/paasta_tools/cli/cmds/mesh_status.py
@@ -19,8 +19,11 @@
 from paasta_tools.cli.cmds.status import get_envoy_status_human
 from paasta_tools.cli.cmds.status import get_smartstack_status_human
 from paasta_tools.cli.utils import figure_out_service_name
+from paasta_tools.cli.utils import get_instance_configs_for_service
+from paasta_tools.cli.utils import get_paasta_oapi_api_clustername
 from paasta_tools.cli.utils import lazy_choices_completer
 from paasta_tools.cli.utils import verify_instances
+from paasta_tools.eks_tools import EksDeploymentConfig
 from paasta_tools.utils import DEFAULT_SOA_DIR
 from paasta_tools.utils import list_clusters
 from paasta_tools.utils import list_services
@@ -75,8 +78,30 @@ def paasta_mesh_status_on_api_endpoint(
     service: str,
     instance: str,
     system_paasta_config: SystemPaastaConfig,
+    soa_dir: str = DEFAULT_SOA_DIR,
 ) -> Tuple[int, List[str]]:
-    client = get_paasta_oapi_client(cluster, system_paasta_config)
+    instance_config = next(
+        get_instance_configs_for_service(
+            service=service,
+            soa_dir=soa_dir,
+            clusters=[cluster],
+            instances=[instance],
+        ),
+        None,
+    )
+    if not instance_config:
+        print(
+            "ERROR: Could not find config files for this service instance in soaconfigs. Maybe you mispelled an argument?"
+        )
+        exit(1)
+
+    client = get_paasta_oapi_client(
+        cluster=get_paasta_oapi_api_clustername(
+            cluster,
+            is_eks=(instance_config.__class__ == EksDeploymentConfig),
+        ),
+        system_paasta_config=system_paasta_config,
+    )
     if not client:
         print("ERROR: Cannot get a paasta-api client")
         exit(1)
@@ -85,7 +110,6 @@ def paasta_mesh_status_on_api_endpoint(
         mesh_status = client.service.mesh_instance(
             service=service,
             instance=instance,
-            include_smartstack=False,
         )
     except client.api_error as exc:
         # 405 (method not allowed) is returned for instances that are not configured
@@ -128,7 +152,7 @@ def paasta_mesh_status(args) -> int:
 
     # validate args, funcs have their own error output
     service = figure_out_service_name(args, args.soa_dir)
-    if verify_instances(args.instance, service, [args.cluster]):
+    if verify_instances(args.instance, service, [args.cluster], args.soa_dir):
         return 1
 
     return_code, mesh_output = paasta_mesh_status_on_api_endpoint(
@@ -136,6 +160,7 @@ def paasta_mesh_status(args) -> int:
         service=service,
         instance=args.instance,
         system_paasta_config=system_paasta_config,
+        soa_dir=args.soa_dir,
     )
 
     output = [
diff --git a/paasta_tools/cli/cmds/performance_check.py b/paasta_tools/cli/cmds/performance_check.py
deleted file mode 100644
index a70e7b24ee..0000000000
--- a/paasta_tools/cli/cmds/performance_check.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2015-2016 Yelp Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-
-import requests
-from service_configuration_lib import read_extra_service_information
-
-from paasta_tools.cli.utils import validate_service_name
-from paasta_tools.utils import DEFAULT_SOA_DIR
-from paasta_tools.utils import timeout
-
-
-def add_subparser(subparsers):
-    list_parser = subparsers.add_parser(
-        "performance-check",
-        description="Performs a performance check",
-        help="Performs a performance check",
-    )
-    list_parser.add_argument(
-        "-s",
-        "--service",
-        help='Name of service for which you wish to check. Leading "services-", as included in a '
-        "Jenkins job name, will be stripped.",
-    )
-    list_parser.add_argument("-k", "--commit", help=argparse.SUPPRESS)
-    list_parser.add_argument(
-        "-d",
-        "--soa-dir",
-        dest="soa_dir",
-        metavar="SOA_DIR",
-        default=DEFAULT_SOA_DIR,
-        help="Define a different soa config directory",
-    )
-    list_parser.set_defaults(command=perform_performance_check)
-
-
-def load_performance_check_config(service, soa_dir):
-    return read_extra_service_information(
-        service_name=service, extra_info="performance-check", soa_dir=soa_dir
-    )
-
-
-def submit_performance_check_job(service, soa_dir):
-    performance_check_config = load_performance_check_config(service, soa_dir)
-
-    if not performance_check_config:
-        print("No performance-check.yaml. Skipping performance-check.")
-        return
-
-    endpoint = performance_check_config.pop("endpoint")
-    r = requests.post(url=endpoint, params=performance_check_config)
-    r.raise_for_status()
-    print("Posted a submission to the PaaSTA performance-check service.")
-    print(f"Endpoint: {endpoint}")
-    print(f"Parameters: {performance_check_config}")
-
-
-@timeout()
-def perform_performance_check(args):
-    service = args.service
-    if service.startswith("services-"):
-        service = service.split("services-", 1)[1]
-    validate_service_name(service, args.soa_dir)
-
-    try:
-        submit_performance_check_job(service=service, soa_dir=args.soa_dir)
-    except Exception as e:
-        print(
-            "Something went wrong with the performance check. Safely bailing. No need to panic."
-        )
-        print("Here was the error:")
-        print(str(e))
diff --git a/paasta_tools/cli/cmds/secret.py b/paasta_tools/cli/cmds/secret.py
index 408d7b8e37..7ada4a4325 100644
--- a/paasta_tools/cli/cmds/secret.py
+++ b/paasta_tools/cli/cmds/secret.py
@@ -423,7 +423,8 @@ def paasta_secret(args):
                 get_secret(
                     kube_client,
                     get_paasta_secret_name(namespace, service, args.secret_name),
-                    namespace,
+                    key_name=args.secret_name,
+                    namespace=namespace,
                 )
             )
         # fallback to default in case mapping fails
@@ -432,7 +433,8 @@ def paasta_secret(args):
                 get_secret(
                     kube_client,
                     get_paasta_secret_name("paasta", service, args.secret_name),
-                    "paasta",
+                    key_name=args.secret_name,
+                    namespace="paasta",
                 )
             )
         return
diff --git a/paasta_tools/cli/cmds/spark_run.py b/paasta_tools/cli/cmds/spark_run.py
index db89624ab8..7f233091ba 100644
--- a/paasta_tools/cli/cmds/spark_run.py
+++ b/paasta_tools/cli/cmds/spark_run.py
@@ -16,14 +16,14 @@
 from typing import Union
 
 import yaml
-from boto3.exceptions import Boto3Error
+from service_configuration_lib import read_service_configuration
+from service_configuration_lib import read_yaml_file
 from service_configuration_lib import spark_config
 from service_configuration_lib.spark_config import get_aws_credentials
 from service_configuration_lib.spark_config import get_grafana_url
 from service_configuration_lib.spark_config import get_resources_requested
 from service_configuration_lib.spark_config import get_signalfx_url
 from service_configuration_lib.spark_config import get_spark_hourly_cost
-from service_configuration_lib.spark_config import send_and_calculate_resources_cost
 from service_configuration_lib.spark_config import UnsupportedClusterManagerException
 
 from paasta_tools.cli.cmds.check import makefile_responds_to
@@ -35,12 +35,11 @@
 from paasta_tools.kubernetes_tools import limit_size_with_hash
 from paasta_tools.spark_tools import DEFAULT_SPARK_SERVICE
 from paasta_tools.spark_tools import get_volumes_from_spark_k8s_configs
-from paasta_tools.spark_tools import get_volumes_from_spark_mesos_configs
 from paasta_tools.spark_tools import get_webui_url
 from paasta_tools.spark_tools import inject_spark_conf_str
 from paasta_tools.utils import _run
 from paasta_tools.utils import DEFAULT_SOA_DIR
-from paasta_tools.utils import get_docker_client
+from paasta_tools.utils import filter_templates_from_config
 from paasta_tools.utils import get_possible_launched_by_user_variable_from_env
 from paasta_tools.utils import get_username
 from paasta_tools.utils import InstanceConfig
@@ -60,10 +59,10 @@
 DEFAULT_SPARK_DOCKER_REGISTRY = "docker-dev.yelpcorp.com"
 SENSITIVE_ENV = ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN"]
 clusterman_metrics, CLUSTERMAN_YAML_FILE_PATH = get_clusterman_metrics()
-CLUSTER_MANAGER_MESOS = "mesos"
 CLUSTER_MANAGER_K8S = "kubernetes"
 CLUSTER_MANAGER_LOCAL = "local"
-CLUSTER_MANAGERS = {CLUSTER_MANAGER_MESOS, CLUSTER_MANAGER_K8S, CLUSTER_MANAGER_LOCAL}
+CLUSTER_MANAGERS = {CLUSTER_MANAGER_K8S, CLUSTER_MANAGER_LOCAL}
+DEFAULT_DOCKER_SHM_SIZE = "64m"
 # Reference: https://spark.apache.org/docs/latest/configuration.html#application-properties
 DEFAULT_DRIVER_CORES_BY_SPARK = 1
 DEFAULT_DRIVER_MEMORY_BY_SPARK = "1g"
@@ -73,6 +72,7 @@
 POD_TEMPLATE_DIR = "/nail/tmp"
 POD_TEMPLATE_PATH = "/nail/tmp/spark-pt-{file_uuid}.yaml"
 DEFAULT_RUNTIME_TIMEOUT = "12h"
+DEFAILT_AWS_PROFILE = "default"
 
 POD_TEMPLATE = """
 apiVersion: v1
@@ -81,6 +81,7 @@
   labels:
     spark: {spark_pod_label}
 spec:
+  dnsPolicy: Default
   affinity:
     podAffinity:
       preferredDuringSchedulingIgnoredDuringExecution:
@@ -173,7 +174,7 @@ def add_subparser(subparsers):
         "--docker-memory-limit",
         help=(
             "Set docker memory limit. Should be greater than driver memory. Defaults to 2x spark.driver.memory. Example: 2g, 500m, Max: 64g"
-            "Note: If memory limit provided is greater than associated with the batch instance, it will default to max memory of the box."
+            " Note: If memory limit provided is greater than associated with the batch instance, it will default to max memory of the box."
         ),
         default=None,
     )
@@ -181,7 +182,27 @@ def add_subparser(subparsers):
         "--docker-cpu-limit",
         help=(
             "Set docker cpus limit. Should be greater than driver cores. Defaults to 1x spark.driver.cores."
-            "Note: The job will fail if the limit provided is greater than number of cores present on batch box (8 for production batch boxes)."
+            " Note: The job will fail if the limit provided is greater than number of cores present on batch box (8 for production batch boxes)."
+        ),
+        default=None,
+    )
+
+    list_parser.add_argument(
+        "--docker-shm-size",
+        help=(
+            "Set docker shared memory size limit for the driver's container. This is the same as setting docker run --shm-size and the shared"
+            " memory is mounted to /dev/shm in the container. Anything written to the shared memory mount point counts towards the docker memory"
+            " limit for the driver's container. Therefore, this should be less than --docker-memory-limit."
+            f" Defaults to {DEFAULT_DOCKER_SHM_SIZE}. Example: 8g, 256m"
+            " Note: this option is mainly useful when training TensorFlow models in the driver, with multiple GPUs using NCCL. The shared memory"
+            f" space is used to sync gradient updates between GPUs during training. The default value of {DEFAULT_DOCKER_SHM_SIZE} is typically not large enough for"
+            " this inter-gpu communication to run efficiently. We recommend a starting value of 8g to ensure that the entire set of model parameters"
+            " can fit in the shared memory. This can be less if you are training a smaller model (<1g parameters) or more if you are using a larger model (>2.5g parameters)"
+            " If you are observing low, average GPU utilization during epoch training (<65-70 percent) you can also try increasing this value; you may be"
+            " resource constrained when GPUs sync training weights between mini-batches (there are other potential bottlenecks that could cause this as well)."
+            " A tool such as nvidia-smi can be use to check GPU utilization."
+            " This option also adds the --ulimit memlock=-1 to the docker run command since this is recommended for TensorFlow applications that use NCCL."
+            " Please refer to docker run documentation for more details on --shm-size and --ulimit memlock=-1."
         ),
         default=None,
     )
@@ -197,7 +218,7 @@ def add_subparser(subparsers):
     list_parser.add_argument(
         "--docker-registry",
         help="Docker registry to push the Spark image built.",
-        default=DEFAULT_SPARK_DOCKER_REGISTRY,
+        default=None,
     )
 
     list_parser.add_argument(
@@ -282,9 +303,8 @@ def add_subparser(subparsers):
 
     list_parser.add_argument(
         "--spark-args",
-        help="Spark configurations documented in https://spark.apache.org/docs/latest/configuration.html. "
-        r'For example, --spark-args "spark.mesos.constraints=pool:default\;instance_type:m4.10xlarge '
-        'spark.executor.cores=4".',
+        help="Spark configurations documented in https://spark.apache.org/docs/latest/configuration.html, separated by space. "
+        'For example, --spark-args "spark.executor.cores=1 spark.executor.memory=7g spark.executor.instances=2".',
     )
 
     list_parser.add_argument(
@@ -319,6 +339,20 @@ def add_subparser(subparsers):
         default=False,
     )
 
+    list_parser.add_argument(
+        "--tronfig",
+        help="Load the Tron config yaml. Use with --job-id.",
+        type=str,
+        default=None,
+    )
+
+    list_parser.add_argument(
+        "--job-id",
+        help="Tron job id <job_name>.<action_name> in the Tronfig to run. Use wuth --tronfig.",
+        type=str,
+        default=None,
+    )
+
     k8s_target_cluster_type_group = list_parser.add_mutually_exclusive_group()
     k8s_target_cluster_type_group.add_argument(
         "--force-use-eks",
@@ -339,14 +373,6 @@ def add_subparser(subparsers):
         default=None,
     )
 
-    if clusterman_metrics:
-        list_parser.add_argument(
-            "--suppress-clusterman-metrics-errors",
-            help="Continue even if sending resource requirements to Clusterman fails. This may result in the job "
-            "failing to acquire resources.",
-            action="store_true",
-        )
-
     list_parser.add_argument(
         "-j", "--jars", help=argparse.SUPPRESS, action=DeprecatedAction
     )
@@ -397,7 +423,7 @@ def add_subparser(subparsers):
         "--aws-credentials-yaml is not specified and --service is either "
         "not specified or the service does not have credentials in "
         "/etc/boto_cfg",
-        default="default",
+        default=DEFAILT_AWS_PROFILE,
     )
 
     aws_group.add_argument(
@@ -508,13 +534,18 @@ def get_docker_run_cmd(
     docker_cmd,
     nvidia,
     docker_memory_limit,
+    docker_shm_size,
     docker_cpu_limit,
 ):
     print(
-        f"Setting docker memory and cpu limits as {docker_memory_limit}, {docker_cpu_limit} core(s) respectively."
+        f"Setting docker memory, shared memory, and cpu limits as {docker_memory_limit}, {docker_shm_size}, and {docker_cpu_limit} core(s) respectively."
     )
     cmd = ["paasta_docker_wrapper", "run"]
     cmd.append(f"--memory={docker_memory_limit}")
+    if docker_shm_size is not None:
+        cmd.append(f"--shm-size={docker_shm_size}")
+        cmd.append("--ulimit")
+        cmd.append("memlock=-1")
     cmd.append(f"--cpus={docker_cpu_limit}")
     cmd.append("--rm")
     cmd.append("--net=host")
@@ -671,21 +702,20 @@ def _parse_user_spark_args(
     enable_compact_bin_packing: bool = False,
     enable_spark_dra: bool = False,
 ) -> Dict[str, str]:
-    if not spark_args:
-        return {}
 
     user_spark_opts = {}
-    for spark_arg in spark_args.split():
-        fields = spark_arg.split("=", 1)
-        if len(fields) != 2:
-            print(
-                PaastaColors.red(
-                    "Spark option %s is not in format option=value." % spark_arg
-                ),
-                file=sys.stderr,
-            )
-            sys.exit(1)
-        user_spark_opts[fields[0]] = fields[1]
+    if spark_args:
+        for spark_arg in spark_args.split():
+            fields = spark_arg.split("=", 1)
+            if len(fields) != 2:
+                print(
+                    PaastaColors.red(
+                        "Spark option %s is not in format option=value." % spark_arg
+                    ),
+                    file=sys.stderr,
+                )
+                sys.exit(1)
+            user_spark_opts[fields[0]] = fields[1]
 
     if enable_compact_bin_packing:
         user_spark_opts["spark.kubernetes.executor.podTemplateFile"] = pod_template_path
@@ -738,6 +768,7 @@ def run_docker_container(
     dry_run,
     nvidia,
     docker_memory_limit,
+    docker_shm_size,
     docker_cpu_limit,
 ) -> int:
 
@@ -749,6 +780,7 @@ def run_docker_container(
         docker_cmd=docker_cmd,
         nvidia=nvidia,
         docker_memory_limit=docker_memory_limit,
+        docker_shm_size=docker_shm_size,
         docker_cpu_limit=docker_cpu_limit,
     )
     docker_run_cmd = get_docker_run_cmd(**docker_run_args)
@@ -819,6 +851,17 @@ def _calculate_docker_memory_limit(
     return docker_memory_limit
 
 
+def _calculate_docker_shared_memory_size(shm_size: Optional[str]) -> str:
+    """In Order of preference:
+    1. Argument: --docker-shm-size
+    3. Default
+    """
+    if shm_size:
+        return shm_size
+
+    return DEFAULT_DOCKER_SHM_SIZE
+
+
 def _calculate_docker_cpu_limit(
     spark_conf: Mapping[str, str], cpu_limit: Optional[str]
 ) -> str:
@@ -843,18 +886,18 @@ def configure_and_run_docker_container(
     aws_creds: Tuple[Optional[str], Optional[str], Optional[str]],
     cluster_manager: str,
     pod_template_path: str,
+    extra_driver_envs: Dict[str, str] = dict(),
 ) -> int:
     docker_memory_limit = _calculate_docker_memory_limit(
         spark_conf, args.docker_memory_limit
     )
+    docker_shm_size = _calculate_docker_shared_memory_size(args.docker_shm_size)
     docker_cpu_limit = _calculate_docker_cpu_limit(
         spark_conf,
         args.docker_cpu_limit,
     )
 
-    if cluster_manager == CLUSTER_MANAGER_MESOS:
-        volumes = get_volumes_from_spark_mesos_configs(spark_conf)
-    elif cluster_manager in {CLUSTER_MANAGER_K8S, CLUSTER_MANAGER_LOCAL}:
+    if cluster_manager in {CLUSTER_MANAGER_K8S, CLUSTER_MANAGER_LOCAL}:
         # service_configuration_lib puts volumes into the k8s
         # configs for local mode
         volumes = get_volumes_from_spark_k8s_configs(spark_conf)
@@ -883,6 +926,7 @@ def configure_and_run_docker_container(
             system_paasta_config=system_paasta_config,
         )
     )  # type:ignore
+    environment.update(extra_driver_envs)
 
     webui_url = get_webui_url(spark_conf["spark.ui.port"])
     webui_url_msg = PaastaColors.green(f"\nSpark monitoring URL: ") + f"{webui_url}\n"
@@ -915,40 +959,21 @@ def configure_and_run_docker_container(
     print(f"Selected cluster manager: {cluster_manager}\n")
 
     if clusterman_metrics and _should_get_resource_requirements(docker_cmd, args.mrjob):
-        try:
-            if cluster_manager == CLUSTER_MANAGER_MESOS:
-                print("Sending resource request metrics to Clusterman")
-                hourly_cost, resources = send_and_calculate_resources_cost(
-                    clusterman_metrics, spark_conf, webui_url, args.pool
-                )
-            else:
-                resources = get_resources_requested(spark_conf)
-                hourly_cost = get_spark_hourly_cost(
-                    clusterman_metrics,
-                    resources,
-                    spark_conf["spark.executorEnv.PAASTA_CLUSTER"],
-                    args.pool,
-                )
-            message = (
-                f"Resource request ({resources['cpus']} cpus and {resources['mem']} MB memory total)"
-                f" is estimated to cost ${hourly_cost} per hour"
-            )
-            if clusterman_metrics.util.costs.should_warn(hourly_cost):
-                print(PaastaColors.red(f"WARNING: {message}"))
-            else:
-                print(message)
-        except Boto3Error as e:
-            print(
-                PaastaColors.red(
-                    f"Encountered {e} while attempting to send resource requirements to Clusterman."
-                )
-            )
-            if args.suppress_clusterman_metrics_errors:
-                print(
-                    "Continuing anyway since --suppress-clusterman-metrics-errors was passed"
-                )
-            else:
-                raise
+        resources = get_resources_requested(spark_conf)
+        hourly_cost = get_spark_hourly_cost(
+            clusterman_metrics,
+            resources,
+            spark_conf["spark.executorEnv.PAASTA_CLUSTER"],
+            args.pool,
+        )
+        message = (
+            f"Resource request ({resources['cpus']} cpus and {resources['mem']} MB memory total)"
+            f" is estimated to cost ${hourly_cost} per hour"
+        )
+        if clusterman_metrics.util.costs.should_warn(hourly_cost):
+            print(PaastaColors.red(f"WARNING: {message}"))
+        else:
+            print(message)
 
     return run_docker_container(
         container_name=spark_conf["spark.app.name"],
@@ -959,6 +984,7 @@ def configure_and_run_docker_container(
         dry_run=args.dry_run,
         nvidia=args.nvidia,
         docker_memory_limit=docker_memory_limit,
+        docker_shm_size=docker_shm_size,
         docker_cpu_limit=docker_cpu_limit,
     )
 
@@ -995,6 +1021,14 @@ def get_docker_cmd(
         return inject_spark_conf_str(original_docker_cmd, spark_conf_str)
 
 
+def _get_adhoc_docker_registry(service: str, soa_dir: str = DEFAULT_SOA_DIR) -> str:
+    if service is None:
+        raise NotImplementedError('"None" is not a valid service')
+
+    service_configuration = read_service_configuration(service, soa_dir)
+    return service_configuration.get("docker_registry", DEFAULT_SPARK_DOCKER_REGISTRY)
+
+
 def build_and_push_docker_image(args: argparse.Namespace) -> Optional[str]:
     """
     Build an image if the default Spark service image is not preferred.
@@ -1018,24 +1052,61 @@ def build_and_push_docker_image(args: argparse.Namespace) -> Optional[str]:
     if cook_return != 0:
         return None
 
-    docker_url = f"{args.docker_registry}/{docker_tag}"
+    registry_uri = args.docker_registry or _get_adhoc_docker_registry(
+        service=args.service,
+        soa_dir=args.yelpsoa_config_root,
+    )
+
+    docker_url = f"{registry_uri}/{docker_tag}"
     command = f"docker tag {docker_tag} {docker_url}"
     print(PaastaColors.grey(command))
     retcode, _ = _run(command, stream=True)
     if retcode != 0:
         return None
 
-    if args.docker_registry != DEFAULT_SPARK_DOCKER_REGISTRY:
+    if registry_uri != DEFAULT_SPARK_DOCKER_REGISTRY:
         command = "sudo -H docker push %s" % docker_url
     else:
         command = "docker push %s" % docker_url
 
     print(PaastaColors.grey(command))
-    retcode, _ = _run(command, stream=True)
+    retcode, output = _run(command, stream=False)
     if retcode != 0:
         return None
 
-    return docker_url
+    # With unprivileged docker, the digest on the remote registry may not match the digest
+    # in the local environment. Because of this, we have to parse the digest message from the
+    # server response and use downstream when launching spark executors
+
+    # Output from `docker push` with unprivileged docker looks like
+    #  Using default tag: latest
+    #  The push refers to repository [docker-dev.yelpcorp.com/paasta-spark-run-dpopes:latest]
+    #  latest: digest: sha256:0a43aa65174a400bd280d48d460b73eb49b0ded4072c9e173f919543bf693557
+
+    # With privileged docker, the last line has an extra "size: 123"
+    #  latest: digest: sha256:0a43aa65174a400bd280d48d460b73eb49b0ded4072c9e173f919543bf693557 size: 52
+
+    digest_line = output.split("\n")[-1]
+    digest_match = re.match(r"[^:]*: [^:]*: (?P<digest>[^\s]*)", digest_line)
+    if not digest_match:
+        raise ValueError(f"Could not determine digest from output: {output}")
+    digest = digest_match.group("digest")
+
+    image_url = f"{docker_url}@{digest}"
+
+    # If the local digest doesn't match the remote digest AND the registry is
+    # non-default (which requires requires authentication, and consequently sudo),
+    # downstream `docker run` commands will fail trying to authenticate.
+    # To work around this, we can proactively `sudo docker pull` here so that
+    # the image exists locally and can be `docker run` without sudo
+    if registry_uri != DEFAULT_SPARK_DOCKER_REGISTRY:
+        command = f"sudo -H docker pull {image_url}"
+        print(PaastaColors.grey(command))
+        retcode, output = _run(command, stream=False)
+        if retcode != 0:
+            raise NoDockerImageError(f"Could not pull {image_url}: {output}")
+
+    return image_url
 
 
 def validate_work_dir(s):
@@ -1129,7 +1200,110 @@ def _get_k8s_url_for_cluster(cluster: str) -> Optional[str]:
     )
 
 
-def paasta_spark_run(args):
+def parse_tronfig(tronfig_path: str, job_id: str) -> Optional[Dict[str, Any]]:
+    splitted = job_id.split(".")
+    if len(splitted) != 2:
+        return None
+    job_name, action_name = splitted
+
+    file_content = read_yaml_file(tronfig_path)
+    jobs = filter_templates_from_config(file_content)
+    if job_name not in jobs or action_name not in jobs[job_name].get("actions", {}):
+        return None
+    return jobs[job_name]["actions"][action_name]
+
+
+def update_args_from_tronfig(args: argparse.Namespace) -> Optional[Dict[str, str]]:
+    """
+    Load and check the following config fields from the provided Tronfig.
+      - executor
+      - pool
+      - iam_role
+      - iam_role_provider
+      - command
+      - env
+      - spark_args
+
+    Returns: environment variables dictionary or None if failed.
+    """
+    action_dict = parse_tronfig(args.tronfig, args.job_id)
+    if action_dict is None:
+        print(
+            PaastaColors.red(f"Unable to get configs from job-id: {args.job_id}"),
+            file=sys.stderr,
+        )
+        return None
+
+    # executor === spark
+    if action_dict.get("executor", "") != "spark":
+        print(
+            PaastaColors.red("Invalid Tronfig: executor should be 'spark'"),
+            file=sys.stderr,
+        )
+        return None
+
+    # iam_role / aws_profile
+    if "iam_role" in action_dict and action_dict.get("iam_role_provider", "") != "aws":
+        print(
+            PaastaColors.red("Invalid Tronfig: iam_role_provider should be 'aws'"),
+            file=sys.stderr,
+        )
+        return None
+
+    # Other args
+    fields_to_args = {
+        "pool": "pool",
+        "iam_role": "assume_aws_role",
+        "command": "cmd",
+        "spark_args": "spark_args",
+    }
+    for field_name, arg_name in fields_to_args.items():
+        if field_name in action_dict:
+            value = action_dict[field_name]
+
+            # Convert spark_args values from dict to a string "k1=v1 k2=v2"
+            if field_name == "spark_args":
+                value = " ".join([f"{k}={v}" for k, v in dict(value).items()])
+
+            # Befutify for printing
+            arg_name_str = (f"--{arg_name.replace('_', '-')}").ljust(20, " ")
+            field_name_str = field_name.ljust(12)
+
+            # Only load iam_role value if --aws-profile is not set
+            if field_name == "iam_role" and args.aws_profile != DEFAILT_AWS_PROFILE:
+                print(
+                    PaastaColors.yellow(
+                        f"Overwriting args with Tronfig: {arg_name_str} => {field_name_str} : IGNORE, "
+                        "since --aws-profile is provided"
+                    ),
+                )
+                continue
+
+            if hasattr(args, arg_name):
+                print(
+                    PaastaColors.yellow(
+                        f"Overwriting args with Tronfig: {arg_name_str} => {field_name_str} : {value}"
+                    ),
+                )
+            setattr(args, arg_name, value)
+
+    # env (currently paasta spark-run does not support Spark driver secrets environment variables)
+    return action_dict.get("env", dict())
+
+
+def paasta_spark_run(args: argparse.Namespace) -> int:
+    driver_envs_from_tronfig: Dict[str, str] = dict()
+    if args.tronfig is not None:
+        if args.job_id is None:
+            print(
+                PaastaColors.red("Missing --job-id when --tronfig is provided"),
+                file=sys.stderr,
+            )
+            return False
+        driver_envs_from_tronfig = update_args_from_tronfig(args)
+        if driver_envs_from_tronfig is None:
+            return False
+
     # argparse does not work as expected with both default and
     # type=validate_work_dir.
     validate_work_dir(args.work_dir)
@@ -1204,18 +1378,10 @@ def paasta_spark_run(args):
         assume_aws_role_arn=args.assume_aws_role,
         session_duration=args.aws_role_duration,
     )
-    docker_image = get_docker_image(args, instance_config)
-    if docker_image is None:
+    docker_image_digest = get_docker_image(args, instance_config)
+    if docker_image_digest is None:
         return 1
 
-    # Get image digest
-    docker_client = get_docker_client()
-    image_details = docker_client.inspect_image(docker_image)
-    if len(image_details["RepoDigests"]) < 1:
-        print("Failed to get docker image digest", file=sys.stderr)
-        return None
-    docker_image_digest = image_details["RepoDigests"][0]
-
     pod_template_path = generate_pod_template_path()
     args.enable_compact_bin_packing = should_enable_compact_bin_packing(
         args.disable_compact_bin_packing, args.cluster_manager
@@ -1294,4 +1460,5 @@ def paasta_spark_run(args):
         aws_creds=aws_creds,
         cluster_manager=args.cluster_manager,
         pod_template_path=pod_template_path,
+        extra_driver_envs=driver_envs_from_tronfig,
     )
diff --git a/paasta_tools/cli/cmds/status.py b/paasta_tools/cli/cmds/status.py
index 8da41723fb..88870b0864 100644
--- a/paasta_tools/cli/cmds/status.py
+++ b/paasta_tools/cli/cmds/status.py
@@ -52,11 +52,13 @@
 from paasta_tools.cassandracluster_tools import CassandraClusterDeploymentConfig
 from paasta_tools.cli.utils import figure_out_service_name
 from paasta_tools.cli.utils import get_instance_configs_for_service
+from paasta_tools.cli.utils import get_paasta_oapi_api_clustername
 from paasta_tools.cli.utils import lazy_choices_completer
 from paasta_tools.cli.utils import list_deploy_groups
 from paasta_tools.cli.utils import NoSuchService
 from paasta_tools.cli.utils import validate_service_name
 from paasta_tools.cli.utils import verify_instances
+from paasta_tools.eks_tools import EksDeploymentConfig
 from paasta_tools.flink_tools import FlinkDeploymentConfig
 from paasta_tools.flink_tools import get_flink_config_from_paasta_api_client
 from paasta_tools.flink_tools import get_flink_jobs_from_paasta_api_client
@@ -101,6 +103,7 @@
     CassandraClusterDeploymentConfig,
     KafkaClusterDeploymentConfig,
     KubernetesDeploymentConfig,
+    EksDeploymentConfig,
     AdhocJobConfig,
     MarathonServiceConfig,
     TronActionConfig,
@@ -112,6 +115,7 @@
     CassandraClusterDeploymentConfig,
     KafkaClusterDeploymentConfig,
     KubernetesDeploymentConfig,
+    EksDeploymentConfig,
     AdhocJobConfig,
     MarathonServiceConfig,
 ]
@@ -278,9 +282,16 @@ def paasta_status_on_api_endpoint(
     lock: Lock,
     verbose: int,
     new: bool = False,
+    is_eks: bool = False,
 ) -> int:
-    output = ["", f"\n{service}.{PaastaColors.cyan(instance)} in {cluster}"]
-    client = get_paasta_oapi_client(cluster, system_paasta_config)
+    output = [
+        "",
+        f"\n{service}.{PaastaColors.cyan(instance)} in {cluster}{' (EKS)' if is_eks else ''}",
+    ]
+    client = get_paasta_oapi_client(
+        cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=is_eks),
+        system_paasta_config=system_paasta_config,
+    )
     if not client:
         print("Cannot get a paasta-api client")
         exit(1)
@@ -290,7 +301,6 @@ def paasta_status_on_api_endpoint(
             instance=instance,
             verbose=verbose,
             new=new,
-            include_smartstack=False,
         )
     except client.api_error as exc:
         output.append(PaastaColors.red(exc.reason))
@@ -1716,7 +1726,6 @@ def get_autoscaling_table(
             f"       Desired instances: {autoscaling_status['desired_replicas']}"
         )
         table.append(f"       Last scale time: {autoscaling_status['last_scale_time']}")
-        table.append(f"       Dashboard: y/sfx-autoscaling")
         NA = PaastaColors.red("N/A")
         if len(autoscaling_status["metrics"]) > 0:
             table.append(f"       Metrics:")
@@ -2138,7 +2147,7 @@ def report_status_for_cluster(
     output = ["", "service: %s" % service, "cluster: %s" % cluster]
     deployed_instances = []
     instances = [
-        instance
+        (instance, instance_config_class)
         for instance, instance_config_class in instance_whitelist.items()
         if instance_config_class in ALLOWED_INSTANCE_CONFIG
     ]
@@ -2175,7 +2184,7 @@ def report_status_for_cluster(
 
     return_code = 0
     return_codes = []
-    for deployed_instance in instances:
+    for deployed_instance, instance_config_class in instances:
         return_codes.append(
             paasta_status_on_api_endpoint(
                 cluster=cluster,
@@ -2185,6 +2194,7 @@ def report_status_for_cluster(
                 lock=lock,
                 verbose=verbose,
                 new=new,
+                is_eks=(instance_config_class == EksDeploymentConfig),
             )
         )
 
@@ -2192,7 +2202,11 @@ def report_status_for_cluster(
         return_code = 1
 
     output.append(
-        report_invalid_whitelist_values(instances, seen_instances, "instance")
+        report_invalid_whitelist_values(
+            whitelist=[instance[0] for instance in instances],
+            items=seen_instances,
+            item_type="instance",
+        )
     )
 
     return return_code, output
@@ -2569,6 +2583,7 @@ def _use_new_paasta_status(args, system_paasta_config) -> bool:
     marathon=print_marathon_status,
     kubernetes=print_kubernetes_status,
     kubernetes_v2=print_kubernetes_status_v2,
+    eks=print_kubernetes_status,
     tron=print_tron_status,
     adhoc=print_adhoc_status,
     flink=print_flink_status,
diff --git a/paasta_tools/cli/cmds/validate.py b/paasta_tools/cli/cmds/validate.py
index b591ccf930..f13b8eb28b 100644
--- a/paasta_tools/cli/cmds/validate.py
+++ b/paasta_tools/cli/cmds/validate.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import functools
 import json
 import os
 import pkgutil
@@ -26,6 +27,7 @@
 from typing import Dict
 from typing import List
 from typing import Optional
+from typing import Tuple
 from typing import Union
 
 import pytz
@@ -49,6 +51,9 @@
 from paasta_tools.cli.utils import PaastaColors
 from paasta_tools.cli.utils import success
 from paasta_tools.kubernetes_tools import sanitise_kubernetes_name
+from paasta_tools.long_running_service_tools import (
+    DEFAULT_DESIRED_ACTIVE_REQUESTS_PER_REPLICA,
+)
 from paasta_tools.secret_tools import get_secret_name_from_ref
 from paasta_tools.secret_tools import is_secret_ref
 from paasta_tools.secret_tools import is_shared_secret
@@ -58,6 +63,8 @@
 from paasta_tools.tron_tools import TronJobConfig
 from paasta_tools.tron_tools import validate_complete_config
 from paasta_tools.utils import get_service_instance_list
+from paasta_tools.utils import InstanceConfig
+from paasta_tools.utils import InstanceConfigDict
 from paasta_tools.utils import list_all_instances_for_service
 from paasta_tools.utils import list_clusters
 from paasta_tools.utils import list_services
@@ -96,10 +103,11 @@
 SCHEMA_TYPES = {
     "adhoc",
     "kubernetes",  # long-running services
-    "marathon",  # long-running services on mesos - no longer used
     "rollback",  # automatic rollbacks during deployments
     "tron",  # batch workloads
     "eks",  # eks workloads
+    "autotuned_defaults/kubernetes",
+    "autotuned_defaults/cassandracluster",
 }
 # we expect a comment that looks like # override-cpu-setting PROJ-1234
 # but we don't have a $ anchor in case users want to add an additional
@@ -115,6 +123,8 @@
 # this to the autotune cap (i.e., 1)
 CPU_BURST_THRESHOLD = 2
 
+K8S_TYPES = {"eks", "kubernetes"}
+
 
 class ConditionConfig(TypedDict, total=False):
     """
@@ -131,6 +141,26 @@ class ConditionConfig(TypedDict, total=False):
     dry_run: bool
 
 
+@functools.lru_cache()
+def load_all_instance_configs_for_service(
+    service: str, cluster: str, soa_dir: str
+) -> Tuple[Tuple[str, InstanceConfig], ...]:
+    ret = []
+    for instance in list_all_instances_for_service(
+        service=service, clusters=[cluster], soa_dir=soa_dir
+    ):
+        instance_config = get_instance_config(
+            service=service,
+            instance=instance,
+            cluster=cluster,
+            load_deployments=False,
+            soa_dir=soa_dir,
+        )
+        ret.append((instance, instance_config))
+
+    return tuple(ret)
+
+
 def invalid_tron_namespace(cluster, output, filename):
     return failure(
         "%s is invalid:\n  %s\n  " "More info:" % (filename, output),
@@ -283,7 +313,7 @@ def validate_schema(file_path: str, file_type: str) -> bool:
     config_file_object = get_config_file_dict(file_path)
     try:
         validator.validate(config_file_object)
-        if file_type == "kubernetes" and not validate_instance_names(
+        if file_type in K8S_TYPES and not validate_instance_names(
             config_file_object, file_path
         ):
             return False
@@ -315,15 +345,16 @@ def validate_all_schemas(service_path: str) -> bool:
     :param service_path: path to location of configuration files
     """
 
-    path = os.path.join(service_path, "*.yaml")
+    path = os.path.join(service_path, "**/*.yaml")
 
     returncode = True
-    for file_name in glob(path):
+    for file_name in glob(path, recursive=True):
         if os.path.islink(file_name):
             continue
-        basename = os.path.basename(file_name)
+
+        filename_without_service_path = os.path.relpath(file_name, start=service_path)
         for file_type in SCHEMA_TYPES:
-            if basename.startswith(file_type):
+            if filename_without_service_path.startswith(file_type):
                 if not validate_schema(file_name, file_type):
                     returncode = False
     return returncode
@@ -466,16 +497,9 @@ def validate_paasta_objects(service_path):
     returncode = True
     messages = []
     for cluster in list_clusters(service, soa_dir):
-        for instance in list_all_instances_for_service(
-            service=service, clusters=[cluster], soa_dir=soa_dir
+        for instance, instance_config in load_all_instance_configs_for_service(
+            service=service, cluster=cluster, soa_dir=soa_dir
         ):
-            instance_config = get_instance_config(
-                service=service,
-                instance=instance,
-                cluster=cluster,
-                load_deployments=False,
-                soa_dir=soa_dir,
-            )
             messages.extend(instance_config.validate())
     returncode = len(messages) == 0
 
@@ -560,26 +584,46 @@ def validate_autoscaling_configs(service_path):
     )
 
     for cluster in list_clusters(service, soa_dir):
-        for instance in list_all_instances_for_service(
-            service=service, clusters=[cluster], soa_dir=soa_dir
+        for instance, instance_config in load_all_instance_configs_for_service(
+            service=service, cluster=cluster, soa_dir=soa_dir
         ):
-            instance_config = get_instance_config(
-                service=service,
-                instance=instance,
-                cluster=cluster,
-                load_deployments=False,
-                soa_dir=soa_dir,
-            )
 
             if (
-                instance_config.get_instance_type() == "kubernetes"
+                instance_config.get_instance_type() in K8S_TYPES
                 and instance_config.is_autoscaling_enabled()
                 # we should eventually make the python templates add the override comment
                 # to the correspoding YAML line, but until then we just opt these out of that validation
-                and __is_templated(service, soa_dir, cluster, workload="kubernetes")
+                and __is_templated(
+                    service,
+                    soa_dir,
+                    cluster,
+                    workload=instance_config.get_instance_type(),
+                )
                 is False
             ):
                 autoscaling_params = instance_config.get_autoscaling_params()
+                if autoscaling_params["metrics_provider"] == "active-requests":
+                    desired_active_requests_per_replica = autoscaling_params.get(
+                        "desired_active_requests_per_replica",
+                        DEFAULT_DESIRED_ACTIVE_REQUESTS_PER_REPLICA,
+                    )
+                    if desired_active_requests_per_replica <= 0:
+                        returncode = False
+                        print(
+                            failure(
+                                msg="Autoscaling configuration is invalid: desired_active_requests_per_replica must be "
+                                "greater than zero",
+                                link="",
+                            )
+                        )
+                    if len(instance_config.get_registrations()) > 1:
+                        returncode = False
+                        print(
+                            failure(
+                                msg="Autoscaling configuration is invalid: active-requests autoscaler doesn't support instances with multiple registrations.",
+                                link="",
+                            )
+                        )
                 if autoscaling_params["metrics_provider"] in {
                     "uwsgi",
                     "piscina",
@@ -611,7 +655,11 @@ def validate_autoscaling_configs(service_path):
                     # we need access to the comments, so we need to read the config with ruamel to be able
                     # to actually get them in a "nice" automated fashion
                     config = get_config_file_dict(
-                        os.path.join(soa_dir, service, f"kubernetes-{cluster}.yaml"),
+                        os.path.join(
+                            soa_dir,
+                            service,
+                            f"{instance_config.get_instance_type()}-{cluster}.yaml",
+                        ),
                         use_ruamel=True,
                     )
                     if config[instance].get("cpus") is None:
@@ -649,16 +697,9 @@ def validate_min_max_instances(service_path):
     returncode = True
 
     for cluster in list_clusters(service, soa_dir):
-        for instance in list_all_instances_for_service(
-            service=service, clusters=[cluster], soa_dir=soa_dir
+        for instance, instance_config in load_all_instance_configs_for_service(
+            service=service, cluster=cluster, soa_dir=soa_dir
         ):
-            instance_config = get_instance_config(
-                service=service,
-                instance=instance,
-                cluster=cluster,
-                load_deployments=False,
-                soa_dir=soa_dir,
-            )
             if instance_config.get_instance_type() != "tron":
                 min_instances = instance_config.get_min_instances()
                 max_instances = instance_config.get_max_instances()
@@ -676,15 +717,21 @@ def validate_min_max_instances(service_path):
     return returncode
 
 
-def check_secrets_for_instance(instance_config_dict, soa_dir, service_path, vault_env):
+def check_secrets_for_instance(
+    instance_config_dict: InstanceConfigDict, soa_dir: str, service: str, vault_env: str
+) -> bool:
     return_value = True
+    # If the service: directive is used, look for the secret there, rather than where the instance config is defined.
+    service_containing_secret = instance_config_dict.get("service", service)
     for env_value in instance_config_dict.get("env", {}).values():
         if is_secret_ref(env_value):
             secret_name = get_secret_name_from_ref(env_value)
             if is_shared_secret(env_value):
                 secret_file_name = f"{soa_dir}/_shared/secrets/{secret_name}.json"
             else:
-                secret_file_name = f"{service_path}/secrets/{secret_name}.json"
+                secret_file_name = (
+                    f"{soa_dir}/{service_containing_secret}/secrets/{secret_name}.json"
+                )
             if os.path.isfile(secret_file_name):
                 secret_json = get_config_file_dict(secret_file_name)
                 if "ciphertext" not in secret_json["environments"].get(vault_env, {}):
@@ -720,18 +767,11 @@ def validate_secrets(service_path):
             return_value = False
             continue
 
-        for instance in list_all_instances_for_service(
-            service=service, clusters=[cluster], soa_dir=soa_dir
+        for instance, instance_config in load_all_instance_configs_for_service(
+            service=service, cluster=cluster, soa_dir=soa_dir
         ):
-            instance_config = get_instance_config(
-                service=service,
-                instance=instance,
-                cluster=cluster,
-                load_deployments=False,
-                soa_dir=soa_dir,
-            )
             if not check_secrets_for_instance(
-                instance_config.config_dict, soa_dir, service_path, vault_env
+                instance_config.config_dict, soa_dir, service, vault_env
             ):
                 return_value = False
     if return_value:
@@ -747,27 +787,29 @@ def validate_cpu_burst(service_path: str) -> bool:
 
     returncode = True
     for cluster in list_clusters(service, soa_dir):
-        if __is_templated(service, soa_dir, cluster, workload="kubernetes"):
+        if __is_templated(
+            service, soa_dir, cluster, workload="kubernetes"
+        ) or __is_templated(service, soa_dir, cluster, workload="eks"):
             # we should eventually make the python templates add the override comment
             # to the correspoding YAML line, but until then we just opt these out of that validation
             continue
-        for instance in list_all_instances_for_service(
-            service=service, clusters=[cluster], soa_dir=soa_dir
+        for instance, instance_config in load_all_instance_configs_for_service(
+            service=service, cluster=cluster, soa_dir=soa_dir
         ):
-            instance_config = get_instance_config(
-                service=service,
-                instance=instance,
-                cluster=cluster,
-                load_deployments=False,
-                soa_dir=soa_dir,
+            is_k8s_service = (
+                instance_config.get_instance_type() == "kubernetes"
+                or instance_config.get_instance_type() == "eks"
             )
-            is_k8s_service = instance_config.get_instance_type() == "kubernetes"
             should_skip_cpu_burst_validation = service in skip_cpu_burst_validation_list
             if is_k8s_service and not should_skip_cpu_burst_validation:
                 # we need access to the comments, so we need to read the config with ruamel to be able
                 # to actually get them in a "nice" automated fashion
                 config = get_config_file_dict(
-                    os.path.join(soa_dir, service, f"kubernetes-{cluster}.yaml"),
+                    os.path.join(
+                        soa_dir,
+                        service,
+                        f"{instance_config.get_instance_type()}-{cluster}.yaml",
+                    ),
                     use_ruamel=True,
                 )
 
diff --git a/paasta_tools/cli/schemas/autotuned_defaults/marathon_schema.json b/paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json
similarity index 62%
rename from paasta_tools/cli/schemas/autotuned_defaults/marathon_schema.json
rename to paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json
index 89464e4f92..0725f3e3b3 100644
--- a/paasta_tools/cli/schemas/autotuned_defaults/marathon_schema.json
+++ b/paasta_tools/cli/schemas/autotuned_defaults/cassandracluster_schema.json
@@ -1,6 +1,6 @@
 {
     "$schema": "http://json-schema.org/draft-04/schema#",
-    "description": "Properties that can be set by automated processes for http://paasta.readthedocs.io/en/latest/yelpsoa_configs.html#marathon-clustername-yaml",
+    "description": "Properties that can be set by automated processes for cassandracluster spec files",
     "type": "object",
     "additionalProperties": false,
     "minProperties": 1,
@@ -15,29 +15,20 @@
                     "minimum": 0,
                     "exclusiveMinimum": true
                 },
-                "cpu_burst_add": {
+                "cpu_burst_percent": {
                     "type": "number",
-                    "minimum": 0.0,
-                    "exclusiveMinimum": false
-                },
-                "disk": {
-                    "type": "number",
-                    "minimum": 128,
-                    "exclusiveMinimum": true
-                },
-                "min_instances": {
-                    "type": "integer",
                     "minimum": 0,
                     "exclusiveMinimum": false
                 },
-                "max_instances": {
-                    "type": "integer",
-                    "minimum": 0,
-                    "exclusiveMinimum": false
+                "disk": {
+                    "type": "string"
                 },
                 "mem": {
+                    "type": "string"
+                },
+                "replicas": {
                     "type": "number",
-                    "minimum": 32,
+                    "minimum": 0,
                     "exclusiveMinimum": true
                 }
             }
diff --git a/paasta_tools/cli/schemas/eks_schema.json b/paasta_tools/cli/schemas/eks_schema.json
index 80d8fdfd37..b2ce87c6b5 120000
--- a/paasta_tools/cli/schemas/eks_schema.json
+++ b/paasta_tools/cli/schemas/eks_schema.json
@@ -1 +1 @@
-paasta_tools/cli/schemas/kubernetes_schema.json
\ No newline at end of file
+kubernetes_schema.json
\ No newline at end of file
diff --git a/paasta_tools/cli/schemas/kubernetes_schema.json b/paasta_tools/cli/schemas/kubernetes_schema.json
index a2f4174cdc..005add2832 100644
--- a/paasta_tools/cli/schemas/kubernetes_schema.json
+++ b/paasta_tools/cli/schemas/kubernetes_schema.json
@@ -198,12 +198,16 @@
                                 "cpu",
                                 "piscina",
                                 "gunicorn",
-                                "arbitrary_promql"
+                                "arbitrary_promql",
+                                "active-requests"
                             ]
                         },
                         "decision_policy": {
                             "type": "string"
                         },
+                        "desired_active_requests_per_replica": {
+                            "type": "number"
+                        },
                         "setpoint": {
                             "type": "number"
                         },
@@ -225,9 +229,6 @@
                         "use_resource_metrics": {
                             "type": "boolean"
                         },
-                        "uwsgi_stats_port": {
-                            "type": "integer"
-                        },
                         "scaledown_policies": {
                             "type": "object"
                         },
@@ -273,6 +274,9 @@
                             "required": [
                                 "metricsQuery"
                             ]
+                        },
+                        "max_instances_alert_threshold": {
+                            "type": "number"
                         }
                     },
                     "allOf": [
@@ -287,6 +291,7 @@
                                                 "cpu",
                                                 "piscina",
                                                 "gunicorn",
+                                                "active-requests",
                                                 "if metrics_provider is arbitrary_promql, the prometheus_adapter_config parameter is required"
                                             ]
                                         }
@@ -424,6 +429,101 @@
                         }
                     }
                 },
+                "node_selectors_preferred": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "weight": {
+                                "type": "integer"
+                            },
+                            "preferences": {
+                                "type": "object",
+                                "additionalProperties": false,
+                                "patternProperties": {
+                                    "^[a-zA-Z0-9]+[a-zA-Z0-9-_./]*[a-zA-Z0-9]+$": {
+                                        "anyOf": [
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array",
+                                                "items": {
+                                                    "type": "string"
+                                                },
+                                                "uniqueItems": true
+                                            },
+                                            {
+                                                "type": "array",
+                                                "items": {
+                                                    "anyOf": [
+                                                        {
+                                                            "type": "object",
+                                                            "additionalProperties": false,
+                                                            "properties": {
+                                                                "operator": {
+                                                                    "enum": [
+                                                                        "In",
+                                                                        "NotIn"
+                                                                    ]
+                                                                },
+                                                                "values": {
+                                                                    "type": "array",
+                                                                    "items": {
+                                                                        "type": "string"
+                                                                    },
+                                                                    "uniqueItems": true
+                                                                }
+                                                            },
+                                                            "required": [
+                                                                "operator",
+                                                                "values"
+                                                            ]
+                                                        },
+                                                        {
+                                                            "type": "object",
+                                                            "additionalProperties": false,
+                                                            "properties": {
+                                                                "operator": {
+                                                                    "enum": [
+                                                                        "Exists",
+                                                                        "DoesNotExist"
+                                                                    ]
+                                                                }
+                                                            },
+                                                            "required": [
+                                                                "operator"
+                                                            ]
+                                                        },
+                                                        {
+                                                            "type": "object",
+                                                            "additionalProperties": false,
+                                                            "properties": {
+                                                                "operator": {
+                                                                    "enum": [
+                                                                        "Gt",
+                                                                        "Lt"
+                                                                    ]
+                                                                },
+                                                                "value": {
+                                                                    "type": "integer"
+                                                                }
+                                                            },
+                                                            "required": [
+                                                                "operator",
+                                                                "value"
+                                                            ]
+                                                        }
+                                                    ]
+                                                }
+                                            }
+                                        ]
+                                    }
+                                }
+                            }
+                        }
+                    }
+                },
                 "pool": {
                     "type": "string"
                 },
@@ -623,11 +723,12 @@
                     "type": "array"
                 },
                 "iam_role": {
-                    "type": "string"
+                    "type": "string",
+                    "pattern": "^arn:aws:iam::[0-9]+:role/[a-zA-Z0-9+=,.@_-]+$",
+                    "$comment": "This should be a valid AWS IAM role ARN, see https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_iam-quotas.html#reference_iam-quotas-names"
                 },
                 "iam_role_provider": {
                     "enum": [
-                        "kiam",
                         "aws"
                     ]
                 },
@@ -858,7 +959,81 @@
                 },
                 "namespace": {
                     "type": "string",
-                    "pattern": "^(paasta|paastasvc-.*)$"
+                    "pattern": "^[a-z0-9]([-a-z0-9]*[a-z0-9])?$",
+                    "maxLength": 63
+                },
+                "autotune_limits": {
+                    "type": "object",
+                    "properties": {
+                        "cpus": {
+                            "type": "object",
+                            "properties": {
+                                "min": {
+                                    "type": "number",
+                                    "minimum": 0,
+                                    "exclusiveMinimum": true
+                                },
+                                "max": {
+                                    "type": "number",
+                                    "minimum": 0,
+                                    "exclusiveMinimum": true
+                                }
+                            }
+                        },
+                        "mem": {
+                            "type": "object",
+                            "properties": {
+                                "min": {
+                                    "type": "integer",
+                                    "minimum": 32,
+                                    "exclusiveMinimum": false
+                                },
+                                "max": {
+                                    "type": "integer",
+                                    "minimum": 32,
+                                    "exclusiveMinimum": false
+                                }
+                            }
+                        },
+                        "disk": {
+                            "type": "object",
+                            "properties": {
+                                "min": {
+                                    "type": "integer",
+                                    "minimum": 128,
+                                    "exclusiveMinimum": false
+                                },
+                                "max": {
+                                    "type": "integer",
+                                    "minimum": 128,
+                                    "exclusiveMinimum": false
+                                }
+                            }
+                        }
+                    }
+                },
+                "topology_spread_constraints": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "topology_key": {
+                                "type": "string"
+                            },
+                            "when_unsatisfiable": {
+                                "type": "string",
+                                "enum": [
+                                    "ScheduleAnyway",
+                                    "DoNotSchedule"
+                                ]
+                            },
+                            "max_skew": {
+                                "type": "integer"
+                            }
+                        },
+                        "required": []
+                    },
+                    "uniqueItems": true
                 }
             }
         }
diff --git a/paasta_tools/cli/schemas/marathon_schema.json b/paasta_tools/cli/schemas/marathon_schema.json
deleted file mode 100644
index b8ccaa820c..0000000000
--- a/paasta_tools/cli/schemas/marathon_schema.json
+++ /dev/null
@@ -1,371 +0,0 @@
-{
-    "$schema": "http://json-schema.org/draft-04/schema#",
-    "description": "http://paasta.readthedocs.io/en/latest/yelpsoa_configs.html#marathon-clustername-yaml",
-    "type": "object",
-    "additionalProperties": false,
-    "minProperties": 1,
-    "patternProperties": {
-        "^_.*$": {
-            "type": "object",
-            "additionalProperties": true
-        },
-        "^([a-z0-9]|[a-z0-9][a-z0-9_-]*[a-z0-9])*$": {
-            "type": "object",
-            "additionalProperties": false,
-            "minProperties": 1,
-            "allOf": [
-                {
-                    "oneOf": [
-                        {
-                            "properties": {
-                                "healthcheck_mode": {
-                                    "enum": [
-                                        "tcp",
-                                        "http",
-                                        "https"
-                                    ]
-                                }
-                            }
-                        },
-                        {
-                            "properties": {
-                                "healthcheck_mode": {
-                                    "enum": [
-                                        "cmd"
-                                    ]
-                                },
-                                "healthcheck_cmd": {
-                                    "type": "string"
-                                }
-                            },
-                            "required": [
-                                "healthcheck_cmd"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "oneOf": [
-                        {
-                            "properties": {
-                                "drain_method": {
-                                    "enum": [
-                                        "noop",
-                                        "hacheck",
-                                        "test"
-                                    ]
-                                }
-                            }
-                        },
-                        {
-                            "properties": {
-                                "drain_method": {
-                                    "enum": [
-                                        "http"
-                                    ]
-                                },
-                                "drain_method_params": {
-                                    "type": "object",
-                                    "properties": {
-                                        "drain": {
-                                            "type": "object"
-                                        },
-                                        "stop_draining": {
-                                            "type": "object"
-                                        },
-                                        "is_draining": {
-                                            "type": "object"
-                                        },
-                                        "is_safe_to_kill": {
-                                            "type": "object"
-                                        }
-                                    },
-                                    "required": [
-                                        "drain",
-                                        "stop_draining",
-                                        "is_draining",
-                                        "is_safe_to_kill"
-                                    ]
-                                }
-                            },
-                            "required": [
-                                "drain_method_params"
-                            ]
-                        }
-                    ]
-                }
-            ],
-            "properties": {
-                "cpus": {
-                    "type": "number",
-                    "minimum": 0,
-                    "exclusiveMinimum": true,
-                    "default": 0.25
-                },
-                "mem": {
-                    "type": "number",
-                    "minimum": 32,
-                    "exclusiveMinimum": true,
-                    "default": 1024
-                },
-                "disk": {
-                    "type": "number",
-                    "minimum": 0,
-                    "exclusiveMinimum": true,
-                    "default": 1024
-                },
-                "gpus": {
-                    "type": "integer",
-                    "minimum": 0,
-                    "exclusiveMinimum": false
-                },
-                "instances": {
-                    "type": "integer",
-                    "minimum": 0,
-                    "exclusiveMinimum": false
-                },
-                "min_instances": {
-                    "type": "integer",
-                    "minimum": 0,
-                    "exclusiveMinimum": false
-                },
-                "max_instances": {
-                    "type": "integer",
-                    "minimum": 0,
-                    "exclusiveMinimum": false
-                },
-                "backoff_factor": {
-                    "type": "integer",
-                    "default": 2
-                },
-                "max_launch_delay_seconds": {
-                    "type": "integer",
-                    "default": 300
-                },
-                "registrations": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    },
-                    "uniqueItems": true
-                },
-                "bounce_method": {
-                    "type": "string"
-                },
-                "bounce_health_params": {
-                    "type": "object",
-                    "properties": {
-                        "check_haproxy": {
-                            "type": "boolean",
-                            "default": true
-                        },
-                        "min_task_uptime": {
-                            "type": "number"
-                        },
-                        "haproxy_min_fraction_up": {
-                            "type": "number",
-                            "minimum": 0.0,
-                            "maximum": 1.0,
-                            "exclusiveMinimum": true,
-                            "exclusiveMaximum": false
-                        }
-                    }
-                },
-                "bounce_margin_factor": {
-                    "type": "number",
-                    "default": 1,
-                    "minimum": 0,
-                    "maximum": 1,
-                    "exclusiveMinimum": true,
-                    "exclusiveMaximum": false
-                },
-                "bounce_priority": {
-                    "type": "integer"
-                },
-                "bounce_start_deadline": {
-                    "type": "number"
-                },
-                "deploy_group": {
-                    "type": "string"
-                },
-                "autoscaling": {
-                    "type": "object"
-                },
-                "sfn_autoscaling": {
-                    "type": "object"
-                },
-                "drain_method": {
-                    "enum": [
-                        "noop",
-                        "hacheck",
-                        "http",
-                        "test"
-                    ],
-                    "default": "noop"
-                },
-                "drain_method_params": {
-                    "type": "object"
-                },
-                "constraints": {
-                    "type": "array",
-                    "items": {
-                        "type": "array"
-                    },
-                    "uniqueItems": true
-                },
-                "extra_constraints": {
-                    "type": "array",
-                    "items": {
-                        "type": "array"
-                    },
-                    "uniqueItems": true
-                },
-                "pool": {
-                    "type": "string"
-                },
-                "cmd": {
-                    "type": "string"
-                },
-                "args": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "env": {
-                    "type": "object",
-                    "patternProperties": {
-                        "^[a-zA-Z_]+[a-zA-Z0-9_]*$": {
-                            "type": "string"
-                        }
-                    },
-                    "additionalProperties": false
-                },
-                "extra_volumes": {
-                    "type": "array",
-                    "items": {
-                        "type": "object"
-                    },
-                    "uniqueItems": true
-                },
-                "monitoring": {
-                    "type": "object",
-                    "properties": {
-                        "team": {
-                            "type": "string"
-                        },
-                        "page": {
-                            "type": "boolean"
-                        }
-                    },
-                    "additionalProperties": true
-                },
-                "net": {
-                    "type": "string"
-                },
-                "container_port": {
-                    "type": "number"
-                },
-                "deploy_blacklist": {
-                    "type": "array"
-                },
-                "deploy_whitelist": {
-                    "type": "array"
-                },
-                "healthcheck_mode": {
-                    "enum": [
-                        "cmd",
-                        "tcp",
-                        "http",
-                        "https"
-                    ]
-                },
-                "healthcheck_cmd": {
-                    "type": "string",
-                    "default": "/bin/true"
-                },
-                "healthcheck_grace_period_seconds": {
-                    "type": "number",
-                    "default": 60
-                },
-                "healthcheck_interval_seconds": {
-                    "type": "number",
-                    "default": 10
-                },
-                "healthcheck_timeout_seconds": {
-                    "type": "number",
-                    "default": 10
-                },
-                "healthcheck_max_consecutive_failures": {
-                    "type": "integer",
-                    "default": 30
-                },
-                "healthcheck_uri": {
-                    "type": "string",
-                    "default": "/status"
-                },
-                "marathon_shard": {
-                    "type": "integer",
-                    "minimum": 0
-                },
-                "previous_marathon_shards": {
-                    "type": "array"
-                },
-                "replication_threshold": {
-                    "type": "integer",
-                    "minimum": 0
-                },
-                "cap_add": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "cfs_period_us": {
-                    "type": "integer",
-                    "minimum": 1000,
-                    "maximum": 1000000,
-                    "exclusiveMinimum": false
-                },
-                "cpu_burst_add": {
-                    "type": "number",
-                    "minimum": 0.0,
-                    "exclusiveMinimum": false
-                },
-                "host_port": {
-                    "type": "integer",
-                    "default": 0,
-                    "minimum": 0,
-                    "maximum": 65535,
-                    "exclusiveMinimum": false
-                },
-                "dependencies_reference": {
-                    "type": "string"
-                },
-                "extra_docker_args": {
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
-                },
-                "security": {
-                    "type": "object",
-                    "properties": {
-                        "inbound_firewall": {
-                            "enum": [
-                                "accept",
-                                "reject"
-                            ]
-                        },
-                        "outbound_firewall": {
-                            "enum": [
-                                "block",
-                                "monitor"
-                            ]
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
diff --git a/paasta_tools/cli/schemas/tron_schema.json b/paasta_tools/cli/schemas/tron_schema.json
index 9134bfc934..3f1ad3095c 100644
--- a/paasta_tools/cli/schemas/tron_schema.json
+++ b/paasta_tools/cli/schemas/tron_schema.json
@@ -101,7 +101,9 @@
                     "type": "string"
                 },
                 "iam_role": {
-                    "type": "string"
+                    "type": "string",
+                    "pattern": "^arn:aws:iam::[0-9]+:role/[a-zA-Z0-9+=,.@_-]+$",
+                    "$comment": "This should be a valid AWS IAM role ARN, see https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_iam-quotas.html#reference_iam-quotas-names"
                 },
                 "iam_role_provider": {
                     "enum": [
@@ -467,9 +469,6 @@
             ],
             "additionalProperties": false,
             "properties": {
-                "use_k8s": {
-                    "type": "boolean"
-                },
                 "name": {
                     "$ref": "#definitions/name"
                 },
diff --git a/paasta_tools/cli/utils.py b/paasta_tools/cli/utils.py
index b5af5762eb..9cbca4bcf3 100644
--- a/paasta_tools/cli/utils.py
+++ b/paasta_tools/cli/utils.py
@@ -26,6 +26,7 @@
 from shlex import quote
 from typing import Callable
 from typing import Collection
+from typing import Generator
 from typing import Iterable
 from typing import List
 from typing import Mapping
@@ -41,6 +42,8 @@
 from paasta_tools import remote_git
 from paasta_tools.adhoc_tools import load_adhoc_job_config
 from paasta_tools.cassandracluster_tools import load_cassandracluster_instance_config
+from paasta_tools.eks_tools import EksDeploymentConfig
+from paasta_tools.eks_tools import load_eks_service_config
 from paasta_tools.flink_tools import load_flink_instance_config
 from paasta_tools.kafkacluster_tools import load_kafkacluster_instance_config
 from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
@@ -65,9 +68,11 @@
 from paasta_tools.utils import list_clusters
 from paasta_tools.utils import list_services
 from paasta_tools.utils import load_system_paasta_config
+from paasta_tools.utils import PAASTA_K8S_INSTANCE_TYPES
 from paasta_tools.utils import PaastaColors
 from paasta_tools.utils import SystemPaastaConfig
 from paasta_tools.utils import validate_service_instance
+from paasta_tools.vitesscluster_tools import load_vitess_instance_config
 
 log = logging.getLogger(__name__)
 
@@ -772,6 +777,7 @@ class LongRunningInstanceTypeHandler(NamedTuple):
     kubernetes=InstanceTypeHandler(
         get_service_instance_list, load_kubernetes_service_config
     ),
+    eks=InstanceTypeHandler(get_service_instance_list, load_eks_service_config),
     tron=InstanceTypeHandler(get_service_instance_list, load_tron_instance_config),
     flink=InstanceTypeHandler(get_service_instance_list, load_flink_instance_config),
     cassandracluster=InstanceTypeHandler(
@@ -780,6 +786,9 @@ class LongRunningInstanceTypeHandler(NamedTuple):
     kafkacluster=InstanceTypeHandler(
         get_service_instance_list, load_kafkacluster_instance_config
     ),
+    vitesscluster=InstanceTypeHandler(
+        get_service_instance_list, load_vitess_instance_config
+    ),
     nrtsearchservice=InstanceTypeHandler(
         get_service_instance_list, load_nrtsearchservice_instance_config
     ),
@@ -807,12 +816,18 @@ class LongRunningInstanceTypeHandler(NamedTuple):
     kafkacluster=LongRunningInstanceTypeHandler(
         get_service_instance_list, load_kafkacluster_instance_config
     ),
+    vitesscluster=LongRunningInstanceTypeHandler(
+        get_service_instance_list, load_vitess_instance_config
+    ),
     nrtsearchservice=LongRunningInstanceTypeHandler(
         get_service_instance_list, load_nrtsearchservice_instance_config
     ),
     monkrelays=LongRunningInstanceTypeHandler(
         get_service_instance_list, load_monkrelaycluster_instance_config
     ),
+    eks=LongRunningInstanceTypeHandler(
+        get_service_instance_list, load_eks_service_config
+    ),
 )
 
 
@@ -852,11 +867,16 @@ def get_namespaces_for_secret(
 ) -> Set[str]:
     secret_to_k8s_namespace = set()
 
+    k8s_instance_type_classes = {
+        "kubernetes": KubernetesDeploymentConfig,
+        "eks": EksDeploymentConfig,
+    }
     for instance_type in INSTANCE_TYPES:
-        if instance_type == "kubernetes":
+        if instance_type in PAASTA_K8S_INSTANCE_TYPES:
             config_loader = PaastaServiceConfigLoader(service, soa_dir)
             for service_instance_config in config_loader.instance_configs(
-                cluster=cluster, instance_type_class=KubernetesDeploymentConfig
+                cluster=cluster,
+                instance_type_class=k8s_instance_type_classes[instance_type],
             ):
                 secret_to_k8s_namespace.add(service_instance_config.get_namespace())
         else:
@@ -1046,14 +1066,14 @@ def get_instance_configs_for_service(
     type_filter: Optional[Iterable[str]] = None,
     clusters: Optional[Sequence[str]] = None,
     instances: Optional[Sequence[str]] = None,
-) -> Iterable[InstanceConfig]:
+) -> Generator[InstanceConfig, None, None]:
     if not clusters:
         clusters = list_clusters(service=service, soa_dir=soa_dir)
 
     if type_filter is None:
         type_filter = INSTANCE_TYPE_HANDLERS.keys()
 
-    for cluster in list_clusters(service=service, soa_dir=soa_dir):
+    for cluster in clusters:
         for instance_type, instance_handlers in INSTANCE_TYPE_HANDLERS.items():
             if instance_type not in type_filter:
                 continue
@@ -1178,3 +1198,15 @@ def verify_instances(
                 print("  %s" % instance)
 
     return misspelled_instances
+
+
+def get_paasta_oapi_api_clustername(cluster: str, is_eks: bool) -> str:
+    """
+    We'll be doing a tiny bit of lying while we have both EKS and non-EKS
+    clusters: these will generally share the same PaaSTA name (i.e., the
+    soaconfigs suffix will stay the same) - but we'll need a way to route API
+    requests to the correct place. To do so, we'll add "fake" entries to our
+    api_endpoints SystemPaastaConfig that are the PaaSTA clustername with an
+    "eks-" prefix
+    """
+    return f"eks-{cluster}" if is_eks else cluster
diff --git a/paasta_tools/config_utils.py b/paasta_tools/config_utils.py
index ad6ead5f0c..4e43d05280 100644
--- a/paasta_tools/config_utils.py
+++ b/paasta_tools/config_utils.py
@@ -1,3 +1,4 @@
+import copy
 import logging
 import os
 import subprocess
@@ -7,17 +8,30 @@
 from typing import List
 from typing import Optional
 from typing import Set
+from typing import Tuple
 
 import ruamel.yaml as yaml
 
 from paasta_tools.cli.cmds.validate import validate_schema
+from paasta_tools.utils import AUTO_SOACONFIG_SUBDIR
 from paasta_tools.utils import DEFAULT_SOA_DIR
 
 
 log = logging.getLogger(__name__)
 
 # Must have a schema defined
-KNOWN_CONFIG_TYPES = ("marathon", "kubernetes", "deploy", "smartstack")
+KNOWN_CONFIG_TYPES = (
+    "marathon",
+    "kubernetes",
+    "deploy",
+    "smartstack",
+    "cassandracluster",
+)
+
+# this could use a better name - but basically, this is for pairs of instance types
+# where you generally want to check both types (i.e.,g a status-quo and migration
+# instance type)
+INSTANCE_TYPE_COUNTERPARTS = {"eks": "kubernetes", "kubernetes": "eks"}
 
 
 def my_represent_none(self, data):
@@ -174,7 +188,13 @@ def __enter__(self):
         self.pwd = os.getcwd()
         os.chdir(self.working_dir)
         if self.branch != "master":
-            subprocess.check_call(["git", "checkout", "-b", self.branch])
+            if self._remote_branch_exists():
+                subprocess.check_call(["git", "fetch", "origin", self.branch])
+                subprocess.check_call(
+                    ["git", "checkout", "-b", self.branch, f"origin/{self.branch}"]
+                )
+            else:
+                subprocess.check_call(["git", "checkout", "-b", self.branch])
         return self
 
     def __exit__(self, type, value, traceback):
@@ -182,6 +202,14 @@ def __exit__(self, type, value, traceback):
         if self.tmp_dir:
             self.tmp_dir.cleanup()
 
+    def _remote_branch_exists(self) -> bool:
+        return (
+            subprocess.run(
+                ["git", "ls-remote", "--exit-code", "--heads", "origin", self.branch],
+            ).returncode
+            == 0
+        )
+
     def write_configs(
         self,
         service: str,
@@ -239,3 +267,106 @@ def commit_to_remote(self, extra_message: str = ""):
             _push_to_remote(self.branch)
         else:
             log.info("No files changed, no push required.")
+
+    def _clamp_recommendations(
+        self, merged_recommendation: Dict[str, Any], config: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        clamped_recomendation = copy.deepcopy(merged_recommendation)
+        for limit_type, limits in config.get("autotune_limits", {}).items():
+            log.debug(f"Processing {limit_type} autotune limits...")
+            min_value = limits.get("min")
+            max_value = limits.get("max")
+            unclamped_resource_value = clamped_recomendation.get(limit_type)
+
+            # no autotune data present, but min value present
+            if not unclamped_resource_value and min_value:
+                # use the min value since this is likely an autogenerated service where we know we have a given minimum CPU
+                # that we'd like to allocate
+                log.debug(
+                    f"No {limit_type} autotune data found, using autotune limit lower bound ({min_value})."
+                )
+                clamped_recomendation[limit_type] = min_value
+
+            # otherwise, we can do some pretty rote clamping of resource values
+            elif unclamped_resource_value is not None:
+                if min_value and unclamped_resource_value < min_value:
+                    log.debug(
+                        f"{limit_type} autotune config under configured limit ({min_value}), using autotune limit lower bound."
+                    )
+                    clamped_recomendation[limit_type] = min_value
+                if max_value and unclamped_resource_value > max_value:
+                    log.debug(
+                        f"{limit_type} autotune config over configured limit ({min_value}), using autotune limit upper bound."
+                    )
+                    clamped_recomendation[limit_type] = max_value
+            else:
+                log.debug(
+                    f"No {limit_type} autotune data or limits found, will continue using PaaSTA defaults."
+                )
+
+        return clamped_recomendation
+
+    def merge_recommendations(
+        self, recs: Dict[Tuple[str, str], Dict[str, Any]]
+    ) -> Dict[Tuple[str, str], Dict[str, Any]]:
+        """
+        :param recs: Dictionary of (service, instance_type_cluster) -> recommendations.
+        NOTE: instance_type_cluster is something like "kubernetes-pnw-prod".
+        :returns: a dictionary of the same format, with the previous recommendations merged in and autotune_limits applied.
+        """
+        merged_recs = {}
+        for (
+            service,
+            instance_type_cluster,
+        ), recommendations_by_instance in recs.items():
+            log.info(f"Starting to process {service}/{instance_type_cluster}.yaml...")
+            log.debug(
+                f"Getting current autotune configs for {service}/{instance_type_cluster}.yaml"
+            )
+            existing_recommendations = self.get_existing_configs(
+                service=service,
+                file_name=instance_type_cluster,
+                sub_dir=AUTO_SOACONFIG_SUBDIR,
+            )
+
+            log.debug(
+                f"Getting current configs for {service}/{instance_type_cluster}.yaml..."
+            )
+            # i'm so sorry.
+            # basically, we need to make sure that for every autotuned service, we load both kubernetes-
+            # and eks- files for the existing configs, as there are services that at any given time will
+            # only exist on one of these or may have a mix (and the csv file that we get fakes the current
+            # cluster type)
+            # NOTE: if an instance appears in both files, the counterpart will always "win" - this
+            # should only be possible while an instance is being migrated from one instance type to
+            # another
+            instance_type, _ = instance_type_cluster.split("-", maxsplit=1)
+            existing_configs = {
+                # if we upgrate to py3.9 before getting rid of this code, this should use PEP-584-style dict merging
+                **self.get_existing_configs(
+                    service=service,
+                    file_name=instance_type_cluster,
+                ),
+                **self.get_existing_configs(
+                    service=service,
+                    file_name=instance_type_cluster.replace(
+                        instance_type, INSTANCE_TYPE_COUNTERPARTS.get(instance_type, "")
+                    ),
+                ),
+            }
+
+            for instance_name, recommendation in recommendations_by_instance.items():
+                log.debug(
+                    f"Merging recommendations for {instance_name} in {service}/{AUTO_SOACONFIG_SUBDIR}/{instance_type_cluster}.yaml..."
+                )
+                existing_recommendations.setdefault(instance_name, {})
+                existing_recommendations[instance_name].update(recommendation)
+
+                existing_recommendations[instance_name] = self._clamp_recommendations(
+                    merged_recommendation=existing_recommendations[instance_name],
+                    config=existing_configs[instance_name],
+                )
+            merged_recs[(service, instance_type_cluster)] = existing_recommendations
+            log.info(f"Done processing {service}/{instance_type_cluster}.yaml.")
+
+        return merged_recs
diff --git a/paasta_tools/contrib/create_paasta_playground.py b/paasta_tools/contrib/create_paasta_playground.py
index 660d3a7c68..9672e80d5c 100644
--- a/paasta_tools/contrib/create_paasta_playground.py
+++ b/paasta_tools/contrib/create_paasta_playground.py
@@ -46,6 +46,7 @@ def main():
         src="./k8s_itests/deployments/paasta/fake_soa_config",
         dst="soa_config_playground",
         values=values_path,
+        overwrite=False,
     )
 
 
diff --git a/paasta_tools/contrib/get_running_task_allocation.py b/paasta_tools/contrib/get_running_task_allocation.py
index be9854aadd..f776ae9e67 100644
--- a/paasta_tools/contrib/get_running_task_allocation.py
+++ b/paasta_tools/contrib/get_running_task_allocation.py
@@ -275,54 +275,55 @@ def parse_args() -> argparse.Namespace:
         "--scheduler",
         help="Scheduler to get task info from",
         dest="scheduler",
-        default="mesos",
+        default="kubernetes",
         choices=["mesos", "kubernetes"],
     )
+    parser.add_argument(
+        "--additional-namespaces-exclude",
+        help="full names of namespaces to not fetch allocation info for those that don't match --namespace-prefix-exlude",
+        dest="additional_namespaces_exclude",
+        nargs="+",
+        default=[],
+    )
     parser.add_argument(
         "--namespace-prefix",
-        help="prefix of the namespace to fetch the logs for"
-        "Used only when scheduler is kubernetes",
+        help=argparse.SUPPRESS,
         dest="namespace_prefix",
         default="paasta",
     )
     parser.add_argument(
         "--additional-namespaces",
-        help="full names of namespaces to fetch allocation info for that don't match --namespace-prefix"
-        "Used only when scheduler is kubernetes",
+        help=argparse.SUPPRESS,
         dest="additional_namespaces",
         nargs="+",
         # we default this to tron since this is really the only non-paasta-prefix namespaced that is part of paasta
         # and we'd like to not run two cronjobs to get this information :p
         default=["tron"],
     )
-    return parser.parse_args()
+    args = parser.parse_args()
+
+    args.additional_namespaces_exclude = set(args.additional_namespaces_exclude)
 
+    return args
 
-def get_matching_namespaces(
-    namespaces: List[str], namespace_prefix: str, additional_namespaces: List[str]
+
+def get_unexcluded_namespaces(
+    namespaces: List[str], excluded_namespaces: List[str]
 ) -> List[str]:
-    return [
-        n
-        for n in namespaces
-        if n.startswith(namespace_prefix) or n in additional_namespaces
-    ]
+    return [n for n in namespaces if n not in excluded_namespaces]
 
 
 def main(args: argparse.Namespace) -> None:
     cluster = load_system_paasta_config().get_cluster()
-    if args.scheduler == "mesos":
+    kube_client = KubeClient()
+    all_namespaces = kubernetes_tools.get_all_namespaces(kube_client)
+    for matching_namespace in get_unexcluded_namespaces(
+        all_namespaces,
+        args.additional_namespaces_exclude,
+    ):
         display_task_allocation_info(
-            cluster, args.scheduler, args.namespace_prefix, kube_client=None
+            cluster, args.scheduler, matching_namespace, kube_client
         )
-    else:
-        kube_client = KubeClient()
-        all_namespaces = kubernetes_tools.get_all_namespaces(kube_client)
-        for matching_namespace in get_matching_namespaces(
-            all_namespaces, args.namespace_prefix, args.additional_namespaces
-        ):
-            display_task_allocation_info(
-                cluster, args.scheduler, matching_namespace, kube_client
-            )
 
 
 def display_task_allocation_info(
diff --git a/paasta_tools/contrib/habitat_fixer.py b/paasta_tools/contrib/habitat_fixer.py
new file mode 100755
index 0000000000..63fdee22c3
--- /dev/null
+++ b/paasta_tools/contrib/habitat_fixer.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+import argparse
+from pathlib import Path
+
+from kubernetes.client import V1Node
+
+from paasta_tools.kubernetes_tools import KUBE_CONFIG_PATH
+from paasta_tools.kubernetes_tools import KUBE_CONFIG_USER_PATH
+from paasta_tools.kubernetes_tools import KubeClient
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Karpenter Habitat Corruption Workaround"
+    )
+    parser.add_argument("-c", "--cluster", required=True)
+    parser.add_argument(
+        "-k",
+        "--kubeconfig",
+        default=KUBE_CONFIG_PATH
+        if Path(KUBE_CONFIG_PATH).exists()
+        else KUBE_CONFIG_USER_PATH,
+    )
+    parser.add_argument(
+        "-t", "--context", default=None  # -c is taken, so lets use the last letter :p
+    )
+    parser.add_argument(
+        "--for-real",
+        action="store_true",
+    )
+    parsed_args = parser.parse_args()
+
+    if not parsed_args.context:
+        if parsed_args.kubeconfig == KUBE_CONFIG_USER_PATH:
+            # in the user kubeconfig, context names are just the cluster names
+            parsed_args.context = parsed_args.cluster
+        else:
+            print(
+                f"NOTE: no context specified - will use the current context selected in {parsed_args.kubeconfig} "
+                "(or the KUBECONTEXT environment variable if set)."
+            )
+
+    return parsed_args
+
+
+def is_affected_node(node: V1Node) -> bool:
+    try:
+        int(node.metadata.labels["yelp.com/habitat"])
+        return True
+    except ValueError:
+        return False
+
+
+def get_desired_habitat(node: V1Node) -> str:
+    zone = node.metadata.labels["topology.kubernetes.io/zone"].replace("-", "")
+    ecosystem = node.metadata.labels["yelp.com/ecosystem"]
+
+    return f"{zone}{ecosystem}"
+
+
+def main():
+    args = parse_args()
+    client = KubeClient(config_file=args.kubeconfig, context=args.context)
+    for node in client.core.list_node().items:
+        if not is_affected_node(node):
+            continue
+
+        if args.for_real:
+            client.core.patch_node(
+                name=node.metadata.name,
+                body={
+                    "metadata": {
+                        "labels": {
+                            "yelp.com/habitat": get_desired_habitat(node),
+                        },
+                    }
+                },
+            )
+        else:
+            print(
+                f"Would have edited {node.metadata.name} in pool={node.metadata.labels['yelp.com/pool']} to have habitat={get_desired_habitat(node)} (from {node.metadata.labels['yelp.com/habitat']})",
+            )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/paasta_tools/contrib/render_template.py b/paasta_tools/contrib/render_template.py
index 5c9ddf4cf6..dde7fea595 100755
--- a/paasta_tools/contrib/render_template.py
+++ b/paasta_tools/contrib/render_template.py
@@ -31,15 +31,17 @@ def render_file(src, dst, values):
             new.write(replace(old.read(), values))
 
 
-def render(src, dst, values={}, exclude={}):
+def render(src, dst, values={}, exclude={}, overwrite=True):
     if os.path.isfile(src):
-        render_file(src, dst, values)
+        if overwrite:
+            render_file(src, dst, values)
         return
     for f in os.scandir(src):
         if f.name.startswith(".") or f.path in exclude:
             continue
         if os.path.isfile(f.path):
-            render_file(f.path, dst, values)
+            if overwrite:
+                render_file(f.path, dst, values)
         else:
             new_dst = replace(f"{dst}/{f.name}", values)
             try:
@@ -47,7 +49,7 @@ def render(src, dst, values={}, exclude={}):
             except OSError as e:
                 if e.errno != os.errno.EEXIST:
                     raise
-            render(f.path, new_dst, values, exclude)
+            render(f.path, new_dst, values, exclude, overwrite)
 
 
 def parse_args():
@@ -82,7 +84,7 @@ def parse_args():
     return args
 
 
-def render_values(src: str, dst: str, values: str) -> None:
+def render_values(src: str, dst: str, values: str, overwrite=True) -> None:
     if values is not None:
         values = os.path.abspath(values)
     # Validate src and values. Dst needs to be a directory. src can be either a valid folder of directory. values need to be valid file if provided.
@@ -108,7 +110,7 @@ def render_values(src: str, dst: str, values: str) -> None:
             ),
             v,
         )
-    render(src, dst, config_dict, {values})
+    render(src, dst, config_dict, {values}, overwrite)
 
 
 def main():
diff --git a/paasta_tools/contrib/rightsizer_soaconfigs_update.py b/paasta_tools/contrib/rightsizer_soaconfigs_update.py
index 594f800037..b30a80ce36 100644
--- a/paasta_tools/contrib/rightsizer_soaconfigs_update.py
+++ b/paasta_tools/contrib/rightsizer_soaconfigs_update.py
@@ -1,15 +1,27 @@
 import argparse
 import logging
 from collections import defaultdict
+from typing import Any
+from typing import cast
+from typing import Dict
+from typing import List
+from typing import Literal
+from typing import Optional
 from typing import Set
+from typing import TypedDict
+from typing import Union
 
 from paasta_tools.config_utils import AutoConfigUpdater
 from paasta_tools.contrib.paasta_update_soa_memcpu import get_report_from_splunk
+from paasta_tools.kubernetes_tools import SidecarResourceRequirements
 from paasta_tools.utils import AUTO_SOACONFIG_SUBDIR
 from paasta_tools.utils import DEFAULT_SOA_CONFIGS_GIT_URL
 from paasta_tools.utils import format_git_url
 from paasta_tools.utils import load_system_paasta_config
 
+
+log = logging.getLogger(__name__)
+
 NULL = "null"
 SUPPORTED_CSV_KEYS = (
     "cpus",
@@ -136,10 +148,66 @@ def get_default_git_remote():
     return default_git_remote
 
 
-def get_recommendation_from_result(result, keys_to_apply):
-    rec = {}
+SupportedInstanceType = Literal["kubernetes", "eks", "cassandracluster"]
+
+
+class CassandraRightsizerResult(TypedDict):
+    current_cpus: str
+    suggested_cpus: str
+
+    current_disk: str
+    suggested_disk: str
+
+    current_mem: str
+    suggested_mem: str
+
+    current_replicas: str
+    suggested_replicas: str
+
+
+class CassandraRecommendation(TypedDict, total=False):
+    disk: str
+    mem: str
+    cpus: float
+    replicas: int
+    cpu_burst_percent: float
+
+
+class KubernetesRightsizerResult(TypedDict):
+    current_cpus: str
+    suggested_cpus: str
+
+    current_disk: str
+    suggested_disk: str
+
+    current_mem: str
+    suggested_mem: str
+
+    suggested_hacheck_cpus: float
+
+    suggested_cpu_burst_add: float
+
+    suggested_min_instances: int
+
+    suggested_max_instances: int
+
+
+class KubernetesRecommendation(TypedDict, total=False):
+    disk: float
+    mem: float
+    cpus: float
+    cpu_burst_add: float
+    max_instances: int
+    min_instances: int
+    sidecar_resource_requirements: Dict[str, SidecarResourceRequirements]
+
+
+def get_kubernetes_recommendation_from_result(
+    result: KubernetesRightsizerResult, keys_to_apply: List[str]
+) -> KubernetesRecommendation:
+    rec: KubernetesRecommendation = {}
     for key in keys_to_apply:
-        val = result.get(key)
+        val: Optional[str] = cast(Optional[str], result.get(key))
         if not val or val == NULL:
             continue
         if key == "cpus":
@@ -169,12 +237,33 @@ def get_recommendation_from_result(result, keys_to_apply):
     return rec
 
 
+def get_cassandra_recommendation_from_result(
+    result: CassandraRightsizerResult, keys_to_apply: List[str]
+) -> CassandraRecommendation:
+    rec: CassandraRecommendation = {}
+    for key in keys_to_apply:
+        val: Optional[str] = cast(Optional[str], result.get(key))
+        if not val or val == NULL:
+            continue
+        if key == "cpus":
+            rec["cpus"] = float(val)
+        elif key == "cpu_burst_percent":
+            rec["cpu_burst_percent"] = float(val)
+        elif key == "mem":
+            rec["mem"] = val
+        elif key == "disk":
+            rec["disk"] = val
+        elif key == "replicas":
+            rec["replicas"] = int(val)
+    return rec
+
+
 def get_recommendations_by_service_file(
     results,
     keys_to_apply,
     exclude_clusters: Set[str],
 ):
-    results_by_service_file = defaultdict(dict)
+    results_by_service_file: Dict[tuple, Dict[str, Any]] = defaultdict(dict)
     for result in results.values():
         # we occasionally want to disable autotune for a cluster (or set of clusters)
         # to do so, we can simply skip getting recommendations for any (service, cluster)
@@ -189,7 +278,12 @@ def get_recommendations_by_service_file(
             result["service"],
             result["cluster"],
         )  # e.g. (foo, marathon-norcal-stagef)
-        rec = get_recommendation_from_result(result, keys_to_apply)
+        instance_type = result["cluster"].split("-", 1)[0]
+        rec: Union[KubernetesRecommendation, CassandraRecommendation] = {}
+        if instance_type == "cassandracluster":
+            rec = get_cassandra_recommendation_from_result(result, keys_to_apply)
+        elif instance_type == "kubernetes":
+            rec = get_kubernetes_recommendation_from_result(result, keys_to_apply)
         if not rec:
             continue
         results_by_service_file[key][result["instance"]] = rec
@@ -226,17 +320,17 @@ def main(args):
         validation_schema_path=AUTO_SOACONFIG_SUBDIR,
     )
     with updater:
-        for (service, extra_info), instance_recommendations in results.items():
-            existing_recommendations = updater.get_existing_configs(
-                service, extra_info, AUTO_SOACONFIG_SUBDIR
+        for (
+            service,
+            instance_type_cluster,
+        ), instance_recommendations in updater.merge_recommendations(results).items():
+            log.info(
+                f"Writing configs for {service} to {AUTO_SOACONFIG_SUBDIR}/{instance_type_cluster}.yaml..."
             )
-            for instance_name, recommendation in instance_recommendations.items():
-                existing_recommendations.setdefault(instance_name, {})
-                existing_recommendations[instance_name].update(recommendation)
             updater.write_configs(
                 service,
-                extra_info,
-                existing_recommendations,
+                instance_type_cluster,
+                instance_recommendations,
                 AUTO_SOACONFIG_SUBDIR,
                 HEADER_COMMENT,
             )
diff --git a/paasta_tools/delete_kubernetes_deployments.py b/paasta_tools/delete_kubernetes_deployments.py
index 944954fd70..b04574b317 100755
--- a/paasta_tools/delete_kubernetes_deployments.py
+++ b/paasta_tools/delete_kubernetes_deployments.py
@@ -73,7 +73,11 @@ def main(args=None) -> None:
     for deployment_name in deployment_names:
         try:
             log.debug(f"Deleting {deployment_name}")
-            delete_deployment(kube_client=kube_client, deployment_name=deployment_name)
+            delete_deployment(
+                kube_client=kube_client,
+                deployment_name=deployment_name,
+                namespace="paasta",
+            )
         except Exception as err:
             log.error(f"Unable to delete {deployment_name}: {err}")
             sys.exit(1)
diff --git a/paasta_tools/docker_wrapper.py b/paasta_tools/docker_wrapper.py
index f7867e8277..72f9901bf0 100755
--- a/paasta_tools/docker_wrapper.py
+++ b/paasta_tools/docker_wrapper.py
@@ -340,15 +340,14 @@ def main(argv=None):
     # Marathon sets MESOS_TASK_ID
     mesos_task_id = env_args.get("MESOS_TASK_ID")
 
-    hostname = socket.getfqdn()
+    fqdn = socket.getfqdn()
+    hostname = fqdn.partition(".")[0]
     if mesos_task_id and can_add_hostname(argv):
-        argv = add_argument(argv, f"-e=PAASTA_HOST={hostname}")
-        hostname_task_id = generate_hostname_task_id(
-            hostname.partition(".")[0], mesos_task_id
-        )
+        argv = add_argument(argv, f"-e=PAASTA_HOST={fqdn}")
+        hostname_task_id = generate_hostname_task_id(hostname, mesos_task_id)
         argv = add_argument(argv, f"--hostname={hostname_task_id }")
     elif can_add_hostname(argv):
-        argv = add_argument(argv, f"-e=PAASTA_HOST={hostname}")
+        argv = add_argument(argv, f"-e=PAASTA_HOST={fqdn}")
         argv = add_argument(argv, f"--hostname={hostname}")
 
     paasta_firewall = env_args.get("PAASTA_FIREWALL")
diff --git a/paasta_tools/instance/kubernetes.py b/paasta_tools/instance/kubernetes.py
index 28f37a2604..fa825a676e 100644
--- a/paasta_tools/instance/kubernetes.py
+++ b/paasta_tools/instance/kubernetes.py
@@ -12,6 +12,7 @@
 from typing import Sequence
 from typing import Set
 from typing import Tuple
+from typing import Union
 
 import a_sync
 import pytz
@@ -24,6 +25,7 @@
 from mypy_extensions import TypedDict
 
 from paasta_tools import cassandracluster_tools
+from paasta_tools import eks_tools
 from paasta_tools import envoy_tools
 from paasta_tools import flink_tools
 from paasta_tools import kafkacluster_tools
@@ -32,6 +34,7 @@
 from paasta_tools import monkrelaycluster_tools
 from paasta_tools import nrtsearchservice_tools
 from paasta_tools import smartstack_tools
+from paasta_tools import vitesscluster_tools
 from paasta_tools.cli.utils import LONG_RUNNING_INSTANCE_TYPE_HANDLERS
 from paasta_tools.instance.hpa_metrics_parser import HPAMetricsDict
 from paasta_tools.instance.hpa_metrics_parser import HPAMetricsParser
@@ -46,8 +49,12 @@
 from paasta_tools.utils import calculate_tail_lines
 
 
-INSTANCE_TYPES_CR = {"flink", "cassandracluster", "kafkacluster"}
-INSTANCE_TYPES_K8S = {"kubernetes", "cassandracluster"}
+INSTANCE_TYPES_CR = {"flink", "cassandracluster", "kafkacluster", "vitesscluster"}
+INSTANCE_TYPES_K8S = {
+    "cassandracluster",
+    "eks",
+    "kubernetes",
+}
 INSTANCE_TYPES = INSTANCE_TYPES_K8S.union(INSTANCE_TYPES_CR)
 
 INSTANCE_TYPES_WITH_SET_STATE = {"flink"}
@@ -55,6 +62,7 @@
     flink=flink_tools.cr_id,
     cassandracluster=cassandracluster_tools.cr_id,
     kafkacluster=kafkacluster_tools.cr_id,
+    vitesscluster=vitesscluster_tools.cr_id,
     nrtsearchservice=nrtsearchservice_tools.cr_id,
     monkrelaycluster=monkrelaycluster_tools.cr_id,
 )
@@ -215,6 +223,8 @@ async def pod_info(
     }
 
 
+# TODO: Cleanup
+# Only used in old kubernetes_status
 async def job_status(
     kstatus: MutableMapping[str, Any],
     client: kubernetes_tools.KubeClient,
@@ -459,18 +469,29 @@ def filter_actually_running_replicasets(
 
 
 def bounce_status(
-    service: str,
-    instance: str,
-    settings: Any,
+    service: str, instance: str, settings: Any, is_eks: bool = False
 ) -> Dict[str, Any]:
     status: Dict[str, Any] = {}
-    job_config = kubernetes_tools.load_kubernetes_service_config(
-        service=service,
-        instance=instance,
-        cluster=settings.cluster,
-        soa_dir=settings.soa_dir,
-        load_deployments=True,
-    )
+    # this should be the only place where it matters that we use eks_tools.
+    # apart from loading config files, we should be using kubernetes_tools
+    # everywhere.
+    job_config: Union[KubernetesDeploymentConfig, eks_tools.EksDeploymentConfig]
+    if is_eks:
+        job_config = eks_tools.load_eks_service_config(
+            service=service,
+            instance=instance,
+            cluster=settings.cluster,
+            soa_dir=settings.soa_dir,
+            load_deployments=True,
+        )
+    else:
+        job_config = kubernetes_tools.load_kubernetes_service_config(
+            service=service,
+            instance=instance,
+            cluster=settings.cluster,
+            soa_dir=settings.soa_dir,
+            load_deployments=True,
+        )
     expected_instance_count = job_config.get_instances()
     status["expected_instance_count"] = expected_instance_count
     desired_state = job_config.get_desired_state()
@@ -574,7 +595,6 @@ async def kubernetes_status_v2(
     service: str,
     instance: str,
     verbose: int,
-    include_smartstack: bool,
     include_envoy: bool,
     instance_type: str,
     settings: Any,
@@ -1091,12 +1111,12 @@ async def get_version_for_controller_revision(
     }
 
 
+# TODO: Cleanup old kubernetes status
 @a_sync.to_blocking
 async def kubernetes_status(
     service: str,
     instance: str,
     verbose: int,
-    include_smartstack: bool,
     include_envoy: bool,
     instance_type: str,
     settings: Any,
@@ -1184,35 +1204,23 @@ async def kubernetes_status(
             evicted_count += 1
     kstatus["evicted_count"] = evicted_count
 
-    if include_smartstack or include_envoy:
+    if include_envoy:
         service_namespace_config = kubernetes_tools.load_service_namespace_config(
             service=service,
             namespace=job_config.get_nerve_namespace(),
             soa_dir=settings.soa_dir,
         )
         if "proxy_port" in service_namespace_config:
-            if include_smartstack:
-                kstatus["smartstack"] = await mesh_status(
-                    service=service,
-                    service_mesh=ServiceMesh.SMARTSTACK,
-                    instance=job_config.get_nerve_namespace(),
-                    job_config=job_config,
-                    service_namespace_config=service_namespace_config,
-                    pods_task=pods_task,
-                    should_return_individual_backends=verbose > 0,
-                    settings=settings,
-                )
-            if include_envoy:
-                kstatus["envoy"] = await mesh_status(
-                    service=service,
-                    service_mesh=ServiceMesh.ENVOY,
-                    instance=job_config.get_nerve_namespace(),
-                    job_config=job_config,
-                    service_namespace_config=service_namespace_config,
-                    pods_task=pods_task,
-                    should_return_individual_backends=verbose > 0,
-                    settings=settings,
-                )
+            kstatus["envoy"] = await mesh_status(
+                service=service,
+                service_mesh=ServiceMesh.ENVOY,
+                instance=job_config.get_nerve_namespace(),
+                job_config=job_config,
+                service_namespace_config=service_namespace_config,
+                pods_task=pods_task,
+                should_return_individual_backends=verbose > 0,
+                settings=settings,
+            )
     return kstatus
 
 
@@ -1220,7 +1228,6 @@ def instance_status(
     service: str,
     instance: str,
     verbose: int,
-    include_smartstack: bool,
     include_envoy: bool,
     use_new: bool,
     instance_type: str,
@@ -1250,7 +1257,6 @@ def instance_status(
                 instance=instance,
                 instance_type=instance_type,
                 verbose=verbose,
-                include_smartstack=include_smartstack,
                 include_envoy=include_envoy,
                 settings=settings,
             )
@@ -1260,7 +1266,6 @@ def instance_status(
                 instance=instance,
                 instance_type=instance_type,
                 verbose=verbose,
-                include_smartstack=include_smartstack,
                 include_envoy=include_envoy,
                 settings=settings,
             )
@@ -1285,11 +1290,10 @@ async def kubernetes_mesh_status(
     instance: str,
     instance_type: str,
     settings: Any,
-    include_smartstack: bool = True,
     include_envoy: bool = True,
 ) -> Mapping[str, Any]:
 
-    if not include_smartstack and not include_envoy:
+    if not include_envoy:
         raise RuntimeError("No mesh types specified when requesting mesh status")
     if instance_type not in LONG_RUNNING_INSTANCE_TYPE_HANDLERS:
         raise RuntimeError(
@@ -1334,11 +1338,6 @@ async def kubernetes_mesh_status(
         should_return_individual_backends=True,
         settings=settings,
     )
-    if include_smartstack:
-        kmesh["smartstack"] = await mesh_status(
-            service_mesh=ServiceMesh.SMARTSTACK,
-            **mesh_status_kwargs,
-        )
     if include_envoy:
         kmesh["envoy"] = await mesh_status(
             service_mesh=ServiceMesh.ENVOY,
diff --git a/paasta_tools/kubernetes/application/controller_wrappers.py b/paasta_tools/kubernetes/application/controller_wrappers.py
index 83d025931a..ac9f2fa433 100644
--- a/paasta_tools/kubernetes/application/controller_wrappers.py
+++ b/paasta_tools/kubernetes/application/controller_wrappers.py
@@ -1,8 +1,6 @@
 import logging
-import threading
 from abc import ABC
 from abc import abstractmethod
-from time import sleep
 from typing import Optional
 from typing import Union
 
@@ -13,14 +11,13 @@
 from kubernetes.client.rest import ApiException
 
 from paasta_tools.autoscaling.autoscaling_service_lib import autoscaling_is_paused
+from paasta_tools.eks_tools import load_eks_service_config_no_cache
 from paasta_tools.kubernetes_tools import create_deployment
 from paasta_tools.kubernetes_tools import create_pod_disruption_budget
 from paasta_tools.kubernetes_tools import create_stateful_set
-from paasta_tools.kubernetes_tools import force_delete_pods
 from paasta_tools.kubernetes_tools import KubeClient
 from paasta_tools.kubernetes_tools import KubeDeployment
 from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
-from paasta_tools.kubernetes_tools import list_all_deployments
 from paasta_tools.kubernetes_tools import load_kubernetes_service_config_no_cache
 from paasta_tools.kubernetes_tools import paasta_prefixed
 from paasta_tools.kubernetes_tools import pod_disruption_budget_for_service_instance
@@ -68,15 +65,23 @@ def __init__(
         self.logging = logging
 
     def load_local_config(
-        self, soa_dir: str, cluster: str
+        self, soa_dir: str, cluster: str, eks: bool = False
     ) -> Optional[KubernetesDeploymentConfig]:
         if not self.soa_config:
-            self.soa_config = load_kubernetes_service_config_no_cache(
-                service=self.kube_deployment.service,
-                instance=self.kube_deployment.instance,
-                cluster=cluster,
-                soa_dir=soa_dir,
-            )
+            if eks:
+                self.soa_config = load_eks_service_config_no_cache(
+                    service=self.kube_deployment.service,
+                    instance=self.kube_deployment.instance,
+                    cluster=cluster,
+                    soa_dir=soa_dir,
+                )
+            else:
+                self.soa_config = load_kubernetes_service_config_no_cache(
+                    service=self.kube_deployment.service,
+                    instance=self.kube_deployment.instance,
+                    cluster=cluster,
+                    soa_dir=soa_dir,
+                )
         return self.soa_config
 
     def __str__(self):
@@ -141,7 +146,7 @@ def delete_pod_disruption_budget(self, kube_client: KubeClient) -> None:
             )
 
     def ensure_pod_disruption_budget(
-        self, kube_client: KubeClient, namespace: str = "paasta"
+        self, kube_client: KubeClient, namespace: str
     ) -> V1beta1PodDisruptionBudget:
         max_unavailable: Union[str, int]
         if "bounce_margin_factor" in self.soa_config.config_dict:
@@ -239,65 +244,9 @@ def create(self, kube_client: KubeClient) -> None:
         self.ensure_pod_disruption_budget(kube_client, self.soa_config.get_namespace())
         self.sync_horizontal_pod_autoscaler(kube_client)
 
-    def deep_delete_and_create(self, kube_client: KubeClient) -> None:
-        self.deep_delete(kube_client)
-        timer = 0
-        while (
-            self.kube_deployment
-            in set(list_all_deployments(kube_client, self.soa_config.get_namespace()))
-            and timer < 60
-        ):
-            sleep(1)
-            timer += 1
-
-        if timer >= 60 and self.kube_deployment in set(
-            list_all_deployments(kube_client, self.soa_config.get_namespace())
-        ):
-            # When deleting then immediately creating, we need to use Background
-            # deletion to ensure we can create the deployment immediately
-            self.deep_delete(kube_client, propagation_policy="Background")
-
-            try:
-                force_delete_pods(
-                    self.item.metadata.name,
-                    self.kube_deployment.service,
-                    self.kube_deployment.instance,
-                    self.item.metadata.namespace,
-                    kube_client,
-                )
-            except ApiException as e:
-                if e.status == 404:
-                    # Pod(s) may have been deleted by GC before we got to it
-                    # We can consider this a success
-                    self.logging.debug(
-                        "pods already deleted for {} from namespace/{}. Continuing.".format(
-                            self.kube_deployment.service, self.item.metadata.namespace
-                        )
-                    )
-                else:
-                    raise
-
-        if self.kube_deployment in set(
-            list_all_deployments(kube_client, self.soa_config.get_namespace())
-        ):
-            # deployment deletion failed, we cannot continue
-            raise Exception(f"Could not delete deployment {self.item.metadata.name}")
-        else:
-            self.logging.info(
-                "deleted deploy/{} from namespace/{}".format(
-                    self.kube_deployment.service, self.item.metadata.namespace
-                )
-            )
-        self.create(kube_client=kube_client)
-
     def update(self, kube_client: KubeClient) -> None:
         # If HPA is enabled, do not update replicas.
         # In all other cases, replica is set to max(instances, min_instances)
-        if self.soa_config.config_dict.get("bounce_method", "") == "brutal":
-            threading.Thread(
-                target=self.deep_delete_and_create, args=[KubeClient()]
-            ).start()
-            return
         update_deployment(
             kube_client=kube_client,
             formatted_deployment=self.item,
diff --git a/paasta_tools/kubernetes/bin/paasta_secrets_sync.py b/paasta_tools/kubernetes/bin/paasta_secrets_sync.py
index 58b5016224..ef6b8a4939 100755
--- a/paasta_tools/kubernetes/bin/paasta_secrets_sync.py
+++ b/paasta_tools/kubernetes/bin/paasta_secrets_sync.py
@@ -35,8 +35,10 @@
 from kubernetes.client.rest import ApiException
 from typing_extensions import Literal
 
+from paasta_tools.eks_tools import EksDeploymentConfig
 from paasta_tools.kubernetes_tools import create_secret
 from paasta_tools.kubernetes_tools import create_secret_signature
+from paasta_tools.kubernetes_tools import ensure_namespace
 from paasta_tools.kubernetes_tools import get_paasta_secret_name
 from paasta_tools.kubernetes_tools import get_paasta_secret_signature_name
 from paasta_tools.kubernetes_tools import get_secret_signature
@@ -55,11 +57,18 @@
 from paasta_tools.utils import INSTANCE_TYPE_TO_K8S_NAMESPACE
 from paasta_tools.utils import INSTANCE_TYPES
 from paasta_tools.utils import load_system_paasta_config
+from paasta_tools.utils import PAASTA_K8S_INSTANCE_TYPES
 from paasta_tools.utils import SHARED_SECRETS_K8S_NAMESPACES
 
 log = logging.getLogger(__name__)
 
 
+K8S_INSTANCE_TYPE_CLASSES = (
+    KubernetesDeploymentConfig,
+    EksDeploymentConfig,
+)
+
+
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description="Sync paasta secrets into k8s")
     parser.add_argument(
@@ -212,31 +221,32 @@ def get_services_to_k8s_namespaces_to_allowlist(
             continue
 
         config_loader = PaastaServiceConfigLoader(service, soa_dir)
-        for service_instance_config in config_loader.instance_configs(
-            cluster=cluster, instance_type_class=KubernetesDeploymentConfig
-        ):
-            secrets_used, shared_secrets_used = get_secrets_used_by_instance(
-                service_instance_config
-            )
-            allowlist = services_to_k8s_namespaces_to_allowlist[service].setdefault(
-                service_instance_config.get_namespace(),
-                set(),
-            )
-            if allowlist is not None:
-                allowlist.update(secrets_used)
-
-            if "_shared" in service_list:
-                shared_allowlist = services_to_k8s_namespaces_to_allowlist[
-                    "_shared"
-                ].setdefault(
+        for instance_type_class in K8S_INSTANCE_TYPE_CLASSES:
+            for service_instance_config in config_loader.instance_configs(
+                cluster=cluster, instance_type_class=instance_type_class
+            ):
+                secrets_used, shared_secrets_used = get_secrets_used_by_instance(
+                    service_instance_config
+                )
+                allowlist = services_to_k8s_namespaces_to_allowlist[service].setdefault(
                     service_instance_config.get_namespace(),
                     set(),
                 )
-                if shared_allowlist is not None:
-                    shared_allowlist.update(shared_secrets_used)
+                if allowlist is not None:
+                    allowlist.update(secrets_used)
+
+                if "_shared" in service_list:
+                    shared_allowlist = services_to_k8s_namespaces_to_allowlist[
+                        "_shared"
+                    ].setdefault(
+                        service_instance_config.get_namespace(),
+                        set(),
+                    )
+                    if shared_allowlist is not None:
+                        shared_allowlist.update(shared_secrets_used)
 
         for instance_type in INSTANCE_TYPES:
-            if instance_type == "kubernetes":
+            if instance_type in PAASTA_K8S_INSTANCE_TYPES:
                 continue  # handled above.
 
             instances = get_service_instance_list(
@@ -309,6 +319,7 @@ def sync_all_secrets(
                 else namespaces_to_allowlist.get(overwrite_namespace, set()),
             }
         for namespace, secret_allowlist in namespaces_to_allowlist.items():
+            ensure_namespace(kube_client, namespace)
             sync_service_secrets["paasta-secret"].append(
                 partial(
                     sync_secrets,
@@ -463,64 +474,67 @@ def sync_datastore_credentials(
         system_paasta_config.get_datastore_credentials_vault_overrides()
     )
 
-    for instance_config in config_loader.instance_configs(
-        cluster=cluster, instance_type_class=KubernetesDeploymentConfig
-    ):
-        namespace = (
-            overwrite_namespace
-            if overwrite_namespace is not None
-            else instance_config.get_namespace()
-        )
-        datastore_credentials = instance_config.get_datastore_credentials()
-        with set_temporary_environment_variables(datastore_credentials_vault_overrides):
-            # expects VAULT_ADDR_OVERRIDE, VAULT_CA_OVERRIDE, and VAULT_TOKEN_OVERRIDE to be set
-            # in order to use a custom vault shard. overriden temporarily in this context
-            provider = get_secret_provider(
-                secret_provider_name=secret_provider_name,
-                soa_dir=soa_dir,
-                service_name=service,
-                cluster_names=[cluster],
-                # overridden by env variables but still needed here for spec validation
-                secret_provider_kwargs={
-                    "vault_cluster_config": vault_cluster_config,
-                    "vault_auth_method": "token",
-                    "vault_token_file": vault_token_file,
-                },
+    for instance_type_class in K8S_INSTANCE_TYPE_CLASSES:
+        for instance_config in config_loader.instance_configs(
+            cluster=cluster, instance_type_class=instance_type_class
+        ):
+            namespace = (
+                overwrite_namespace
+                if overwrite_namespace is not None
+                else instance_config.get_namespace()
             )
+            datastore_credentials = instance_config.get_datastore_credentials()
+            with set_temporary_environment_variables(
+                datastore_credentials_vault_overrides
+            ):
+                # expects VAULT_ADDR_OVERRIDE, VAULT_CA_OVERRIDE, and VAULT_TOKEN_OVERRIDE to be set
+                # in order to use a custom vault shard. overriden temporarily in this context
+                provider = get_secret_provider(
+                    secret_provider_name=secret_provider_name,
+                    soa_dir=soa_dir,
+                    service_name=service,
+                    cluster_names=[cluster],
+                    # overridden by env variables but still needed here for spec validation
+                    secret_provider_kwargs={
+                        "vault_cluster_config": vault_cluster_config,
+                        "vault_auth_method": "token",
+                        "vault_token_file": vault_token_file,
+                    },
+                )
 
-            secret_data = {}
-            for datastore, credentials in datastore_credentials.items():
-                # mypy loses type hints on '.items' and throws false positives. unfortunately have to type: ignore
-                # https://github.com/python/mypy/issues/7178
-                for credential in credentials:  # type: ignore
-                    vault_path = f"secrets/datastore/{datastore}/{credential}"
-                    secrets = provider.get_data_from_vault_path(vault_path)
-                    if not secrets:
-                        # no secrets found at this path. skip syncing
-                        log.debug(
-                            f"Warning: no secrets found at requested path {vault_path}."
-                        )
-                        continue
+                secret_data = {}
+                for datastore, credentials in datastore_credentials.items():
+                    # mypy loses type hints on '.items' and throws false positives. unfortunately have to type: ignore
+                    # https://github.com/python/mypy/issues/7178
+                    for credential in credentials:  # type: ignore
+                        vault_path = f"secrets/datastore/{datastore}/{credential}"
+                        secrets = provider.get_data_from_vault_path(vault_path)
+                        if not secrets:
+                            # no secrets found at this path. skip syncing
+                            log.debug(
+                                f"Warning: no secrets found at requested path {vault_path}."
+                            )
+                            continue
+
+                        # decrypt and save in secret_data
+                        vault_key_path = get_vault_key_secret_name(vault_path)
+
+                        # kubernetes expects data to be base64 encoded binary in utf-8 when put into secret maps
+                        # may look like:
+                        # {'master': {'passwd': '****', 'user': 'v-approle-mysql-serv-nVcYexH95A2'}, 'reporting': {'passwd': '****', 'user': 'v-approle-mysql-serv-GgCpRIh9Ut7'}, 'slave': {'passwd': '****', 'user': 'v-approle-mysql-serv-PzjPwqNMbqu'}
+                        secret_data[vault_key_path] = base64.b64encode(
+                            json.dumps(secrets).encode("utf-8")
+                        ).decode("utf-8")
 
-                    # decrypt and save in secret_data
-                    vault_key_path = get_vault_key_secret_name(vault_path)
-
-                    # kubernetes expects data to be base64 encoded binary in utf-8 when put into secret maps
-                    # may look like:
-                    # {'master': {'passwd': '****', 'user': 'v-approle-mysql-serv-nVcYexH95A2'}, 'reporting': {'passwd': '****', 'user': 'v-approle-mysql-serv-GgCpRIh9Ut7'}, 'slave': {'passwd': '****', 'user': 'v-approle-mysql-serv-PzjPwqNMbqu'}
-                    secret_data[vault_key_path] = base64.b64encode(
-                        json.dumps(secrets).encode("utf-8")
-                    ).decode("utf-8")
-
-        create_or_update_k8s_secret(
-            service=service,
-            signature_name=instance_config.get_datastore_credentials_signature_name(),
-            secret_name=instance_config.get_datastore_credentials_secret_name(),
-            get_secret_data=(lambda: secret_data),
-            secret_signature=_get_dict_signature(secret_data),
-            kube_client=kube_client,
-            namespace=namespace,
-        )
+            create_or_update_k8s_secret(
+                service=service,
+                signature_name=instance_config.get_datastore_credentials_signature_name(),
+                secret_name=instance_config.get_datastore_credentials_secret_name(),
+                get_secret_data=(lambda: secret_data),
+                secret_signature=_get_dict_signature(secret_data),
+                kube_client=kube_client,
+                namespace=namespace,
+            )
 
     return True
 
@@ -543,50 +557,51 @@ def sync_crypto_secrets(
     So each replica of a service instance gets the same key, thereby reducing requests to Vault API as we only talk to vault during secret syncing
     """
     config_loader = PaastaServiceConfigLoader(service=service, soa_dir=soa_dir)
-    for instance_config in config_loader.instance_configs(
-        cluster=cluster, instance_type_class=KubernetesDeploymentConfig
-    ):
-        crypto_keys = instance_config.get_crypto_keys_from_config()
-        if not crypto_keys:
-            continue
-        secret_data = {}
-        provider = get_secret_provider(
-            secret_provider_name=secret_provider_name,
-            soa_dir=soa_dir,
-            service_name=service,
-            cluster_names=[cluster],
-            secret_provider_kwargs={
-                "vault_cluster_config": vault_cluster_config,
-                "vault_auth_method": "token",
-                "vault_token_file": vault_token_file,
-            },
-        )
-        for key in crypto_keys:
-            key_versions = provider.get_key_versions(key)
-            if not key_versions:
-                log.error(
-                    f"No key versions found for {key} on {instance_config.get_sanitised_deployment_name()}"
-                )
+    for instance_type_class in K8S_INSTANCE_TYPE_CLASSES:
+        for instance_config in config_loader.instance_configs(
+            cluster=cluster, instance_type_class=instance_type_class
+        ):
+            crypto_keys = instance_config.get_crypto_keys_from_config()
+            if not crypto_keys:
                 continue
+            secret_data = {}
+            provider = get_secret_provider(
+                secret_provider_name=secret_provider_name,
+                soa_dir=soa_dir,
+                service_name=service,
+                cluster_names=[cluster],
+                secret_provider_kwargs={
+                    "vault_cluster_config": vault_cluster_config,
+                    "vault_auth_method": "token",
+                    "vault_token_file": vault_token_file,
+                },
+            )
+            for key in crypto_keys:
+                key_versions = provider.get_key_versions(key)
+                if not key_versions:
+                    log.error(
+                        f"No key versions found for {key} on {instance_config.get_sanitised_deployment_name()}"
+                    )
+                    continue
 
-            secret_data[get_vault_key_secret_name(key)] = base64.b64encode(
-                json.dumps(key_versions).encode("utf-8")
-            ).decode("utf-8")
+                secret_data[get_vault_key_secret_name(key)] = base64.b64encode(
+                    json.dumps(key_versions).encode("utf-8")
+                ).decode("utf-8")
 
-        if not secret_data:
-            continue
+            if not secret_data:
+                continue
 
-        create_or_update_k8s_secret(
-            service=service,
-            signature_name=instance_config.get_crypto_secret_signature_name(),
-            # the secret name here must match the secret name given in the secret volume config,
-            # i.e. `kubernetes.client.V1SecretVolumeSource`'s `secret_name` must match below
-            secret_name=instance_config.get_crypto_secret_name(),
-            get_secret_data=(lambda: secret_data),
-            secret_signature=_get_dict_signature(secret_data),
-            kube_client=kube_client,
-            namespace=instance_config.get_namespace(),
-        )
+            create_or_update_k8s_secret(
+                service=service,
+                signature_name=instance_config.get_crypto_secret_signature_name(),
+                # the secret name here must match the secret name given in the secret volume config,
+                # i.e. `kubernetes.client.V1SecretVolumeSource`'s `secret_name` must match below
+                secret_name=instance_config.get_crypto_secret_name(),
+                get_secret_data=(lambda: secret_data),
+                secret_signature=_get_dict_signature(secret_data),
+                kube_client=kube_client,
+                namespace=instance_config.get_namespace(),
+            )
 
     return True
 
@@ -598,40 +613,46 @@ def sync_boto_secrets(
     soa_dir: str,
 ) -> bool:
     config_loader = PaastaServiceConfigLoader(service=service, soa_dir=soa_dir)
-    for instance_config in config_loader.instance_configs(
-        cluster=cluster, instance_type_class=KubernetesDeploymentConfig
-    ):
-        boto_keys = instance_config.config_dict.get("boto_keys", [])
-        if not boto_keys:
-            continue
-        boto_keys.sort()
-        secret_data = {}
-        for key in boto_keys:
-            for filetype in ["sh", "yaml", "json", "cfg"]:
-                this_key = key + "." + filetype
-                sanitised_key = this_key.replace(".", "-").replace("_", "--")
-                try:
-                    with open(f"/etc/boto_cfg_private/{this_key}") as f:
+    for instance_type_class in K8S_INSTANCE_TYPE_CLASSES:
+        for instance_config in config_loader.instance_configs(
+            cluster=cluster, instance_type_class=instance_type_class
+        ):
+            boto_keys = instance_config.config_dict.get("boto_keys", [])
+            if not boto_keys:
+                continue
+            boto_keys.sort()
+            secret_data = {}
+            for key in boto_keys:
+                for filetype in ["sh", "yaml", "json", "cfg"]:
+                    this_key = key + "." + filetype
+                    sanitised_key = this_key.replace(".", "-").replace("_", "--")
+                    try:
+                        with open(f"/etc/boto_cfg_private/{this_key}") as f:
+                            secret_data[sanitised_key] = base64.b64encode(
+                                f.read().encode("utf-8")
+                            ).decode("utf-8")
+                    except IOError:
+                        log.warning(
+                            f"Boto key {this_key} required for {service} could not be found."
+                        )
                         secret_data[sanitised_key] = base64.b64encode(
-                            f.read().encode("utf-8")
+                            "This user no longer exists. Remove it from boto_keys.".encode(
+                                "utf-8"
+                            )
                         ).decode("utf-8")
-                except IOError:
-                    log.warning(
-                        f"Boto key {this_key} required for {service} could not be found."
-                    )
 
-        if not secret_data:
-            continue
+            if not secret_data:
+                continue
 
-        create_or_update_k8s_secret(
-            service=service,
-            signature_name=instance_config.get_boto_secret_signature_name(),
-            secret_name=instance_config.get_boto_secret_name(),
-            get_secret_data=(lambda: secret_data),
-            secret_signature=_get_dict_signature(secret_data),
-            kube_client=kube_client,
-            namespace=instance_config.get_namespace(),
-        )
+            create_or_update_k8s_secret(
+                service=service,
+                signature_name=instance_config.get_boto_secret_signature_name(),
+                secret_name=instance_config.get_boto_secret_name(),
+                get_secret_data=(lambda: secret_data),
+                secret_signature=_get_dict_signature(secret_data),
+                kube_client=kube_client,
+                namespace=instance_config.get_namespace(),
+            )
     return True
 
 
@@ -654,7 +675,9 @@ def create_or_update_k8s_secret(
     :param get_secret_data: is a function to postpone fetching data in order to reduce service load, e.g. Vault API
     """
     # In order to prevent slamming the k8s API, add some artificial delay here
-    time.sleep(0.3)
+    delay = load_system_paasta_config().get_secret_sync_delay_seconds()
+    if delay:
+        time.sleep(delay)
 
     kubernetes_signature = get_secret_signature(
         kube_client=kube_client,
diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py
index 0be1c92101..ba34e37c68 100644
--- a/paasta_tools/kubernetes_tools.py
+++ b/paasta_tools/kubernetes_tools.py
@@ -11,6 +11,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import base64
+import functools
 import hashlib
 import itertools
 import json
@@ -23,11 +24,13 @@
 from inspect import currentframe
 from pathlib import Path
 from typing import Any
+from typing import cast
 from typing import Collection
 from typing import Container
 from typing import Dict
 from typing import Iterable
 from typing import List
+from typing import Literal
 from typing import Mapping
 from typing import MutableMapping
 from typing import NamedTuple
@@ -72,6 +75,9 @@
 from kubernetes.client import V1KeyToPath
 from kubernetes.client import V1LabelSelector
 from kubernetes.client import V1Lifecycle
+from kubernetes.client import V1LimitRange
+from kubernetes.client import V1LimitRangeItem
+from kubernetes.client import V1LimitRangeSpec
 from kubernetes.client import V1Namespace
 from kubernetes.client import V1Node
 from kubernetes.client import V1NodeAffinity
@@ -89,6 +95,7 @@
 from kubernetes.client import V1PodSecurityContext
 from kubernetes.client import V1PodSpec
 from kubernetes.client import V1PodTemplateSpec
+from kubernetes.client import V1PreferredSchedulingTerm
 from kubernetes.client import V1Probe
 from kubernetes.client import V1ReplicaSet
 from kubernetes.client import V1ResourceRequirements
@@ -158,6 +165,7 @@
 from paasta_tools.utils import SecretVolume
 from paasta_tools.utils import SystemPaastaConfig
 from paasta_tools.utils import time_cache
+from paasta_tools.utils import TopologySpreadConstraintDict
 from paasta_tools.utils import VolumeWithMode
 
 
@@ -173,11 +181,9 @@
     "brutal": "RollingUpdate",
 }
 HACHECK_POD_NAME = "hacheck"
-UWSGI_EXPORTER_POD_NAME = "uwsgi--exporter"
 GUNICORN_EXPORTER_POD_NAME = "gunicorn--exporter"
 SIDECAR_CONTAINER_NAMES = [
     HACHECK_POD_NAME,
-    UWSGI_EXPORTER_POD_NAME,
     GUNICORN_EXPORTER_POD_NAME,
 ]
 KUBERNETES_NAMESPACE = "paasta"
@@ -333,7 +339,6 @@ def _set_disrupted_pods(self: Any, disrupted_pods: Mapping[str, datetime]) -> No
         "paasta.yelp.com/image_version": str,
         "paasta.yelp.com/instance": str,
         "paasta.yelp.com/prometheus_shard": str,
-        "paasta.yelp.com/scrape_uwsgi_prometheus": str,
         "paasta.yelp.com/scrape_piscina_prometheus": str,
         "paasta.yelp.com/scrape_gunicorn_prometheus": str,
         "paasta.yelp.com/service": str,
@@ -357,11 +362,45 @@ class CryptoKeyConfig(TypedDict):
     decrypt: List[str]
 
 
+class NodeSelectorInNotIn(TypedDict):
+    operator: Literal["In", "NotIn"]
+    values: List[str]
+
+
+class NodeSelectorExistsDoesNotExist(TypedDict):
+    operator: Literal["Exists", "DoesNotExist"]
+
+
+class NodeSelectorGtLt(TypedDict):
+    operator: Literal["Gt", "Lt"]
+    value: int
+
+
+NodeSelectorOperator = Union[
+    NodeSelectorInNotIn,
+    NodeSelectorExistsDoesNotExist,
+    NodeSelectorGtLt,
+]
+
+
+NodeSelectorConfig = Union[
+    str,
+    List[str],
+    List[NodeSelectorOperator],
+]
+
+
+class NodeSelectorsPreferredConfigDict(TypedDict):
+    weight: int
+    preferences: Dict[str, NodeSelectorConfig]
+
+
 class KubernetesDeploymentConfigDict(LongRunningServiceConfigDict, total=False):
     bounce_method: str
     bounce_health_params: Dict[str, Any]
     service_account_name: str
-    node_selectors: Dict[str, Union[str, Dict[str, Any]]]
+    node_selectors: Dict[str, NodeSelectorConfig]
+    node_selectors_preferred: List[NodeSelectorsPreferredConfigDict]
     sidecar_resource_requirements: Dict[str, SidecarResourceRequirements]
     lifecycle: KubeLifecycleDict
     anti_affinity: Union[KubeAffinityCondition, List[KubeAffinityCondition]]
@@ -377,6 +416,7 @@ class KubernetesDeploymentConfigDict(LongRunningServiceConfigDict, total=False):
     boto_keys: List[str]
     crypto_keys: CryptoKeyConfig
     datastore_credentials: DatastoreCredentialsConfig
+    topology_spread_constraints: List[TopologySpreadConstraintDict]
 
 
 def load_kubernetes_service_config_no_cache(
@@ -496,6 +536,19 @@ def __init__(self, exception: Exception, service: str, instance: str) -> None:
 
 
 class KubeClient:
+    @functools.lru_cache()  # type: ignore
+    def __new__(
+        cls,
+        component: Optional[str] = None,
+        config_file: Optional[str] = None,
+        context: Optional[str] = None,
+    ) -> "KubeClient":
+        """By @lru_cache'ing this function, repeated instantiations of KubeClient with the same arguments will return the
+        exact same object. This makes it possible to effectively cache function calls that take a KubeClient as an
+        argument."""
+        return super().__new__(cls)
+
+    @functools.lru_cache()  # type: ignore
     def __init__(
         self,
         component: Optional[str] = None,
@@ -575,28 +628,40 @@ def allowlist_denylist_to_requirements(
 
 
 def raw_selectors_to_requirements(
-    raw_selectors: Mapping[str, Any]
+    raw_selectors: Mapping[str, NodeSelectorConfig]
 ) -> List[Tuple[str, str, List[str]]]:
     """Converts certain node_selectors into requirements, which can be
     converted to node affinities.
     """
-    requirements = []
+    requirements: List[Tuple[str, str, List[str]]] = []
 
     for label, configs in raw_selectors.items():
+        operator_configs: List[NodeSelectorOperator] = []
+
         if type(configs) is not list or len(configs) == 0:
             continue
         elif type(configs[0]) is str:
             # specifying an array/list of strings for a label is shorthand
             # for the "In" operator
-            configs = [{"operator": "In", "values": configs}]
+            operator_configs = [
+                NodeSelectorInNotIn(
+                    {"operator": "In", "values": cast(List[str], configs)}
+                )
+            ]
+        else:
+            # configs should already be a List[NodeSelectorOperator]
+            operator_configs = cast(List[NodeSelectorOperator], configs)
 
         label = to_node_label(label)
-        for config in configs:
+        for config in operator_configs:
             if config["operator"] in {"In", "NotIn"}:
+                config = cast(NodeSelectorInNotIn, config)
                 values = config["values"]
             elif config["operator"] in {"Exists", "DoesNotExist"}:
+                config = cast(NodeSelectorExistsDoesNotExist, config)
                 values = []
             elif config["operator"] in {"Gt", "Lt"}:
+                config = cast(NodeSelectorGtLt, config)
                 # config["value"] is validated by jsonschema to be an int. but,
                 # k8s expects singleton list of the int represented as a str
                 # for these operators.
@@ -723,7 +788,7 @@ def get_autoscaling_metric_spec(
         name: str,
         cluster: str,
         kube_client: KubeClient,
-        namespace: str = "paasta",
+        namespace: str,
     ) -> Optional[Union[V2beta2HorizontalPodAutoscaler, Dict]]:
         # Returns None if an HPA should not be attached based on the config,
         # or the config is invalid.
@@ -742,7 +807,7 @@ def get_autoscaling_metric_spec(
         max_replicas = self.get_max_instances()
         if min_replicas == 0 or max_replicas == 0:
             log.error(
-                f"Invalid value for min or max_instances: {min_replicas}, {max_replicas}"
+                f"Invalid value for min or max_instances on {name}: {min_replicas}, {max_replicas}"
             )
             return None
 
@@ -796,7 +861,7 @@ def get_autoscaling_metric_spec(
                         ),
                     )
                 )
-        elif metrics_provider in {"uwsgi", "piscina", "gunicorn"}:
+        elif metrics_provider in {"uwsgi", "piscina", "gunicorn", "active-requests"}:
             metrics.append(
                 V2beta2MetricSpec(
                     type="Object",
@@ -972,9 +1037,6 @@ def get_sidecar_containers(
             service_namespace_config,
             hacheck_sidecar_volumes,
         )
-        uwsgi_exporter_container = self.get_uwsgi_exporter_sidecar_container(
-            system_paasta_config
-        )
         gunicorn_exporter_container = self.get_gunicorn_exporter_sidecar_container(
             system_paasta_config
         )
@@ -982,8 +1044,6 @@ def get_sidecar_containers(
         sidecars = []
         if hacheck_container:
             sidecars.append(hacheck_container)
-        if uwsgi_exporter_container:
-            sidecars.append(uwsgi_exporter_container)
         if gunicorn_exporter_container:
             sidecars.append(gunicorn_exporter_container)
         return sidecars
@@ -1072,57 +1132,14 @@ def get_hacheck_sidecar_container(
             )
         return None
 
-    def get_uwsgi_exporter_sidecar_container(
-        self,
-        system_paasta_config: SystemPaastaConfig,
-    ) -> Optional[V1Container]:
-
-        if self.should_run_uwsgi_exporter_sidecar(system_paasta_config):
-            stats_port_env = V1EnvVar(
-                name="STATS_PORT",
-                value=str(self.get_autoscaling_params().get("uwsgi_stats_port", 8889)),
-            )
-
-            return V1Container(
-                image=system_paasta_config.get_uwsgi_exporter_sidecar_image_url(),
-                resources=self.get_sidecar_resource_requirements(
-                    "uwsgi_exporter",
-                    system_paasta_config,
-                ),
-                name=UWSGI_EXPORTER_POD_NAME,
-                env=self.get_kubernetes_environment() + [stats_port_env],
-                ports=[V1ContainerPort(container_port=9117)],
-                lifecycle=V1Lifecycle(
-                    pre_stop=V1Handler(
-                        _exec=V1ExecAction(
-                            command=[
-                                "/bin/sh",
-                                "-c",
-                                # we sleep for the same amount of time as we do after an hadown to ensure that we have accurate
-                                # metrics up until our Pod dies
-                                f"sleep {DEFAULT_HADOWN_PRESTOP_SLEEP_SECONDS}",
-                            ]
-                        )
-                    )
-                ),
-            )
-
-        return None
-
-    def should_run_uwsgi_exporter_sidecar(
+    def should_use_uwsgi_exporter(
         self,
         system_paasta_config: SystemPaastaConfig,
     ) -> bool:
-        if self.is_autoscaling_enabled():
-            autoscaling_params = self.get_autoscaling_params()
-            if autoscaling_params["metrics_provider"] == "uwsgi":
-                if autoscaling_params.get(
-                    "use_prometheus",
-                    DEFAULT_USE_PROMETHEUS_UWSGI
-                    or system_paasta_config.default_should_run_uwsgi_exporter_sidecar(),
-                ):
-                    return True
-        return False
+        return (
+            self.is_autoscaling_enabled()
+            and self.get_autoscaling_params()["metrics_provider"] == "uwsgi"
+        )
 
     def get_gunicorn_exporter_sidecar_container(
         self,
@@ -1535,6 +1552,7 @@ def get_pod_volumes(
                         ),
                         default_mode=mode_to_int(secret_volume.get("default_mode")),
                         items=items,
+                        optional=False,
                     ),
                 )
             )
@@ -1984,7 +2002,9 @@ def get_enable_envoy_readiness_check(
 
     def get_namespace(self) -> str:
         """Get namespace from config, default to 'paasta'"""
-        return self.config_dict.get("namespace", "paasta")
+        return self.config_dict.get(
+            "namespace", f"paastasvc-{self.get_sanitised_service_name()}"
+        )
 
     def get_pod_management_policy(self) -> str:
         """Get sts pod_management_policy from config, default to 'OrderedReady'"""
@@ -2093,7 +2113,7 @@ def has_routable_ip(
             self.config_dict.get("routable_ip", False)
             or service_namespace_config.is_in_smartstack()
             or self.get_prometheus_port() is not None
-            or self.should_run_uwsgi_exporter_sidecar(system_paasta_config)
+            or self.should_use_uwsgi_exporter(system_paasta_config)
             or self.should_run_gunicorn_exporter_sidecar()
         ):
             return "true"
@@ -2161,7 +2181,9 @@ def get_pod_template_spec(
         pod_topology_spread_constraints = create_pod_topology_spread_constraints(
             service=self.get_service(),
             instance=self.get_instance(),
-            topology_spread_constraints=system_paasta_config.get_topology_spread_constraints(),
+            topology_spread_constraints=self.get_topology_spread_constraints(
+                system_paasta_config.get_topology_spread_constraints()
+            ),
         )
         if pod_topology_spread_constraints:
             constraints = pod_spec_kwargs.get("topology_spread_constraints", [])
@@ -2248,16 +2270,13 @@ def get_pod_template_spec(
         if self.is_istio_sidecar_injection_enabled():
             labels["sidecar.istio.io/inject"] = "true"
 
-        # not all services use uwsgi autoscaling, so we label those that do in order to have
+        # not all services use autoscaling, so we label those that do in order to have
         # prometheus selectively discover/scrape them
-        if self.should_run_uwsgi_exporter_sidecar(
-            system_paasta_config=system_paasta_config
-        ):
-            # this is kinda silly, but k8s labels must be strings
-            labels["paasta.yelp.com/scrape_uwsgi_prometheus"] = "true"
-
+        if self.should_use_uwsgi_exporter(system_paasta_config=system_paasta_config):
+            # UWSGI no longer needs a label to indicate it needs to be scraped as all pods are checked for the uwsgi stats port by our centralized uwsgi-exporter
+            # But we do still need deploy_group for relabeling properly
             # this should probably eventually be made into a default label,
-            # but for now we're fine with it being behind this feature toggle.
+            # but for now we're fine with it being behind these feature toggles.
             # ideally, we'd also have the docker image here for ease-of-use
             # in Prometheus relabeling, but that information is over the
             # character limit for k8s labels (63 chars)
@@ -2307,26 +2326,60 @@ def get_node_affinity(self) -> Optional[V1NodeAffinity]:
                 raw_selectors=self.config_dict.get("node_selectors", {}),
             )
         )
+
+        preferred_terms = []
+        for node_selectors_prefered_config_dict in self.config_dict.get(
+            "node_selectors_preferred", []
+        ):
+            preferred_terms.append(
+                V1PreferredSchedulingTerm(
+                    weight=node_selectors_prefered_config_dict["weight"],
+                    preference=V1NodeSelectorTerm(
+                        match_expressions=[
+                            V1NodeSelectorRequirement(
+                                key=key,
+                                operator=op,
+                                values=vs,
+                            )
+                            for key, op, vs in raw_selectors_to_requirements(
+                                raw_selectors=node_selectors_prefered_config_dict[
+                                    "preferences"
+                                ]
+                            )
+                        ]
+                    ),
+                )
+            )
+
         # package everything into a node affinity - lots of layers :P
-        if len(requirements) == 0:
+        if len(requirements) == 0 and len(preferred_terms) == 0:
             return None
-        term = V1NodeSelectorTerm(
-            match_expressions=[
-                V1NodeSelectorRequirement(
-                    key=key,
-                    operator=op,
-                    values=vs,
-                )
-                for key, op, vs in requirements
-            ]
+
+        required_term = (
+            V1NodeSelectorTerm(
+                match_expressions=[
+                    V1NodeSelectorRequirement(
+                        key=key,
+                        operator=op,
+                        values=vs,
+                    )
+                    for key, op, vs in requirements
+                ]
+            )
+            if requirements
+            else None
         )
-        selector = V1NodeSelector(node_selector_terms=[term])
+
+        if not preferred_terms:
+            preferred_terms = None
+
         return V1NodeAffinity(
-            # this means that the selectors are only used during scheduling.
-            # changing it while the pod is running will not cause an eviction.
-            # this should be fine since if there are whitelist/blacklist config
-            # changes, we will bounce anyway.
-            required_during_scheduling_ignored_during_execution=selector,
+            required_during_scheduling_ignored_during_execution=V1NodeSelector(
+                node_selector_terms=[required_term]
+            )
+            if required_term
+            else None,
+            preferred_during_scheduling_ignored_during_execution=preferred_terms,
         )
 
     def get_pod_required_anti_affinity_terms(
@@ -2460,9 +2513,17 @@ def get_prometheus_path(self) -> Optional[str]:
     def get_prometheus_port(self) -> Optional[int]:
         return self.config_dict.get("prometheus_port")
 
+    def get_topology_spread_constraints(
+        self,
+        default_pod_topology_spread_constraints: List[TopologySpreadConstraintDict],
+    ) -> List[TopologySpreadConstraintDict]:
+        return self.config_dict.get(
+            "topology_spread_constraints", default_pod_topology_spread_constraints
+        )
+
 
 def get_kubernetes_secret_hashes(
-    environment_variables: Mapping[str, str], service: str, namespace: str = "paasta"
+    environment_variables: Mapping[str, str], service: str, namespace: str
 ) -> Mapping[str, str]:
     hashes = {}
     to_get_hash = []
@@ -2620,6 +2681,7 @@ def force_delete_pods(
         paasta_service,
         instance,
         kube_client,
+        namespace=namespace,
     )
     delete_options = V1DeleteOptions()
     for pod in pods_to_delete:
@@ -2628,6 +2690,7 @@ def force_delete_pods(
         )
 
 
+@time_cache(ttl=60)
 def get_all_namespaces(
     kube_client: KubeClient, label_selector: Optional[str] = None
 ) -> List[str]:
@@ -2654,6 +2717,7 @@ def get_matching_namespaces(
     ]
 
 
+@functools.lru_cache()
 def ensure_namespace(kube_client: KubeClient, namespace: str) -> None:
     paasta_namespace = V1Namespace(
         metadata=V1ObjectMeta(
@@ -2665,13 +2729,21 @@ def ensure_namespace(kube_client: KubeClient, namespace: str) -> None:
             },
         )
     )
-    namespaces = kube_client.core.list_namespace()
-    namespace_names = [item.metadata.name for item in namespaces.items]
+    namespace_names = get_all_namespaces(kube_client)
     if namespace not in namespace_names:
         log.warning(f"Creating namespace: {namespace} as it does not exist")
-        kube_client.core.create_namespace(body=paasta_namespace)
+        try:
+            kube_client.core.create_namespace(body=paasta_namespace)
+        except ApiException as e:
+            if e.status == 409:
+                log.warning(
+                    "Got HTTP 409 when creating namespace; it must already exist. Continuing."
+                )
+            else:
+                raise
 
     ensure_paasta_api_rolebinding(kube_client, namespace)
+    ensure_paasta_namespace_limits(kube_client, namespace)
 
 
 def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> None:
@@ -2679,7 +2751,7 @@ def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> No
     rolebinding_names = [item.metadata.name for item in rolebindings]
     if "paasta-api-server-per-namespace" not in rolebinding_names:
         log.warning(
-            f"Creating rolebinding paasta-api-server-per-namespace as it does not exist"
+            f"Creating rolebinding paasta-api-server-per-namespace on {namespace} namespace as it does not exist"
         )
         role_binding = V1RoleBinding(
             metadata=V1ObjectMeta(
@@ -2703,6 +2775,45 @@ def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> No
         )
 
 
+def ensure_paasta_namespace_limits(kube_client: KubeClient, namespace: str) -> None:
+    if not namespace.startswith("paastasvc-"):
+        log.debug(
+            f"Not creating LimitRange because {namespace} does not start with paastasvc-"
+        )
+        return
+
+    limits = get_all_limit_ranges(kube_client, namespace=namespace)
+    limits_names = {item.metadata.name for item in limits}
+    if "limit-mem-cpu-disk-per-container" not in limits_names:
+        log.warning(
+            f"Creating limit: limit-mem-cpu-disk-per-container on {namespace} namespace as it does not exist"
+        )
+        limit = V1LimitRange(
+            metadata=V1ObjectMeta(
+                name="limit-mem-cpu-disk-per-container",
+                namespace=namespace,
+            ),
+            spec=V1LimitRangeSpec(
+                limits=[
+                    V1LimitRangeItem(
+                        type="Container",
+                        default={
+                            "cpu": "1",
+                            "memory": "1024Mi",
+                            "ephemeral-storage": "1Gi",
+                        },
+                        default_request={
+                            "cpu": "1",
+                            "memory": "1024Mi",
+                            "ephemeral-storage": "1Gi",
+                        },
+                    )
+                ]
+            ),
+        )
+        kube_client.core.create_namespaced_limit_range(namespace=namespace, body=limit)
+
+
 def list_deployments_in_all_namespaces(
     kube_client: KubeClient, label_selector: str
 ) -> List[KubeDeployment]:
@@ -2733,8 +2844,9 @@ def list_deployments_in_all_namespaces(
 
 def list_deployments(
     kube_client: KubeClient,
+    *,
+    namespace: str,
     label_selector: str = "",
-    namespace: str = "paasta",
 ) -> Sequence[KubeDeployment]:
 
     deployments = kube_client.deployments.list_namespaced_deployment(
@@ -3029,7 +3141,7 @@ def pod_disruption_budget_for_service_instance(
     service: str,
     instance: str,
     max_unavailable: Union[str, int],
-    namespace: str = "paasta",
+    namespace: str,
 ) -> V1beta1PodDisruptionBudget:
     return V1beta1PodDisruptionBudget(
         metadata=V1ObjectMeta(
@@ -3051,7 +3163,7 @@ def pod_disruption_budget_for_service_instance(
 def create_pod_disruption_budget(
     kube_client: KubeClient,
     pod_disruption_budget: V1beta1PodDisruptionBudget,
-    namespace: str = "paasta",
+    namespace: str,
 ) -> None:
     return kube_client.policy.create_namespaced_pod_disruption_budget(
         namespace=namespace, body=pod_disruption_budget
@@ -3126,7 +3238,7 @@ def list_all_paasta_deployments(kube_client: KubeClient) -> Sequence[KubeDeploym
 
 
 def list_all_deployments(
-    kube_client: KubeClient, namespace: str = "paasta"
+    kube_client: KubeClient, namespace: str
 ) -> Sequence[KubeDeployment]:
     return list_deployments(kube_client=kube_client, namespace=namespace)
 
@@ -3134,12 +3246,13 @@ def list_all_deployments(
 def list_matching_deployments(
     service: str,
     instance: str,
+    *,
+    namespace: str,
     kube_client: KubeClient,
-    namespace: str = "paasta",
 ) -> Sequence[KubeDeployment]:
     return list_deployments(
         kube_client,
-        f"paasta.yelp.com/service={service},paasta.yelp.com/instance={instance}",
+        label_selector=f"paasta.yelp.com/service={service},paasta.yelp.com/instance={instance}",
         namespace=namespace,
     )
 
@@ -3157,7 +3270,7 @@ def list_matching_deployments_in_all_namespaces(
 
 @async_timeout()
 async def replicasets_for_service_instance(
-    service: str, instance: str, kube_client: KubeClient, namespace: str = "paasta"
+    service: str, instance: str, kube_client: KubeClient, namespace: str
 ) -> Sequence[V1ReplicaSet]:
     async_list_replica_set = a_sync.to_async(
         kube_client.deployments.list_namespaced_replica_set
@@ -3171,7 +3284,7 @@ async def replicasets_for_service_instance(
 
 @async_timeout()
 async def controller_revisions_for_service_instance(
-    service: str, instance: str, kube_client: KubeClient, namespace: str = "paasta"
+    service: str, instance: str, kube_client: KubeClient, namespace: str
 ) -> Sequence[V1ControllerRevision]:
     async_list_controller_revisions = a_sync.to_async(
         kube_client.deployments.list_namespaced_controller_revision
@@ -3185,7 +3298,7 @@ async def controller_revisions_for_service_instance(
 
 @async_timeout(15)
 async def pods_for_service_instance(
-    service: str, instance: str, kube_client: KubeClient, namespace: str = "paasta"
+    service: str, instance: str, kube_client: KubeClient, namespace: str
 ) -> Sequence[V1Pod]:
     async_list_pods = a_sync.to_async(kube_client.core.list_namespaced_pod)
     response = await async_list_pods(
@@ -3201,14 +3314,12 @@ def get_pods_by_node(kube_client: KubeClient, node: V1Node) -> Sequence[V1Pod]:
     ).items
 
 
-def get_all_pods(kube_client: KubeClient, namespace: str = "paasta") -> List[V1Pod]:
+def get_all_pods(kube_client: KubeClient, namespace: str) -> List[V1Pod]:
     return kube_client.core.list_namespaced_pod(namespace=namespace).items
 
 
 @time_cache(ttl=300)
-def get_all_pods_cached(
-    kube_client: KubeClient, namespace: str = "paasta"
-) -> Sequence[V1Pod]:
+def get_all_pods_cached(kube_client: KubeClient, namespace: str) -> Sequence[V1Pod]:
     pods: Sequence[V1Pod] = get_all_pods(kube_client, namespace)
     return pods
 
@@ -3335,7 +3446,7 @@ def get_all_nodes(
     return kube_client.core.list_node().items
 
 
-@time_cache(ttl=300)
+@time_cache(ttl=60)
 def get_all_nodes_cached(kube_client: KubeClient) -> Sequence[V1Node]:
     nodes: Sequence[V1Node] = get_all_nodes(kube_client)
     return nodes
@@ -3400,7 +3511,7 @@ def get_kubernetes_app_name(service: str, instance: str) -> str:
 
 
 def get_kubernetes_app_by_name(
-    name: str, kube_client: KubeClient, namespace: str = "paasta"
+    name: str, kube_client: KubeClient, namespace: str
 ) -> Union[V1Deployment, V1StatefulSet]:
     try:
         app = kube_client.deployments.read_namespaced_deployment_status(
@@ -3420,7 +3531,7 @@ def get_kubernetes_app_by_name(
 def create_deployment(
     kube_client: KubeClient,
     formatted_deployment: V1Deployment,
-    namespace: str = "paasta",
+    namespace: str,
 ) -> None:
     return kube_client.deployments.create_namespaced_deployment(
         namespace=namespace, body=formatted_deployment
@@ -3430,7 +3541,7 @@ def create_deployment(
 def update_deployment(
     kube_client: KubeClient,
     formatted_deployment: V1Deployment,
-    namespace: str = "paasta",
+    namespace: str,
 ) -> None:
     return kube_client.deployments.replace_namespaced_deployment(
         name=formatted_deployment.metadata.name,
@@ -3442,7 +3553,7 @@ def update_deployment(
 def patch_deployment(
     kube_client: KubeClient,
     formatted_deployment: V1Deployment,
-    namespace: str = "paasta",
+    namespace: str,
 ) -> None:
     return kube_client.deployments.patch_namespaced_deployment(
         name=formatted_deployment.metadata.name,
@@ -3454,7 +3565,7 @@ def patch_deployment(
 def delete_deployment(
     kube_client: KubeClient,
     deployment_name: str,
-    namespace: str = "paasta",
+    namespace: str,
 ) -> None:
     return kube_client.deployments.delete_namespaced_deployment(
         name=deployment_name,
@@ -3465,7 +3576,7 @@ def delete_deployment(
 def create_stateful_set(
     kube_client: KubeClient,
     formatted_stateful_set: V1StatefulSet,
-    namespace: str = "paasta",
+    namespace: str,
 ) -> None:
     return kube_client.deployments.create_namespaced_stateful_set(
         namespace=namespace, body=formatted_stateful_set
@@ -3475,7 +3586,7 @@ def create_stateful_set(
 def update_stateful_set(
     kube_client: KubeClient,
     formatted_stateful_set: V1StatefulSet,
-    namespace: str = "paasta",
+    namespace: str,
 ) -> None:
     return kube_client.deployments.replace_namespaced_stateful_set(
         name=formatted_stateful_set.metadata.name,
@@ -3653,6 +3764,7 @@ def update_secret(
     )
 
 
+@time_cache(ttl=300)
 def get_secret_signature(
     kube_client: KubeClient,
     signature_name: str,
@@ -3684,7 +3796,7 @@ def update_secret_signature(
     service_name: str,
     signature_name: str,
     secret_signature: str,
-    namespace: str = "paasta",
+    namespace: str,
 ) -> None:
     """
     :param service_name: Expect unsanitised service_name
@@ -3713,7 +3825,7 @@ def create_secret_signature(
     service_name: str,
     signature_name: str,
     secret_signature: str,
-    namespace: str = "paasta",
+    namespace: str,
 ) -> None:
     """
     :param service_name: Expect unsanitised service_name
@@ -3765,7 +3877,7 @@ def load_custom_resource_definitions(
 def create_pod_topology_spread_constraints(
     service: str,
     instance: str,
-    topology_spread_constraints: List[Dict[str, Any]],
+    topology_spread_constraints: List[TopologySpreadConstraintDict],
 ) -> List[V1TopologySpreadConstraint]:
     """
     Applies cluster-level topology spread constraints to every Pod template.
@@ -3889,12 +4001,19 @@ def get_all_role_bindings(
     return kube_client.rbac.list_namespaced_role_binding(namespace=namespace).items
 
 
+def get_all_limit_ranges(
+    kube_client: KubeClient,
+    namespace: str,
+) -> Sequence[V1LimitRange]:
+    return kube_client.core.list_namespaced_limit_range(namespace).items
+
+
 _RE_NORMALIZE_IAM_ROLE = re.compile(r"[^0-9a-zA-Z]+")
 
 
 def create_or_find_service_account_name(
     iam_role: str,
-    namespace: str = "paasta",
+    namespace: str,
     k8s_role: Optional[str] = None,
     dry_run: bool = False,
 ) -> str:
@@ -3909,9 +4028,9 @@ def create_or_find_service_account_name(
         # to support these two usecases, we'll suffix the name of a Service Account with the
         # Kubernetes Role name to disambiguate between the two.
         if k8s_role:
-            sa_name = f"paasta--{_RE_NORMALIZE_IAM_ROLE.sub('-', iam_role)}--{k8s_role}"
+            sa_name = f"paasta--{_RE_NORMALIZE_IAM_ROLE.sub('-', iam_role.lower())}--{k8s_role}"
         else:
-            sa_name = f"paasta--{_RE_NORMALIZE_IAM_ROLE.sub('-', iam_role)}"
+            sa_name = f"paasta--{_RE_NORMALIZE_IAM_ROLE.sub('-', iam_role.lower())}"
     # until Core ML migrates Spark to use Pod Identity, we need to support starting Spark drivers with a Service Account
     # that only has k8s access
     elif not iam_role and k8s_role:
@@ -4144,7 +4263,8 @@ def get_secret(
     kube_client: KubeClient,
     secret_name: str,
     key_name: str,
-    namespace: str = "paasta",
+    *,
+    namespace: str,
     decode: bool = True,
 ) -> Union[str, bytes]:
     """
@@ -4167,7 +4287,7 @@ def get_kubernetes_secret_env_variables(
     kube_client: KubeClient,
     environment: Dict[str, str],
     service_name: str,
-    namespace: str = "paasta",
+    namespace: str,
 ) -> Dict[str, str]:
     decrypted_secrets = {}
     for k, v in environment.items():
@@ -4195,7 +4315,7 @@ def get_kubernetes_secret_volumes(
     kube_client: KubeClient,
     secret_volumes_config: Sequence[SecretVolume],
     service_name: str,
-    namespace: str = "paasta",
+    namespace: str,
 ) -> Dict[str, Union[str, bytes]]:
     secret_volumes = {}
     # The config might look one of two ways:
diff --git a/paasta_tools/long_running_service_tools.py b/paasta_tools/long_running_service_tools.py
index 2322ee5a58..41685cd935 100644
--- a/paasta_tools/long_running_service_tools.py
+++ b/paasta_tools/long_running_service_tools.py
@@ -29,6 +29,8 @@
 DEFAULT_CONTAINER_PORT = 8888
 
 DEFAULT_AUTOSCALING_SETPOINT = 0.8
+DEFAULT_DESIRED_ACTIVE_REQUESTS_PER_REPLICA = 1
+DEFAULT_ACTIVE_REQUESTS_AUTOSCALING_MOVING_AVERAGE_WINDOW = 1800
 DEFAULT_UWSGI_AUTOSCALING_MOVING_AVERAGE_WINDOW = 1800
 DEFAULT_PISCINA_AUTOSCALING_MOVING_AVERAGE_WINDOW = 1800
 DEFAULT_GUNICORN_AUTOSCALING_MOVING_AVERAGE_WINDOW = 1800
@@ -42,15 +44,16 @@ class AutoscalingParamsDict(TypedDict, total=False):
     metrics_provider: str
     decision_policy: str
     setpoint: float
+    desired_active_requests_per_replica: int
     forecast_policy: Optional[str]
     offset: Optional[float]
     moving_average_window_seconds: Optional[int]
     use_prometheus: bool
     use_resource_metrics: bool
-    uwsgi_stats_port: int
     scaledown_policies: Optional[dict]
     good_enough_window: List[float]
     prometheus_adapter_config: Optional[dict]
+    max_instances_alert_threshold: float
 
 
 class LongRunningServiceConfigDict(InstanceConfigDict, total=False):
@@ -354,6 +357,12 @@ def get_autoscaling_params(self) -> AutoscalingParamsDict:
             defaults=default_params,
         )
 
+    def get_autoscaling_max_instances_alert_threshold(self) -> float:
+        autoscaling_params = self.get_autoscaling_params()
+        return autoscaling_params.get(
+            "max_instances_alert_threshold", autoscaling_params["setpoint"]
+        )
+
     def validate(
         self,
         params: Optional[List[str]] = None,
diff --git a/paasta_tools/metrics/metastatus_lib.py b/paasta_tools/metrics/metastatus_lib.py
index bf9abfe450..3bb6d75faf 100755
--- a/paasta_tools/metrics/metastatus_lib.py
+++ b/paasta_tools/metrics/metastatus_lib.py
@@ -461,7 +461,7 @@ def assert_mesos_tasks_running(
 
 
 def assert_kube_pods_running(
-    kube_client: KubeClient, namespace: str = "paasta"
+    kube_client: KubeClient, namespace: str
 ) -> HealthCheckResult:
     statuses = [
         get_pod_status(pod) for pod in get_all_pods_cached(kube_client, namespace)
@@ -884,9 +884,10 @@ def get_resource_utilization_by_grouping(
 def get_resource_utilization_by_grouping_kube(
     grouping_func: _GenericNodeGroupingFunctionT,
     kube_client: KubeClient,
+    *,
+    namespace: str,
     filters: Sequence[_GenericNodeFilterFunctionT] = [],
     sort_func: _GenericNodeSortFunctionT = None,
-    namespace: str = "paasta",
 ) -> Mapping[_KeyFuncRetT, ResourceUtilizationDict]:
     """Given a function used to group nodes, calculate resource utilization
     for each value of a given attribute.
@@ -1045,7 +1046,7 @@ def assert_marathon_deployments(
 
 
 def assert_kube_deployments(
-    kube_client: KubeClient, namespace: str = "paasta"
+    kube_client: KubeClient, namespace: str
 ) -> HealthCheckResult:
     num_deployments = len(list_all_deployments(kube_client, namespace))
     return HealthCheckResult(
@@ -1065,7 +1066,7 @@ def get_marathon_status(
 
 
 def get_kube_status(
-    kube_client: KubeClient, namespace: str = "paasta"
+    kube_client: KubeClient, namespace: str
 ) -> Sequence[HealthCheckResult]:
     """Gather information about Kubernetes.
     :param kube_client: the KUbernetes client
diff --git a/paasta_tools/paasta_execute_docker_command.py b/paasta_tools/paasta_execute_docker_command.py
index 212d9717a8..0b74abd40e 100755
--- a/paasta_tools/paasta_execute_docker_command.py
+++ b/paasta_tools/paasta_execute_docker_command.py
@@ -32,6 +32,7 @@
 
 from paasta_tools.mesos_tools import get_container_id_for_mesos_id
 from paasta_tools.utils import get_docker_client
+from paasta_tools.utils import is_using_unprivileged_containers
 
 
 def parse_args():
@@ -68,7 +69,11 @@ def signal_handler(signum, frame):
 
 def execute_in_container(docker_client, container_id, cmd, timeout):
     container_info = docker_client.inspect_container(container_id)
-    if container_info["ExecIDs"] and len(container_info["ExecIDs"]) > 0:
+    if (
+        container_info["ExecIDs"]
+        and len(container_info["ExecIDs"]) > 0
+        and not is_using_unprivileged_containers()
+    ):
         for possible_exec_id in container_info["ExecIDs"]:
             exec_info = docker_client.exec_inspect(possible_exec_id)["ProcessConfig"]
             if exec_info["entrypoint"] == "/bin/sh" and exec_info["arguments"] == [
diff --git a/paasta_tools/paasta_metastatus.py b/paasta_tools/paasta_metastatus.py
index a0e79cfaa2..f05323d9ca 100755
--- a/paasta_tools/paasta_metastatus.py
+++ b/paasta_tools/paasta_metastatus.py
@@ -230,8 +230,9 @@ def utilization_table_by_grouping_from_kube(
     groupings: Sequence[str],
     threshold: float,
     kube_client: KubeClient,
+    *,
+    namespace: str,
     service_instance_stats: Optional[ServiceInstanceStats] = None,
-    namespace: str = "paasta",
 ) -> Tuple[Sequence[MutableSequence[str]], bool]:
     grouping_function = metastatus_lib.key_func_for_attribute_multi_kube(groupings)
 
@@ -317,7 +318,7 @@ def get_service_instance_stats(
 
 
 def _run_kube_checks(
-    kube_client: KubeClient, namespace: str = "paasta"
+    kube_client: KubeClient, namespace: str
 ) -> Sequence[HealthCheckResult]:
     kube_status = metastatus_lib.get_kube_status(kube_client, namespace)
     kube_metrics_status = metastatus_lib.get_kube_resource_utilization_health(
diff --git a/paasta_tools/paastaapi/api/service_api.py b/paasta_tools/paastaapi/api/service_api.py
index 1bd8d94943..19d19e7684 100644
--- a/paasta_tools/paastaapi/api/service_api.py
+++ b/paasta_tools/paastaapi/api/service_api.py
@@ -1186,7 +1186,6 @@ def __mesh_instance(
                 instance (str): Instance name
 
             Keyword Args:
-                include_smartstack (bool): Include Smartstack information. [optional] if omitted the server will use the default value of True
                 include_envoy (bool): Include Envoy information. [optional] if omitted the server will use the default value of True
                 _return_http_data_only (bool): response data without head status
                     code and headers. Default is True.
@@ -1251,7 +1250,6 @@ def __mesh_instance(
                 'all': [
                     'service',
                     'instance',
-                    'include_smartstack',
                     'include_envoy',
                 ],
                 'required': [
@@ -1275,21 +1273,17 @@ def __mesh_instance(
                         (str,),
                     'instance':
                         (str,),
-                    'include_smartstack':
-                        (bool,),
                     'include_envoy':
                         (bool,),
                 },
                 'attribute_map': {
                     'service': 'service',
                     'instance': 'instance',
-                    'include_smartstack': 'include_smartstack',
                     'include_envoy': 'include_envoy',
                 },
                 'location_map': {
                     'service': 'path',
                     'instance': 'path',
-                    'include_smartstack': 'query',
                     'include_envoy': 'query',
                 },
                 'collection_format_map': {
@@ -1325,7 +1319,6 @@ def __status_instance(
 
             Keyword Args:
                 verbose (int): Include verbose status information. [optional]
-                include_smartstack (bool): Include Smartstack information. [optional]
                 include_envoy (bool): Include Envoy information. [optional]
                 include_mesos (bool): Include Mesos information. [optional]
                 new (bool): Use new version of paasta status for services. [optional]
@@ -1393,7 +1386,6 @@ def __status_instance(
                     'service',
                     'instance',
                     'verbose',
-                    'include_smartstack',
                     'include_envoy',
                     'include_mesos',
                     'new',
@@ -1421,8 +1413,6 @@ def __status_instance(
                         (str,),
                     'verbose':
                         (int,),
-                    'include_smartstack':
-                        (bool,),
                     'include_envoy':
                         (bool,),
                     'include_mesos':
@@ -1434,7 +1424,6 @@ def __status_instance(
                     'service': 'service',
                     'instance': 'instance',
                     'verbose': 'verbose',
-                    'include_smartstack': 'include_smartstack',
                     'include_envoy': 'include_envoy',
                     'include_mesos': 'include_mesos',
                     'new': 'new',
@@ -1443,7 +1432,6 @@ def __status_instance(
                     'service': 'path',
                     'instance': 'path',
                     'verbose': 'query',
-                    'include_smartstack': 'query',
                     'include_envoy': 'query',
                     'include_mesos': 'query',
                     'new': 'query',
diff --git a/paasta_tools/secret_tools.py b/paasta_tools/secret_tools.py
index bfbefd00e6..b0fffe2bd4 100644
--- a/paasta_tools/secret_tools.py
+++ b/paasta_tools/secret_tools.py
@@ -219,7 +219,7 @@ def decrypt_secret_volumes(
     # This ^ should result in 2 files (/nail/foo/bar.yaml, /nail/foo/baz.yaml)
     # We need to support both cases
     for secret_volume in secret_volumes_config:
-        if "items" not in secret_volume:
+        if not secret_volume.get("items"):
             secret_contents = decrypt_secret(
                 secret_provider_name=secret_provider_name,
                 soa_dir=soa_dir,
diff --git a/paasta_tools/setup_kubernetes_job.py b/paasta_tools/setup_kubernetes_job.py
index 9f7a61f036..486957a421 100755
--- a/paasta_tools/setup_kubernetes_job.py
+++ b/paasta_tools/setup_kubernetes_job.py
@@ -23,11 +23,15 @@
 import argparse
 import logging
 import sys
+import traceback
 from typing import List
 from typing import Optional
 from typing import Sequence
 from typing import Tuple
+from typing import Union
 
+from paasta_tools.eks_tools import EksDeploymentConfig
+from paasta_tools.eks_tools import load_eks_service_config_no_cache
 from paasta_tools.kubernetes.application.controller_wrappers import Application
 from paasta_tools.kubernetes.application.controller_wrappers import (
     get_application_wrapper,
@@ -88,6 +92,13 @@ def parse_args() -> argparse.Namespace:
         type=int,
         help="Update or create up to this number of service instances. Default is 0 (no limit).",
     )
+    parser.add_argument(
+        "--eks",
+        help="This flag deploys only k8 services that should run on EKS",
+        dest="eks",
+        action="store_true",
+        default=False,
+    )
     args = parser.parse_args()
     return args
 
@@ -112,11 +123,12 @@ def main() -> None:
         service_instances=args.service_instance_list
     )
 
-    # returns a list of pairs of (No error?, KubernetesDeploymentConfig) for every service_instance
+    # returns a list of pairs of (No error?, KubernetesDeploymentConfig | EksDeploymentConfig) for every service_instance
     service_instance_configs_list = get_kubernetes_deployment_config(
         service_instances_with_valid_names=service_instances_with_valid_names,
         cluster=args.cluster or load_system_paasta_config().get_cluster(),
         soa_dir=soa_dir,
+        eks=args.eks,
     )
 
     if ((False, None) in service_instance_configs_list) or (
@@ -138,6 +150,7 @@ def main() -> None:
             rate_limit=args.rate_limit,
             soa_dir=soa_dir,
             metrics_interface=deploy_metrics,
+            eks=args.eks,
         )
     else:
         setup_kube_succeeded = False
@@ -170,16 +183,28 @@ def get_kubernetes_deployment_config(
     service_instances_with_valid_names: list,
     cluster: str,
     soa_dir: str = DEFAULT_SOA_DIR,
-) -> List[Tuple[bool, KubernetesDeploymentConfig]]:
+    eks: bool = False,
+) -> List[Tuple[bool, Union[KubernetesDeploymentConfig, EksDeploymentConfig]]]:
     service_instance_configs_list = []
     for service_instance in service_instances_with_valid_names:
         try:
-            service_instance_config = load_kubernetes_service_config_no_cache(
-                service=service_instance[0],
-                instance=service_instance[1],
-                cluster=cluster,
-                soa_dir=soa_dir,
-            )
+            service_instance_config: Union[
+                KubernetesDeploymentConfig, EksDeploymentConfig
+            ]
+            if eks:
+                service_instance_config = load_eks_service_config_no_cache(
+                    service=service_instance[0],
+                    instance=service_instance[1],
+                    cluster=cluster,
+                    soa_dir=soa_dir,
+                )
+            else:
+                service_instance_config = load_kubernetes_service_config_no_cache(
+                    service=service_instance[0],
+                    instance=service_instance[1],
+                    cluster=cluster,
+                    soa_dir=soa_dir,
+                )
             service_instance_configs_list.append((True, service_instance_config))
         except NoDeploymentsAvailable:
             log.debug(
@@ -200,24 +225,30 @@ def get_kubernetes_deployment_config(
 def setup_kube_deployments(
     kube_client: KubeClient,
     cluster: str,
-    service_instance_configs_list: List[Tuple[bool, KubernetesDeploymentConfig]],
+    service_instance_configs_list: List[
+        Tuple[bool, Union[KubernetesDeploymentConfig, EksDeploymentConfig]]
+    ],
     rate_limit: int = 0,
     soa_dir: str = DEFAULT_SOA_DIR,
     metrics_interface: metrics_lib.BaseMetrics = metrics_lib.NoMetrics("paasta"),
+    eks: bool = False,
 ) -> bool:
 
-    if service_instance_configs_list:
-        existing_kube_deployments = set(list_all_paasta_deployments(kube_client))
-        existing_apps = {
-            (deployment.service, deployment.instance, deployment.namespace)
-            for deployment in existing_kube_deployments
-        }
+    if not service_instance_configs_list:
+        return True
+
+    existing_kube_deployments = set(list_all_paasta_deployments(kube_client))
+    existing_apps = {
+        (deployment.service, deployment.instance, deployment.namespace)
+        for deployment in existing_kube_deployments
+    }
 
     applications = [
         create_application_object(
             cluster=cluster,
             soa_dir=soa_dir,
             service_instance_config=service_instance,
+            eks=eks,
         )
         if service_instance
         else (_, None)
@@ -238,6 +269,19 @@ def setup_kube_deployments(
                     app.kube_deployment.instance,
                     app.kube_deployment.namespace,
                 ) not in existing_apps:
+                    if app.soa_config.get_bounce_method() == "downthenup":
+                        if any(
+                            (
+                                existing_app[:2]
+                                == (
+                                    app.kube_deployment.service,
+                                    app.kube_deployment.instance,
+                                )
+                            )
+                            for existing_app in existing_apps
+                        ):
+                            # For downthenup, we don't want to create until cleanup_kubernetes_job has cleaned up the instance in the other namespace.
+                            continue
                     log.info(f"Creating {app} because it does not exist yet.")
                     app.create(kube_client)
                     app_dimensions["deploy_event"] = "create"
@@ -273,16 +317,17 @@ def setup_kube_deployments(
 def create_application_object(
     cluster: str,
     soa_dir: str,
-    service_instance_config: KubernetesDeploymentConfig,
+    service_instance_config: Union[KubernetesDeploymentConfig, EksDeploymentConfig],
+    eks: bool = False,
 ) -> Tuple[bool, Optional[Application]]:
     try:
         formatted_application = service_instance_config.format_kubernetes_app()
-    except InvalidKubernetesConfig as e:
-        log.error(str(e))
+    except InvalidKubernetesConfig:
+        log.error(traceback.format_exc())
         return False, None
 
     app = get_application_wrapper(formatted_application)
-    app.load_local_config(soa_dir, cluster)
+    app.load_local_config(soa_dir, cluster, eks)
     return True, app
 
 
diff --git a/paasta_tools/setup_prometheus_adapter_config.py b/paasta_tools/setup_prometheus_adapter_config.py
index 4539763192..ee2ad09111 100755
--- a/paasta_tools/setup_prometheus_adapter_config.py
+++ b/paasta_tools/setup_prometheus_adapter_config.py
@@ -31,6 +31,7 @@
 from kubernetes.client.rest import ApiException
 from mypy_extensions import TypedDict
 
+from paasta_tools.eks_tools import EksDeploymentConfig
 from paasta_tools.kubernetes_tools import DEFAULT_USE_PROMETHEUS_CPU
 from paasta_tools.kubernetes_tools import DEFAULT_USE_PROMETHEUS_UWSGI
 from paasta_tools.kubernetes_tools import ensure_namespace
@@ -40,9 +41,15 @@
 from paasta_tools.kubernetes_tools import sanitise_kubernetes_name
 from paasta_tools.kubernetes_tools import V1Pod
 from paasta_tools.long_running_service_tools import AutoscalingParamsDict
+from paasta_tools.long_running_service_tools import (
+    DEFAULT_ACTIVE_REQUESTS_AUTOSCALING_MOVING_AVERAGE_WINDOW,
+)
 from paasta_tools.long_running_service_tools import (
     DEFAULT_CPU_AUTOSCALING_MOVING_AVERAGE_WINDOW,
 )
+from paasta_tools.long_running_service_tools import (
+    DEFAULT_DESIRED_ACTIVE_REQUESTS_PER_REPLICA,
+)
 from paasta_tools.long_running_service_tools import (
     DEFAULT_GUNICORN_AUTOSCALING_MOVING_AVERAGE_WINDOW,
 )
@@ -75,6 +82,11 @@
 
 CPU_METRICS_PROVIDER = "cpu"
 
+K8S_INSTANCE_TYPE_CLASSES = (
+    KubernetesDeploymentConfig,
+    EksDeploymentConfig,
+)
+
 
 class PrometheusAdapterResourceConfig(TypedDict, total=False):
     """
@@ -228,25 +240,37 @@ def should_create_piscina_scaling_rule(
     return False, "did not request piscina autoscaling"
 
 
-def create_instance_uwsgi_scaling_rule(
-    service: str,
-    instance: str,
+def should_create_active_requests_scaling_rule(
     autoscaling_config: AutoscalingParamsDict,
+) -> Tuple[bool, Optional[str]]:
+    """
+    Determines whether we should configure the prometheus adapter for a given service.
+    Returns a 2-tuple of (should_create, reason_to_skip)
+    """
+    if autoscaling_config["metrics_provider"] == "active-requests":
+        return True, None
+    return False, "did not request active-requests autoscaling"
+
+
+def create_instance_active_requests_scaling_rule(
+    service: str,
+    instance_config: KubernetesDeploymentConfig,
     paasta_cluster: str,
-    namespace: str = "paasta",
 ) -> PrometheusAdapterRule:
     """
     Creates a Prometheus adapter rule config for a given service instance.
     """
-    setpoint = autoscaling_config["setpoint"]
+    autoscaling_config = instance_config.get_autoscaling_params()
+    instance = instance_config.instance
+    namespace = instance_config.get_namespace()
+    desired_active_requests_per_replica = autoscaling_config.get(
+        "desired_active_requests_per_replica",
+        DEFAULT_DESIRED_ACTIVE_REQUESTS_PER_REPLICA,
+    )
     moving_average_window = autoscaling_config.get(
-        "moving_average_window_seconds", DEFAULT_UWSGI_AUTOSCALING_MOVING_AVERAGE_WINDOW
+        "moving_average_window_seconds",
+        DEFAULT_ACTIVE_REQUESTS_AUTOSCALING_MOVING_AVERAGE_WINDOW,
     )
-    # this should always be set, but we default to 0 for safety as the worst thing that would happen
-    # is that we take a couple more iterations than required to hit the desired setpoint
-    offset = autoscaling_config.get("offset", 0)
-    offset_multiplier = load_system_paasta_config().get_uwsgi_offset_multiplier()
-
     deployment_name = get_kubernetes_app_name(service=service, instance=instance)
 
     # In order for autoscaling to work safely while a service migrates from one namespace to another, the HPA needs to
@@ -275,6 +299,104 @@ def create_instance_uwsgi_scaling_rule(
             )
         ) by (kube_deployment)
     """
+
+    # Envoy tracks metrics at the smartstack namespace level. In most cases the paasta instance name matches the smartstack namespace.
+    # In rare cases, there are custom registration added to instance configs.
+    # If there is no custom registration the envoy and instance names match and no need to update the worker_filter_terms.
+    # If there is a single custom registration for an instance, we will process the registration value and extract the value to be used.
+    # The registrations usually follow the format of {service_name}.{smartstack_name}. Hence we split the string by dot and extract the last token.
+    # More than one custom registrations are not supported and config validation takes care of rejecting such configs.
+    registrations = instance_config.get_registrations()
+
+    mesh_instance = registrations[0].split(".")[-1] if len(registrations) == 1 else None
+    envoy_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{mesh_instance or instance}'"
+
+    # envoy-based metrics have no labels corresponding to the k8s resources that they
+    # front, but we can trivially add one in since our deployment names are of the form
+    # {service_name}-{instance_name} - which are both things in `worker_filter_terms` so
+    # it's safe to unconditionally add.
+    # This is necessary as otherwise the HPA/prometheus adapter does not know what these
+    # metrics are for.
+    total_load = f"""
+    (
+        sum(
+            label_replace(
+                paasta_instance:envoy_cluster__egress_cluster_upstream_rq_active{{{envoy_filter_terms}}},
+                "kube_deployment", "{deployment_name}", "", ""
+            )
+        ) by (kube_deployment)
+    )
+    """
+    desired_instances_at_each_point_in_time = f"""
+        {total_load} / {desired_active_requests_per_replica}
+    """
+    desired_instances = f"""
+        avg_over_time(
+            (
+                {desired_instances_at_each_point_in_time}
+            )[{moving_average_window}s:]
+        )
+    """
+
+    # The prometheus HPA adapter needs kube_deployment and kube_namespace labels attached to the metrics its scaling on.
+    # The envoy-based metrics have no labels corresponding to the k8s resources, so we can add them in.
+    metrics_query = f"""
+        label_replace(
+            label_replace(
+                {desired_instances} / {current_replicas},
+                "kube_deployment", "{deployment_name}", "", ""
+            ),
+            "kube_namespace", "{namespace}", "", ""
+        )
+    """
+    series_query = f"""
+        k8s:deployment:pods_status_ready{{{worker_filter_terms}}}
+    """
+
+    metric_name = f"{deployment_name}-active-requests-prom"
+
+    return {
+        "name": {"as": metric_name},
+        "seriesQuery": _minify_promql(series_query),
+        "resources": {"template": "kube_<<.Resource>>"},
+        "metricsQuery": _minify_promql(metrics_query),
+    }
+
+
+def create_instance_uwsgi_scaling_rule(
+    service: str,
+    instance_config: KubernetesDeploymentConfig,
+    paasta_cluster: str,
+) -> PrometheusAdapterRule:
+    """
+    Creates a Prometheus adapter rule config for a given service instance.
+    """
+    autoscaling_config = instance_config.get_autoscaling_params()
+    instance = instance_config.instance
+    namespace = instance_config.get_namespace()
+    setpoint = autoscaling_config["setpoint"]
+    moving_average_window = autoscaling_config.get(
+        "moving_average_window_seconds", DEFAULT_UWSGI_AUTOSCALING_MOVING_AVERAGE_WINDOW
+    )
+    # this should always be set, but we default to 0 for safety as the worst thing that would happen
+    # is that we take a couple more iterations than required to hit the desired setpoint
+    offset = autoscaling_config.get("offset", 0)
+    offset_multiplier = load_system_paasta_config().get_uwsgi_offset_multiplier()
+
+    deployment_name = get_kubernetes_app_name(service=service, instance=instance)
+
+    # In order for autoscaling to work safely while a service migrates from one namespace to another, the HPA needs to
+    # make sure that the deployment in the new namespace is scaled up enough to handle _all_ the load.
+    # This is because once the new deployment is 100% healthy, cleanup_kubernetes_job will delete the deployment out of
+    # the old namespace all at once, suddenly putting all the load onto the deployment in the new namespace.
+    # To ensure this, we must:
+    #  - DO NOT filter on namespace in worker_filter_terms (which is used when calculating desired_instances).
+    #  - DO filter on namespace in replica_filter_terms (which is used to calculate current_replicas).
+    # This makes sure that desired_instances includes load from all namespaces, but that the scaling ratio calculated
+    # by (desired_instances / current_replicas) is meaningful for each namespace.
+    worker_filter_terms = f"paasta_cluster='{paasta_cluster}',paasta_service='{service}',paasta_instance='{instance}'"
+    replica_filter_terms = f"paasta_cluster='{paasta_cluster}',kube_deployment='{deployment_name}',namespace='{namespace}'"
+
     # k8s:deployment:pods_status_ready is a metric created by summing kube_pod_status_ready
     # over paasta service/instance/cluster. it counts the number of ready pods in a paasta
     # deployment.
@@ -287,6 +409,17 @@ def create_instance_uwsgi_scaling_rule(
             )
         ) by (kube_deployment))
     """
+    # as mentioned above: we want to get the overload by counting load across namespces - but we need
+    # to divide by the ready pods in the target namespace - which is done by using a namespace filter here
+    ready_pods_namespaced = f"""
+        (sum(
+            k8s:deployment:pods_status_ready{{{replica_filter_terms}}} >= 0
+            or
+            max_over_time(
+                k8s:deployment:pods_status_ready{{{replica_filter_terms}}}[{DEFAULT_EXTRAPOLATION_TIME}s]
+            )
+        ) by (kube_deployment))
+    """
     load_per_instance = f"""
         avg(
             uwsgi_worker_busy{{{worker_filter_terms}}}
@@ -317,8 +450,15 @@ def create_instance_uwsgi_scaling_rule(
             )[{moving_average_window}s:]
         )
     """
+
+    # our Prometheus query is calculating a desired number of replicas, and then k8s wants that expressed as an average utilization
+    # so as long as we divide by the number that k8s ends up multiplying by, we should be able to convince k8s to run any arbitrary
+    # number of replicas.
+    # k8s happens to multiply by the # of ready pods - so we divide by that rather than by the amount of current replicas (which may
+    # include non-ready pods)
+    # ref: https://github.com/kubernetes/kubernetes/blob/7ec1a89a509906dad9fd6a4635d7bfc157b47790/pkg/controller/podautoscaler/replica_calculator.go#L278
     metrics_query = f"""
-        {desired_instances} / {current_replicas}
+        {desired_instances} / {ready_pods_namespaced}
     """
 
     metric_name = f"{deployment_name}-uwsgi-prom"
@@ -333,14 +473,15 @@ def create_instance_uwsgi_scaling_rule(
 
 def create_instance_piscina_scaling_rule(
     service: str,
-    instance: str,
-    autoscaling_config: AutoscalingParamsDict,
+    instance_config: KubernetesDeploymentConfig,
     paasta_cluster: str,
-    namespace: str = "paasta",
 ) -> PrometheusAdapterRule:
     """
     Creates a Prometheus adapter rule config for a given service instance.
     """
+    autoscaling_config = instance_config.get_autoscaling_params()
+    instance = instance_config.instance
+    namespace = instance_config.get_namespace()
     setpoint = autoscaling_config["setpoint"]
     moving_average_window = autoscaling_config.get(
         "moving_average_window_seconds",
@@ -444,14 +585,15 @@ def should_create_cpu_scaling_rule(
 
 def create_instance_cpu_scaling_rule(
     service: str,
-    instance: str,
-    autoscaling_config: AutoscalingParamsDict,
+    instance_config: KubernetesDeploymentConfig,
     paasta_cluster: str,
-    namespace: str = "paasta",
 ) -> PrometheusAdapterRule:
     """
     Creates a Prometheus adapter rule config for a given service instance.
     """
+    autoscaling_config = instance_config.get_autoscaling_params()
+    instance = instance_config.instance
+    namespace = instance_config.get_namespace()
     deployment_name = get_kubernetes_app_name(service=service, instance=instance)
     sanitized_instance_name = sanitise_kubernetes_name(instance)
     metric_name = f"{deployment_name}-cpu-prom"
@@ -592,14 +734,15 @@ def create_instance_cpu_scaling_rule(
 
 def create_instance_gunicorn_scaling_rule(
     service: str,
-    instance: str,
-    autoscaling_config: AutoscalingParamsDict,
+    instance_config: KubernetesDeploymentConfig,
     paasta_cluster: str,
-    namespace: str = "paasta",
 ) -> PrometheusAdapterRule:
     """
     Creates a Prometheus adapter rule config for a given service instance.
     """
+    autoscaling_config = instance_config.get_autoscaling_params()
+    instance = instance_config.instance
+    namespace = instance_config.get_namespace()
     setpoint = autoscaling_config["setpoint"]
     moving_average_window = autoscaling_config.get(
         "moving_average_window_seconds",
@@ -704,11 +847,12 @@ def should_create_arbitrary_promql_scaling_rule(
 
 def create_instance_arbitrary_promql_scaling_rule(
     service: str,
-    instance: str,
-    autoscaling_config: AutoscalingParamsDict,
+    instance_config: KubernetesDeploymentConfig,
     paasta_cluster: str,
-    namespace: str,
 ) -> PrometheusAdapterRule:
+    autoscaling_config = instance_config.get_autoscaling_params()
+    instance = instance_config.instance
+    namespace = instance_config.get_namespace()
     prometheus_adapter_config = autoscaling_config["prometheus_adapter_config"]
     deployment_name = get_kubernetes_app_name(service=service, instance=instance)
 
@@ -764,10 +908,8 @@ def create_instance_arbitrary_promql_scaling_rule(
 
 def get_rules_for_service_instance(
     service_name: str,
-    instance_name: str,
-    autoscaling_config: AutoscalingParamsDict,
+    instance_config: KubernetesDeploymentConfig,
     paasta_cluster: str,
-    namespace: str,
 ) -> List[PrometheusAdapterRule]:
     """
     Returns a list of Prometheus Adapter rules for a given service instance. For now, this
@@ -777,29 +919,31 @@ def get_rules_for_service_instance(
     rules: List[PrometheusAdapterRule] = []
 
     for should_create_scaling_rule, create_instance_scaling_rule in (
+        (
+            should_create_active_requests_scaling_rule,
+            create_instance_active_requests_scaling_rule,
+        ),
         (should_create_uwsgi_scaling_rule, create_instance_uwsgi_scaling_rule),
         (should_create_piscina_scaling_rule, create_instance_piscina_scaling_rule),
         (should_create_cpu_scaling_rule, create_instance_cpu_scaling_rule),
         (should_create_gunicorn_scaling_rule, create_instance_gunicorn_scaling_rule),
     ):
         should_create, skip_reason = should_create_scaling_rule(
-            autoscaling_config=autoscaling_config,
+            autoscaling_config=instance_config.get_autoscaling_params(),
         )
         if should_create:
             rules.append(
                 create_instance_scaling_rule(
                     service=service_name,
-                    instance=instance_name,
-                    autoscaling_config=autoscaling_config,
+                    instance_config=instance_config,
                     paasta_cluster=paasta_cluster,
-                    namespace=namespace,
                 )
             )
         else:
             log.debug(
                 "Skipping %s.%s - %s.",
                 service_name,
-                instance_name,
+                instance_config.instance,
                 skip_reason,
             )
 
@@ -827,23 +971,30 @@ def create_prometheus_adapter_config(
             cluster=paasta_cluster, instance_type="kubernetes", soa_dir=str(soa_dir)
         )
     }
+    services.update(
+        {
+            service_name
+            for service_name, _ in get_services_for_cluster(
+                cluster=paasta_cluster, instance_type="eks", soa_dir=str(soa_dir)
+            )
+        }
+    )
     for service_name in services:
         config_loader = PaastaServiceConfigLoader(
             service=service_name, soa_dir=str(soa_dir)
         )
-        for instance_config in config_loader.instance_configs(
-            cluster=paasta_cluster,
-            instance_type_class=KubernetesDeploymentConfig,
-        ):
-            rules.extend(
-                get_rules_for_service_instance(
-                    service_name=service_name,
-                    instance_name=instance_config.instance,
-                    autoscaling_config=instance_config.get_autoscaling_params(),
-                    paasta_cluster=paasta_cluster,
-                    namespace=instance_config.get_namespace(),
+        for instance_type_class in K8S_INSTANCE_TYPE_CLASSES:
+            for instance_config in config_loader.instance_configs(
+                cluster=paasta_cluster,
+                instance_type_class=instance_type_class,
+            ):
+                rules.extend(
+                    get_rules_for_service_instance(
+                        service_name=service_name,
+                        instance_config=instance_config,
+                        paasta_cluster=paasta_cluster,
+                    )
                 )
-            )
 
     return {
         # we sort our rules so that we can easily compare between two different configmaps
diff --git a/paasta_tools/spark_tools.py b/paasta_tools/spark_tools.py
index edd339fe1b..b1f8df744f 100644
--- a/paasta_tools/spark_tools.py
+++ b/paasta_tools/spark_tools.py
@@ -2,6 +2,7 @@
 import logging
 import re
 import socket
+import sys
 from functools import lru_cache
 from typing import cast
 from typing import Dict
@@ -169,13 +170,27 @@ def get_volumes_from_spark_mesos_configs(spark_conf: Mapping[str, str]) -> List[
 
 
 def get_volumes_from_spark_k8s_configs(spark_conf: Mapping[str, str]) -> List[str]:
-    volume_names = [
-        re.match(
-            r"spark.kubernetes.executor.volumes.hostPath.(\d+).mount.path", key
-        ).group(1)
-        for key in spark_conf.keys()
-        if "spark.kubernetes.executor.volumes.hostPath." in key and ".mount.path" in key
-    ]
+    volume_names = []
+    for key in list(spark_conf.keys()):
+        if (
+            "spark.kubernetes.executor.volumes.hostPath." in key
+            and ".mount.path" in key
+        ):
+            v_name = re.match(
+                r"spark.kubernetes.executor.volumes.hostPath.([a-z0-9]([-a-z0-9]*[a-z0-9])?).mount.path",
+                key,
+            )
+            if v_name:
+                volume_names.append(v_name.group(1))
+            else:
+                log.error(
+                    f"Volume names must consist of lower case alphanumeric characters or '-', "
+                    f"and must start and end with an alphanumeric character. Config -> '{key}' must be fixed."
+                )
+                # Failing here because the k8s pod fails to start if the volume names
+                # don't follow the lowercase RFC 1123 standard.
+                sys.exit(1)
+
     volumes = []
     for volume_name in volume_names:
         read_only = (
diff --git a/paasta_tools/tron_tools.py b/paasta_tools/tron_tools.py
index cdc1c44ffc..f69b823369 100644
--- a/paasta_tools/tron_tools.py
+++ b/paasta_tools/tron_tools.py
@@ -210,10 +210,6 @@ def parse_time_variables(command: str, parse_time: datetime.datetime = None) ->
     return StringFormatter(job_context).format(command)
 
 
-def _use_k8s_default() -> bool:
-    return load_system_paasta_config().get_tron_use_k8s_default()
-
-
 def _get_tron_k8s_cluster_override(cluster: str) -> Optional[str]:
     """
     Return the name of a compute cluster if there's a different compute cluster that should be used to run a Tronjob.
@@ -665,9 +661,6 @@ def __init__(
         # Indicate whether this config object is created for validation
         self.for_validation = for_validation
 
-    def get_use_k8s(self) -> bool:
-        return self.config_dict.get("use_k8s", _use_k8s_default())
-
     def get_name(self):
         return self.name
 
@@ -738,7 +731,7 @@ def get_cluster(self):
     def get_expected_runtime(self):
         return self.config_dict.get("expected_runtime")
 
-    def _get_action_config(self, action_name, action_dict):
+    def _get_action_config(self, action_name, action_dict) -> TronActionConfig:
         action_service = action_dict.setdefault("service", self.get_service())
         action_deploy_group = action_dict.setdefault(
             "deploy_group", self.get_deploy_group()
@@ -781,24 +774,17 @@ def _get_action_config(self, action_name, action_dict):
         action_dict["monitoring"] = self.get_monitoring()
 
         cluster_override = _get_tron_k8s_cluster_override(self.get_cluster())
-        # technically, we should also be checking if k8s is enabled, but at this stage
-        # of our migration we're not expecting any issues and when we clean up all the
-        # Mesos remnants on-completion we can also rip out all the code that fallsback
-        # to Mesos and just do this unconditionally.
-        use_k8s_cluster_override = cluster_override is not None and self.get_use_k8s()
         return TronActionConfig(
             service=action_service,
             instance=compose_instance(self.get_name(), action_name),
-            cluster=cluster_override
-            if use_k8s_cluster_override
-            else self.get_cluster(),
+            cluster=cluster_override or self.get_cluster(),
             config_dict=action_dict,
             branch_dict=branch_dict,
             soa_dir=self.soa_dir,
             for_validation=self.for_validation,
         )
 
-    def get_actions(self):
+    def get_actions(self) -> List[TronActionConfig]:
         actions = self.config_dict.get("actions")
         return [
             self._get_action_config(name, action_dict)
@@ -893,7 +879,7 @@ def format_master_config(master_config, default_volumes, dockercfg_location):
     return master_config
 
 
-def format_tron_action_dict(action_config: TronActionConfig, use_k8s: bool = False):
+def format_tron_action_dict(action_config: TronActionConfig):
     """Generate a dict of tronfig for an action, from the TronActionConfig.
 
     :param job_config: TronActionConfig
@@ -921,12 +907,7 @@ def format_tron_action_dict(action_config: TronActionConfig, use_k8s: bool = Fal
         "service_account_name": action_config.get_service_account_name(),
     }
 
-    # while we're tranisitioning, we want to be able to cleanly fallback to Mesos
-    # so we'll default to Mesos unless k8s usage is enabled for both the cluster
-    # and job.
-    # there are slight differences between k8s and Mesos configs, so we'll translate
-    # whatever is in soaconfigs to the k8s equivalent here as well.
-    if executor in KUBERNETES_EXECUTOR_NAMES and use_k8s:
+    if executor in KUBERNETES_EXECUTOR_NAMES:
         # we'd like Tron to be able to distinguish between spark and normal actions
         # even though they both run on k8s
         result["executor"] = EXECUTOR_NAME_TO_TRON_EXECUTOR_TYPE.get(
@@ -1046,11 +1027,9 @@ def format_tron_job_dict(job_config: TronJobConfig, k8s_enabled: bool = False):
 
     :param job_config: TronJobConfig
     """
-    # TODO: this use_k8s flag should be removed once we've fully migrated off of mesos
-    use_k8s = job_config.get_use_k8s() and k8s_enabled
     action_dict = {
         action_config.get_action_name(): format_tron_action_dict(
-            action_config=action_config, use_k8s=use_k8s
+            action_config=action_config,
         )
         for action_config in job_config.get_actions()
     }
@@ -1069,16 +1048,11 @@ def format_tron_job_dict(job_config: TronJobConfig, k8s_enabled: bool = False):
         "time_zone": job_config.get_time_zone(),
         "expected_runtime": job_config.get_expected_runtime(),
     }
-    # TODO: this should be directly inlined, but we need to update tron everywhere first so it'll
-    # be slightly less tedious to just conditionally send this now until we clean things up on the
-    # removal of all the Mesos code
-    if job_config.get_use_k8s():
-        result["use_k8s"] = job_config.get_use_k8s()
 
     cleanup_config = job_config.get_cleanup_action()
     if cleanup_config:
         cleanup_action = format_tron_action_dict(
-            action_config=cleanup_config, use_k8s=use_k8s
+            action_config=cleanup_config,
         )
         result["cleanup_action"] = cleanup_action
 
@@ -1093,23 +1067,41 @@ def load_tron_instance_config(
     load_deployments: bool = True,
     soa_dir: str = DEFAULT_SOA_DIR,
 ) -> TronActionConfig:
-    jobs = load_tron_service_config(
+    for action in load_tron_instance_configs(
         service=service,
         cluster=cluster,
         load_deployments=load_deployments,
         soa_dir=soa_dir,
-    )
-    requested_job, requested_action = instance.split(".")
-    for job in jobs:
-        if job.get_name() == requested_job:
-            for action in job.get_actions():
-                if action.get_action_name() == requested_action:
-                    return action
+    ):
+        if action.get_instance() == instance:
+            return action
     raise NoConfigurationForServiceError(
         f"No tron configuration found for {service} {instance}"
     )
 
 
+@time_cache(ttl=5)
+def load_tron_instance_configs(
+    service: str,
+    cluster: str,
+    load_deployments: bool = True,
+    soa_dir: str = DEFAULT_SOA_DIR,
+) -> Tuple[TronActionConfig, ...]:
+    ret: List[TronActionConfig] = []
+
+    jobs = load_tron_service_config(
+        service=service,
+        cluster=cluster,
+        load_deployments=load_deployments,
+        soa_dir=soa_dir,
+    )
+
+    for job in jobs:
+        ret.extend(job.get_actions())
+
+    return tuple(ret)
+
+
 @time_cache(ttl=5)
 def load_tron_service_config(
     service,
diff --git a/paasta_tools/utils.py b/paasta_tools/utils.py
index 2e61c016e2..2afcf98ab4 100644
--- a/paasta_tools/utils.py
+++ b/paasta_tools/utils.py
@@ -57,6 +57,7 @@
 from typing import Iterable
 from typing import Iterator
 from typing import List
+from typing import Literal
 from typing import Mapping
 from typing import NamedTuple
 from typing import Optional
@@ -98,7 +99,6 @@
     "itest",
     "itest-and-push-to-registry",
     "security-check",
-    "performance-check",
     "push-to-registry",
 )
 # Default values for _log
@@ -129,14 +129,21 @@
     "paasta_native",
     "adhoc",
     "kubernetes",
+    "eks",
     "tron",
     "flink",
     "cassandracluster",
     "kafkacluster",
+    "vitesscluster",
     "monkrelays",
     "nrtsearchservice",
 )
 
+PAASTA_K8S_INSTANCE_TYPES = {
+    "kubernetes",
+    "eks",
+}
+
 INSTANCE_TYPE_TO_K8S_NAMESPACE = {
     "marathon": "paasta",
     "adhoc": "paasta",
@@ -144,6 +151,7 @@
     "flink": "paasta-flinks",
     "cassandracluster": "paasta-cassandraclusters",
     "kafkacluster": "paasta-kafkaclusters",
+    "vitesscluster": "paasta-vitessclusters",
     "nrtsearchservice": "paasta-nrtsearchservices",
 }
 
@@ -330,6 +338,7 @@ class InstanceConfigDict(TypedDict, total=False):
     branch: str
     iam_role: str
     iam_role_provider: str
+    service: str
 
 
 class BranchDictV1(TypedDict, total=False):
@@ -536,6 +545,19 @@ def get_cap_drop(self) -> Iterable[DockerParameter]:
         for cap in CAPS_DROP:
             yield {"key": "cap-drop", "value": cap}
 
+    def get_cap_args(self) -> Iterable[DockerParameter]:
+        """Generate all --cap-add/--cap-drop parameters, ensuring not to have overlapping settings"""
+        cap_adds = list(self.get_cap_add())
+        if cap_adds and is_using_unprivileged_containers():
+            log.warning(
+                "Unprivileged containerizer detected, adding capabilities will not work properly"
+            )
+        yield from cap_adds
+        added_caps = [cap["value"] for cap in cap_adds]
+        for cap in self.get_cap_drop():
+            if cap["value"] not in added_caps:
+                yield cap
+
     def format_docker_parameters(
         self,
         with_labels: bool = True,
@@ -570,9 +592,8 @@ def format_docker_parameters(
         if extra_docker_args:
             for key, value in extra_docker_args.items():
                 parameters.extend([{"key": key, "value": value}])
-        parameters.extend(self.get_cap_add())
         parameters.extend(self.get_docker_init())
-        parameters.extend(self.get_cap_drop())
+        parameters.extend(self.get_cap_args())
         return parameters
 
     def use_docker_disk_quota(
@@ -918,7 +939,7 @@ def check_deploy_group(self) -> Tuple[bool, str]:
             if deploy_group not in pipeline_deploy_groups:
                 return (
                     False,
-                    f"{self.service}.{self.instance} uses deploy_group {deploy_group}, but it is not deploy.yaml",
+                    f"{self.service}.{self.instance} uses deploy_group {deploy_group}, but {deploy_group} is not deployed to in deploy.yaml",
                 )  # noqa: E501
         return True, ""
 
@@ -939,7 +960,7 @@ def get_iam_role(self) -> str:
         return self.config_dict.get("iam_role", "")
 
     def get_iam_role_provider(self) -> str:
-        return self.config_dict.get("iam_role_provider", "kiam")
+        return self.config_dict.get("iam_role_provider", "aws")
 
     def get_role(self) -> Optional[str]:
         """Which mesos role of nodes this job should run on."""
@@ -1910,12 +1931,20 @@ class KubeStateMetricsCollectorConfigDict(TypedDict, total=False):
     label_renames: Dict[str, str]
 
 
+class TopologySpreadConstraintDict(TypedDict, total=False):
+    topology_key: str
+    when_unsatisfiable: Literal["ScheduleAnyway", "DoNotSchedule"]
+    max_skew: int
+
+
 class SystemPaastaConfigDict(TypedDict, total=False):
     allowed_pools: Dict[str, List[str]]
+    api_client_timeout: int
     api_endpoints: Dict[str, str]
     api_profiling_config: Dict
     auth_certificate_ttl: str
     auto_config_instance_types_enabled: Dict[str, bool]
+    auto_config_instance_type_aliases: Dict[str, str]
     auto_hostname_unique_size: int
     boost_regions: List[str]
     cluster_autoscaler_max_decrease: float
@@ -1930,7 +1959,7 @@ class SystemPaastaConfigDict(TypedDict, total=False):
     dashboard_links: Dict[str, Dict[str, str]]
     datastore_credentials_vault_env_overrides: Dict[str, str]
     default_push_groups: List
-    default_should_run_uwsgi_exporter_sidecar: bool
+    default_should_use_uwsgi_exporter: bool
     deploy_blacklist: UnsafeDeployBlacklist
     deployd_big_bounce_deadline: float
     deployd_log_level: str
@@ -1988,7 +2017,7 @@ class SystemPaastaConfigDict(TypedDict, total=False):
     pdb_max_unavailable: Union[str, int]
     pki_backend: str
     pod_defaults: Dict[str, Any]
-    topology_spread_constraints: List[Dict[str, Any]]
+    topology_spread_constraints: List[TopologySpreadConstraintDict]
     previous_marathon_servers: List[MarathonConfigDict]
     readiness_check_prefix_template: List[str]
     register_k8s_pods: bool
@@ -2009,13 +2038,11 @@ class SystemPaastaConfigDict(TypedDict, total=False):
     synapse_port: int
     taskproc: Dict
     tron: Dict
-    uwsgi_exporter_sidecar_image_url: str
     gunicorn_exporter_sidecar_image_url: str
     vault_cluster_map: Dict
     vault_environment: str
     volumes: List[DockerVolume]
     zookeeper: str
-    tron_use_k8s: bool
     tron_k8s_cluster_overrides: Dict[str, str]
     skip_cpu_override_validation: List[str]
     spark_k8s_role: str
@@ -2033,6 +2060,7 @@ class SystemPaastaConfigDict(TypedDict, total=False):
     spark_use_eks_default: bool
     sidecar_requirements_config: Dict[str, KubeContainerResourceRequest]
     eks_cluster_aliases: Dict[str, str]
+    secret_sync_delay_seconds: float
 
 
 def load_system_paasta_config(
@@ -2109,6 +2137,9 @@ def __eq__(self, other: Any) -> bool:
     def __repr__(self) -> str:
         return f"SystemPaastaConfig({self.config_dict!r}, {self.directory!r})"
 
+    def get_secret_sync_delay_seconds(self) -> float:
+        return self.config_dict.get("secret_sync_delay_seconds", 0)
+
     def get_spark_use_eks_default(self) -> bool:
         return self.config_dict.get("spark_use_eks_default", False)
 
@@ -2213,6 +2244,20 @@ def get_auto_hostname_unique_size(self) -> int:
     def get_auto_config_instance_types_enabled(self) -> Dict[str, bool]:
         return self.config_dict.get("auto_config_instance_types_enabled", {})
 
+    def get_auto_config_instance_type_aliases(self) -> Dict[str, str]:
+        """
+        Allow re-using another instance type's autotuned data. This is useful when an instance can be trivially moved around
+        type-wise as it allows us to avoid data races/issues with the autotuned recommendations generator/updater.
+        """
+        return self.config_dict.get("auto_config_instance_type_aliases", {})
+
+    def get_api_client_timeout(self) -> int:
+        """
+        We've seen the Paasta API get hung up sometimes and the client not realizing this will sit idle forever.
+        This will be used to specify the default timeout
+        """
+        return self.config_dict.get("api_client_timeout", 120)
+
     def get_api_endpoints(self) -> Mapping[str, str]:
         return self.config_dict["api_endpoints"]
 
@@ -2583,7 +2628,7 @@ def get_taskproc(self) -> Dict:
     def get_disabled_watchers(self) -> List:
         return self.config_dict.get("disabled_watchers", [])
 
-    def get_topology_spread_constraints(self) -> List[Dict[str, Any]]:
+    def get_topology_spread_constraints(self) -> List[TopologySpreadConstraintDict]:
         """List of TopologySpreadConstraints that will be applied to all Pods in the cluster"""
         return self.config_dict.get("topology_spread_constraints", [])
 
@@ -2690,15 +2735,8 @@ def get_git_repo_config(self, repo_name: str) -> Dict:
         """
         return self.get_git_config().get("repos", {}).get(repo_name, {})
 
-    def get_uwsgi_exporter_sidecar_image_url(self) -> str:
-        """Get the docker image URL for the uwsgi_exporter sidecar container"""
-        return self.config_dict.get(
-            "uwsgi_exporter_sidecar_image_url",
-            "docker-paasta.yelpcorp.com:443/uwsgi_exporter-k8s-sidecar:v1.3.0-yelp0",
-        )
-
-    def default_should_run_uwsgi_exporter_sidecar(self) -> bool:
-        return self.config_dict.get("default_should_run_uwsgi_exporter_sidecar", False)
+    def default_should_use_uwsgi_exporter(self) -> bool:
+        return self.config_dict.get("default_should_use_uwsgi_exporter", False)
 
     def get_gunicorn_exporter_sidecar_image_url(self) -> str:
         """Get the docker image URL for the gunicorn_exporter sidecar container"""
@@ -2728,9 +2766,6 @@ def get_mark_for_deployment_should_ping_for_unhealthy_pods(self) -> bool:
             "mark_for_deployment_should_ping_for_unhealthy_pods", True
         )
 
-    def get_tron_use_k8s_default(self) -> bool:
-        return self.config_dict.get("tron_use_k8s", False)
-
     def get_spark_k8s_role(self) -> str:
         return self.config_dict.get("spark_k8s_role", "spark")
 
@@ -3405,8 +3440,18 @@ def load_service_instance_auto_configs(
     soa_dir: str = DEFAULT_SOA_DIR,
 ) -> Dict[str, Dict[str, Any]]:
     enabled_types = load_system_paasta_config().get_auto_config_instance_types_enabled()
-    conf_file = f"{instance_type}-{cluster}"
-    if enabled_types.get(instance_type):
+    # this looks a little funky: but what we're generally trying to do here is ensure that
+    # certain types of instances can be moved between instance types without having to worry
+    # about any sort of data races (or data weirdness) in autotune.
+    # instead, what we do is map certain instance types to whatever we've picked as the "canonical"
+    # instance type in autotune and always merge from there.
+    realized_type = (
+        load_system_paasta_config()
+        .get_auto_config_instance_type_aliases()
+        .get(instance_type, instance_type)
+    )
+    conf_file = f"{realized_type}-{cluster}"
+    if enabled_types.get(realized_type):
         return service_configuration_lib.read_extra_service_information(
             service,
             f"{AUTO_SOACONFIG_SUBDIR}/{conf_file}",
@@ -4230,3 +4275,8 @@ def get_runtimeenv() -> str:
         # we could also just crash or return None, but this seems a little easier to find
         # should we somehow run into this at Yelp
         return "unknown"
+
+
+@lru_cache(maxsize=1)
+def is_using_unprivileged_containers() -> bool:
+    return "podman" in os.getenv("DOCKER_HOST", "")
diff --git a/paasta_tools/vitess_tools.py b/paasta_tools/vitesscluster_tools.py
similarity index 64%
rename from paasta_tools/vitess_tools.py
rename to paasta_tools/vitesscluster_tools.py
index 9cd47d66af..3337f2d7cc 100644
--- a/paasta_tools/vitess_tools.py
+++ b/paasta_tools/vitesscluster_tools.py
@@ -5,20 +5,16 @@
 
 import service_configuration_lib
 
-from paasta_tools.kubernetes_tools import sanitise_kubernetes_name
 from paasta_tools.kubernetes_tools import sanitised_cr_name
 from paasta_tools.long_running_service_tools import LongRunningServiceConfig
 from paasta_tools.long_running_service_tools import LongRunningServiceConfigDict
 from paasta_tools.utils import BranchDictV2
-from paasta_tools.utils import compose_job_id
-from paasta_tools.utils import decompose_job_id
 from paasta_tools.utils import deep_merge_dictionaries
 from paasta_tools.utils import DEFAULT_SOA_DIR
-from paasta_tools.utils import InvalidJobNameError
 from paasta_tools.utils import load_service_instance_config
 from paasta_tools.utils import load_v2_deployments_json
 
-KUBERNETES_NAMESPACE = "paasta-vitess"
+KUBERNETES_NAMESPACE = "paasta-vitessclusters"
 
 log = logging.getLogger(__name__)
 log.addHandler(logging.NullHandler())
@@ -31,7 +27,7 @@ class VitessDeploymentConfigDict(LongRunningServiceConfigDict, total=False):
 class VitessDeploymentConfig(LongRunningServiceConfig):
     config_dict: VitessDeploymentConfigDict
 
-    config_filename_prefix = "vitess"
+    config_filename_prefix = "vitesscluster"
 
     def __init__(
         self,
@@ -52,61 +48,9 @@ def __init__(
             branch_dict=branch_dict,
         )
 
-    def get_service_name_smartstack(self) -> str:
-        """
-        We register in vitess.main
-        """
-        return "vitess_" + self.get_instance()
-
-    def get_nerve_namespace(self) -> str:
-        """
-        We register in vitess.main
-        """
-        return "main"
-
-    def get_registrations(self) -> List[str]:
-        """
-        We register in vitess.main
-        """
-        registrations = self.config_dict.get("registrations", [])
-        for registration in registrations:
-            try:
-                decompose_job_id(registration)
-            except InvalidJobNameError:
-                log.error(
-                    "Provided registration {} for service "
-                    "{} is invalid".format(registration, self.service)
-                )
-
-        return registrations or [
-            compose_job_id(self.get_service_name_smartstack(), "main")
-        ]
-
-    def get_kubernetes_namespace(self) -> str:
-        return KUBERNETES_NAMESPACE
-
-    def get_namespace(self) -> str:
-        """Get namespace from config, default to 'paasta'"""
-        return self.config_dict.get("namespace", KUBERNETES_NAMESPACE)
-
     def get_instances(self, with_limit: bool = True) -> int:
         return self.config_dict.get("replicas", 1)
 
-    def get_bounce_method(self) -> str:
-        """
-        Need to map to a paasta bounce method and crossover is the closest
-        """
-        return "crossover"
-
-    def get_sanitised_service_name(self) -> str:
-        return sanitise_kubernetes_name(self.get_service())
-
-    def get_sanitised_instance_name(self) -> str:
-        return sanitise_kubernetes_name(self.get_instance())
-
-    def get_sanitised_deployment_name(self) -> str:
-        return self.get_sanitised_instance_name()
-
     def validate(
         self,
         params: List[str] = [
@@ -139,7 +83,7 @@ def load_vitess_instance_config(
         service, soa_dir=soa_dir
     )
     instance_config = load_service_instance_config(
-        service, instance, " vitesscluster", cluster, soa_dir=soa_dir
+        service, instance, "vitesscluster", cluster, soa_dir=soa_dir
     )
     general_config = deep_merge_dictionaries(
         overrides=instance_config, defaults=general_config
@@ -176,6 +120,6 @@ def cr_id(service: str, instance: str) -> Mapping[str, str]:
         group="yelp.com",
         version="v1alpha1",
         namespace=KUBERNETES_NAMESPACE,
-        plural="vitess",
+        plural="vitessclusters",
         name=sanitised_cr_name(service, instance),
     )
diff --git a/requirements-dev-minimal.txt b/requirements-dev-minimal.txt
index f586bdfef8..1c46473aa1 100644
--- a/requirements-dev-minimal.txt
+++ b/requirements-dev-minimal.txt
@@ -1,6 +1,9 @@
 astroid
 asynctest
 coverage
+# VSCode debugging requirement
+# See https://code.visualstudio.com/docs/python/debugging#_local-script-debugging
+debugpy
 docutils
 flake8
 freezegun
diff --git a/requirements-dev.txt b/requirements-dev.txt
index a926dc7482..dbe193a3be 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -5,7 +5,9 @@ asynctest==0.12.0
 Babel==2.9.1
 cfgv==2.0.1
 coverage==6.5.0
+debugpy==1.8.0
 distlib==0.3.4
+docutils==0.12
 exceptiongroup==1.1.2
 filelock==3.0.12
 flake8==3.5.0
diff --git a/requirements-docs.txt b/requirements-docs.txt
index f765b4d555..d585e6aa06 100644
--- a/requirements-docs.txt
+++ b/requirements-docs.txt
@@ -11,6 +11,7 @@ freezegun==0.3.7
 identify==1.0.6
 imagesize==0.7.1
 isort==4.2.5
+jinja2==3.0.3
 lazy-object-proxy==1.4.3
 mccabe==0.6.1
 mock==2.0.0
diff --git a/requirements-minimal.txt b/requirements-minimal.txt
index dfffab112c..a42e3006c9 100644
--- a/requirements-minimal.txt
+++ b/requirements-minimal.txt
@@ -54,7 +54,7 @@ requests-cache >= 0.4.10
 retry
 ruamel.yaml
 sensu-plugin
-service-configuration-lib >= 2.18.0
+service-configuration-lib >= 2.18.11
 signalfx
 slackclient >= 1.2.1
 sticht >= 1.1.0
diff --git a/requirements.txt b/requirements.txt
index f6b6bfc997..aa6201a9bb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,9 +8,9 @@ async-timeout==3.0.0
 attrs==19.2.0
 binaryornot==0.4.4
 boto==2.48.0
-boto3==1.11.11
+boto3==1.34.22
 boto3-type-annotations==0.3.1
-botocore==1.14.11
+botocore==1.34.22
 bravado==10.4.1
 bravado-core==5.12.1
 cachetools==2.0.1
@@ -22,7 +22,6 @@ cookiecutter==1.4.0
 croniter==1.3.4
 decorator==4.1.2
 docker-py==1.2.3
-docutils==0.12
 dulwich==0.17.3
 ephemeral-port-reserve==1.1.0
 future==0.16.0
@@ -89,9 +88,9 @@ retry==0.9.2
 rfc3987==1.3.7
 rsa==4.7.2
 ruamel.yaml==0.15.96
-s3transfer==0.3.3
+s3transfer==0.10.0
 sensu-plugin==0.3.1
-service-configuration-lib==2.18.0
+service-configuration-lib==2.18.11
 setuptools==39.0.1
 signalfx==1.0.17
 simplejson==3.10.0
@@ -111,7 +110,7 @@ translationstring==1.3
 typing-extensions==4.3.0
 tzlocal==1.2
 url-normalize==1.4.2
-urllib3==1.24.3
+urllib3==1.26.18
 utaw==0.2.0
 venusian==1.1.0
 webcolors==1.7
diff --git a/setup.py b/setup.py
index 1942cbb3a0..20a2544732 100644
--- a/setup.py
+++ b/setup.py
@@ -43,8 +43,9 @@ def get_install_requires():
         "paasta_tools/am_i_mesos_leader.py",
         "paasta_tools/apply_external_resources.py",
         "paasta_tools/autoscale_all_services.py",
-        "paasta_tools/check_flink_services_health.py",
+        "paasta_tools/check_autoscaler_max_instances.py",
         "paasta_tools/check_cassandracluster_services_replication.py",
+        "paasta_tools/check_flink_services_health.py",
         "paasta_tools/check_kubernetes_api.py",
         "paasta_tools/check_kubernetes_services_replication.py",
         "paasta_tools/check_oom_events.py",
@@ -52,16 +53,16 @@ def get_install_requires():
         "paasta_tools/cleanup_kubernetes_cr.py",
         "paasta_tools/cleanup_kubernetes_crd.py",
         "paasta_tools/cleanup_kubernetes_jobs.py",
+        "paasta_tools/cli/paasta_tabcomplete.sh",
         "paasta_tools/delete_kubernetes_deployments.py",
-        "paasta_tools/paasta_deploy_tron_jobs",
         "paasta_tools/generate_all_deployments",
         "paasta_tools/generate_deployments_for_service.py",
         "paasta_tools/generate_services_file.py",
         "paasta_tools/generate_services_yaml.py",
         "paasta_tools/get_mesos_leader.py",
-        "paasta_tools/kubernetes/bin/paasta_secrets_sync.py",
-        "paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py",
         "paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py",
+        "paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py",
+        "paasta_tools/kubernetes/bin/paasta_secrets_sync.py",
         "paasta_tools/list_marathon_service_instances.py",
         "paasta_tools/log_task_lifecycle_events.py",
         "paasta_tools/marathon_dashboard.py",
@@ -69,20 +70,20 @@ def get_install_requires():
         "paasta_tools/monitoring/check_marathon_has_apps.py",
         "paasta_tools/monitoring/check_mesos_active_frameworks.py",
         "paasta_tools/monitoring/check_mesos_duplicate_frameworks.py",
-        "paasta_tools/monitoring/check_mesos_quorum.py",
         "paasta_tools/monitoring/check_mesos_outdated_tasks.py",
+        "paasta_tools/monitoring/check_mesos_quorum.py",
         "paasta_tools/monitoring/kill_orphaned_docker_containers.py",
-        "paasta_tools/cli/paasta_tabcomplete.sh",
         "paasta_tools/paasta_cluster_boost.py",
+        "paasta_tools/paasta_deploy_tron_jobs",
         "paasta_tools/paasta_execute_docker_command.py",
         "paasta_tools/paasta_maintenance.py",
         "paasta_tools/paasta_metastatus.py",
         "paasta_tools/paasta_remote_run.py",
-        "paasta_tools/setup_kubernetes_job.py",
-        "paasta_tools/setup_kubernetes_crd.py",
+        "paasta_tools/setup_istio_mesh.py",
         "paasta_tools/setup_kubernetes_cr.py",
+        "paasta_tools/setup_kubernetes_crd.py",
         "paasta_tools/setup_kubernetes_internal_crd.py",
-        "paasta_tools/setup_istio_mesh.py",
+        "paasta_tools/setup_kubernetes_job.py",
         "paasta_tools/setup_prometheus_adapter_config.py",
         "paasta_tools/synapse_srv_namespaces_fact.py",
     ]
@@ -106,6 +107,7 @@ def get_install_requires():
             "paasta_oom_logger=paasta_tools.oom_logger:main",
             "paasta_broadcast_log=paasta_tools.broadcast_log_to_services:main",
             "paasta_dump_locally_running_services=paasta_tools.dump_locally_running_services:main",
+            "paasta_habitat_fixer=paasta_tools.contrib.habitat_fixer:main",
         ],
         "paste.app_factory": ["paasta-api-config=paasta_tools.api.api:make_app"],
     },
diff --git a/tests/api/test_autoscaler.py b/tests/api/test_autoscaler.py
index c78cc63220..058ca85dc7 100644
--- a/tests/api/test_autoscaler.py
+++ b/tests/api/test_autoscaler.py
@@ -12,21 +12,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import mock
+import pytest
 from pyramid import testing
 
 from paasta_tools.api.views import autoscaler
+from paasta_tools.eks_tools import EksDeploymentConfig
 from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
 from paasta_tools.marathon_tools import MarathonServiceConfig
 
 
 @mock.patch("paasta_tools.api.views.autoscaler.get_instance_config", autospec=True)
-def test_get_autoscaler_count(mock_get_instance_config):
+@pytest.mark.parametrize(
+    "instance_type_class",
+    (
+        KubernetesDeploymentConfig,
+        EksDeploymentConfig,
+    ),
+)
+def test_get_autoscaler_count(mock_get_instance_config, instance_type_class):
     request = testing.DummyRequest()
     request.swagger_data = {"service": "fake_service", "instance": "fake_instance"}
 
     mock_get_instance_config.return_value = mock.MagicMock(
         get_instances=mock.MagicMock(return_value=123),
-        spec=KubernetesDeploymentConfig,
+        spec=instance_type_class,
     )
     response = autoscaler.get_autoscaler_count(request)
     assert response.json_body["desired_instances"] == 123
@@ -54,7 +63,16 @@ def test_update_autoscaler_count_marathon(mock_get_instance_config):
 
 
 @mock.patch("paasta_tools.api.views.autoscaler.get_instance_config", autospec=True)
-def test_update_autoscaler_count_kubernetes(mock_get_instance_config):
+@pytest.mark.parametrize(
+    "instance_type_class",
+    (
+        KubernetesDeploymentConfig,
+        EksDeploymentConfig,
+    ),
+)
+def test_update_autoscaler_count_kubernetes(
+    mock_get_instance_config, instance_type_class
+):
     request = testing.DummyRequest()
     request.swagger_data = {
         "service": "fake_kubernetes_service",
@@ -65,7 +83,7 @@ def test_update_autoscaler_count_kubernetes(mock_get_instance_config):
     mock_get_instance_config.return_value = mock.MagicMock(
         get_min_instances=mock.MagicMock(return_value=100),
         get_max_instances=mock.MagicMock(return_value=200),
-        spec=KubernetesDeploymentConfig,
+        spec=instance_type_class,
     )
 
     response = autoscaler.update_autoscaler_count(request)
@@ -74,7 +92,14 @@ def test_update_autoscaler_count_kubernetes(mock_get_instance_config):
 
 
 @mock.patch("paasta_tools.api.views.autoscaler.get_instance_config", autospec=True)
-def test_update_autoscaler_count_warning(mock_get_instance_config):
+@pytest.mark.parametrize(
+    "instance_type_class",
+    (
+        KubernetesDeploymentConfig,
+        EksDeploymentConfig,
+    ),
+)
+def test_update_autoscaler_count_warning(mock_get_instance_config, instance_type_class):
     request = testing.DummyRequest()
     request.swagger_data = {
         "service": "fake_service",
@@ -85,7 +110,7 @@ def test_update_autoscaler_count_warning(mock_get_instance_config):
     mock_get_instance_config.return_value = mock.MagicMock(
         get_min_instances=mock.MagicMock(return_value=10),
         get_max_instances=mock.MagicMock(return_value=100),
-        spec=KubernetesDeploymentConfig,
+        spec=instance_type_class,
     )
 
     response = autoscaler.update_autoscaler_count(request)
diff --git a/tests/api/test_instance.py b/tests/api/test_instance.py
index 829fc10f94..a089b13c1b 100644
--- a/tests/api/test_instance.py
+++ b/tests/api/test_instance.py
@@ -18,11 +18,13 @@
 import mock
 import pytest
 from kubernetes.client import V1Pod
+from kubernetes.client.rest import ApiException
 from marathon.models.app import MarathonApp
 from marathon.models.app import MarathonTask
 from pyramid import testing
 from requests.exceptions import ReadTimeout
 
+from paasta_tools import eks_tools
 from paasta_tools import kubernetes_tools
 from paasta_tools import marathon_tools
 from paasta_tools.api import settings
@@ -48,7 +50,6 @@
 
 @pytest.mark.parametrize("include_mesos", [False, True])
 @pytest.mark.parametrize("include_envoy", [False, True])
-@pytest.mark.parametrize("include_smartstack", [False, True])
 @mock.patch("paasta_tools.api.views.instance.marathon_mesos_status", autospec=True)
 @mock.patch(
     "paasta_tools.api.views.instance.marathon_service_mesh_status", autospec=True
@@ -82,7 +83,6 @@ def test_instance_status_marathon(
     mock_load_service_namespace_config,
     mock_marathon_service_mesh_status,
     mock_marathon_mesos_status,
-    include_smartstack,
     include_envoy,
     include_mesos,
 ):
@@ -121,7 +121,6 @@ def test_instance_status_marathon(
         "service": "fake_service",
         "instance": "fake_instance",
         "verbose": 2,
-        "include_smartstack": include_smartstack,
         "include_envoy": include_envoy,
         "include_mesos": include_mesos,
     }
@@ -131,8 +130,6 @@ def test_instance_status_marathon(
         "marathon_job_status_field1": "field1_value",
         "marathon_job_status_field2": "field2_value",
     }
-    if include_smartstack:
-        expected_response["smartstack"] = mock_marathon_service_mesh_status.return_value
     if include_envoy:
         expected_response["envoy"] = mock_marathon_service_mesh_status.return_value
     if include_mesos:
@@ -151,18 +148,6 @@ def test_instance_status_marathon(
             "fake_service", "fake_instance", 2
         )
     expected_marathon_service_mesh_status_calls = []
-    if include_smartstack:
-        expected_marathon_service_mesh_status_calls.append(
-            mock.call(
-                "fake_service",
-                ServiceMesh.SMARTSTACK,
-                "fake_instance",
-                mock_service_config,
-                mock_load_service_namespace_config.return_value,
-                mock_app.tasks,
-                should_return_individual_backends=True,
-            ),
-        )
     if include_envoy:
         expected_marathon_service_mesh_status_calls.append(
             mock.call(
@@ -530,7 +515,26 @@ def test_marathon_service_mesh_status(
 
 
 @pytest.mark.asyncio
-async def test_kubernetes_smartstack_status():
+@pytest.mark.parametrize(
+    "mock_job_config",
+    (
+        kubernetes_tools.KubernetesDeploymentConfig(
+            service="fake_service",
+            cluster="fake_cluster",
+            instance="fake_instance",
+            config_dict={"bounce_method": "fake_bounce"},
+            branch_dict=None,
+        ),
+        eks_tools.EksDeploymentConfig(
+            service="fake_service",
+            cluster="fake_cluster",
+            instance="fake_instance",
+            config_dict={"bounce_method": "fake_bounce"},
+            branch_dict=None,
+        ),
+    ),
+)
+async def test_kubernetes_smartstack_status(mock_job_config):
     with asynctest.patch(
         "paasta_tools.api.views.instance.pik.match_backends_and_pods", autospec=True
     ) as mock_match_backends_and_pods, asynctest.patch(
@@ -568,13 +572,6 @@ async def test_kubernetes_smartstack_status():
         mock_pod = mock.create_autospec(V1Pod)
         mock_match_backends_and_pods.return_value = [(mock_backend, mock_pod)]
 
-        mock_job_config = kubernetes_tools.KubernetesDeploymentConfig(
-            service="fake_service",
-            cluster="fake_cluster",
-            instance="fake_instance",
-            config_dict={"bounce_method": "fake_bounce"},
-            branch_dict=None,
-        )
         mock_service_namespace_config = ServiceNamespaceConfig()
         mock_settings = mock.Mock()
 
@@ -1100,7 +1097,6 @@ def test_kubernetes_instance_status_bounce_method():
             instance=inst,
             instance_type="kubernetes",
             verbose=0,
-            include_smartstack=False,
             include_envoy=False,
             settings=settings,
         )
@@ -1135,7 +1131,6 @@ def test_kubernetes_instance_status_evicted_nodes():
             instance="fake-inst",
             instance_type="kubernetes",
             verbose=0,
-            include_smartstack=False,
             include_envoy=False,
             settings=mock_settings,
         )
@@ -1196,7 +1191,6 @@ def test_instance_mesh_status(
     request.swagger_data = {
         "service": "fake_service",
         "instance": "fake_inst",
-        "include_smartstack": False,
     }
     instance_mesh = instance.instance_mesh_status(request)
 
@@ -1212,7 +1206,6 @@ def test_instance_mesh_status(
             instance="fake_inst",
             instance_type="flink",
             settings=settings,
-            include_smartstack=False,
             include_envoy=None,  # default of true in api specs
         ),
     ]
@@ -1249,7 +1242,6 @@ def test_instance_mesh_status_error(
     request.swagger_data = {
         "service": "fake_service",
         "instance": "fake_inst",
-        "include_smartstack": False,
     }
 
     with pytest.raises(ApiFailure) as excinfo:
@@ -1279,13 +1271,21 @@ def mock_request(self):
         }
         return request
 
+    @pytest.mark.parametrize(
+        "instance_type",
+        (
+            "kubernetes",
+            "eks",
+        ),
+    )
     def test_success(
         self,
         mock_pik_bounce_status,
         mock_validate_service_instance,
         mock_request,
+        instance_type,
     ):
-        mock_validate_service_instance.return_value = "kubernetes"
+        mock_validate_service_instance.return_value = instance_type
         response = instance.bounce_status(mock_request)
         assert response == mock_pik_bounce_status.return_value
 
@@ -1300,6 +1300,18 @@ def test_not_found(
             instance.bounce_status(mock_request)
         assert excinfo.value.err == 404
 
+    def test_app_not_found(
+        self,
+        mock_pik_bounce_status,
+        mock_validate_service_instance,
+        mock_request,
+    ):
+        mock_validate_service_instance.return_value = "kubernetes"
+        mock_pik_bounce_status.side_effect = [ApiException(status=404)]
+        with pytest.raises(ApiFailure) as excinfo:
+            instance.bounce_status(mock_request)
+        assert excinfo.value.err == 404
+
     def test_not_kubernetes(
         self,
         mock_pik_bounce_status,
diff --git a/tests/cli/test_cmds_autoscale.py b/tests/cli/test_cmds_autoscale.py
index 6b87fe4926..bfa706d6bb 100644
--- a/tests/cli/test_cmds_autoscale.py
+++ b/tests/cli/test_cmds_autoscale.py
@@ -12,8 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import mock
+import pytest
 
 from paasta_tools.cli.cmds.autoscale import paasta_autoscale
+from paasta_tools.eks_tools import EksDeploymentConfig
+from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
 
 
 @mock.patch("paasta_tools.cli.cmds.autoscale.figure_out_service_name", autospec=True)
@@ -21,8 +24,18 @@
     "paasta_tools.cli.cmds.autoscale.client.get_paasta_oapi_client", autospec=True
 )
 @mock.patch("paasta_tools.cli.cmds.autoscale._log_audit", autospec=True)
+@pytest.mark.parametrize(
+    "instance_type_class",
+    (
+        EksDeploymentConfig,
+        KubernetesDeploymentConfig,
+    ),
+)
 def test_paasta_autoscale(
-    mock__log_audit, mock_get_paasta_oapi_client, mock_figure_out_service_name
+    mock__log_audit,
+    mock_get_paasta_oapi_client,
+    mock_figure_out_service_name,
+    instance_type_class,
 ):
     service = "fake_service"
     instance = "fake_instance"
@@ -45,5 +58,42 @@ def test_paasta_autoscale(
     )
     mock__log_audit.return_value = None
 
-    paasta_autoscale(args)
+    with mock.patch(
+        "paasta_tools.cli.cmds.autoscale.get_instance_configs_for_service",
+        return_value=iter([mock.Mock(__class__=instance_type_class)]),
+        autospec=True,
+    ):
+        paasta_autoscale(args)
     assert mock_api.update_autoscaler_count.call_count == 1
+
+
+@mock.patch("paasta_tools.cli.cmds.autoscale.figure_out_service_name", autospec=True)
+@mock.patch(
+    "paasta_tools.cli.cmds.autoscale.client.get_paasta_oapi_client", autospec=True
+)
+@mock.patch("paasta_tools.cli.cmds.autoscale._log_audit", autospec=True)
+def test_paasta_autoscale_no_config(
+    mock__log_audit,
+    mock_get_paasta_oapi_client,
+    mock_figure_out_service_name,
+):
+    service = "fake_service"
+    instance = "fake_instance"
+    cluster = "fake_cluster"
+
+    mock_figure_out_service_name.return_value = service
+    mock_api = mock.Mock()
+    mock_get_paasta_oapi_client.return_value = mock.Mock(autoscaler=mock_api)
+
+    args = mock.MagicMock()
+    args.service = service
+    args.clusters = cluster
+    args.instances = instance
+    args.set = 14
+
+    with mock.patch(
+        "paasta_tools.cli.cmds.autoscale.get_instance_configs_for_service",
+        return_value=iter(()),
+        autospec=True,
+    ):
+        assert paasta_autoscale(args) == 1
diff --git a/tests/cli/test_cmds_local_run.py b/tests/cli/test_cmds_local_run.py
index c5a77ed402..393af3039d 100644
--- a/tests/cli/test_cmds_local_run.py
+++ b/tests/cli/test_cmds_local_run.py
@@ -17,6 +17,7 @@
 
 import docker
 import mock
+import pytest
 from pytest import mark
 from pytest import raises
 
@@ -137,6 +138,7 @@ def test_dry_run_json_dict(
     assert ret == 0
 
     # Ensure it's a dict and check some keys
+    print("Output", out)
     expected_out = json.loads(out)
     assert isinstance(expected_out, dict)
     assert "docker_hash" in expected_out
@@ -407,6 +409,7 @@ def test_configure_and_run_command_uses_cmd_from_config(
         cluster="fake_cluster",
         system_paasta_config=system_paasta_config,
         args=args,
+        assume_role_aws_account=None,
     )
     assert return_code == 0
     mock_run_docker_container.assert_called_once_with(
@@ -430,6 +433,7 @@ def test_configure_and_run_command_uses_cmd_from_config(
         skip_secrets=False,
         assume_role_arn="",
         assume_pod_identity=False,
+        assume_role_aws_account=None,
         use_okta_role=False,
     )
 
@@ -475,6 +479,7 @@ def test_configure_and_run_uses_bash_by_default_when_interactive(
         cluster="fake_cluster",
         system_paasta_config=system_paasta_config,
         args=args,
+        assume_role_aws_account="dev",
     )
     assert return_code == 0
     mock_secret_provider_kwargs = {
@@ -502,6 +507,7 @@ def test_configure_and_run_uses_bash_by_default_when_interactive(
         secret_provider_kwargs=mock_secret_provider_kwargs,
         skip_secrets=False,
         assume_role_arn="",
+        assume_role_aws_account="dev",
         assume_pod_identity=False,
         use_okta_role=False,
     )
@@ -555,6 +561,7 @@ def test_configure_and_run_pulls_image_when_asked(
         args=args,
         system_paasta_config=system_paasta_config,
         pull_image=True,
+        assume_role_aws_account="dev",
     )
     assert return_code == 0
     mock_docker_pull_image.assert_called_once_with("fake_registry/fake_image")
@@ -584,6 +591,7 @@ def test_configure_and_run_pulls_image_when_asked(
         skip_secrets=False,
         assume_role_arn="",
         assume_pod_identity=False,
+        assume_role_aws_account="dev",
         use_okta_role=False,
     )
 
@@ -633,6 +641,7 @@ def test_configure_and_run_docker_container_defaults_to_interactive_instance(
             cluster="fake_cluster",
             args=args,
             system_paasta_config=system_paasta_config,
+            assume_role_aws_account="dev",
         )
         assert return_code == 0
         mock_secret_provider_kwargs = {
@@ -661,6 +670,7 @@ def test_configure_and_run_docker_container_defaults_to_interactive_instance(
             skip_secrets=False,
             assume_role_arn="",
             assume_pod_identity=False,
+            assume_role_aws_account="dev",
             use_okta_role=False,
         )
 
@@ -718,6 +728,7 @@ def test_configure_and_run_docker_container_respects_docker_sha(
             cluster="fake_cluster",
             args=args,
             system_paasta_config=system_paasta_config,
+            assume_role_aws_account="dev",
         )
         expected = "fake_registry/services-fake_service:paasta-abcdefg"
         assert mock_run_docker_container.call_args[1]["docker_url"] == expected
@@ -762,6 +773,43 @@ def test_run_success(
     assert paasta_local_run(args) is None
 
 
+@pytest.mark.parametrize(
+    "cluster, aws_account, expected_aws_account",
+    [
+        ("pnw-devc", None, "dev"),
+        ("pnw-devc", "prod", "prod"),
+        ("pnw-prod", None, "prod"),
+        ("pnw-prod", "dev", "dev"),
+    ],
+)
+@mock.patch("paasta_tools.cli.cmds.local_run.load_system_paasta_config", autospec=True)
+@mock.patch("paasta_tools.cli.cmds.local_run.figure_out_service_name", autospec=True)
+@mock.patch("paasta_tools.cli.cmds.cook_image.validate_service_name", autospec=True)
+@mock.patch(
+    "paasta_tools.cli.cmds.local_run.configure_and_run_docker_container", autospec=True
+)
+def test_assume_role_aws_account(
+    mock_run_docker_container,
+    mock_validate_service_name,
+    mock_figure_out_service_name,
+    mock_system_paasta_config,
+    cluster,
+    aws_account,
+    expected_aws_account,
+    system_paasta_config,
+):
+    mock_system_paasta_config.return_value = system_paasta_config
+
+    args = mock.MagicMock()
+    args.cluster = cluster
+    args.assume_role_aws_account = aws_account
+
+    paasta_local_run(args)
+
+    _, kwargs = mock_run_docker_container.call_args
+    assert kwargs.get("assume_role_aws_account", "") == expected_aws_account
+
+
 @mock.patch("paasta_tools.cli.cmds.local_run.figure_out_service_name", autospec=True)
 @mock.patch(
     "paasta_tools.cli.cmds.local_run.configure_and_run_docker_container", autospec=True
@@ -1925,6 +1973,7 @@ def test_volumes_are_deduped(mock_exists):
                 "/etc/paasta",
             ),
             args=mock.Mock(yelpsoa_config_root="/blurp/durp", volumes=[]),
+            assume_role_aws_account="dev",
         )
         args, kwargs = mock_run_docker_container.call_args
         assert kwargs["volumes"] == ["/hostPath:/containerPath:ro"]
@@ -1980,6 +2029,7 @@ def test_missing_volumes_skipped(mock_exists):
                 "/etc/paasta",
             ),
             args=mock.Mock(yelpsoa_config_root="/blurp/durp", volumes=[]),
+            assume_role_aws_account="dev",
         )
         args, kwargs = mock_run_docker_container.call_args
         assert kwargs["volumes"] == []
@@ -2113,6 +2163,13 @@ def test_run_docker_container_assume_aws_role(
     autospec=None,
 )
 @mock.patch("os.makedirs", autospec=True)
+@mark.parametrize(
+    "original_service_name, override_service_name",
+    (
+        ("fake_service", "fake_service"),  # no service override
+        ("fake_service", "super_fake_service"),  # service override
+    ),
+)
 def test_run_docker_container_secret_volumes(
     mock_os_makedirs,
     mock_open,
@@ -2123,6 +2180,8 @@ def test_run_docker_container_secret_volumes(
     mock_execlpe,
     mock_get_docker_run_cmd,
     mock_pick_random_port,
+    original_service_name,
+    override_service_name,
 ):
     mock_docker_client = mock.MagicMock(spec_set=docker.Client)
     mock_docker_client.attach = mock.MagicMock(spec_set=docker.Client.attach)
@@ -2130,8 +2189,12 @@ def test_run_docker_container_secret_volumes(
     mock_docker_client.remove_container = mock.MagicMock(
         spec_set=docker.Client.remove_container
     )
+
     mock_service_manifest = mock.MagicMock(spec=MarathonServiceConfig)
     mock_service_manifest.cluster = "fake_cluster"
+    mock_service_manifest.get_service = mock.MagicMock(
+        return_value=override_service_name
+    )
 
     # Coverage for binary file vs non-binary file
     mock_text_io_wrapper = mock.Mock(name="text_io_wrapper", autospec=True)
@@ -2149,7 +2212,7 @@ def test_run_docker_container_secret_volumes(
     os.environ["TMPDIR"] = "/tmp/"
     return_code = run_docker_container(
         docker_client=mock_docker_client,
-        service="fake_service",
+        service=original_service_name,
         instance="fake_instance",
         docker_url="fake_hash",
         volumes=[],
@@ -2174,6 +2237,9 @@ def test_run_docker_container_secret_volumes(
         the_kwargs["volumes"][1],
     ), "Did not find the expected secret file volume mount"
 
+    _, decrypt_kwargs = mock_decrypt_secret_volumes.call_args_list[0]
+    assert decrypt_kwargs["service_name"] == override_service_name
+
     assert 0 == return_code
 
 
@@ -2503,12 +2569,18 @@ def test_assume_aws_role(
                 assume_role,
                 assume_pod_identity,
                 use_okta_role,
+                "dev",
             )
         assert sys_exit.value.code == 1
         return
     else:
         env = assume_aws_role(
-            mock_config, mock_service, assume_role, assume_pod_identity, use_okta_role
+            mock_config,
+            mock_service,
+            assume_role,
+            assume_pod_identity,
+            use_okta_role,
+            "dev",
         )
 
     if as_root:
@@ -2526,3 +2598,33 @@ def test_assume_aws_role(
         assert env["AWS_ACCESS_KEY_ID"] == "AKIAFOOBAR"
     else:
         assert env["AWS_ACCESS_KEY_ID"] == "AKIAFOOBAR2"
+
+
+@mock.patch("paasta_tools.cli.cmds.local_run.subprocess.run", autospec=True)
+@mock.patch("paasta_tools.cli.cmds.local_run.boto3.Session", autospec=True)
+def test_assume_aws_role_with_web_identity(
+    mock_boto,
+    mock_subprocess_run,
+):
+    mock_config = mock.MagicMock()
+    mock_config.get_iam_role.return_value = None
+    mock_service = "mockservice"
+
+    mock_credentials = mock.MagicMock()
+    mock_credentials.access_key = "AKIAFOOBAR"
+    mock_credentials.secret_key = "SECRETKEY"
+    mock_credentials.token = "SESSION_TOKEN"
+    mock_boto.return_value.get_credentials.return_value = mock_credentials
+
+    os.environ["AWS_ROLE_ARN"] = "arn:aws:iam::123456789:role/mock_role"
+    os.environ["AWS_WEB_IDENTITY_TOKEN_FILE"] = "/tokenfile"
+
+    env = assume_aws_role(mock_config, mock_service, False, False, False, "dev")
+
+    os.environ.pop("AWS_ROLE_ARN")
+    os.environ.pop("AWS_WEB_IDENTITY_TOKEN_FILE")
+
+    assert "AWS_ACCESS_KEY_ID" in env
+    assert "AWS_SECRET_ACCESS_KEY" in env
+    assert "AWS_SESSION_TOKEN" in env
+    assert env["AWS_ACCESS_KEY_ID"] == "AKIAFOOBAR"
diff --git a/tests/cli/test_cmds_mark_for_deployment.py b/tests/cli/test_cmds_mark_for_deployment.py
index 9d523708e3..0deb476e00 100644
--- a/tests/cli/test_cmds_mark_for_deployment.py
+++ b/tests/cli/test_cmds_mark_for_deployment.py
@@ -43,6 +43,10 @@ class FakeArgs:
     auto_abandon_delay = 1.0
     auto_rollback_delay = 1.0
     authors = None
+    warn = 17
+    polling_interval = None
+    diagnosis_interval = None
+    time_before_first_diagnosis = None
 
 
 @fixture
@@ -137,31 +141,6 @@ class FakeArgsRollback(FakeArgs):
     mock_is_docker_image_already_in_registry.return_value = False
     with raises(ValueError):
         mark_for_deployment.paasta_mark_for_deployment(FakeArgsRollback)
-
-
-@patch("paasta_tools.cli.cmds.mark_for_deployment.validate_service_name", autospec=True)
-@patch(
-    "paasta_tools.cli.cmds.mark_for_deployment.is_docker_image_already_in_registry",
-    autospec=True,
-)
-@patch(
-    "paasta_tools.cli.cmds.mark_for_deployment.get_currently_deployed_version",
-    autospec=True,
-)
-@patch("paasta_tools.cli.cmds.mark_for_deployment.list_deploy_groups", autospec=True)
-def test_paasta_mark_for_deployment_when_verify_image_succeeds(
-    mock_list_deploy_groups,
-    mock_get_currently_deployed_version,
-    mock_is_docker_image_already_in_registry,
-    mock_validate_service_name,
-):
-    class FakeArgsRollback(FakeArgs):
-        verify_image = True
-
-    mock_list_deploy_groups.return_value = ["test_deploy_groups"]
-    mock_is_docker_image_already_in_registry.return_value = False
-    with raises(ValueError):
-        mark_for_deployment.paasta_mark_for_deployment(FakeArgsRollback)
     mock_is_docker_image_already_in_registry.assert_called_with(
         "test_service",
         "fake_soa_dir",
@@ -858,3 +837,65 @@ def test_MarkForDeployProcess_happy_path_skips_complete_if_no_auto_rollback(
     assert mfdp.run() == 0
     assert mfdp.trigger_history == ["start_deploy", "mfd_succeeded", "deploy_finished"]
     assert mfdp.state_history == ["start_deploy", "deploying", "deployed"]
+
+
+@patch(
+    "paasta_tools.cli.cmds.mark_for_deployment.get_instance_configs_for_service_in_deploy_group_all_clusters",
+    autospec=True,
+)
+@patch(
+    "paasta_tools.cli.cmds.mark_for_deployment.MarkForDeploymentProcess.any_slo_failing",
+    autospec=True,
+)
+def test_MarkForDeployProcess_get_available_buttons_failing_slos_show_disable_rollback(
+    mock_any_slo_failing,
+    mock_get_instance_configs,
+):
+    mock_any_slo_failing.return_value = True
+    mfdp = WrappedMarkForDeploymentProcess(
+        service="service",
+        deploy_info=MagicMock(),
+        deploy_group="deploy_group",
+        commit="commit",
+        old_git_sha="old_git_sha",
+        git_url="git_url",
+        auto_rollback=True,
+        block=True,
+        soa_dir="soa_dir",
+        timeout=3600,
+        warn_pct=50,
+        auto_certify_delay=None,
+        auto_abandon_delay=600,
+        auto_rollback_delay=30,
+        authors=None,
+    )
+
+    # Test only get_available_buttons
+    mfdp.run_timeout = 1
+    mfdp.state = "deploying"
+    assert "disable_auto_rollbacks" in mfdp.get_available_buttons()
+    assert "enable_auto_rollbacks" not in mfdp.get_available_buttons()
+
+    mock_any_slo_failing.return_value = True
+    mfdp = WrappedMarkForDeploymentProcess(
+        service="service",
+        deploy_info=MagicMock(),
+        deploy_group="deploy_group",
+        commit="commit",
+        old_git_sha="old_git_sha",
+        git_url="git_url",
+        auto_rollback=False,
+        block=True,
+        soa_dir="soa_dir",
+        timeout=3600,
+        warn_pct=50,
+        auto_certify_delay=None,
+        auto_abandon_delay=600,
+        auto_rollback_delay=30,
+        authors=None,
+    )
+
+    mfdp.run_timeout = 1
+    mfdp.state = "deploying"
+    assert "disable_auto_rollbacks" not in mfdp.get_available_buttons()
+    assert "enable_auto_rollbacks" in mfdp.get_available_buttons()
diff --git a/tests/cli/test_cmds_mesh_status.py b/tests/cli/test_cmds_mesh_status.py
index 17820510e1..1cf905ced8 100644
--- a/tests/cli/test_cmds_mesh_status.py
+++ b/tests/cli/test_cmds_mesh_status.py
@@ -3,6 +3,8 @@
 
 import paasta_tools.paastaapi.models as paastamodels
 from paasta_tools.cli.cmds import mesh_status
+from paasta_tools.eks_tools import EksDeploymentConfig
+from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
 from paasta_tools.paastaapi import ApiException
 
 
@@ -50,22 +52,35 @@ def mock_get_oapi_client(fake_backend_location):
 @mock.patch(
     "paasta_tools.cli.cmds.mesh_status.get_smartstack_status_human", autospec=True
 )
+@pytest.mark.parametrize(
+    "instance_type_class",
+    (
+        EksDeploymentConfig,
+        KubernetesDeploymentConfig,
+    ),
+)
 def test_paasta_mesh_status_on_api_endpoint(
     mock_smtstk_status_human,
     mock_envoy_status_human,
     mock_get_oapi_client,
     fake_backend_location,
     system_paasta_config,
+    instance_type_class,
 ):
     envoy_output = mock.Mock()
     mock_envoy_status_human.return_value = [envoy_output]
-
-    code, output = mesh_status.paasta_mesh_status_on_api_endpoint(
-        cluster="fake_cluster",
-        service="fake_service",
-        instance="fake_instance",
-        system_paasta_config=system_paasta_config,
-    )
+    with mock.patch(
+        "paasta_tools.cli.cmds.mesh_status.get_instance_configs_for_service",
+        return_value=iter([mock.Mock(__class__=instance_type_class)]),
+        autospec=True,
+    ):
+        code, output = mesh_status.paasta_mesh_status_on_api_endpoint(
+            cluster="fake_cluster",
+            service="fake_service",
+            instance="fake_instance",
+            system_paasta_config=system_paasta_config,
+            soa_dir="/fake/path",
+        )
 
     assert code == 0
     assert output == [envoy_output]
@@ -79,12 +94,20 @@ def test_paasta_mesh_status_on_api_endpoint(
 @mock.patch(
     "paasta_tools.cli.cmds.mesh_status.get_smartstack_status_human", autospec=True
 )
+@pytest.mark.parametrize(
+    "instance_type_class",
+    (
+        EksDeploymentConfig,
+        KubernetesDeploymentConfig,
+    ),
+)
 def test_paasta_mesh_status_on_api_endpoint_error(
     mock_smtstk_status_human,
     mock_envoy_status_human,
     mock_get_oapi_client,
     fake_backend_location,
     system_paasta_config,
+    instance_type_class,
 ):
     client = mock_get_oapi_client.return_value
     api_error = ApiException(
@@ -100,11 +123,17 @@ def test_paasta_mesh_status_on_api_endpoint_error(
     for exc, expected_code, expected_msg in test_cases:
         client.service.mesh_instance.side_effect = [exc]
 
+    with mock.patch(
+        "paasta_tools.cli.cmds.mesh_status.get_instance_configs_for_service",
+        return_value=iter([mock.Mock(__class__=instance_type_class)]),
+        autospec=True,
+    ):
         code, output = mesh_status.paasta_mesh_status_on_api_endpoint(
             cluster="fake_cluster",
             service="fake_service",
             instance="fake_instance",
             system_paasta_config=system_paasta_config,
+            soa_dir="/fake/path",
         )
 
         assert expected_code == code
@@ -112,3 +141,32 @@ def test_paasta_mesh_status_on_api_endpoint_error(
 
     assert mock_smtstk_status_human.call_args_list == []
     assert mock_envoy_status_human.call_args_list == []
+
+
+@mock.patch("paasta_tools.cli.cmds.mesh_status.get_envoy_status_human", autospec=True)
+@mock.patch(
+    "paasta_tools.cli.cmds.mesh_status.get_smartstack_status_human", autospec=True
+)
+def test_paasta_mesh_status_on_api_endpoint_error_no_config(
+    mock_smtstk_status_human,
+    mock_envoy_status_human,
+    mock_get_oapi_client,
+    fake_backend_location,
+    system_paasta_config,
+):
+    with mock.patch(
+        "paasta_tools.cli.cmds.mesh_status.get_instance_configs_for_service",
+        return_value=iter(()),
+        autospec=True,
+    ):
+        with pytest.raises(SystemExit):
+            mesh_status.paasta_mesh_status_on_api_endpoint(
+                cluster="fake_cluster",
+                service="fake_service",
+                instance="fake_instance",
+                system_paasta_config=system_paasta_config,
+                soa_dir="/fake/path",
+            )
+
+    assert mock_smtstk_status_human.call_args_list == []
+    assert mock_envoy_status_human.call_args_list == []
diff --git a/tests/cli/test_cmds_performance_check.py b/tests/cli/test_cmds_performance_check.py
deleted file mode 100644
index 57d7385d5d..0000000000
--- a/tests/cli/test_cmds_performance_check.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright 2015-2016 Yelp Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import mock
-from pytest import raises
-
-from paasta_tools.cli.cmds import performance_check
-
-
-@mock.patch(
-    "paasta_tools.cli.cmds.performance_check.validate_service_name", autospec=True
-)
-@mock.patch("requests.post", autospec=True)
-@mock.patch(
-    "paasta_tools.cli.cmds.performance_check.load_performance_check_config",
-    autospec=True,
-)
-def test_submit_performance_check_job_happy(
-    mock_load_performance_check_config, mock_requests_post, mock_validate_service_name
-):
-    fake_endpoint = "http://foo:1234/submit"
-    mock_load_performance_check_config.return_value = {
-        "endpoint": fake_endpoint,
-        "fake_param": "fake_value",
-    }
-    mock_validate_service_name.return_value = True
-    performance_check.submit_performance_check_job("fake_service", "fake_soa_dir")
-    mock_requests_post.assert_called_once_with(
-        url=fake_endpoint, params={"fake_param": "fake_value"}
-    )
-
-
-@mock.patch(
-    "paasta_tools.cli.cmds.performance_check.validate_service_name", autospec=True
-)
-@mock.patch(
-    "paasta_tools.cli.cmds.performance_check.submit_performance_check_job",
-    autospec=True,
-)
-def test_main_safely_returns_when_exceptions(
-    mock_submit_performance_check_job, mock_validate_service_name
-):
-    mock_validate_service_name.return_value = True
-    fake_args = mock.Mock()
-    fake_args.service = "services-fake_service"
-    fake_args.soa_dir = "fake_soa_dir"
-    mock_submit_performance_check_job.side_effect = raises(Exception)
-    performance_check.perform_performance_check(fake_args)
-    mock_submit_performance_check_job.assert_called_once_with(
-        service="fake_service", soa_dir="fake_soa_dir"
-    )
diff --git a/tests/cli/test_cmds_secret.py b/tests/cli/test_cmds_secret.py
index 0f00a18b37..a32abe1a7d 100644
--- a/tests/cli/test_cmds_secret.py
+++ b/tests/cli/test_cmds_secret.py
@@ -149,7 +149,7 @@ def test_paasta_secret():
         "paasta_tools.cli.cmds.secret.is_secrets_for_teams_enabled", autospec=True
     ) as mock_is_secrets_for_teams_enabled, mock.patch(
         "paasta_tools.cli.cmds.secret.get_secret", autospec=True
-    ) as mock_get_kubernetes_secret, mock.patch(
+    ) as mock_get_secret, mock.patch(
         "paasta_tools.cli.cmds.secret.KubeClient", autospec=True
     ) as mock_kube_client, mock.patch(
         "paasta_tools.cli.cmds.secret.get_namespaces_for_secret", autospec=True
@@ -242,8 +242,8 @@ def test_paasta_secret():
         )
         kube_client = mock.Mock()
         mock_is_secrets_for_teams_enabled.return_value = True
-        mock_get_namespaces_for_secret.return_value = {"paasta"}
-        mock_select_k8s_secret_namespace.return_value = "paasta"
+        mock_get_namespaces_for_secret.return_value = {"paastasvc-middleearth"}
+        mock_select_k8s_secret_namespace.return_value = "paastasvc-middleearth"
         mock_kube_client.return_value = kube_client
 
         secret.paasta_secret(mock_args)
@@ -252,20 +252,24 @@ def test_paasta_secret():
         mock_kube_client.assert_called_with(
             config_file=KUBE_CONFIG_USER_PATH, context="mesosstage"
         )
-        mock_get_kubernetes_secret.assert_called_with(
+        mock_get_secret.assert_called_with(
             kube_client,
-            get_paasta_secret_name("paasta", "middleearth", "theonering"),
-            "paasta",
+            get_paasta_secret_name(
+                "paastasvc-middleearth", "middleearth", "theonering"
+            ),
+            key_name="theonering",
+            namespace="paastasvc-middleearth",
         )
 
         # empty namespace list
         mock_get_namespaces_for_secret.return_value = set()
         mock_select_k8s_secret_namespace.return_value = None
         secret.paasta_secret(mock_args)
-        mock_get_kubernetes_secret.assert_called_with(
+        mock_get_secret.assert_called_with(
             kube_client,
             get_paasta_secret_name("paasta", "middleearth", "theonering"),
-            "paasta",
+            key_name="theonering",
+            namespace="paasta",
         )
 
         mock_args = mock.Mock(
diff --git a/tests/cli/test_cmds_spark_run.py b/tests/cli/test_cmds_spark_run.py
index 18ed0f3b4b..6723a4d296 100644
--- a/tests/cli/test_cmds_spark_run.py
+++ b/tests/cli/test_cmds_spark_run.py
@@ -15,15 +15,15 @@
 
 import mock
 import pytest
-from boto3.exceptions import Boto3Error
-from mock import Mock
+from service_configuration_lib import spark_config
 
 from paasta_tools.cli.cmds import spark_run
 from paasta_tools.cli.cmds.spark_run import _should_get_resource_requirements
+from paasta_tools.cli.cmds.spark_run import build_and_push_docker_image
 from paasta_tools.cli.cmds.spark_run import CLUSTER_MANAGER_K8S
-from paasta_tools.cli.cmds.spark_run import CLUSTER_MANAGER_MESOS
 from paasta_tools.cli.cmds.spark_run import configure_and_run_docker_container
 from paasta_tools.cli.cmds.spark_run import decide_final_eks_toggle_state
+from paasta_tools.cli.cmds.spark_run import DEFAULT_DOCKER_SHM_SIZE
 from paasta_tools.cli.cmds.spark_run import DEFAULT_DRIVER_CORES_BY_SPARK
 from paasta_tools.cli.cmds.spark_run import DEFAULT_DRIVER_MEMORY_BY_SPARK
 from paasta_tools.cli.cmds.spark_run import get_docker_run_cmd
@@ -51,6 +51,7 @@ def test_get_docker_run_cmd(mock_getegid, mock_geteuid):
     docker_cmd = "pyspark"
     nvidia = False
     docker_memory_limit = "2g"
+    docker_shm_size = "1g"
     docker_cpu_limit = "2"
 
     actual = get_docker_run_cmd(
@@ -61,10 +62,10 @@ def test_get_docker_run_cmd(mock_getegid, mock_geteuid):
         docker_cmd,
         nvidia,
         docker_memory_limit,
+        docker_shm_size,
         docker_cpu_limit,
     )
-
-    assert actual[7:] == [
+    assert actual[-12:] == [
         "--user=1234:100",
         "--name=fake_name",
         "--env",
@@ -96,9 +97,7 @@ def test_sanitize_container_name(container_name, expected):
 @pytest.mark.parametrize(
     "disable_compact_bin_packing,cluster_manager,dir_access,expected",
     [
-        (False, CLUSTER_MANAGER_MESOS, True, False),
         (False, CLUSTER_MANAGER_K8S, True, True),
-        (True, CLUSTER_MANAGER_MESOS, True, False),
         (True, CLUSTER_MANAGER_K8S, True, False),
         (True, CLUSTER_MANAGER_K8S, False, False),
     ],
@@ -136,23 +135,6 @@ def mock_run():
         yield m
 
 
-@pytest.fixture
-def mock_get_docker_client():
-    fake_image_info = {
-        "RepoDigests": [
-            DUMMY_DOCKER_IMAGE_DIGEST,
-        ],
-    }
-    docker_client = Mock(inspect_image=Mock(return_value=fake_image_info))
-
-    with mock.patch(
-        "paasta_tools.cli.cmds.spark_run.get_docker_client",
-        return_value=docker_client,
-        autospec=True,
-    ) as m:
-        yield m
-
-
 @pytest.mark.parametrize(
     "args,expected_output",
     [
@@ -340,6 +322,7 @@ def test_get_spark_env(
         ),
         ("spark.cores.max", False, None),
         (None, False, {}),
+        (None, True, {"spark.dynamicAllocation.enabled": "true"}),
     ],
 )
 def test_parse_user_spark_args(spark_args, enable_spark_dra, expected, capsys):
@@ -472,6 +455,7 @@ def test_run_docker_container(
             dry_run=dry_run,
             nvidia=nvidia,
             docker_memory_limit=DEFAULT_DRIVER_MEMORY_BY_SPARK,
+            docker_shm_size=DEFAULT_DOCKER_SHM_SIZE,
             docker_cpu_limit=DEFAULT_DRIVER_CORES_BY_SPARK,
         )
         mock_get_docker_run_cmd.assert_called_once_with(
@@ -482,6 +466,7 @@ def test_run_docker_container(
             docker_cmd=docker_cmd,
             nvidia=nvidia,
             docker_memory_limit=DEFAULT_DRIVER_MEMORY_BY_SPARK,
+            docker_shm_size=DEFAULT_DOCKER_SHM_SIZE,
             docker_cpu_limit=DEFAULT_DRIVER_CORES_BY_SPARK,
         )
         if dry_run:
@@ -500,11 +485,6 @@ def test_run_docker_container(
 
 @mock.patch("paasta_tools.cli.cmds.spark_run.get_username", autospec=True)
 @mock.patch("paasta_tools.cli.cmds.spark_run.run_docker_container", autospec=True)
-@mock.patch(
-    "paasta_tools.cli.cmds.spark_run.send_and_calculate_resources_cost",
-    autospec=True,
-    return_value=(10, {"cpus": 10, "mem": 1024}),
-)
 @mock.patch("paasta_tools.cli.cmds.spark_run.get_webui_url", autospec=True)
 @mock.patch("paasta_tools.cli.cmds.spark_run.create_spark_config_str", autospec=True)
 @mock.patch("paasta_tools.cli.cmds.spark_run.get_docker_cmd", autospec=True)
@@ -539,13 +519,6 @@ def mock_create_spark_config_str(self):
     @pytest.mark.parametrize(
         ["cluster_manager", "spark_args_volumes", "expected_volumes"],
         [
-            (
-                spark_run.CLUSTER_MANAGER_MESOS,
-                {
-                    "spark.mesos.executor.docker.volumes": "/mesos/volume:/mesos/volume:rw"
-                },
-                ["/mesos/volume:/mesos/volume:rw"],
-            ),
             (
                 spark_run.CLUSTER_MANAGER_K8S,
                 {
@@ -579,7 +552,6 @@ def test_configure_and_run_docker_container(
         mock_get_docker_cmd,
         mock_create_spark_config_str,
         mock_get_webui_url,
-        mock_send_and_calculate_resources_cost,
         mock_run_docker_container,
         mock_get_username,
         cluster_manager,
@@ -606,7 +578,10 @@ def test_configure_and_run_docker_container(
         args.cluster_manager = cluster_manager
         args.docker_cpu_limit = False
         args.docker_memory_limit = False
+        args.docker_shm_size = False
         args.use_eks_override = False
+        args.tronfig = None
+        args.job_id = None
         with mock.patch.object(
             self.instance_config, "get_env_dictionary", return_value={"env1": "val1"}
         ):
@@ -643,19 +618,13 @@ def test_configure_and_run_docker_container(
             dry_run=True,
             nvidia=False,
             docker_memory_limit="2g",
+            docker_shm_size=DEFAULT_DOCKER_SHM_SIZE,
             docker_cpu_limit="1",
         )
 
     @pytest.mark.parametrize(
         ["cluster_manager", "spark_args_volumes", "expected_volumes"],
         [
-            (
-                spark_run.CLUSTER_MANAGER_MESOS,
-                {
-                    "spark.mesos.executor.docker.volumes": "/mesos/volume:/mesos/volume:rw"
-                },
-                ["/mesos/volume:/mesos/volume:rw"],
-            ),
             (
                 spark_run.CLUSTER_MANAGER_K8S,
                 {
@@ -689,7 +658,6 @@ def test_configure_and_run_docker_driver_resource_limits_config(
         mock_get_docker_cmd,
         mock_create_spark_config_str,
         mock_get_webui_url,
-        mock_send_and_calculate_resources_cost,
         mock_run_docker_container,
         mock_get_username,
         cluster_manager,
@@ -718,6 +686,7 @@ def test_configure_and_run_docker_driver_resource_limits_config(
         args.cluster_manager = cluster_manager
         args.docker_cpu_limit = 3
         args.docker_memory_limit = "4g"
+        args.docker_shm_size = "1g"
         args.use_eks_override = False
         with mock.patch.object(
             self.instance_config, "get_env_dictionary", return_value={"env1": "val1"}
@@ -755,19 +724,13 @@ def test_configure_and_run_docker_driver_resource_limits_config(
             dry_run=True,
             nvidia=False,
             docker_memory_limit="4g",
+            docker_shm_size="1g",
             docker_cpu_limit=3,
         )
 
     @pytest.mark.parametrize(
         ["cluster_manager", "spark_args_volumes", "expected_volumes"],
         [
-            (
-                spark_run.CLUSTER_MANAGER_MESOS,
-                {
-                    "spark.mesos.executor.docker.volumes": "/mesos/volume:/mesos/volume:rw"
-                },
-                ["/mesos/volume:/mesos/volume:rw"],
-            ),
             (
                 spark_run.CLUSTER_MANAGER_K8S,
                 {
@@ -801,7 +764,6 @@ def test_configure_and_run_docker_driver_resource_limits(
         mock_get_docker_cmd,
         mock_create_spark_config_str,
         mock_get_webui_url,
-        mock_send_and_calculate_resources_cost,
         mock_run_docker_container,
         mock_get_username,
         cluster_manager,
@@ -830,6 +792,7 @@ def test_configure_and_run_docker_driver_resource_limits(
         args.cluster_manager = cluster_manager
         args.docker_cpu_limit = False
         args.docker_memory_limit = False
+        args.docker_shm_size = False
         args.use_eks_override = False
         with mock.patch.object(
             self.instance_config, "get_env_dictionary", return_value={"env1": "val1"}
@@ -867,6 +830,7 @@ def test_configure_and_run_docker_driver_resource_limits(
             dry_run=True,
             nvidia=False,
             docker_memory_limit="2g",
+            docker_shm_size=DEFAULT_DOCKER_SHM_SIZE,
             docker_cpu_limit="2",
         )
 
@@ -877,7 +841,6 @@ def test_configure_and_run_docker_container_nvidia(
         mock_get_docker_cmd,
         mock_create_spark_config_str,
         mock_get_webui_url,
-        mock_send_and_calculate_resources_cost,
         mock_run_docker_container,
         mock_get_username,
     ):
@@ -886,9 +849,12 @@ def test_configure_and_run_docker_container_nvidia(
         ):
             spark_conf = {
                 "spark.cores.max": "5",
+                "spark.executor.cores": 1,
+                "spark.executor.memory": "2g",
                 "spark.master": "mesos://spark.master",
                 "spark.ui.port": "1234",
                 "spark.app.name": "fake app",
+                "spark.executorEnv.PAASTA_CLUSTER": "test-cluster",
             }
             args = mock.MagicMock(cmd="pyspark", nvidia=True)
 
@@ -899,13 +865,12 @@ def test_configure_and_run_docker_container_nvidia(
                 system_paasta_config=self.system_paasta_config,
                 aws_creds=("id", "secret", "token"),
                 spark_conf=spark_conf,
-                cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
+                cluster_manager=spark_run.CLUSTER_MANAGER_K8S,
                 pod_template_path="unique-run",
             )
 
             args, kwargs = mock_run_docker_container.call_args
             assert kwargs["nvidia"]
-            assert mock_send_and_calculate_resources_cost.called
 
     def test_configure_and_run_docker_container_mrjob(
         self,
@@ -914,7 +879,6 @@ def test_configure_and_run_docker_container_mrjob(
         mock_get_docker_cmd,
         mock_create_spark_config_str,
         mock_get_webui_url,
-        mock_send_and_calculate_resources_cost,
         mock_run_docker_container,
         mock_get_username,
     ):
@@ -923,9 +887,12 @@ def test_configure_and_run_docker_container_mrjob(
         ):
             spark_conf = {
                 "spark.cores.max": 5,
+                "spark.executor.cores": 1,
+                "spark.executor.memory": "2g",
                 "spark.master": "mesos://spark.master",
                 "spark.ui.port": "1234",
                 "spark.app.name": "fake_app",
+                "spark.executorEnv.PAASTA_CLUSTER": "test-cluster",
             }
             args = mock.MagicMock(cmd="python mrjob_wrapper.py", mrjob=True)
 
@@ -936,64 +903,13 @@ def test_configure_and_run_docker_container_mrjob(
                 system_paasta_config=self.system_paasta_config,
                 aws_creds=("id", "secret", "token"),
                 spark_conf=spark_conf,
-                cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
+                cluster_manager=spark_run.CLUSTER_MANAGER_K8S,
                 pod_template_path="unique-run",
             )
 
             args, kwargs = mock_run_docker_container.call_args
             assert kwargs["docker_cmd"] == mock_get_docker_cmd.return_value
 
-            assert mock_send_and_calculate_resources_cost.called
-
-    def test_suppress_clusterman_metrics_errors(
-        self,
-        mock_get_history_url,
-        mock_et_signalfx_url,
-        mock_get_docker_cmd,
-        mock_create_spark_config_str,
-        mock_get_webui_url,
-        mock_send_and_calculate_resources_cost,
-        mock_run_docker_container,
-        mock_get_username,
-    ):
-        with mock.patch(
-            "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True
-        ):
-            mock_send_and_calculate_resources_cost.side_effect = Boto3Error
-            mock_create_spark_config_str.return_value = "--conf spark.cores.max=5"
-            spark_conf = {
-                "spark.cores.max": 5,
-                "spark.ui.port": "1234",
-                "spark.app.name": "fake app",
-            }
-            args = mock.MagicMock(
-                suppress_clusterman_metrics_errors=False, cmd="pyspark"
-            )
-            with pytest.raises(Boto3Error):
-                configure_and_run_docker_container(
-                    args=args,
-                    docker_img="fake-registry/fake-service",
-                    instance_config=self.instance_config,
-                    system_paasta_config=self.system_paasta_config,
-                    aws_creds=("id", "secret", "token"),
-                    spark_conf=spark_conf,
-                    cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
-                    pod_template_path="unique-run",
-                )
-
-            # make sure we don't blow up when this setting is True
-            args.suppress_clusterman_metrics_errors = True
-            configure_and_run_docker_container(
-                args=args,
-                docker_img="fake-registry/fake-service",
-                instance_config=self.instance_config,
-                system_paasta_config=self.system_paasta_config,
-                aws_creds=("id", "secret", "token"),
-                spark_conf=spark_conf,
-                cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
-                pod_template_path="unique-run",
-            )
-
     def test_dont_emit_metrics_for_inappropriate_commands(
         self,
         mock_get_history_url,
@@ -1001,7 +917,6 @@ def test_dont_emit_metrics_for_inappropriate_commands(
         mock_get_docker_cmd,
         mock_create_spark_config_str,
         mock_get_webui_url,
-        mock_send_and_calculate_resources_cost,
         mock_run_docker_container,
         mock_get_username,
     ):
@@ -1018,10 +933,9 @@ def test_dont_emit_metrics_for_inappropriate_commands(
                 system_paasta_config=self.system_paasta_config,
                 aws_creds=("id", "secret", "token"),
                 spark_conf={"spark.ui.port": "1234", "spark.app.name": "fake_app"},
-                cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
+                cluster_manager=spark_run.CLUSTER_MANAGER_K8S,
                 pod_template_path="unique-run",
             )
-            assert not mock_send_and_calculate_resources_cost.called
 
 
 @pytest.mark.parametrize(
@@ -1111,7 +1025,6 @@ def test_paasta_spark_run_bash(
     mock_load_system_paasta_config,
     mock_validate_work_dir,
     mock_generate_pod_template_path,
-    mock_get_docker_client,
 ):
     args = argparse.Namespace(
         work_dir="/tmp/local",
@@ -1138,12 +1051,15 @@ def test_paasta_spark_run_bash(
         aws_role_duration=3600,
         use_eks_override=False,
         k8s_server_address=None,
+        tronfig=None,
+        job_id=None,
     )
     mock_load_system_paasta_config.return_value.get_cluster_aliases.return_value = {}
     mock_load_system_paasta_config.return_value.get_cluster_pools.return_value = {
         "test-cluster": ["test-pool"]
     }
     mock_should_enable_compact_bin_packing.return_value = True
+    mock_get_docker_image.return_value = DUMMY_DOCKER_IMAGE_DIGEST
     spark_run.paasta_spark_run(args)
     mock_validate_work_dir.assert_called_once_with("/tmp/local")
     assert args.cmd == "/bin/bash"
@@ -1195,6 +1111,7 @@ def test_paasta_spark_run_bash(
         aws_creds=mock_get_aws_credentials.return_value,
         cluster_manager=spark_run.CLUSTER_MANAGER_K8S,
         pod_template_path="unique-run",
+        extra_driver_envs=dict(),
     )
     mock_generate_pod_template_path.assert_called_once()
 
@@ -1225,7 +1142,6 @@ def test_paasta_spark_run(
     mock_load_system_paasta_config,
     mock_validate_work_dir,
     mock_generate_pod_template_path,
-    mock_get_docker_client,
 ):
     args = argparse.Namespace(
         work_dir="/tmp/local",
@@ -1252,12 +1168,15 @@ def test_paasta_spark_run(
         aws_role_duration=3600,
         use_eks_override=False,
         k8s_server_address=None,
+        tronfig=None,
+        job_id=None,
     )
     mock_load_system_paasta_config.return_value.get_cluster_aliases.return_value = {}
     mock_load_system_paasta_config.return_value.get_cluster_pools.return_value = {
         "test-cluster": ["test-pool"]
     }
     mock_should_enable_compact_bin_packing.return_value = True
+    mock_get_docker_image.return_value = DUMMY_DOCKER_IMAGE_DIGEST
     spark_run.paasta_spark_run(args)
     mock_validate_work_dir.assert_called_once_with("/tmp/local")
     assert args.cmd == "USER=test timeout 1m spark-submit test.py"
@@ -1308,6 +1227,7 @@ def test_paasta_spark_run(
         aws_creds=mock_get_aws_credentials.return_value,
         cluster_manager=spark_run.CLUSTER_MANAGER_K8S,
         pod_template_path="unique-run",
+        extra_driver_envs=dict(),
     )
     mock_generate_pod_template_path.assert_called_once()
 
@@ -1338,7 +1258,6 @@ def test_paasta_spark_run_pyspark(
     mock_load_system_paasta_config,
     mock_validate_work_dir,
     mock_generate_pod_template_path,
-    mock_get_docker_client,
 ):
     args = argparse.Namespace(
         work_dir="/tmp/local",
@@ -1365,6 +1284,8 @@ def test_paasta_spark_run_pyspark(
         aws_role_duration=3600,
         use_eks_override=False,
         k8s_server_address=None,
+        tronfig=None,
+        job_id=None,
     )
     mock_load_system_paasta_config.return_value.get_spark_use_eks_default.return_value = (
         False
@@ -1374,6 +1295,7 @@ def test_paasta_spark_run_pyspark(
         "test-cluster": ["test-pool"]
     }
 
+    mock_get_docker_image.return_value = DUMMY_DOCKER_IMAGE_DIGEST
     spark_run.paasta_spark_run(args)
     mock_validate_work_dir.assert_called_once_with("/tmp/local")
     assert args.cmd == "pyspark"
@@ -1430,6 +1352,7 @@ def test_paasta_spark_run_pyspark(
         aws_creds=mock_get_aws_credentials.return_value,
         cluster_manager=spark_run.CLUSTER_MANAGER_K8S,
         pod_template_path="unique-run",
+        extra_driver_envs=dict(),
     )
     mock_generate_pod_template_path.assert_called_once()
 
@@ -1472,3 +1395,99 @@ def test_decide_final_eks_toggle_state(override, default, expected):
         )
 
         assert decide_final_eks_toggle_state(override) is expected
+
+
+@mock.patch.object(spark_run, "makefile_responds_to", autospec=True)
+@mock.patch.object(spark_run, "paasta_cook_image", autospec=True)
+@mock.patch.object(spark_run, "get_username", autospec=True)
+def test_build_and_push_docker_image_unprivileged_output_format(
+    mock_get_username,
+    mock_paasta_cook_image,
+    mock_makefile_responds_to,
+    mock_run,
+):
+    args = mock.MagicMock(
+        docker_registry="MOCK-docker-dev.yelpcorp.com",
+        autospec=True,
+    )
+    mock_makefile_responds_to.return_value = True
+    mock_paasta_cook_image.return_value = 0
+    mock_run.side_effect = [
+        (0, None),
+        (
+            0,
+            (
+                "Using default tag: latest\n"
+                "The push refers to repository [MOCK-docker-dev.yelpcorp.com/paasta-spark-run-user:latest]\n"
+                "latest: digest: sha256:103ce91c65d42498ca61cdfe8d799fab8ab1c37dac58b743b49ced227bc7bc06"
+            ),
+        ),
+        (0, None),
+    ]
+    mock_get_username.return_value = "user"
+    docker_image_digest = build_and_push_docker_image(args)
+    assert DUMMY_DOCKER_IMAGE_DIGEST == docker_image_digest
+
+
+@mock.patch.object(spark_run, "makefile_responds_to", autospec=True)
+@mock.patch.object(spark_run, "paasta_cook_image", autospec=True)
+@mock.patch.object(spark_run, "get_username", autospec=True)
+def test_build_and_push_docker_image_privileged_output_format(
+    mock_get_username,
+    mock_paasta_cook_image,
+    mock_makefile_responds_to,
+    mock_run,
+):
+    args = mock.MagicMock(
+        docker_registry="MOCK-docker-dev.yelpcorp.com",
+        autospec=True,
+    )
+    mock_makefile_responds_to.return_value = True
+    mock_paasta_cook_image.return_value = 0
+    mock_run.side_effect = [
+        (0, None),
+        (
+            0,
+            (
+                "Using default tag: latest\n"
+                "The push refers to repository [MOCK-docker-dev.yelpcorp.com/paasta-spark-run-user:latest]\n"
+                "latest: digest: sha256:103ce91c65d42498ca61cdfe8d799fab8ab1c37dac58b743b49ced227bc7bc06 size: 1337"
+            ),
+        ),
+        (0, None),
+    ]
+    mock_get_username.return_value = "user"
+    docker_image_digest = build_and_push_docker_image(args)
+    assert DUMMY_DOCKER_IMAGE_DIGEST == docker_image_digest
+
+
+@mock.patch.object(spark_run, "makefile_responds_to", autospec=True)
+@mock.patch.object(spark_run, "paasta_cook_image", autospec=True)
+@mock.patch.object(spark_run, "get_username", autospec=True)
+def test_build_and_push_docker_image_unexpected_output_format(
+    mock_get_username,
+    mock_paasta_cook_image,
+    mock_makefile_responds_to,
+    mock_run,
+):
+    args = mock.MagicMock(
+        docker_registry="MOCK-docker-dev.yelpcorp.com",
+        autospec=True,
+    )
+    mock_makefile_responds_to.return_value = True
+    mock_paasta_cook_image.return_value = 0
+    mock_run.side_effect = [
+        (0, None),
+        (
+            0,
+            (
+                "Using default tag: latest\n"
+                "The push refers to repository [MOCK-docker-dev.yelpcorp.com/paasta-spark-run-user:latest]\n"
+                "the regex will not match this"
+            ),
+        ),
+        (0, None),
+    ]
+    with pytest.raises(ValueError) as e:
+        build_and_push_docker_image(args)
+    assert "Could not determine digest from output" in str(e.value)
diff --git a/tests/cli/test_cmds_status.py b/tests/cli/test_cmds_status.py
index 2f485d1a24..8e91cc09e2 100644
--- a/tests/cli/test_cmds_status.py
+++ b/tests/cli/test_cmds_status.py
@@ -920,7 +920,7 @@ def test_status_with_registration(
 
 
 @pytest.fixture
-def mock_marathon_status(include_envoy=True, include_smartstack=True):
+def mock_marathon_status(include_envoy=True):
     kwargs = dict(
         desired_state="start",
         desired_app_id="abc.def",
@@ -937,12 +937,6 @@ def mock_marathon_status(include_envoy=True, include_smartstack=True):
             non_running_tasks=[],
         ),
     )
-    if include_smartstack:
-        kwargs["smartstack"] = paastamodels.SmartstackStatus(
-            registration="fake_service.fake_instance",
-            expected_backends_per_location=1,
-            locations=[],
-        )
     if include_envoy:
         kwargs["envoy"] = paastamodels.EnvoyStatus(
             registration="fake_service.fake_instance",
@@ -2491,12 +2485,15 @@ def test_output(
 
 class TestPrintFlinkStatus:
     @patch("paasta_tools.cli.cmds.status.load_system_paasta_config", autospec=True)
+    @patch("paasta_tools.api.client.load_system_paasta_config", autospec=True)
     def test_error_no_flink(
         self,
+        mock_load_system_paasta_config_api,
         mock_load_system_paasta_config,
         mock_flink_status,
         system_paasta_config,
     ):
+        mock_load_system_paasta_config_api.return_value = system_paasta_config
         mock_load_system_paasta_config.return_value = system_paasta_config
         mock_flink_status["status"] = None
         output = []
diff --git a/tests/cli/test_cmds_validate.py b/tests/cli/test_cmds_validate.py
index 854ca07a85..4350b574f9 100644
--- a/tests/cli/test_cmds_validate.py
+++ b/tests/cli/test_cmds_validate.py
@@ -99,15 +99,16 @@ def test_paasta_validate_calls_everything(
     assert mock_validate_cpu_burst.called
 
 
-@patch("paasta_tools.cli.cmds.validate.get_instance_config", autospec=True)
+@patch(
+    "paasta_tools.cli.cmds.validate.load_all_instance_configs_for_service",
+    autospec=True,
+)
 @patch("paasta_tools.cli.cmds.validate.list_clusters", autospec=True)
-@patch("paasta_tools.cli.cmds.validate.list_all_instances_for_service", autospec=True)
 @patch("paasta_tools.cli.cmds.validate.path_to_soa_dir_service", autospec=True)
 def test_validate_paasta_objects(
     mock_path_to_soa_dir_service,
-    mock_list_all_instances_for_service,
     mock_list_clusters,
-    mock_get_instance_config,
+    mock_load_all_instance_configs_for_service,
     capsys,
 ):
 
@@ -120,8 +121,9 @@ def test_validate_paasta_objects(
 
     mock_path_to_soa_dir_service.return_value = ("fake_soa_dir", fake_service)
     mock_list_clusters.return_value = [fake_cluster]
-    mock_list_all_instances_for_service.return_value = [fake_instance]
-    mock_get_instance_config.return_value = mock_paasta_instance
+    mock_load_all_instance_configs_for_service.return_value = [
+        (fake_instance, mock_paasta_instance)
+    ]
 
     assert validate_paasta_objects("fake-service-path") is False, capsys
     captured = capsys.readouterr()
@@ -159,26 +161,31 @@ def test_validate_unknown_service_service_path():
     assert not paasta_validate_soa_configs(service, service_path)
 
 
-@patch("paasta_tools.cli.cmds.validate.get_instance_config", autospec=True)
-@patch("paasta_tools.cli.cmds.validate.list_all_instances_for_service", autospec=True)
+@patch(
+    "paasta_tools.cli.cmds.validate.load_all_instance_configs_for_service",
+    autospec=True,
+)
 @patch("paasta_tools.cli.cmds.validate.list_clusters", autospec=True)
 @patch("paasta_tools.cli.cmds.validate.path_to_soa_dir_service", autospec=True)
 def test_validate_min_max_instances_success(
     mock_path_to_soa_dir_service,
     mock_list_clusters,
-    mock_list_all_instances_for_service,
-    mock_get_instance_config,
+    mock_load_all_instance_configs_for_service,
     capsys,
 ):
     mock_path_to_soa_dir_service.return_value = ("fake_soa_dir", "fake_service")
     mock_list_clusters.return_value = ["fake_cluster"]
-    mock_list_all_instances_for_service.return_value = {"fake_instance1"}
-    mock_get_instance_config.return_value = mock.Mock(
-        get_instance=mock.Mock(return_value="fake_instance1"),
-        get_instance_type=mock.Mock(return_value="fake_type"),
-        get_min_instances=mock.Mock(return_value=3),
-        get_max_instances=mock.Mock(return_value=1),
-    )
+    mock_load_all_instance_configs_for_service.return_value = [
+        (
+            "fake_instance1",
+            mock.Mock(
+                get_instance=mock.Mock(return_value="fake_instance1"),
+                get_instance_type=mock.Mock(return_value="fake_type"),
+                get_min_instances=mock.Mock(return_value=3),
+                get_max_instances=mock.Mock(return_value=1),
+            ),
+        )
+    ]
 
     assert validate_min_max_instances("fake-service-path") is False
     output, _ = capsys.readouterr()
@@ -226,8 +233,8 @@ def is_schema(schema):
     assert "$schema" in schema
 
 
-def test_get_schema_marathon_found():
-    schema = get_schema("marathon")
+def test_get_schema_eks_found():
+    schema = get_schema("eks")
     is_schema(schema)
 
 
@@ -241,8 +248,34 @@ def test_get_schema_missing():
 
 
 @patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
-def test_marathon_validate_schema_list_hashes_good(mock_get_file_contents, capsys):
-    marathon_content = """
+def test_k8s_namespace_schema_good(mock_get_file_contents, capsys):
+    mock_get_file_contents.return_value = """
+main:
+    namespace: this-is-good
+"""
+    for schema_type in ["kubernetes", "eks"]:
+        assert validate_schema("unused_service_path.yaml", schema_type)
+
+    output, _ = capsys.readouterr()
+    assert SCHEMA_VALID in output
+
+
+@patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
+def test_k8s_namespace_schema_bad(mock_get_file_contents, capsys):
+    mock_get_file_contents.return_value = """
+main:
+    namspace: bad_namespace
+"""
+    for schema_type in ["kubernetes", "eks"]:
+        assert not validate_schema("unused_service_path.yaml", schema_type)
+
+        output, _ = capsys.readouterr()
+        assert SCHEMA_INVALID in output
+
+
+@patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
+def test_kubernetes_validate_schema_list_hashes_good(mock_get_file_contents, capsys):
+    kubernetes_content = """
 ---
 main_worker:
   cpus: 0.1
@@ -259,8 +292,8 @@ def test_marathon_validate_schema_list_hashes_good(mock_get_file_contents, capsy
   disk: 512
   registrations: ['foo.bar', 'bar.baz']
 """
-    mock_get_file_contents.return_value = marathon_content
-    for schema_type in ["marathon", "kubernetes"]:
+    mock_get_file_contents.return_value = kubernetes_content
+    for schema_type in ["kubernetes", "eks"]:
         assert validate_schema("unused_service_path.yaml", schema_type)
         output, _ = capsys.readouterr()
         assert SCHEMA_VALID in output
@@ -308,8 +341,8 @@ def test_validate_rollback_bounds(mock_config, expected):
 
 
 @patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
-def test_marathon_validate_understands_underscores(mock_get_file_contents, capsys):
-    marathon_content = """
+def test_kubernetes_validate_understands_underscores(mock_get_file_contents, capsys):
+    kubernetes_content = """
 ---
 _template: &template
   foo: bar
@@ -320,16 +353,16 @@ def test_marathon_validate_understands_underscores(mock_get_file_contents, capsy
   env:
     <<: *template
 """
-    mock_get_file_contents.return_value = marathon_content
-    for schema_type in ["marathon", "kubernetes"]:
+    mock_get_file_contents.return_value = kubernetes_content
+    for schema_type in ["kubernetes", "eks"]:
         assert validate_schema("unused_service_path.yaml", schema_type)
         output, _ = capsys.readouterr()
         assert SCHEMA_VALID in output
 
 
 @patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
-def test_marathon_validate_schema_healthcheck_non_cmd(mock_get_file_contents, capsys):
-    marathon_content = """
+def test_kubernetes_validate_schema_healthcheck_non_cmd(mock_get_file_contents, capsys):
+    kubernetes_content = """
 ---
 main_worker:
   cpus: 0.1
@@ -339,12 +372,12 @@ def test_marathon_validate_schema_healthcheck_non_cmd(mock_get_file_contents, ca
   cmd: virtualenv_run/bin/python adindexer/adindex_worker.py
   healthcheck_mode: tcp
 """
-    mock_get_file_contents.return_value = marathon_content
-    for schema_type in ["marathon", "kubernetes"]:
+    mock_get_file_contents.return_value = kubernetes_content
+    for schema_type in ["kubernetes", "eks"]:
         assert validate_schema("unused_service_path.yaml", schema_type)
         output, _ = capsys.readouterr()
         assert SCHEMA_VALID in output
-        marathon_content = """
+        kubernetes_content = """
 ---
 main_worker:
   cpus: 0.1
@@ -353,16 +386,16 @@ def test_marathon_validate_schema_healthcheck_non_cmd(mock_get_file_contents, ca
   disk: 512
   cmd: virtualenv_run/bin/python adindexer/adindex_worker.py
 """
-    mock_get_file_contents.return_value = marathon_content
-    for schema_type in ["marathon", "kubernetes"]:
+    mock_get_file_contents.return_value = kubernetes_content
+    for schema_type in ["kubernetes", "eks"]:
         assert validate_schema("unused_service_path.yaml", schema_type)
         output, _ = capsys.readouterr()
         assert SCHEMA_VALID in output
 
 
 @patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
-def test_marathon_validate_id(mock_get_file_contents, capsys):
-    marathon_content = """
+def test_kubernetes_validate_id(mock_get_file_contents, capsys):
+    kubernetes_content = """
 ---
 valid:
   cpus: 0.1
@@ -371,13 +404,13 @@ def test_marathon_validate_id(mock_get_file_contents, capsys):
   disk: 512
   cmd: virtualenv_run/bin/python adindexer/adindex_worker.py
 """
-    mock_get_file_contents.return_value = marathon_content
-    for schema_type in ["marathon", "kubernetes"]:
+    mock_get_file_contents.return_value = kubernetes_content
+    for schema_type in ["kubernetes", "eks"]:
         assert validate_schema("unused_service_path.yaml", schema_type)
         output, _ = capsys.readouterr()
         assert SCHEMA_VALID in output
 
-    marathon_content = """
+    kubernetes_content = """
 ---
 this_is_okay_too_1:
   cpus: 0.1
@@ -386,13 +419,13 @@ def test_marathon_validate_id(mock_get_file_contents, capsys):
   disk: 512
   cmd: virtualenv_run/bin/python adindexer/adindex_worker.py
 """
-    mock_get_file_contents.return_value = marathon_content
-    for schema_type in ["marathon", "kubernetes"]:
+    mock_get_file_contents.return_value = kubernetes_content
+    for schema_type in ["kubernetes", "eks"]:
         assert validate_schema("unused_service_path.yaml", schema_type)
         output, _ = capsys.readouterr()
         assert SCHEMA_VALID in output
 
-    marathon_content = """
+    kubernetes_content = """
 ---
 dashes-are-okay-too:
   cpus: 0.1
@@ -401,14 +434,14 @@ def test_marathon_validate_id(mock_get_file_contents, capsys):
   disk: 512
   cmd: virtualenv_run/bin/python adindexer/adindex_worker.py
 """
-    mock_get_file_contents.return_value = marathon_content
-    for schema_type in ["marathon", "kubernetes"]:
+    mock_get_file_contents.return_value = kubernetes_content
+    for schema_type in ["kubernetes", "eks"]:
         assert validate_schema("unused_service_path.yaml", schema_type)
         get_config_file_dict.cache_clear()  # HACK: ensure cache is cleared for future calls
         output, _ = capsys.readouterr()
         assert SCHEMA_VALID in output
 
-    marathon_content = """
+    kubernetes_content = """
 ---
 main_worker_CAPITALS_INVALID:
   cpus: 0.1
@@ -417,14 +450,14 @@ def test_marathon_validate_id(mock_get_file_contents, capsys):
   disk: 512
   cmd: virtualenv_run/bin/python adindexer/adindex_worker.py
 """
-    mock_get_file_contents.return_value = marathon_content
-    for schema_type in ["marathon", "kubernetes"]:
+    mock_get_file_contents.return_value = kubernetes_content
+    for schema_type in ["kubernetes", "eks"]:
         assert not validate_schema("unused_service_path.yaml", schema_type)
         get_config_file_dict.cache_clear()  # HACK: ensure cache is cleared for future calls
         output, _ = capsys.readouterr()
         assert SCHEMA_INVALID in output
 
-    marathon_content = """
+    kubernetes_content = """
 ---
 $^&*()(&*^%&definitely_not_okay:
   cpus: 0.1
@@ -433,18 +466,18 @@ def test_marathon_validate_id(mock_get_file_contents, capsys):
   disk: 512
   cmd: virtualenv_run/bin/python adindexer/adindex_worker.py
 """
-    mock_get_file_contents.return_value = marathon_content
-    for schema_type in ["marathon", "kubernetes"]:
+    mock_get_file_contents.return_value = kubernetes_content
+    for schema_type in ["kubernetes", "eks"]:
         assert not validate_schema("unused_service_path.yaml", schema_type)
         output, _ = capsys.readouterr()
         assert SCHEMA_INVALID in output
 
 
 @patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
-def test_marathon_validate_schema_healthcheck_cmd_has_cmd(
+def test_kubernetes_validate_schema_healthcheck_cmd_has_cmd(
     mock_get_file_contents, capsys
 ):
-    marathon_content = """
+    kubernetes_content = """
 ---
 main_worker:
   cpus: 0.1
@@ -454,13 +487,13 @@ def test_marathon_validate_schema_healthcheck_cmd_has_cmd(
   cmd: virtualenv_run/bin/python adindexer/adindex_worker.py
   healthcheck_mode: cmd
 """
-    mock_get_file_contents.return_value = marathon_content
-    for schema_type in ["marathon", "kubernetes"]:
+    mock_get_file_contents.return_value = kubernetes_content
+    for schema_type in ["kubernetes", "eks"]:
         assert not validate_schema("unused_service_path.yaml", schema_type)
         get_config_file_dict.cache_clear()  # HACK: ensure cache is cleared for future calls
         output, _ = capsys.readouterr()
         assert SCHEMA_INVALID in output
-        marathon_content = """
+        kubernetes_content = """
 ---
 main_worker:
   cpus: 0.1
@@ -471,8 +504,8 @@ def test_marathon_validate_schema_healthcheck_cmd_has_cmd(
   healthcheck_mode: cmd
   healthcheck_cmd: '/bin/true'
 """
-    mock_get_file_contents.return_value = marathon_content
-    for schema_type in ["marathon", "kubernetes"]:
+    mock_get_file_contents.return_value = kubernetes_content
+    for schema_type in ["kubernetes", "eks"]:
         assert validate_schema("unused_service_path.yaml", schema_type)
         get_config_file_dict.cache_clear()  # HACK: ensure cache is cleared for future calls
         output, _ = capsys.readouterr()
@@ -480,7 +513,7 @@ def test_marathon_validate_schema_healthcheck_cmd_has_cmd(
 
 
 @patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
-def test_marathon_validate_schema_keys_outside_instance_blocks_bad(
+def test_kubernetes_validate_schema_keys_outside_instance_blocks_bad(
     mock_get_file_contents, capsys
 ):
     mock_get_file_contents.return_value = """
@@ -491,7 +524,7 @@ def test_marathon_validate_schema_keys_outside_instance_blocks_bad(
     "page": false
 }
 """
-    for schema_type in ["marathon", "kubernetes"]:
+    for schema_type in ["kubernetes", "eks"]:
         assert not validate_schema("unused_service_path.json", schema_type)
         get_config_file_dict.cache_clear()  # HACK: ensure cache is cleared for future calls
 
@@ -500,28 +533,28 @@ def test_marathon_validate_schema_keys_outside_instance_blocks_bad(
 
 
 @patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
-def test_marathon_validate_schema_security_good(mock_get_file_contents, capsys):
+def test_kubernetes_validate_schema_security_good(mock_get_file_contents, capsys):
     mock_get_file_contents.return_value = """
 main:
     dependencies_reference: main
     security:
         outbound_firewall: block
 """
-    assert validate_schema("unused_service_path.yaml", "marathon")
+    assert validate_schema("unused_service_path.yaml", "kubernetes")
 
     output, _ = capsys.readouterr()
     assert SCHEMA_VALID in output
 
 
 @patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
-def test_marathon_validate_schema_security_bad(mock_get_file_contents, capsys):
+def test_kubernetes_validate_schema_security_bad(mock_get_file_contents, capsys):
     mock_get_file_contents.return_value = """
 main:
     dependencies_reference: main
     security:
         outbound_firewall: bblock
 """
-    for schema_type in ["marathon", "kubernetes"]:
+    for schema_type in ["kubernetes", "eks"]:
         assert not validate_schema("unused_service_path.yaml", schema_type)
 
         output, _ = capsys.readouterr()
@@ -529,7 +562,7 @@ def test_marathon_validate_schema_security_bad(mock_get_file_contents, capsys):
 
 
 @patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
-def test_marathon_validate_invalid_key_bad(mock_get_file_contents, capsys):
+def test_kubernetes_validate_invalid_key_bad(mock_get_file_contents, capsys):
     mock_get_file_contents.return_value = """
 {
     "main": {
@@ -537,13 +570,46 @@ def test_marathon_validate_invalid_key_bad(mock_get_file_contents, capsys):
     }
 }
 """
-    for schema_type in ["marathon", "kubernetes"]:
+    for schema_type in ["kubernetes", "eks"]:
         assert not validate_schema("unused_service_path.json", schema_type)
 
         output, _ = capsys.readouterr()
         assert SCHEMA_INVALID in output
 
 
+@pytest.mark.parametrize(
+    "iam_role, expected, instance_type",
+    [
+        ("not_an_arn", False, "kubernetes"),
+        ("not_an_arn", False, "eks"),
+        ("arn:aws:iam::12345678:role/some_role", True, "kubernetes"),
+        ("arn:aws:iam::12345678:role/some_role", True, "eks"),
+        ("arn:aws:iam::12345678:role/Some_Capitalized_Role", True, "kubernetes"),
+        ("arn:aws:iam::12345678:role/Some_Capitalized_Role", True, "eks"),
+        ("arn:aws:iam::12345678::role/malformed_role", False, "kubernetes"),
+        ("arn:aws:iam::12345678::role/malformed_role", False, "eks"),
+    ],
+)
+def test_instance_validate_schema_iam_role(
+    iam_role,
+    expected,
+    instance_type,
+    capsys,
+):
+    instance_content = f"""
+test_instance:
+  iam_role: {iam_role}
+"""
+    with patch(
+        "paasta_tools.cli.cmds.validate.get_file_contents", autospec=True
+    ) as mock_get_file_contents:
+        mock_get_file_contents.return_value = instance_content
+        assert validate_schema("unused_service_path.yaml", instance_type) == expected
+        expected_output = SCHEMA_VALID if expected else SCHEMA_INVALID
+        output, _ = capsys.readouterr()
+        assert expected_output in output
+
+
 @patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
 def test_tron_validate_schema_understands_underscores(mock_get_file_contents, capsys):
     tron_content = """
@@ -622,6 +688,35 @@ def test_tron_validate_schema_cleanup_action_extra_properties_bad(
     assert SCHEMA_INVALID in output
 
 
+@pytest.mark.parametrize(
+    "iam_role, expected",
+    [
+        ("not_an_arn", False),
+        ("arn:aws:iam::12345678:role/some_role", True),
+        ("arn:aws:iam::12345678:role/Some_Capitalized_Role", True),
+        ("arn:aws:iam::12345678::role/malformed_role", False),
+    ],
+)
+def test_tron_validate_schema_iam_role(iam_role, expected, capsys):
+    tron_content = f"""
+test_job:
+  node: paasta
+  schedule: "daily 04:00:00"
+  actions:
+    first:
+      iam_role: {iam_role}
+      command: echo hello world
+"""
+    with patch(
+        "paasta_tools.cli.cmds.validate.get_file_contents", autospec=True
+    ) as mock_get_file_contents:
+        mock_get_file_contents.return_value = tron_content
+        assert validate_schema("unused_service_path.yaml", "tron") == expected
+        output, _ = capsys.readouterr()
+        expected_output = SCHEMA_VALID if expected else SCHEMA_INVALID
+        assert expected_output in output
+
+
 @pytest.mark.parametrize(
     "mock_content",
     (
@@ -834,9 +929,11 @@ def test_validate_unique_service_name_failure(
     assert "instance_1" in output
 
 
-@patch("paasta_tools.cli.cmds.validate.get_instance_config", autospec=True)
+@patch(
+    "paasta_tools.cli.cmds.validate.load_all_instance_configs_for_service",
+    autospec=True,
+)
 @patch("paasta_tools.cli.cmds.validate.list_clusters", autospec=True)
-@patch("paasta_tools.cli.cmds.validate.list_all_instances_for_service", autospec=True)
 @patch("paasta_tools.cli.cmds.validate.path_to_soa_dir_service", autospec=True)
 @patch("paasta_tools.cli.cmds.validate.load_system_paasta_config", autospec=True)
 @patch("paasta_tools.cli.cmds.validate.check_secrets_for_instance", autospec=True)
@@ -844,9 +941,8 @@ def test_validate_secrets(
     mock_check_secrets_for_instance,
     mock_load_system_paasta_config,
     mock_path_to_soa_dir_service,
-    mock_list_all_instances_for_service,
     mock_list_clusters,
-    mock_get_instance_config,
+    mock_load_all_instance_configs_for_service,
     capsys,
 ):
     mock_path_to_soa_dir_service.return_value = ("fake_soa_dir", "fake_service")
@@ -856,17 +952,16 @@ def test_validate_secrets(
             return_value={"fake_cluster": "fake_vault_env"}
         )
     )
-    mock_list_all_instances_for_service.return_value = [
-        "fake_instance",
-        "fake_instance2",
-    ]
     mock_paasta_instance = mock.Mock(
         config_dict={"env": {"SUPER_SECRET1": "SECRET(secret1)"}}
     )
     mock_paasta_instance2 = mock.Mock(
         config_dict={"env": {"SUPER_SECRET1": "SHARED_SECRET(secret1)"}}
     )
-    mock_get_instance_config.side_effect = [mock_paasta_instance, mock_paasta_instance2]
+    mock_load_all_instance_configs_for_service.return_value = [
+        ("fake_instance", mock_paasta_instance),
+        ("fake_instance", mock_paasta_instance2),
+    ]
     mock_check_secrets_for_instance.return_value = True
     assert validate_secrets("fake-service-path"), capsys
     captured = capsys.readouterr()
@@ -878,8 +973,9 @@ def test_validate_secrets(
 @patch("paasta_tools.cli.cmds.validate.os.path.isfile", autospec=True)
 def test_check_secrets_for_instance(mock_isfile, mock_get_file_contents):
     instance_config_dict = {"env": {"SUPER_SECRET1": "SECRET(secret1)"}}
+    overriding_service = "fake-other-service-name"
     soa_dir = "fake_soa_dir"
-    service_path = "fake-service-path"
+    service = "fake-service-name"
     vault_env = "fake_vault_env"
     secret_content = """
 {
@@ -892,18 +988,28 @@ def test_check_secrets_for_instance(mock_isfile, mock_get_file_contents):
 """
     mock_get_file_contents.return_value = secret_content
     mock_isfile.return_value = True
-    assert check_secrets_for_instance(
-        instance_config_dict, soa_dir, service_path, vault_env
+    assert check_secrets_for_instance(instance_config_dict, soa_dir, service, vault_env)
+    mock_get_file_contents.assert_called_with(
+        "fake_soa_dir/fake-service-name/secrets/secret1.json"
     )
-    mock_get_file_contents.assert_called_with("fake-service-path/secrets/secret1.json")
     instance_config_dict = {"env": {"SUPER_SECRET1": "SHARED_SECRET(secret1)"}}
-    assert check_secrets_for_instance(
-        instance_config_dict, soa_dir, service_path, vault_env
-    )
+    assert check_secrets_for_instance(instance_config_dict, soa_dir, service, vault_env)
     mock_get_file_contents.assert_called_with(
         "fake_soa_dir/_shared/secrets/secret1.json"
     )
 
+    # validation should also work on instances with service: override.
+    instance_config_dict = {
+        "env": {"SUPER_SECRET1": "SECRET(secret1)"},
+        "service": overriding_service,
+    }
+    mock_get_file_contents.return_value = secret_content
+    mock_isfile.return_value = True
+    assert check_secrets_for_instance(instance_config_dict, soa_dir, service, vault_env)
+    mock_get_file_contents.assert_called_with(
+        f"{soa_dir}/{overriding_service}/secrets/secret1.json"
+    )
+
 
 @patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
 @patch("paasta_tools.cli.cmds.validate.os.path.isfile", autospec=True)
@@ -912,7 +1018,7 @@ def test_check_secrets_for_instance_missing_secret(
 ):
     instance_config_dict = {"env": {"SUPER_SECRET1": "SECRET(secret1)"}}
     soa_dir = "fake_soa_dir"
-    service_path = "fake-service-path"
+    service = "fake-service-name"
     vault_env = "even_more_fake_vault_env"
     secret_content = """
 {
@@ -926,51 +1032,60 @@ def test_check_secrets_for_instance_missing_secret(
     mock_get_file_contents.return_value = secret_content
     mock_isfile.return_value = True
     assert not check_secrets_for_instance(
-        instance_config_dict, soa_dir, service_path, vault_env
+        instance_config_dict, soa_dir, service, vault_env
     ), capsys
     captured = capsys.readouterr()
     assert (
-        "Secret secret1 not defined for ecosystem even_more_fake_vault_env on secret file fake-service-path/secrets/secret1.json"
+        "Secret secret1 not defined for ecosystem even_more_fake_vault_env on secret file fake_soa_dir/fake-service-name/secrets/secret1.json"
         in captured.out
     )
 
 
 @pytest.mark.parametrize(
-    "setpoint,offset,expected",
+    "setpoint,offset,expected,instance_type",
     [
-        (0.5, 0.5, False),
-        (0.5, 0.6, False),
-        (0.8, 0.25, True),
+        (0.5, 0.5, False, "kubernetes"),
+        (0.5, 0.6, False, "kubernetes"),
+        (0.8, 0.25, True, "kubernetes"),
+        (0.5, 0.5, False, "eks"),
+        (0.5, 0.6, False, "eks"),
+        (0.8, 0.25, True, "eks"),
     ],
 )
-@patch("paasta_tools.cli.cmds.validate.get_instance_config", autospec=True)
-@patch("paasta_tools.cli.cmds.validate.list_all_instances_for_service", autospec=True)
+@patch(
+    "paasta_tools.cli.cmds.validate.load_all_instance_configs_for_service",
+    autospec=True,
+)
 @patch("paasta_tools.cli.cmds.validate.list_clusters", autospec=True)
 @patch("paasta_tools.cli.cmds.validate.path_to_soa_dir_service", autospec=True)
 def test_validate_autoscaling_configs(
     mock_path_to_soa_dir_service,
     mock_list_clusters,
-    mock_list_all_instances_for_service,
-    mock_get_instance_config,
+    mock_load_all_instance_configs_for_service,
     setpoint,
     offset,
     expected,
+    instance_type,
 ):
     mock_path_to_soa_dir_service.return_value = ("fake_soa_dir", "fake_service")
     mock_list_clusters.return_value = ["fake_cluster"]
-    mock_list_all_instances_for_service.return_value = {"fake_instance1"}
-    mock_get_instance_config.return_value = mock.Mock(
-        get_instance=mock.Mock(return_value="fake_instance1"),
-        get_instance_type=mock.Mock(return_value="kubernetes"),
-        is_autoscaling_enabled=mock.Mock(return_value=True),
-        get_autoscaling_params=mock.Mock(
-            return_value={
-                "metrics_provider": "uwsgi",
-                "setpoint": setpoint,
-                "offset": offset,
-            }
-        ),
-    )
+    mock_load_all_instance_configs_for_service.return_value = [
+        (
+            "fake_instance1",
+            mock.Mock(
+                get_instance=mock.Mock(return_value="fake_instance1"),
+                get_instance_type=mock.Mock(return_value=instance_type),
+                is_autoscaling_enabled=mock.Mock(return_value=True),
+                get_autoscaling_params=mock.Mock(
+                    return_value={
+                        "metrics_provider": "uwsgi",
+                        "setpoint": setpoint,
+                        "offset": offset,
+                    }
+                ),
+            ),
+        )
+    ]
 
     with mock.patch(
         "paasta_tools.cli.cmds.validate.load_system_paasta_config",
@@ -983,30 +1098,40 @@ def test_validate_autoscaling_configs(
         assert validate_autoscaling_configs("fake-service-path") is expected
 
 
-@patch("paasta_tools.cli.cmds.validate.get_instance_config", autospec=True)
-@patch("paasta_tools.cli.cmds.validate.list_all_instances_for_service", autospec=True)
+@pytest.mark.parametrize(
+    "instance_type",
+    [("kubernetes"), ("eks")],
+)
+@patch(
+    "paasta_tools.cli.cmds.validate.load_all_instance_configs_for_service",
+    autospec=True,
+)
 @patch("paasta_tools.cli.cmds.validate.list_clusters", autospec=True)
 @patch("paasta_tools.cli.cmds.validate.path_to_soa_dir_service", autospec=True)
 def test_validate_autoscaling_configs_no_offset_specified(
     mock_path_to_soa_dir_service,
     mock_list_clusters,
-    mock_list_all_instances_for_service,
-    mock_get_instance_config,
+    mock_load_all_instance_configs_for_service,
+    instance_type,
 ):
     mock_path_to_soa_dir_service.return_value = ("fake_soa_dir", "fake_service")
     mock_list_clusters.return_value = ["fake_cluster"]
-    mock_list_all_instances_for_service.return_value = {"fake_instance1"}
-    mock_get_instance_config.return_value = mock.Mock(
-        get_instance=mock.Mock(return_value="fake_instance1"),
-        get_instance_type=mock.Mock(return_value="kubernetes"),
-        is_autoscaling_enabled=mock.Mock(return_value=True),
-        get_autoscaling_params=mock.Mock(
-            return_value={
-                "metrics_provider": "uwsgi",
-                "setpoint": 0.8,
-            }
-        ),
-    )
+    mock_load_all_instance_configs_for_service.return_value = [
+        (
+            "fake_instance1",
+            mock.Mock(
+                get_instance=mock.Mock(return_value="fake_instance1"),
+                get_instance_type=mock.Mock(return_value=instance_type),
+                is_autoscaling_enabled=mock.Mock(return_value=True),
+                get_autoscaling_params=mock.Mock(
+                    return_value={
+                        "metrics_provider": "uwsgi",
+                        "setpoint": 0.8,
+                    }
+                ),
+            ),
+        )
+    ]
 
     with mock.patch(
         "paasta_tools.cli.cmds.validate.load_system_paasta_config",
@@ -1019,43 +1144,137 @@ def test_validate_autoscaling_configs_no_offset_specified(
         assert validate_autoscaling_configs("fake-service-path") is True
 
 
+@patch(
+    "paasta_tools.cli.cmds.validate.load_all_instance_configs_for_service",
+    autospec=True,
+)
+@patch("paasta_tools.cli.cmds.validate.list_clusters", autospec=True)
+@patch("paasta_tools.cli.cmds.validate.path_to_soa_dir_service", autospec=True)
 @pytest.mark.parametrize(
-    "filecontents,expected",
+    "autoscaling_config,registrations,expected",
     [
-        ("# overridexxx-cpu-setting", False),
-        ("# override-cpu-setting", False),
-        ("", False),
-        ("# override-cpu-setting (PAASTA-17522)", True),
+        (
+            {
+                "metrics_provider": "active-requests",
+            },
+            [],
+            True,
+        ),
+        (
+            {
+                "metrics_provider": "active-requests",
+                "desired_active_requests_per_replica": -5,
+            },
+            [],
+            False,
+        ),
+        (
+            {
+                "metrics_provider": "active-requests",
+                "desired_active_requests_per_replica": 5,
+            },
+            [],
+            True,
+        ),
+        (
+            {
+                "metrics_provider": "active-requests",
+                "desired_active_requests_per_replica": 5,
+            },
+            ["fake_service.abc"],
+            True,
+        ),
+        (
+            {
+                "metrics_provider": "active-requests",
+                "desired_active_requests_per_replica": 5,
+            },
+            ["fake_service.abc", "fake_service.def"],
+            False,
+        ),
+    ],
+)
+def test_validate_autoscaling_configs_active_requests(
+    mock_path_to_soa_dir_service,
+    mock_list_clusters,
+    mock_load_all_instance_configs_for_service,
+    autoscaling_config,
+    registrations,
+    expected,
+):
+    mock_path_to_soa_dir_service.return_value = ("fake_soa_dir", "fake_service")
+    mock_list_clusters.return_value = ["fake_cluster"]
+    mock_load_all_instance_configs_for_service.return_value = [
+        (
+            "fake_instance1",
+            mock.Mock(
+                get_instance=mock.Mock(return_value="fake_instance1"),
+                get_instance_type=mock.Mock(return_value="kubernetes"),
+                is_autoscaling_enabled=mock.Mock(return_value=True),
+                get_autoscaling_params=mock.Mock(return_value=autoscaling_config),
+                get_registrations=mock.Mock(return_value=registrations),
+            ),
+        )
+    ]
+
+    with mock.patch(
+        "paasta_tools.cli.cmds.validate.load_system_paasta_config",
+        autospec=True,
+        return_value=SystemPaastaConfig(
+            config={"skip_cpu_override_validation": ["not-a-real-service"]},
+            directory="/some/test/dir",
+        ),
+    ):
+        assert validate_autoscaling_configs("fake-service-path") is expected
+
+
+@pytest.mark.parametrize(
+    "filecontents,expected, instance_type",
+    [
+        ("# overridexxx-cpu-setting", False, "kubernetes"),
+        ("# override-cpu-setting", False, "kubernetes"),
+        ("", False, "kubernetes"),
+        ("# override-cpu-setting (PAASTA-17522)", True, "kubernetes"),
+        ("# overridexxx-cpu-setting", False, "eks"),
+        ("# override-cpu-setting", False, "eks"),
+        ("", False, "eks"),
+        ("# override-cpu-setting (PAASTA-17522)", True, "eks"),
     ],
 )
 @patch("paasta_tools.cli.cmds.validate.get_file_contents", autospec=True)
-@patch("paasta_tools.cli.cmds.validate.get_instance_config", autospec=True)
-@patch("paasta_tools.cli.cmds.validate.list_all_instances_for_service", autospec=True)
+@patch(
+    "paasta_tools.cli.cmds.validate.load_all_instance_configs_for_service",
+    autospec=True,
+)
 @patch("paasta_tools.cli.cmds.validate.list_clusters", autospec=True)
 @patch("paasta_tools.cli.cmds.validate.path_to_soa_dir_service", autospec=True)
 def test_validate_cpu_autotune_override(
     mock_path_to_soa_dir_service,
     mock_list_clusters,
-    mock_list_all_instances_for_service,
-    mock_get_instance_config,
+    mock_load_all_instance_configs_for_service,
     mock_get_file_contents,
     filecontents,
     expected,
+    instance_type,
 ):
     mock_path_to_soa_dir_service.return_value = ("fake_soa_dir", "fake_service")
     mock_list_clusters.return_value = ["fake_cluster"]
-    mock_list_all_instances_for_service.return_value = {"fake_instance1"}
-    mock_get_instance_config.return_value = mock.Mock(
-        get_instance=mock.Mock(return_value="fake_instance1"),
-        get_instance_type=mock.Mock(return_value="kubernetes"),
-        is_autoscaling_enabled=mock.Mock(return_value=True),
-        get_autoscaling_params=mock.Mock(
-            return_value={
-                "metrics_provider": "cpu",
-                "setpoint": 0.8,
-            }
-        ),
-    )
+    mock_load_all_instance_configs_for_service.return_value = [
+        (
+            "fake_instance1",
+            mock.Mock(
+                get_instance=mock.Mock(return_value="fake_instance1"),
+                get_instance_type=mock.Mock(return_value=instance_type),
+                is_autoscaling_enabled=mock.Mock(return_value=True),
+                get_autoscaling_params=mock.Mock(
+                    return_value={
+                        "metrics_provider": "cpu",
+                        "setpoint": 0.8,
+                    }
+                ),
+            ),
+        )
+    ]
     mock_get_file_contents.return_value = f"""
 ---
 fake_instance1:
@@ -1114,22 +1333,25 @@ def test_list_upcoming_runs(schedule, starting_from, num_runs, expected):
 
 
 @pytest.mark.parametrize(
-    "burst, comment, expected",
+    "burst, comment, expected, instance_type",
     [
-        (3, "# overridexxx-cpu-burst", False),
-        (4, "# override-cpu-burst", False),
-        (5, "", False),
-        (6, "# override-cpu-burst (MAGIC-42)", True),
-        (7, "# override-cpu-burst (SECURE-1234#some comment)", True),
-        (1, "# override-cpu-burst (HWAT-789)", True),
-        (1, "# override-cpu-burst", True),
+        (3, "# overridexxx-cpu-burst", False, "kubernetes"),
+        (4, "# override-cpu-burst", False, "kubernetes"),
+        (5, "", False, "kubernetes"),
+        (6, "# override-cpu-burst (MAGIC-42)", True, "kubernetes"),
+        (7, "# override-cpu-burst (SECURE-1234#some comment)", True, "kubernetes"),
+        (1, "# override-cpu-burst (HWAT-789)", True, "kubernetes"),
+        (1, "# override-cpu-burst", True, "kubernetes"),
+        (3, "# overridexxx-cpu-burst", False, "eks"),
+        (4, "# override-cpu-burst", False, "eks"),
+        (5, "", False, "eks"),
+        (6, "# override-cpu-burst (MAGIC-42)", True, "eks"),
+        (7, "# override-cpu-burst (SECURE-1234#some comment)", True, "eks"),
+        (1, "# override-cpu-burst (HWAT-789)", True, "eks"),
+        (1, "# override-cpu-burst", True, "eks"),
     ],
 )
-def test_validate_cpu_burst_override(
-    burst,
-    comment,
-    expected,
-):
+def test_validate_cpu_burst_override(burst, comment, expected, instance_type):
     instance_config = f"""
 ---
 fake_instance1:
@@ -1152,7 +1374,7 @@ def test_validate_cpu_burst_override(
         autospec=True,
         return_value=mock.Mock(
             get_instance=mock.Mock(return_value="fake_instance1"),
-            get_instance_type=mock.Mock(return_value="kubernetes"),
+            get_instance_type=mock.Mock(return_value=instance_type),
         ),
     ), mock.patch(
         "paasta_tools.cli.cmds.validate.list_all_instances_for_service",
diff --git a/tests/conftest.py b/tests/conftest.py
index ae3b3cd939..b62bd7d8e3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,10 +1,12 @@
 import asyncio
+import os
 import sys
 import time
 
 import mock
 import pytest
 
+from paasta_tools.kubernetes_tools import KubeClient
 from paasta_tools.utils import SystemPaastaConfig
 
 
@@ -26,6 +28,7 @@ def system_paasta_config():
         {
             "cluster": "fake_cluster",
             "api_endpoints": {"fake_cluster": "http://fake_cluster:5054"},
+            "api_client_timeout": 120,
             "docker_registry": "fake_registry",
             "volumes": [
                 {
@@ -35,11 +38,29 @@ def system_paasta_config():
                 }
             ],
             "service_discovery_providers": {"smartstack": {}, "envoy": {}},
+            "kube_clusters": {
+                "pnw-prod": {"aws_account": "prod"},
+                "pnw-devc": {"aws_account": "dev"},
+            },
         },
         "/fake_dir/",
     )
 
 
+@pytest.fixture(scope="function", autouse=True)
+def remove_pod_identity_env_vars():
+    with mock.patch.dict(
+        os.environ,
+        {
+            k: v
+            for k, v in os.environ.items()
+            if k not in ["AWS_ROLE_ARN", "AWS_WEB_IDENTITY_TOKEN_FILE"]
+        },
+        clear=True,
+    ):
+        yield
+
+
 @pytest.fixture(autouse=True)
 def mock_read_soa_metadata():
     with mock.patch(
@@ -60,6 +81,12 @@ def mock_ktools_read_soa_metadata(mock_read_soa_metadata):
         yield mock_read_soa_metadata
 
 
+@pytest.fixture(autouse=True)
+def cache_clear_KubeClient():
+    KubeClient.__new__.cache_clear()
+    KubeClient.__init__.cache_clear()
+
+
 class Struct:
     """
     convert a dictionary to an object
diff --git a/tests/contrib/test_get_running_task_allocation.py b/tests/contrib/test_get_running_task_allocation.py
index 25208adb52..98c0467b0f 100644
--- a/tests/contrib/test_get_running_task_allocation.py
+++ b/tests/contrib/test_get_running_task_allocation.py
@@ -4,7 +4,7 @@
 from paasta_tools.contrib.get_running_task_allocation import (
     get_kubernetes_resource_request_limit,
 )
-from paasta_tools.contrib.get_running_task_allocation import get_matching_namespaces
+from paasta_tools.contrib.get_running_task_allocation import get_unexcluded_namespaces
 
 
 def test_get_kubernetes_resource_request_limit():
@@ -26,25 +26,26 @@ def test_get_kubernetes_resource_request_limit():
 
 
 @pytest.mark.parametrize(
-    "namespaces, namespace_prefix, additional_namespaces, expected",
+    "namespaces, namespaces_to_exclude, expected",
     (
         (
             ["paasta", "paasta-flink", "paasta-spark", "luisp-was-here", "tron"],
-            "paasta",
             ["tron"],
-            ["paasta", "paasta-flink", "paasta-spark", "tron"],
+            ["paasta", "paasta-flink", "paasta-spark", "luisp-was-here"],
         ),
         (
             ["paasta", "paasta-flink", "paasta-spark", "luisp-was-here", "tron"],
-            "paasta",
-            [""],
-            ["paasta", "paasta-flink", "paasta-spark"],
+            [],
+            ["paasta", "paasta-flink", "paasta-spark", "luisp-was-here", "tron"],
+        ),
+        (
+            ["paasta", "paasta-flink", "paasta-spark", "luisp-was-here", "tron"],
+            ["tron", "paasta"],
+            ["paasta-flink", "paasta-spark", "luisp-was-here"],
         ),
     ),
 )
-def test_get_matching_namespaces(
-    namespaces, namespace_prefix, additional_namespaces, expected
-):
+def test_get_matching_namespaces(namespaces, namespaces_to_exclude, expected):
     assert sorted(
-        get_matching_namespaces(namespaces, namespace_prefix, additional_namespaces)
+        get_unexcluded_namespaces(namespaces, namespaces_to_exclude)
     ) == sorted(expected)
diff --git a/tests/instance/test_kubernetes.py b/tests/instance/test_kubernetes.py
index 5c192d55d0..a86284870b 100644
--- a/tests/instance/test_kubernetes.py
+++ b/tests/instance/test_kubernetes.py
@@ -117,7 +117,6 @@ def instance_status_kwargs():
         instance="",
         instance_type="",
         verbose=0,
-        include_smartstack=False,
         include_envoy=False,
         settings=mock.Mock(),
         use_new=False,
@@ -189,7 +188,6 @@ def test_kubernetes_status():
             service="",
             instance="",
             verbose=0,
-            include_smartstack=False,
             include_envoy=False,
             instance_type="flink",
             settings=mock.Mock(),
@@ -306,7 +304,6 @@ def test_replicaset(
             service="service",
             instance="instance",
             verbose=0,
-            include_smartstack=False,
             include_envoy=False,
             instance_type="kubernetes",
             settings=mock.Mock(),
@@ -421,7 +418,6 @@ def test_statefulset(
                 service="service",
                 instance="instance",
                 verbose=0,
-                include_smartstack=False,
                 include_envoy=False,
                 instance_type="kubernetes",
                 settings=mock.Mock(),
@@ -484,7 +480,6 @@ def test_statefulset_with_image_version(
                 service="service",
                 instance="instance",
                 verbose=0,
-                include_smartstack=False,
                 include_envoy=False,
                 instance_type="kubernetes",
                 settings=mock.Mock(),
@@ -546,7 +541,6 @@ def test_event_timeout(
             service="service",
             instance="instance",
             verbose=0,
-            include_smartstack=False,
             include_envoy=False,
             instance_type="kubernetes",
             settings=mock.Mock(),
@@ -600,7 +594,6 @@ def test_pod_timeout(
             service="service",
             instance="instance",
             verbose=0,
-            include_smartstack=False,
             include_envoy=False,
             instance_type="kubernetes",
             settings=mock.Mock(),
@@ -628,52 +621,6 @@ def test_job_status_include_replicaset_non_verbose(mock_get_kubernetes_app_by_na
     assert len(kstatus["replicasets"]) == 3
 
 
-def test_kubernetes_status_include_smartstack():
-    with asynctest.patch(
-        "paasta_tools.instance.kubernetes.job_status",
-        autospec=True,
-    ), asynctest.patch(
-        "paasta_tools.kubernetes_tools.load_service_namespace_config", autospec=True
-    ) as mock_load_service_namespace_config, asynctest.patch(
-        "paasta_tools.instance.kubernetes.mesh_status",
-        autospec=True,
-    ) as mock_mesh_status, asynctest.patch(
-        "paasta_tools.kubernetes_tools.replicasets_for_service_instance", autospec=True
-    ) as mock_replicasets_for_service_instance, asynctest.patch(
-        "paasta_tools.kubernetes_tools.pods_for_service_instance",
-        autospec=True,
-    ) as mock_pods_for_service_instance, asynctest.patch(
-        "paasta_tools.kubernetes_tools.get_kubernetes_app_by_name",
-        autospec=True,
-    ), asynctest.patch(
-        "paasta_tools.instance.kubernetes.LONG_RUNNING_INSTANCE_TYPE_HANDLERS",
-        autospec=True,
-    ) as mock_LONG_RUNNING_INSTANCE_TYPE_HANDLERS:
-        mock_load_service_namespace_config.return_value = {"proxy_port": 1234}
-        mock_LONG_RUNNING_INSTANCE_TYPE_HANDLERS["flink"] = mock.Mock()
-        mock_pods_for_service_instance.return_value = []
-        mock_replicasets_for_service_instance.return_value = []
-        mock_service = mock.Mock()
-        status = pik.kubernetes_status(
-            service=mock_service,
-            instance="",
-            verbose=0,
-            include_smartstack=True,
-            include_envoy=False,
-            instance_type="flink",
-            settings=mock.Mock(),
-        )
-        assert (
-            mock_load_service_namespace_config.mock_calls[0][2]["service"]
-            is mock_service
-        )
-        assert mock_mesh_status.mock_calls[0][2]["service"] is mock_service
-        assert "app_count" in status
-        assert "evicted_count" in status
-        assert "bounce_method" in status
-        assert "desired_state" in status
-
-
 def test_cr_status_bad_instance_type():
     with pytest.raises(RuntimeError) as excinfo:
         pik.cr_status(
@@ -794,19 +741,7 @@ async def test_get_pod_status_mesh_ready(event_loop):
     assert not status["mesh_ready"]
 
 
-@pytest.mark.parametrize(
-    "include_smartstack,include_envoy,expected",
-    [
-        (True, True, ("smartstack", "envoy")),
-        (True, False, ("smartstack",)),
-        (False, True, ("envoy",)),
-    ],
-)
-def test_kubernetes_mesh_status(
-    include_smartstack,
-    include_envoy,
-    expected,
-):
+def test_kubernetes_mesh_status_include_envoy():
     with asynctest.patch(
         "paasta_tools.kubernetes_tools.load_service_namespace_config", autospec=True
     ) as mock_load_service_namespace_config, asynctest.patch(
@@ -830,26 +765,34 @@ def test_kubernetes_mesh_status(
             instance="fake_instance",
             instance_type="flink",
             settings=mock_settings,
-            include_smartstack=include_smartstack,
-            include_envoy=include_envoy,
+            include_envoy=True,
         )
 
-        assert len(kmesh) == len(expected)
-        for i in range(len(expected)):
-            mesh_type = expected[i]
-            assert kmesh.get(mesh_type) == mock_mesh_status.return_value
-            assert mock_mesh_status.call_args_list[i] == mock.call(
+        assert len(kmesh) == 1
+        assert kmesh.get("envoy") == mock_mesh_status.return_value
+        assert mock_mesh_status.call_args_list[0] == mock.call(
+            service="fake_service",
+            instance=mock_job_config.get_nerve_namespace.return_value,
+            job_config=mock_job_config,
+            service_namespace_config={"proxy_port": 1234},
+            pods_task=mock.ANY,
+            should_return_individual_backends=True,
+            settings=mock_settings,
+            service_mesh=getattr(pik.ServiceMesh, "ENVOY"),
+        )
+        _, kwargs = mock_mesh_status.call_args_list[0]
+        assert kwargs["pods_task"].result() == ["pod_1"]
+
+        # include_envoy = False should error
+        with pytest.raises(RuntimeError) as excinfo:
+            kmesh = pik.kubernetes_mesh_status(
                 service="fake_service",
-                instance=mock_job_config.get_nerve_namespace.return_value,
-                job_config=mock_job_config,
-                service_namespace_config={"proxy_port": 1234},
-                pods_task=mock.ANY,
-                should_return_individual_backends=True,
+                instance="fake_instance",
+                instance_type="flink",
                 settings=mock_settings,
-                service_mesh=getattr(pik.ServiceMesh, mesh_type.upper()),
+                include_envoy=False,
             )
-            _, kwargs = mock_mesh_status.call_args_list[i]
-            assert kwargs["pods_task"].result() == ["pod_1"]
+        assert "No mesh types specified" in str(excinfo.value)
 
 
 @mock.patch(
@@ -891,7 +834,6 @@ def test_kubernetes_mesh_status_error(
             instance="fake_instance",
             instance_type=inst_type,
             settings=mock_settings,
-            include_smartstack=include_mesh,
             include_envoy=include_mesh,
         )
 
diff --git a/tests/kubernetes/application/test_controller_wrapper.py b/tests/kubernetes/application/test_controller_wrapper.py
index 081c3ad09e..d0c0970e8a 100644
--- a/tests/kubernetes/application/test_controller_wrapper.py
+++ b/tests/kubernetes/application/test_controller_wrapper.py
@@ -1,7 +1,5 @@
-import kubernetes.client
 import mock
 import pytest
-from kubernetes.client import V1DeleteOptions
 from kubernetes.client.rest import ApiException
 
 from paasta_tools.kubernetes.application.controller_wrappers import Application
@@ -27,75 +25,6 @@ def mock_load_system_paasta_config():
         yield mock_load_system_paasta_config
 
 
-def test_brutal_bounce(mock_load_system_paasta_config):
-    # mock the new client used to brutal bounce in the background using threading.
-    mock_cloned_client = mock.MagicMock()
-
-    with mock.patch(
-        "paasta_tools.kubernetes.application.controller_wrappers.KubeClient",
-        return_value=mock_cloned_client,
-        autospec=True,
-    ):
-        with mock.patch(
-            "paasta_tools.kubernetes.application.controller_wrappers.threading.Thread",
-            autospec=True,
-        ) as mock_deep_delete_and_create:
-            mock_client = mock.MagicMock()
-
-            app = mock.MagicMock()
-            app.item.metadata.name = "fake_name"
-            app.item.metadata.namespace = "faasta"
-
-            # we do NOT call deep_delete_and_create
-            app = setup_app({}, True)
-            DeploymentWrapper.update(self=app, kube_client=mock_client)
-
-            assert mock_deep_delete_and_create.call_count == 0
-
-            # we call deep_delete_and_create: when bounce_method is brutal
-            config_dict = {"instances": 1, "bounce_method": "brutal"}
-
-            app = setup_app(config_dict, True)
-            app.update(kube_client=mock_client)
-
-            mock_deep_delete_and_create.assert_called_once_with(
-                target=app.deep_delete_and_create, args=[mock_cloned_client]
-            )
-
-
-def test_deep_delete_and_create(mock_load_system_paasta_config):
-    with mock.patch(
-        "paasta_tools.kubernetes.application.controller_wrappers.sleep", autospec=True
-    ), mock.patch(
-        "paasta_tools.kubernetes.application.controller_wrappers.list_all_deployments",
-        autospec=True,
-    ) as mock_list_deployments, mock.patch(
-        "paasta_tools.kubernetes.application.controller_wrappers.force_delete_pods",
-        autospec=True,
-    ) as mock_force_delete_pods:
-        mock_kube_client = mock.MagicMock()
-        mock_kube_client.deployments = mock.Mock(spec=kubernetes.client.AppsV1Api)
-        config_dict = {"instances": 1, "bounce_method": "brutal"}
-        app = setup_app(config_dict, True)
-        # This mocks being unable to delete the deployment
-        mock_list_deployments.return_value = [app.kube_deployment]
-        delete_options = V1DeleteOptions(propagation_policy="Background")
-
-        with pytest.raises(Exception):
-            # test deep_delete_and_create makes kubeclient calls correctly
-            app.deep_delete_and_create(mock_kube_client)
-        mock_force_delete_pods.assert_called_with(
-            app.item.metadata.name,
-            app.kube_deployment.service,
-            app.kube_deployment.instance,
-            app.item.metadata.namespace,
-            mock_kube_client,
-        )
-        mock_kube_client.deployments.delete_namespaced_deployment.assert_called_with(
-            app.item.metadata.name, app.item.metadata.namespace, body=delete_options
-        )
-
-
 @pytest.mark.parametrize("bounce_margin_factor_set", [True, False])
 def test_ensure_pod_disruption_budget_create(
     bounce_margin_factor_set,
@@ -120,7 +49,9 @@ def test_ensure_pod_disruption_budget_create(
         app.soa_config.get_bounce_margin_factor.return_value = 0.1
     app.kube_deployment.service.return_value = "fake_service"
     app.kube_deployment.instance.return_value = "fake_instance"
-    Application.ensure_pod_disruption_budget(self=app, kube_client=mock_client)
+    Application.ensure_pod_disruption_budget(
+        self=app, kube_client=mock_client, namespace="paasta"
+    )
     mock_client.policy.create_namespaced_pod_disruption_budget.assert_called_once_with(
         body=mock_req_pdr, namespace=mock_req_pdr.metadata.namespace
     )
@@ -145,7 +76,9 @@ def test_ensure_pod_disruption_budget_replaces_outdated(
     app.soa_config.get_bounce_margin_factor.return_value = 0.1
     app.kube_deployment.service.return_value = "fake_service"
     app.kube_deployment.instance.return_value = "fake_instance"
-    Application.ensure_pod_disruption_budget(self=app, kube_client=mock_client)
+    Application.ensure_pod_disruption_budget(
+        self=app, kube_client=mock_client, namespace="paasta"
+    )
 
     mock_client.policy.patch_namespaced_pod_disruption_budget.assert_called_once_with(
         name=mock_req_pdr.metadata.name,
@@ -173,7 +106,9 @@ def test_ensure_pod_disruption_budget_noop_when_min_available_is_set(
     app.soa_config.get_bounce_margin_factor.return_value = 0.1
     app.kube_deployment.service.return_value = "fake_service"
     app.kube_deployment.instance.return_value = "fake_instance"
-    Application.ensure_pod_disruption_budget(self=app, kube_client=mock_client)
+    Application.ensure_pod_disruption_budget(
+        self=app, kube_client=mock_client, namespace="paasta"
+    )
 
     mock_client.policy.patch_namespaced_pod_disruption_budget.assert_not_called()
 
diff --git a/tests/kubernetes/bin/test_paasta_secrets_sync.py b/tests/kubernetes/bin/test_paasta_secrets_sync.py
index ab08becd7c..43809b94c7 100644
--- a/tests/kubernetes/bin/test_paasta_secrets_sync.py
+++ b/tests/kubernetes/bin/test_paasta_secrets_sync.py
@@ -64,6 +64,9 @@ def test_sync_all_secrets():
     ) as mock_sync_secrets, mock.patch(
         "paasta_tools.kubernetes.bin.paasta_secrets_sync.PaastaServiceConfigLoader",
         autospec=True,
+    ), mock.patch(
+        "paasta_tools.kubernetes.bin.paasta_secrets_sync.ensure_namespace",
+        autospec=True,
     ):
         services_to_k8s_namespaces_to_allowlist = {
             "foo": {"paastasvc-foo": None},
@@ -261,7 +264,11 @@ def paasta_secrets_patches():
         "paasta_tools.kubernetes.bin.paasta_secrets_sync.json.load", autospec=True
     ), mock.patch(
         "os.path.isdir", autospec=True, return_value=True
+    ), mock.patch(
+        "paasta_tools.kubernetes.bin.paasta_secrets_sync.load_system_paasta_config",
+        autospec=True,
     ):
+
         yield (
             mock_get_secret_provider,
             mock_scandir,
@@ -620,7 +627,10 @@ def boto_keys_patches():
     ) as mock_update_kubernetes_secret_signature, mock.patch(
         "paasta_tools.kubernetes.bin.paasta_secrets_sync.PaastaServiceConfigLoader",
         autospec=True,
-    ) as mock_config_loader:
+    ) as mock_config_loader, mock.patch(
+        "paasta_tools.kubernetes.bin.paasta_secrets_sync.load_system_paasta_config",
+        autospec=True,
+    ):
         yield (
             mock_open,
             mock_open.return_value.__enter__.return_value,
@@ -665,7 +675,16 @@ def test_sync_boto_secrets_create(boto_keys_patches):
         "scribereader-cfg": "ZmlsZTQ=",
     }
 
-    mock_open_handle.read.side_effect = ["file1", "file2", "file3", "file4"]
+    mock_open_handle.read.side_effect = [
+        "file1",
+        "file2",
+        "file3",
+        "file4",
+        "eksfile1",
+        "eksfile2",
+        "eksfile3",
+        "eksfile4",
+    ]
     mock_get_kubernetes_secret_signature.return_value = None
     assert sync_boto_secrets(
         kube_client=mock.Mock(),
@@ -711,7 +730,16 @@ def test_sync_boto_secrets_update(boto_keys_patches):
     )
     mock_config_loader_instances.return_value = [deployment]
 
-    mock_open_handle.read.side_effect = ["file1", "file2", "file3", "file4"]
+    mock_open_handle.read.side_effect = [
+        "file1",
+        "file2",
+        "file3",
+        "file4",
+        "eksfile1",
+        "eksfile2",
+        "eksfile3",
+        "eksfile4",
+    ]
     mock_get_kubernetes_secret_signature.return_value = "1235abc"
     assert sync_boto_secrets(
         kube_client=mock.Mock(),
@@ -738,7 +766,16 @@ def test_sync_boto_secrets_noop(boto_keys_patches):
         mock_config_loader_instances,
     ) = boto_keys_patches
 
-    mock_open_handle.read.side_effect = ["file1", "file2", "file3", "file4"]
+    mock_open_handle.read.side_effect = [
+        "file1",
+        "file2",
+        "file3",
+        "file4",
+        "eksfile1",
+        "eksfile2",
+        "eksfile3",
+        "eksfile4",
+    ]
     mock_get_kubernetes_secret_signature.return_value = (
         "4c3da4da5d97294f69527dc92c2b930ce127522c"
     )
@@ -778,7 +815,16 @@ def test_sync_boto_secrets_exists_but_no_signature(boto_keys_patches):
     )
     mock_config_loader_instances.return_value = [deployment]
 
-    mock_open_handle.read.side_effect = ["file1", "file2", "file3", "file4"]
+    mock_open_handle.read.side_effect = [
+        "file1",
+        "file2",
+        "file3",
+        "file4",
+        "eksfile1",
+        "eksfile2",
+        "eksfile3",
+        "eksfile4",
+    ]
     mock_get_kubernetes_secret_signature.return_value = None
     mock_create_secret.side_effect = ApiException(409)
 
@@ -818,7 +864,10 @@ def crypto_keys_patches():
     ) as mock_update_kubernetes_secret_signature, mock.patch(
         "paasta_tools.kubernetes.bin.paasta_secrets_sync.PaastaServiceConfigLoader",
         autospec=True,
-    ) as mock_config_loader:
+    ) as mock_config_loader, mock.patch(
+        "paasta_tools.kubernetes.bin.paasta_secrets_sync.load_system_paasta_config",
+        autospec=True,
+    ):
         yield (
             provider,
             mock_get_kubernetes_secret_signature,
diff --git a/tests/metrics/test_metastatus_lib.py b/tests/metrics/test_metastatus_lib.py
index d380ae6f65..65fa6d72ae 100644
--- a/tests/metrics/test_metastatus_lib.py
+++ b/tests/metrics/test_metastatus_lib.py
@@ -265,7 +265,7 @@ def test_assert_kube_deployments():
     ) as mock_list_all_deployments:
         client = Mock()
         mock_list_all_deployments.return_value = ["KubeDeployment:1"]
-        output, ok = metastatus_lib.assert_kube_deployments(client)
+        output, ok = metastatus_lib.assert_kube_deployments(client, namespace="paasta")
         assert re.match("Kubernetes deployments:   1", output)
         assert ok
 
@@ -283,7 +283,7 @@ def test_assert_kube_pods_running():
             V1Pod(status=V1PodStatus(phase="Failed")),
             V1Pod(status=V1PodStatus(phase="Failed")),
         ]
-        output, ok = metastatus_lib.assert_kube_pods_running(client)
+        output, ok = metastatus_lib.assert_kube_pods_running(client, namespace="paasta")
         assert re.match("Pods: running: 1 pending: 2 failed: 3", output)
         assert ok
 
diff --git a/tests/test_cleanup_kubernetes_jobs.py b/tests/test_cleanup_kubernetes_jobs.py
index ac66d41504..96a0e82305 100644
--- a/tests/test_cleanup_kubernetes_jobs.py
+++ b/tests/test_cleanup_kubernetes_jobs.py
@@ -15,6 +15,7 @@
 from copy import deepcopy
 
 import mock
+import pytest
 from kubernetes.client import V1Deployment
 from kubernetes.client import V1StatefulSet
 from pytest import fixture
@@ -23,6 +24,7 @@
 from paasta_tools.cleanup_kubernetes_jobs import cleanup_unused_apps
 from paasta_tools.cleanup_kubernetes_jobs import DontKillEverythingError
 from paasta_tools.cleanup_kubernetes_jobs import main
+from paasta_tools.eks_tools import EksDeploymentConfig
 from paasta_tools.kubernetes.application.controller_wrappers import DeploymentWrapper
 from paasta_tools.kubernetes.application.controller_wrappers import StatefulSetWrapper
 from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
@@ -32,7 +34,7 @@
 def fake_deployment():
     fake_deployment = V1Deployment(
         metadata=mock.Mock(
-            namespace="paasta",
+            namespace="paastasvc-service",
             labels={
                 "yelp.com/paasta_service": "service",
                 "yelp.com/paasta_instance": "instance-1",
@@ -121,6 +123,39 @@ def fake_instance_config(
     return fake_instance_config
 
 
+def fake_eks_instance_config(
+    cluster, service, instance, soa_dir="soa_dir", load_deployments=False
+):
+    fake_eks_instance_config = EksDeploymentConfig(
+        service,
+        instance,
+        cluster,
+        {
+            "port": None,
+            "monitoring": {},
+            "deploy": {"pipeline": [{"step": "default"}]},
+            "data": {},
+            "smartstack": {},
+            "dependencies": {},
+            "cpus": 0.1,
+            "mem": 100,
+            "min_instances": 1,
+            "max_instances": 10,
+            "deploy_group": "prod.main",
+            "autoscaling": {"setpoint": 0.7},
+        },
+        {
+            "docker_image": "services-compute-infra-test-service:paasta-5b861b3bd42ef9674d3ca04a1259c79eddb71694",
+            "git_sha": "5b861b3bd42ef9674d3ca04a1259c79eddb71694",
+            "image_version": None,
+            "desired_state": "start",
+            "force_bounce": None,
+        },
+        soa_dir,
+    )
+    return fake_eks_instance_config
+
+
 def get_fake_instances(self, with_limit: bool = True) -> int:
     return self.config_dict.get("max_instances", None)
 
@@ -136,7 +171,7 @@ def test_main(fake_deployment, fake_stateful_set, invalid_app):
         load_config_patch.return_value.get_cluster.return_value = "fake_cluster"
         main(("--soa-dir", soa_dir, "--cluster", cluster))
         cleanup_patch.assert_called_once_with(
-            soa_dir, cluster, kill_threshold=0.5, force=False
+            soa_dir, cluster, kill_threshold=0.5, force=False, eks=False
         )
 
 
@@ -166,7 +201,14 @@ def test_list_apps(fake_deployment, fake_stateful_set, invalid_app):
         )
 
 
-def test_cleanup_unused_apps(fake_deployment, fake_stateful_set, invalid_app):
+@pytest.mark.parametrize(
+    "eks_flag",
+    [
+        (False),
+        (True),
+    ],
+)
+def test_cleanup_unused_apps(eks_flag, fake_deployment, fake_stateful_set, invalid_app):
     mock_kube_client = mock.MagicMock()
     with mock.patch(
         "paasta_tools.cleanup_kubernetes_jobs.KubeClient",
@@ -180,6 +222,10 @@ def test_cleanup_unused_apps(fake_deployment, fake_stateful_set, invalid_app):
         "paasta_tools.kubernetes_tools.load_kubernetes_service_config_no_cache",
         autospec=True,
         side_effect=fake_instance_config,
+    ), mock.patch(
+        "paasta_tools.eks_tools.load_eks_service_config_no_cache",
+        autospec=True,
+        side_effect=fake_eks_instance_config,
     ), mock.patch(
         "paasta_tools.cleanup_kubernetes_jobs.KubernetesDeploymentConfig.get_instances",
         side_effect=get_fake_instances,
@@ -193,12 +239,21 @@ def test_cleanup_unused_apps(fake_deployment, fake_stateful_set, invalid_app):
     ) as mock_alert_state_change:
         mock_alert_state_change.__enter__ = mock.Mock(return_value=(mock.Mock(), None))
         mock_alert_state_change.__exit__ = mock.Mock(return_value=None)
-        cleanup_unused_apps("soa_dir", "fake cluster", kill_threshold=1, force=False)
+        cleanup_unused_apps(
+            "soa_dir", "fake cluster", kill_threshold=1, force=False, eks=eks_flag
+        )
         assert mock_kube_client.deployments.delete_namespaced_deployment.call_count == 1
 
 
+@pytest.mark.parametrize(
+    "eks_flag",
+    [
+        (False),
+        (True),
+    ],
+)
 def test_cleanup_unused_apps_in_multiple_namespaces(
-    fake_deployment, fake_stateful_set, invalid_app
+    eks_flag, fake_deployment, fake_stateful_set, invalid_app
 ):
     mock_kube_client = mock.MagicMock()
     fake_deployment2 = deepcopy(fake_deployment)
@@ -222,6 +277,10 @@ def test_cleanup_unused_apps_in_multiple_namespaces(
         "paasta_tools.kubernetes_tools.load_kubernetes_service_config_no_cache",
         autospec=True,
         side_effect=fake_instance_config,
+    ), mock.patch(
+        "paasta_tools.eks_tools.load_eks_service_config_no_cache",
+        autospec=True,
+        side_effect=fake_eks_instance_config,
     ), mock.patch(
         "paasta_tools.cleanup_kubernetes_jobs.get_services_for_cluster",
         return_value={("service", "instance-1")},
@@ -235,12 +294,21 @@ def test_cleanup_unused_apps_in_multiple_namespaces(
     ) as mock_alert_state_change:
         mock_alert_state_change.__enter__ = mock.Mock(return_value=(mock.Mock(), None))
         mock_alert_state_change.__exit__ = mock.Mock(return_value=None)
-        cleanup_unused_apps("soa_dir", "fake cluster", kill_threshold=2, force=False)
+        cleanup_unused_apps(
+            "soa_dir", "fake cluster", kill_threshold=2, force=False, eks=eks_flag
+        )
         assert mock_kube_client.deployments.delete_namespaced_deployment.call_count == 1
 
 
+@pytest.mark.parametrize(
+    "eks_flag",
+    [
+        (False),
+        (True),
+    ],
+)
 def test_cleanup_unused_apps_does_not_delete(
-    fake_deployment, fake_stateful_set, invalid_app
+    eks_flag, fake_deployment, fake_stateful_set, invalid_app
 ):
     mock_kube_client = mock.MagicMock()
     with mock.patch(
@@ -251,6 +319,10 @@ def test_cleanup_unused_apps_does_not_delete(
         "paasta_tools.kubernetes_tools.load_kubernetes_service_config_no_cache",
         autospec=True,
         side_effect=fake_instance_config,
+    ), mock.patch(
+        "paasta_tools.eks_tools.load_eks_service_config_no_cache",
+        autospec=True,
+        side_effect=fake_eks_instance_config,
     ), mock.patch(
         "paasta_tools.cleanup_kubernetes_jobs.list_all_applications",
         return_value={("service", "instance-1"): [DeploymentWrapper(fake_deployment)]},
@@ -268,12 +340,21 @@ def test_cleanup_unused_apps_does_not_delete(
     ) as mock_alert_state_change:
         mock_alert_state_change.__enter__ = mock.Mock(return_value=(mock.Mock(), None))
         mock_alert_state_change.__exit__ = mock.Mock(return_value=None)
-        cleanup_unused_apps("soa_dir", "fake cluster", kill_threshold=1, force=False)
+        cleanup_unused_apps(
+            "soa_dir", "fake cluster", kill_threshold=1, force=False, eks=eks_flag
+        )
         assert mock_kube_client.deployments.delete_namespaced_deployment.call_count == 0
 
 
+@pytest.mark.parametrize(
+    "eks_flag",
+    [
+        (False),
+        (True),
+    ],
+)
 def test_cleanup_unused_apps_does_not_delete_bouncing_apps(
-    fake_deployment, fake_stateful_set, invalid_app
+    eks_flag, fake_deployment, fake_stateful_set, invalid_app
 ):
     mock_kube_client = mock.MagicMock()
     fake_deployment2 = deepcopy(fake_deployment)
@@ -297,6 +378,10 @@ def test_cleanup_unused_apps_does_not_delete_bouncing_apps(
         "paasta_tools.kubernetes_tools.load_kubernetes_service_config_no_cache",
         autospec=True,
         side_effect=fake_instance_config,
+    ), mock.patch(
+        "paasta_tools.eks_tools.load_eks_service_config_no_cache",
+        autospec=True,
+        side_effect=fake_eks_instance_config,
     ), mock.patch(
         "paasta_tools.cleanup_kubernetes_jobs.get_services_for_cluster",
         return_value={("service", "instance-1")},
@@ -310,12 +395,21 @@ def test_cleanup_unused_apps_does_not_delete_bouncing_apps(
     ) as mock_alert_state_change:
         mock_alert_state_change.__enter__ = mock.Mock(return_value=(mock.Mock(), None))
         mock_alert_state_change.__exit__ = mock.Mock(return_value=None)
-        cleanup_unused_apps("soa_dir", "fake cluster", kill_threshold=2, force=False)
+        cleanup_unused_apps(
+            "soa_dir", "fake cluster", kill_threshold=2, force=False, eks=eks_flag
+        )
         assert mock_kube_client.deployments.delete_namespaced_deployment.call_count == 0
 
 
+@pytest.mark.parametrize(
+    "eks_flag",
+    [
+        (False),
+        (True),
+    ],
+)
 def test_cleanup_unused_apps_does_not_delete_recently_created_apps(
-    fake_deployment, fake_stateful_set, invalid_app
+    eks_flag, fake_deployment, fake_stateful_set, invalid_app
 ):
     mock_kube_client = mock.MagicMock()
     fake_deployment.status.ready_replicas = 10
@@ -336,6 +430,10 @@ def test_cleanup_unused_apps_does_not_delete_recently_created_apps(
         "paasta_tools.kubernetes_tools.load_kubernetes_service_config_no_cache",
         autospec=True,
         side_effect=fake_instance_config,
+    ), mock.patch(
+        "paasta_tools.eks_tools.load_eks_service_config_no_cache",
+        autospec=True,
+        side_effect=fake_eks_instance_config,
     ), mock.patch(
         "paasta_tools.cleanup_kubernetes_jobs.get_services_for_cluster",
         return_value={("service", "instance-1")},
@@ -349,12 +447,21 @@ def test_cleanup_unused_apps_does_not_delete_recently_created_apps(
     ) as mock_alert_state_change:
         mock_alert_state_change.__enter__ = mock.Mock(return_value=(mock.Mock(), None))
         mock_alert_state_change.__exit__ = mock.Mock(return_value=None)
-        cleanup_unused_apps("soa_dir", "fake cluster", kill_threshold=2, force=False)
+        cleanup_unused_apps(
+            "soa_dir", "fake cluster", kill_threshold=2, force=False, eks=eks_flag
+        )
         assert mock_kube_client.deployments.delete_namespaced_deployment.call_count == 0
 
 
+@pytest.mark.parametrize(
+    "eks_flag",
+    [
+        (False),
+        (True),
+    ],
+)
 def test_cleanup_unused_apps_dont_kill_everything(
-    fake_deployment, fake_stateful_set, invalid_app
+    eks_flag, fake_deployment, fake_stateful_set, invalid_app
 ):
     mock_kube_client = mock.MagicMock()
     with mock.patch(
@@ -365,6 +472,10 @@ def test_cleanup_unused_apps_dont_kill_everything(
         "paasta_tools.kubernetes_tools.load_kubernetes_service_config_no_cache",
         autospec=True,
         side_effect=fake_instance_config,
+    ), mock.patch(
+        "paasta_tools.eks_tools.load_eks_service_config_no_cache",
+        autospec=True,
+        side_effect=fake_eks_instance_config,
     ), mock.patch(
         "paasta_tools.cleanup_kubernetes_jobs.list_all_applications",
         return_value={("service", "instance-1"): [DeploymentWrapper(fake_deployment)]},
@@ -384,13 +495,20 @@ def test_cleanup_unused_apps_dont_kill_everything(
         mock_alert_state_change.__exit__ = mock.Mock(return_value=None)
         with raises(DontKillEverythingError):
             cleanup_unused_apps(
-                "soa_dir", "fake_cluster", kill_threshold=0, force=False
+                "soa_dir", "fake_cluster", kill_threshold=0, force=False, eks=eks_flag
             )
         assert mock_kube_client.deployments.delete_namespaced_deployment.call_count == 0
 
 
+@pytest.mark.parametrize(
+    "eks_flag",
+    [
+        (False),
+        (True),
+    ],
+)
 def test_cleanup_unused_apps_dont_kill_statefulsets(
-    fake_deployment, fake_stateful_set, invalid_app
+    eks_flag, fake_deployment, fake_stateful_set, invalid_app
 ):
     mock_kube_client = mock.MagicMock()
     with mock.patch(
@@ -401,6 +519,10 @@ def test_cleanup_unused_apps_dont_kill_statefulsets(
         "paasta_tools.kubernetes_tools.load_kubernetes_service_config_no_cache",
         autospec=True,
         side_effect=fake_instance_config,
+    ), mock.patch(
+        "paasta_tools.eks_tools.load_eks_service_config_no_cache",
+        autospec=True,
+        side_effect=fake_eks_instance_config,
     ), mock.patch(
         "paasta_tools.cleanup_kubernetes_jobs.list_all_applications",
         return_value={
@@ -419,11 +541,22 @@ def test_cleanup_unused_apps_dont_kill_statefulsets(
     ) as mock_alert_state_change:
         mock_alert_state_change.__enter__ = mock.Mock(return_value=(mock.Mock(), None))
         mock_alert_state_change.__exit__ = mock.Mock(return_value=None)
-        cleanup_unused_apps("soa_dir", "fake_cluster", kill_threshold=0.5, force=False)
+        cleanup_unused_apps(
+            "soa_dir", "fake_cluster", kill_threshold=0.5, force=False, eks=eks_flag
+        )
         assert mock_kube_client.deployments.delete_namespaced_deployment.call_count == 0
 
 
-def test_cleanup_unused_apps_force(fake_deployment, fake_stateful_set, invalid_app):
+@pytest.mark.parametrize(
+    "eks_flag",
+    [
+        (False),
+        (True),
+    ],
+)
+def test_cleanup_unused_apps_force(
+    eks_flag, fake_deployment, fake_stateful_set, invalid_app
+):
     mock_kube_client = mock.MagicMock()
     with mock.patch(
         "paasta_tools.cleanup_kubernetes_jobs.KubeClient",
@@ -433,6 +566,10 @@ def test_cleanup_unused_apps_force(fake_deployment, fake_stateful_set, invalid_a
         "paasta_tools.kubernetes_tools.load_kubernetes_service_config_no_cache",
         autospec=True,
         side_effect=fake_instance_config,
+    ), mock.patch(
+        "paasta_tools.eks_tools.load_eks_service_config_no_cache",
+        autospec=True,
+        side_effect=fake_eks_instance_config,
     ), mock.patch(
         "paasta_tools.cleanup_kubernetes_jobs.list_all_applications",
         return_value={("service", "instance-1"): [DeploymentWrapper(fake_deployment)]},
@@ -450,12 +587,21 @@ def test_cleanup_unused_apps_force(fake_deployment, fake_stateful_set, invalid_a
     ) as mock_alert_state_change:
         mock_alert_state_change.__enter__ = mock.Mock(return_value=(mock.Mock(), None))
         mock_alert_state_change.__exit__ = mock.Mock(return_value=None)
-        cleanup_unused_apps("soa_dir", "fake_cluster", kill_threshold=0, force=True)
+        cleanup_unused_apps(
+            "soa_dir", "fake_cluster", kill_threshold=0, force=True, eks=eks_flag
+        )
         assert mock_kube_client.deployments.delete_namespaced_deployment.call_count == 1
 
 
+@pytest.mark.parametrize(
+    "eks_flag",
+    [
+        (False),
+        (True),
+    ],
+)
 def test_cleanup_unused_apps_ignore_invalid_apps(
-    fake_deployment, fake_stateful_set, invalid_app
+    eks_flag, fake_deployment, fake_stateful_set, invalid_app
 ):
     mock_kube_client = mock.MagicMock()
     with mock.patch(
@@ -474,5 +620,7 @@ def test_cleanup_unused_apps_ignore_invalid_apps(
         mock_kube_client.deployments.list_namespaced_deployment.return_value = (
             mock.MagicMock(items=[invalid_app])
         )
-        cleanup_unused_apps("soa_dir", "fake_cluster", kill_threshold=0, force=True)
+        cleanup_unused_apps(
+            "soa_dir", "fake_cluster", kill_threshold=0, force=True, eks=eks_flag
+        )
         assert mock_kube_client.deployments.delete_namespaced_deployment.call_count == 0
diff --git a/tests/test_config_utils.py b/tests/test_config_utils.py
index 0efcb79d7b..17a76f3d53 100644
--- a/tests/test_config_utils.py
+++ b/tests/test_config_utils.py
@@ -36,7 +36,7 @@ def test_write_auto_config_data_service_dne(tmpdir):
     ) as mock_open:
         result = config_utils.write_auto_config_data(
             service="something",
-            extra_info="marathon-norcal-devc",
+            extra_info="kubernetes-norcal-devc",
             data={"a": 1},
             soa_dir=tmpdir,
         )
@@ -46,7 +46,7 @@ def test_write_auto_config_data_service_dne(tmpdir):
 
 def test_write_auto_config_data_new_file(tmpdir):
     service = "foo"
-    conf_file = "marathon-norcal-devc"
+    conf_file = "kubernetes-norcal-devc"
     data = {"a": 1}
 
     tmpdir.mkdir(service)
@@ -65,7 +65,7 @@ def test_write_auto_config_data_new_file(tmpdir):
 
 def test_write_auto_config_data_file_exists(tmpdir):
     service = "foo"
-    conf_file = "marathon-norcal-devc"
+    conf_file = "kubernetes-norcal-devc"
 
     tmpdir.mkdir(service)
     config_utils.write_auto_config_data(
@@ -91,9 +91,14 @@ def test_write_auto_config_data_file_exists(tmpdir):
 
 @mock.patch("paasta_tools.config_utils.validate_schema", autospec=True)
 def test_validate_auto_config_file_config_types(mock_validate, tmpdir):
-    for config_type in config_utils.KNOWN_CONFIG_TYPES:
+    for config_type in (
+        "kubernetes",
+        "deploy",
+        "smartstack",
+        "cassandracluster",
+    ):
         filepath = f"service/{config_type}-cluster.yaml"
-        config_utils.validate_auto_config_file(filepath, AUTO_SOACONFIG_SUBDIR)
+        assert config_utils.validate_auto_config_file(filepath, AUTO_SOACONFIG_SUBDIR)
         mock_validate.assert_called_with(filepath, f"autotuned_defaults/{config_type}")
 
 
@@ -120,7 +125,7 @@ def test_validate_auto_config_file_unknown_type(mock_validate, tmpdir):
 )
 def test_validate_auto_config_file_e2e(data, is_valid, tmpdir):
     service = "foo"
-    conf_file = "marathon-norcal-devc"
+    conf_file = "kubernetes-norcal-devc"
 
     tmpdir.mkdir(service)
     filepath = config_utils.write_auto_config_data(
@@ -135,12 +140,20 @@ def test_validate_auto_config_file_e2e(data, is_valid, tmpdir):
     )
 
 
-@pytest.mark.parametrize("branch", ["master", "other_test"])
-def test_auto_config_updater_context(branch, tmpdir, mock_subprocess):
+@pytest.mark.parametrize(
+    "branch, remote_branch_exists",
+    [("master", True), ("other_test", True), ("other_test", False)],
+)
+def test_auto_config_updater_context(
+    branch, remote_branch_exists, tmpdir, mock_subprocess
+):
     remote = "git_remote"
     updater = config_utils.AutoConfigUpdater(
         "test_source", remote, branch=branch, working_dir=tmpdir
     )
+    updater._remote_branch_exists = mock.MagicMock(
+        autospec=True, return_value=remote_branch_exists
+    )
     initial_wd = os.getcwd()
 
     with updater:
@@ -148,9 +161,19 @@ def test_auto_config_updater_context(branch, tmpdir, mock_subprocess):
         assert os.path.isdir(clone_dir)
         expected_calls = [mock.call.check_call(["git", "clone", remote, clone_dir])]
         if branch != "master":
-            expected_calls.append(
-                mock.call.check_call(["git", "checkout", "-b", branch])
-            )
+            if remote_branch_exists:
+                expected_calls.extend(
+                    [
+                        mock.call.check_call(["git", "fetch", "origin", branch]),
+                        mock.call.check_call(
+                            ["git", "checkout", "-b", branch, f"origin/{branch}"]
+                        ),
+                    ]
+                )
+            else:
+                expected_calls.append(
+                    mock.call.check_call(["git", "checkout", "-b", branch])
+                )
         assert mock_subprocess.mock_calls == expected_calls
         assert os.getcwd() == clone_dir
 
@@ -159,8 +182,13 @@ def test_auto_config_updater_context(branch, tmpdir, mock_subprocess):
     assert os.getcwd() == initial_wd
 
 
-@pytest.mark.parametrize("branch", ["master", "other_test"])
-def test_auto_config_updater_context_no_clone(branch, tmpdir, mock_subprocess):
+@pytest.mark.parametrize(
+    "branch, remote_branch_exists",
+    [("master", True), ("other_test", True), ("other_test", False)],
+)
+def test_auto_config_updater_context_no_clone(
+    branch, remote_branch_exists, tmpdir, mock_subprocess
+):
     remote = "git_remote"
     working_dir = tmpdir.mkdir("testing")
     updater = config_utils.AutoConfigUpdater(
@@ -170,13 +198,26 @@ def test_auto_config_updater_context_no_clone(branch, tmpdir, mock_subprocess):
         working_dir=working_dir,
         do_clone=False,
     )
+    updater._remote_branch_exists = mock.MagicMock(
+        autospec=True, return_value=remote_branch_exists
+    )
     initial_wd = os.getcwd()
 
     with updater:
         if branch == "master":
             expected_calls = []
         else:
-            expected_calls = [mock.call.check_call(["git", "checkout", "-b", branch])]
+            if remote_branch_exists:
+                expected_calls = [
+                    mock.call.check_call(["git", "fetch", "origin", branch]),
+                    mock.call.check_call(
+                        ["git", "checkout", "-b", branch, f"origin/{branch}"]
+                    ),
+                ]
+            else:
+                expected_calls = [
+                    mock.call.check_call(["git", "checkout", "-b", branch])
+                ]
         assert mock_subprocess.mock_calls == expected_calls
         assert os.getcwd() == working_dir
 
@@ -190,11 +231,10 @@ def test_auto_config_updater_context_no_clone(branch, tmpdir, mock_subprocess):
 def test_auto_config_updater_validate(mock_validate_file, all_valid, updater):
     mock_validate_file.side_effect = [True, all_valid, True]
 
-    updater.write_configs("foo", "marathon-norcal-devc", {"a": 2})
     updater.write_configs("foo", "kubernetes-norcal-devc", {"a": 2})
     updater.write_configs("foo", "kubernetes-pnw-devc", {"a": 2})
     assert updater.validate() == all_valid
-    assert mock_validate_file.call_count == 3
+    assert mock_validate_file.call_count == 2
 
 
 def test_auto_config_updater_read_write(updater):
@@ -275,3 +315,71 @@ def test_auto_config_updater_commit(mock_push, mock_commit, did_commit, updater)
         mock_push.assert_called_once_with(updater.branch)
     else:
         assert mock_push.call_count == 0
+
+
+def test_auto_config_updater_merge_recommendations_limits(updater):
+    service = "foo"
+    conf_file = "notk8s-euwest-prod"
+    limited_instance = "some_instance"
+    noop_instance = "other_instance"
+    autotune_data = {
+        limited_instance: {"cpus": 0.1, "mem": 167, "disk": 256, "cpu_burst_add": 0.5}
+    }
+    user_data = {
+        limited_instance: {
+            "cmd": "ls",
+            "autotune_limits": {
+                "cpus": {"min": 1, "max": 2},
+                "mem": {"min": 1024, "max": 2048},
+                "disk": {"min": 512, "max": 1024},
+            },
+        },
+        noop_instance: {"cmd": "ls"},
+    }
+
+    recs = {
+        (service, conf_file): {
+            limited_instance: {
+                "mem": 1,
+                "disk": 700,
+                "cpus": 3,
+            },
+            noop_instance: {
+                "cpus": 100,
+                "mem": 10000,
+                "disk": 2048,
+            },
+        }
+    }
+
+    with mock.patch.object(
+        updater,
+        "get_existing_configs",
+        autospec=True,
+        side_effect=[
+            # first get the autotune data
+            autotune_data,
+            # then we get both the eks- and kuberentes- data
+            user_data,
+            # there could be data in both of these, but for a
+            # simpler test, we just assume that we're looking
+            # at something that's 100% on Yelp-managed k8s
+            {},
+        ],
+    ):
+        assert updater.merge_recommendations(recs) == {
+            (service, conf_file): {
+                limited_instance: {
+                    "mem": 1024,  # use lower bound
+                    "disk": 700,  # unchanged
+                    "cpus": 2,  # use upper bound
+                    "cpu_burst_add": 0.5,  # no updated rec or limit, leave alone
+                },
+                # this instances recommendations should be left untouched
+                noop_instance: {
+                    "cpus": 100,
+                    "mem": 10000,
+                    "disk": 2048,
+                },
+            }
+        }
diff --git a/tests/test_delete_kubernetes_deployments.py b/tests/test_delete_kubernetes_deployments.py
index d3370505ca..73d3034a87 100644
--- a/tests/test_delete_kubernetes_deployments.py
+++ b/tests/test_delete_kubernetes_deployments.py
@@ -28,6 +28,7 @@ def test_main():
         mock_delete_deployment.assert_called_with(
             kube_client=mock_kube_client.return_value,
             deployment_name="fake_pcm_deployment",
+            namespace="paasta",
         )
 
         # Test main() failed
diff --git a/tests/test_docker_wrapper.py b/tests/test_docker_wrapper.py
index a5c2fe1e39..cb79eb5ba3 100644
--- a/tests/test_docker_wrapper.py
+++ b/tests/test_docker_wrapper.py
@@ -324,7 +324,7 @@ def test_env_not_present(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "foobar",
             )
@@ -364,7 +364,7 @@ def test_numa_string_value(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 '--env=PIN_TO_NUMA_NODE="true"',
             )
@@ -378,7 +378,7 @@ def test_numa_bogus_node(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--env=PIN_TO_NUMA_NODE=True",
             )
@@ -407,7 +407,7 @@ def test_numa_unsupported(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--env=PIN_TO_NUMA_NODE=1",
                 "--env=MARATHON_APP_RESOURCE_CPUS=1.5",
@@ -438,7 +438,7 @@ def test_marathon_bogus_value(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--cpuset-mems=1",
                 "--cpuset-cpus=1,3",
@@ -471,7 +471,7 @@ def test_numa_OK(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--cpuset-mems=1",
                 "--cpuset-cpus=1,3",
@@ -505,7 +505,7 @@ def test_cpuset_already_set(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--cpuset-cpus=0,2",
                 "--env=PIN_TO_NUMA_NODE=1",
@@ -538,7 +538,7 @@ def test_numa_req_bogus_mem_value(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--cpuset-mems=1",
                 "--cpuset-cpus=1,3",
@@ -573,7 +573,7 @@ def test_numa_req_more_mem_than_available(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--env=PIN_TO_NUMA_NODE=1",
                 "--env=MARATHON_APP_RESOURCE_CPUS=2",
@@ -606,7 +606,7 @@ def test_numa_req_less_mem_than_available(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--cpuset-mems=1",
                 "--cpuset-cpus=1,3",
@@ -640,7 +640,7 @@ def test_numa_req_exact_amount_of_cores(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--cpuset-mems=1",
                 "--cpuset-cpus=1,3",
@@ -673,7 +673,7 @@ def test_numa_too_many_cores_requested(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--env=PIN_TO_NUMA_NODE=1",
                 "--env=MARATHON_APP_RESOURCE_CPUS=3.0",
@@ -699,7 +699,7 @@ def test_numa_enabled_unknown_cpu_requirement_skips_cpusets(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--cpuset-mems=1",
                 "--cpuset-cpus=1,3",
@@ -726,7 +726,7 @@ def test_numa_wrong_cpu(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--env=PIN_TO_NUMA_NODE=2",
             )
@@ -746,7 +746,7 @@ def test_numa_single_cpu_doesnt_bother_with_cpusets(self, mock_execlp):
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--env=PIN_TO_NUMA_NODE=1",
             )
@@ -801,7 +801,7 @@ def test_mac_address(
                 "docker",
                 "run",
                 "--mac-address=00:00:00:00:00:00",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 *mock_firewall_env_args,
             )
@@ -846,7 +846,7 @@ def test_mac_address_already_set(
                 "docker",
                 "docker",
                 "run",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 "--mac-address=12:34:56:78:90:ab",
                 *mock_firewall_env_args,
@@ -866,7 +866,7 @@ def test_mac_address_no_lockdir(
                     "docker",
                     "docker",
                     "run",
-                    f"--hostname={socket.getfqdn()}",
+                    f"--hostname={socket.gethostname()}",
                     f"-e=PAASTA_HOST={socket.getfqdn()}",
                     *mock_firewall_env_args,
                 )
@@ -901,7 +901,7 @@ def test_prepare_new_container_error(
                 "docker",
                 "run",
                 "--mac-address=00:00:00:00:00:00",
-                f"--hostname={socket.getfqdn()}",
+                f"--hostname={socket.gethostname()}",
                 f"-e=PAASTA_HOST={socket.getfqdn()}",
                 *mock_firewall_env_args,
             )
diff --git a/tests/test_kubernetes_tools.py b/tests/test_kubernetes_tools.py
index 100b59efab..7ff63f9009 100644
--- a/tests/test_kubernetes_tools.py
+++ b/tests/test_kubernetes_tools.py
@@ -2,6 +2,7 @@
 from base64 import b64encode
 from typing import Any
 from typing import Dict
+from typing import List
 from typing import Sequence
 
 import asynctest
@@ -41,6 +42,7 @@
 from kubernetes.client import V1PodAntiAffinity
 from kubernetes.client import V1PodSpec
 from kubernetes.client import V1PodTemplateSpec
+from kubernetes.client import V1PreferredSchedulingTerm
 from kubernetes.client import V1Probe
 from kubernetes.client import V1ResourceRequirements
 from kubernetes.client import V1RoleBinding
@@ -89,6 +91,7 @@
 from paasta_tools.kubernetes_tools import create_stateful_set
 from paasta_tools.kubernetes_tools import ensure_namespace
 from paasta_tools.kubernetes_tools import ensure_paasta_api_rolebinding
+from paasta_tools.kubernetes_tools import ensure_paasta_namespace_limits
 from paasta_tools.kubernetes_tools import filter_nodes_by_blacklist
 from paasta_tools.kubernetes_tools import filter_pods_by_service_instance
 from paasta_tools.kubernetes_tools import force_delete_pods
@@ -149,6 +152,7 @@
 from paasta_tools.utils import SecretVolume
 from paasta_tools.utils import SecretVolumeItem
 from paasta_tools.utils import SystemPaastaConfig
+from paasta_tools.utils import TopologySpreadConstraintDict
 
 
 def test_force_delete_pods():
@@ -353,26 +357,54 @@ def test_get_bounce_method(self):
             with pytest.raises(Exception):
                 self.deployment.get_bounce_method()
 
-    def test_get_deployment_strategy(self):
+    @pytest.mark.parametrize(
+        "bounce_method,bounce_margin_factor,expected_strategy,expected_rolling_update_deploy",
+        [
+            (
+                "crossover",
+                1,
+                "RollingUpdate",
+                V1RollingUpdateDeployment(max_surge="100%", max_unavailable="0%"),
+            ),
+            (
+                "crossover",
+                0.3,
+                "RollingUpdate",
+                V1RollingUpdateDeployment(max_surge="100%", max_unavailable="70%"),
+            ),
+            # b_m_f does not actually contribute to settings for brutal
+            (
+                "brutal",
+                0.5,
+                "RollingUpdate",
+                V1RollingUpdateDeployment(max_surge="100%", max_unavailable="100%"),
+            ),
+            ("downthenup", 1, "Recreate", None),
+        ],
+    )
+    def test_get_deployment_strategy(
+        self,
+        bounce_method,
+        bounce_margin_factor,
+        expected_strategy,
+        expected_rolling_update_deploy,
+    ):
         with mock.patch(
             "paasta_tools.kubernetes_tools.KubernetesDeploymentConfig.get_bounce_method",
             autospec=True,
-            return_value="crossover",
-        ) as mock_get_bounce_method:
+            return_value=bounce_method,
+        ), mock.patch(
+            "paasta_tools.kubernetes_tools.KubernetesDeploymentConfig.get_bounce_margin_factor",
+            autospec=True,
+            return_value=bounce_margin_factor,
+        ):
             assert (
                 self.deployment.get_deployment_strategy_config()
                 == V1DeploymentStrategy(
-                    type="RollingUpdate",
-                    rolling_update=V1RollingUpdateDeployment(
-                        max_surge="100%", max_unavailable="0%"
-                    ),
+                    type=expected_strategy,
+                    rolling_update=expected_rolling_update_deploy,
                 )
             )
-            mock_get_bounce_method.return_value = "downthenup"
-            assert (
-                self.deployment.get_deployment_strategy_config()
-                == V1DeploymentStrategy(type="Recreate")
-            )
 
     def test_get_sanitised_volume_name(self):
         assert (
@@ -714,11 +746,6 @@ def test_get_sidecar_resource_requirements_default_requirements(self):
                         "memory": "512Mi",
                         "ephemeral-storage": "256Mi",
                     },
-                    "uwsgi_exporter": {
-                        "cpu": 0.1,
-                        "memory": "512Mi",
-                        "ephemeral-storage": "256Mi",
-                    },
                 }
             )
         )
@@ -746,11 +773,6 @@ def test_get_sidecar_resource_requirements_limits_override_default_requirements(
                         "memory": "1024Mi",
                         "ephemeral-storage": "256Mi",
                     },
-                    "uwsgi_exporter": {
-                        "cpu": 0.1,
-                        "memory": "1024Mi",
-                        "ephemeral-storage": "256Mi",
-                    },
                 }
             )
         )
@@ -761,72 +783,7 @@ def test_get_sidecar_resource_requirements_limits_override_default_requirements(
             requests={"cpu": 0.1, "memory": "1024Mi", "ephemeral-storage": "256Mi"},
         )
 
-    def test_get_uwsgi_exporter_sidecar_container_should_run(self):
-        system_paasta_config = mock.Mock(
-            get_uwsgi_exporter_sidecar_image_url=mock.Mock(
-                return_value="uwsgi_exporter_image"
-            )
-        )
-        with mock.patch.object(
-            self.deployment, "should_run_uwsgi_exporter_sidecar", return_value=True
-        ):
-            ret = self.deployment.get_uwsgi_exporter_sidecar_container(
-                system_paasta_config
-            )
-            assert ret is not None
-            assert ret.image == "uwsgi_exporter_image"
-            assert ret.ports[0].container_port == 9117
-
-    @pytest.mark.parametrize(
-        "uwsgi_stats_port,expected_port",
-        [
-            (None, "8889"),
-            (31337, "31337"),
-        ],
-    )
-    def test_get_uwsgi_exporter_sidecar_container_stats_port(
-        self, uwsgi_stats_port, expected_port
-    ):
-        system_paasta_config = mock.Mock(
-            get_uwsgi_exporter_sidecar_image_url=mock.Mock(
-                return_value="uwsgi_exporter_image"
-            )
-        )
-        self.deployment.config_dict.update(
-            {
-                "max_instances": 5,
-                "autoscaling": {
-                    "metrics_provider": "uwsgi",
-                    "use_prometheus": True,
-                },
-            }
-        )
-        if uwsgi_stats_port is not None:
-            self.deployment.config_dict["autoscaling"][
-                "uwsgi_stats_port"
-            ] = uwsgi_stats_port
-
-        ret = self.deployment.get_uwsgi_exporter_sidecar_container(system_paasta_config)
-        expected_env_var = V1EnvVar(name="STATS_PORT", value=expected_port)
-        assert expected_env_var in ret.env
-
-    def test_get_uwsgi_exporter_sidecar_container_shouldnt_run(self):
-        system_paasta_config = mock.Mock(
-            get_uwsgi_exporter_sidecar_image_url=mock.Mock(
-                return_value="uwsgi_exporter_image"
-            )
-        )
-        with mock.patch.object(
-            self.deployment, "should_run_uwsgi_exporter_sidecar", return_value=False
-        ):
-            assert (
-                self.deployment.get_uwsgi_exporter_sidecar_container(
-                    system_paasta_config
-                )
-                is None
-            )
-
-    def test_should_run_uwsgi_exporter_sidecar_explicit(self):
+    def test_should_use_uwsgi_exporter_explicit(self):
         self.deployment.config_dict.update(
             {
                 "max_instances": 5,
@@ -839,71 +796,10 @@ def test_should_run_uwsgi_exporter_sidecar_explicit(self):
 
         system_paasta_config = mock.Mock()
 
-        assert (
-            self.deployment.should_run_uwsgi_exporter_sidecar(system_paasta_config)
-            is True
-        )
+        assert self.deployment.should_use_uwsgi_exporter(system_paasta_config) is True
 
-        self.deployment.config_dict["autoscaling"]["use_prometheus"] = False
-        assert (
-            self.deployment.should_run_uwsgi_exporter_sidecar(system_paasta_config)
-            is False
-        )
-
-    def test_should_run_uwsgi_exporter_sidecar_defaults(self):
-        self.deployment.config_dict.update(
-            {
-                "max_instances": 5,
-                "autoscaling": {
-                    "metrics_provider": "uwsgi",
-                },
-            }
-        )
-
-        system_paasta_config_enabled = mock.Mock(
-            default_should_run_uwsgi_exporter_sidecar=mock.Mock(return_value=True)
-        )
-        system_paasta_config_disabled = mock.Mock(
-            default_should_run_uwsgi_exporter_sidecar=mock.Mock(return_value=False)
-        )
-
-        with mock.patch(
-            "paasta_tools.kubernetes_tools.DEFAULT_USE_PROMETHEUS_UWSGI",
-            autospec=False,
-            new=False,
-        ):
-            assert (
-                self.deployment.should_run_uwsgi_exporter_sidecar(
-                    system_paasta_config_enabled
-                )
-                is True
-            )
-            assert (
-                self.deployment.should_run_uwsgi_exporter_sidecar(
-                    system_paasta_config_disabled
-                )
-                is False
-            )
-
-        # If the default for use_prometheus is True and config_dict doesn't specify use_prometheus, we should run
-        # uwsgi_exporter regardless of default_should_run_uwsgi_exporter_sidecar.
-        with mock.patch(
-            "paasta_tools.kubernetes_tools.DEFAULT_USE_PROMETHEUS_UWSGI",
-            autospec=False,
-            new=True,
-        ):
-            assert (
-                self.deployment.should_run_uwsgi_exporter_sidecar(
-                    system_paasta_config_enabled
-                )
-                is True
-            )
-            assert (
-                self.deployment.should_run_uwsgi_exporter_sidecar(
-                    system_paasta_config_disabled
-                )
-                is True
-            )
+        self.deployment.config_dict["autoscaling"]["metrics_provider"] = "cpu"
+        assert self.deployment.should_use_uwsgi_exporter(system_paasta_config) is False
 
     def test_get_gunicorn_exporter_sidecar_container_should_run(self):
         system_paasta_config = mock.Mock(
@@ -1299,22 +1195,27 @@ def test_get_pod_volumes(self):
             ),
             V1Volume(
                 name="secret--waldo",
-                secret=V1SecretVolumeSource(secret_name="paasta-secret-kurupt-waldo"),
+                secret=V1SecretVolumeSource(
+                    secret_name="paastasvc-kurupt-secret-kurupt-waldo", optional=False
+                ),
             ),
             V1Volume(
                 name="secret--waldo",
                 secret=V1SecretVolumeSource(
-                    secret_name="paasta-secret-kurupt-waldo", default_mode=0o765
+                    secret_name="paastasvc-kurupt-secret-kurupt-waldo",
+                    default_mode=0o765,
+                    optional=False,
                 ),
             ),
             V1Volume(
                 name="secret--waldo",
                 secret=V1SecretVolumeSource(
-                    secret_name="paasta-secret-kurupt-waldo",
+                    secret_name="paastasvc-kurupt-secret-kurupt-waldo",
                     items=[
                         V1KeyToPath(key="aaa", mode=0o567, path="bbb"),
                         V1KeyToPath(key="ccc", path="ddd"),
                     ],
+                    optional=False,
                 ),
             ),
         ]
@@ -1389,14 +1290,14 @@ def test_get_volume_mounts(self):
                 "zuora_integration",
                 "sync_ads_settings_post_budget_edit_batch_daemon",
                 "paasta-boto-key-zuora--integration-sync--ads--settings--po-4xbg",
-                "paasta-secret-zuora--integration-paasta-boto-key-zuora--integration-sync--ads--settings--po-4xbg-signature",
+                "paastasvc-zuora--integration-secret-zuora--integration-paasta-boto-key-zuora--integration-sync--ads--settings--po-4xbg-signature",
             ),
             (
                 {"boto_keys": ["few"]},
                 "zuora_integration",
                 "reprocess_zuora_amend_callouts_batch_daemon",
                 "paasta-boto-key-zuora--integration-reprocess--zuora--amend-jztw",
-                "paasta-secret-zuora--integration-paasta-boto-key-zuora--integration-reprocess--zuora--amend-jztw-signature",
+                "paastasvc-zuora--integration-secret-zuora--integration-paasta-boto-key-zuora--integration-reprocess--zuora--amend-jztw-signature",
             ),
             (
                 {
@@ -1405,14 +1306,14 @@ def test_get_volume_mounts(self):
                 "kafka_discovery",
                 "main",
                 "paasta-boto-key-kafka--discovery-main",
-                "paasta-secret-kafka--discovery-paasta-boto-key-kafka--discovery-main-signature",
+                "paastasvc-kafka--discovery-secret-kafka--discovery-paasta-boto-key-kafka--discovery-main-signature",
             ),
             (
                 {"boto_keys": ["pew"]},
                 "yelp-main",
                 "lives_data_action_content_ingester_worker",
                 "paasta-boto-key-yelp-main-lives--data--action--content--in-4pxl",
-                "paasta-secret-yelp-main-paasta-boto-key-yelp-main-lives--data--action--content--in-4pxl-signature",
+                "paastasvc-yelp-main-secret-yelp-main-paasta-boto-key-yelp-main-lives--data--action--content--in-4pxl-signature",
             ),
             (
                 {
@@ -1906,9 +1807,10 @@ def test_get_pod_template_spec(
 
         if autoscaling_metric_provider:
             expected_labels["paasta.yelp.com/deploy_group"] = "fake_group"
-            expected_labels[
-                f"paasta.yelp.com/scrape_{autoscaling_metric_provider}_prometheus"
-            ] = "true"
+            if autoscaling_metric_provider != "uwsgi":
+                expected_labels[
+                    f"paasta.yelp.com/scrape_{autoscaling_metric_provider}_prometheus"
+                ] = "true"
         if autoscaling_metric_provider in ("uwsgi", "gunicorn"):
             routable_ip = "true"
 
@@ -1935,7 +1837,7 @@ def test_get_pod_template_spec(
         autospec=True,
     )
     @mock.patch(
-        "paasta_tools.kubernetes_tools.KubernetesDeploymentConfig.should_run_uwsgi_exporter_sidecar",
+        "paasta_tools.kubernetes_tools.KubernetesDeploymentConfig.should_use_uwsgi_exporter",
         autospec=True,
     )
     @mock.patch(
@@ -1943,7 +1845,7 @@ def test_get_pod_template_spec(
         autospec=True,
     )
     @pytest.mark.parametrize(
-        "ip_configured,in_smtstk,prometheus_port,should_run_uwsgi_exporter_sidecar_retval,should_run_gunicorn_exporter_sidecar_retval,expected",
+        "ip_configured,in_smtstk,prometheus_port,should_use_uwsgi_exporter_retval,should_run_gunicorn_exporter_sidecar_retval,expected",
         [
             (False, True, 8888, False, False, "true"),
             (False, False, 8888, False, False, "true"),
@@ -1956,20 +1858,18 @@ def test_get_pod_template_spec(
     )
     def test_routable_ip(
         self,
-        mock_should_run_uwsgi_exporter_sidecar,
+        mock_should_use_uwsgi_exporter,
         mock_should_run_gunicorn_exporter_sidecar,
         mock_get_prometheus_port,
         ip_configured,
         in_smtstk,
         prometheus_port,
-        should_run_uwsgi_exporter_sidecar_retval,
+        should_use_uwsgi_exporter_retval,
         should_run_gunicorn_exporter_sidecar_retval,
         expected,
     ):
         mock_get_prometheus_port.return_value = prometheus_port
-        mock_should_run_uwsgi_exporter_sidecar.return_value = (
-            should_run_uwsgi_exporter_sidecar_retval
-        )
+        mock_should_use_uwsgi_exporter.return_value = should_use_uwsgi_exporter_retval
         mock_should_run_gunicorn_exporter_sidecar.return_value = (
             should_run_gunicorn_exporter_sidecar_retval
         )
@@ -1985,7 +1885,7 @@ def test_routable_ip(
         assert ret == expected
 
     def test_create_pod_topology_spread_constraints(self):
-        configured_constraints = [
+        configured_constraints: List[TopologySpreadConstraintDict] = [
             {
                 "topology_key": "kubernetes.io/hostname",
                 "max_skew": 1,
@@ -2049,47 +1949,102 @@ def test_get_node_selectors(self, raw_selectors, expected):
         assert self.deployment.get_node_selector() == expected
 
     def test_get_node_affinity_with_reqs(self):
-        with mock.patch(
-            "paasta_tools.kubernetes_tools.allowlist_denylist_to_requirements",
-            return_value=[("habitat", "In", ["habitat_a"])],
-            autospec=True,
-        ), mock.patch(
-            "paasta_tools.kubernetes_tools.raw_selectors_to_requirements",
-            return_value=[("instance_type", "In", ["a1.1xlarge"])],
-            autospec=True,
-        ):
-            assert self.deployment.get_node_affinity() == V1NodeAffinity(
-                required_during_scheduling_ignored_during_execution=V1NodeSelector(
-                    node_selector_terms=[
-                        V1NodeSelectorTerm(
-                            match_expressions=[
-                                V1NodeSelectorRequirement(
-                                    key="habitat",
-                                    operator="In",
-                                    values=["habitat_a"],
-                                ),
-                                V1NodeSelectorRequirement(
-                                    key="instance_type",
-                                    operator="In",
-                                    values=["a1.1xlarge"],
-                                ),
-                            ]
-                        )
-                    ],
-                ),
-            )
+        deployment = KubernetesDeploymentConfig(
+            service="kurupt",
+            instance="fm",
+            cluster="brentford",
+            config_dict={
+                "deploy_whitelist": ["habitat", ["habitat_a"]],
+                "node_selectors": {
+                    "instance_type": ["a1.1xlarge"],
+                },
+            },
+            branch_dict=None,
+            soa_dir="/nail/blah",
+        )
+
+        assert deployment.get_node_affinity() == V1NodeAffinity(
+            required_during_scheduling_ignored_during_execution=V1NodeSelector(
+                node_selector_terms=[
+                    V1NodeSelectorTerm(
+                        match_expressions=[
+                            V1NodeSelectorRequirement(
+                                key="yelp.com/habitat",
+                                operator="In",
+                                values=["habitat_a"],
+                            ),
+                            V1NodeSelectorRequirement(
+                                key="node.kubernetes.io/instance-type",
+                                operator="In",
+                                values=["a1.1xlarge"],
+                            ),
+                        ]
+                    )
+                ],
+            ),
+        )
 
     def test_get_node_affinity_no_reqs(self):
-        with mock.patch(
-            "paasta_tools.kubernetes_tools.allowlist_denylist_to_requirements",
-            return_value=[],
-            autospec=True,
-        ), mock.patch(
-            "paasta_tools.kubernetes_tools.raw_selectors_to_requirements",
-            return_value=[],
-            autospec=True,
-        ):
-            assert self.deployment.get_node_affinity() is None
+        deployment = KubernetesDeploymentConfig(
+            service="kurupt",
+            instance="fm",
+            cluster="brentford",
+            config_dict={},
+            branch_dict=None,
+            soa_dir="/nail/blah",
+        )
+
+        assert deployment.get_node_affinity() is None
+
+    def test_get_node_affinity_with_preferences(self):
+        deployment = KubernetesDeploymentConfig(
+            service="kurupt",
+            instance="fm",
+            cluster="brentford",
+            config_dict={
+                "deploy_whitelist": ["habitat", ["habitat_a"]],
+                "node_selectors_preferred": [
+                    {
+                        "weight": 1,
+                        "preferences": {
+                            "instance_type": ["a1.1xlarge"],
+                        },
+                    }
+                ],
+            },
+            branch_dict=None,
+            soa_dir="/nail/blah",
+        )
+
+        assert deployment.get_node_affinity() == V1NodeAffinity(
+            required_during_scheduling_ignored_during_execution=V1NodeSelector(
+                node_selector_terms=[
+                    V1NodeSelectorTerm(
+                        match_expressions=[
+                            V1NodeSelectorRequirement(
+                                key="yelp.com/habitat",
+                                operator="In",
+                                values=["habitat_a"],
+                            ),
+                        ]
+                    )
+                ],
+            ),
+            preferred_during_scheduling_ignored_during_execution=[
+                V1PreferredSchedulingTerm(
+                    weight=1,
+                    preference=V1NodeSelectorTerm(
+                        match_expressions=[
+                            V1NodeSelectorRequirement(
+                                key="node.kubernetes.io/instance-type",
+                                operator="In",
+                                values=["a1.1xlarge"],
+                            ),
+                        ]
+                    ),
+                )
+            ],
+        )
 
     @pytest.mark.parametrize(
         "anti_affinity,expected",
@@ -2221,7 +2176,7 @@ def test_raw_selectors_to_requirements_error(self):
             "error_key": [{"operator": "BadOperator"}],  # type: ignore
         }
         with pytest.raises(ValueError):
-            raw_selectors_to_requirements(node_selectors)
+            raw_selectors_to_requirements(node_selectors)  # type: ignore
 
     @pytest.mark.parametrize(
         "is_autoscaled, autoscaled_label",
@@ -2261,7 +2216,7 @@ def test_get_kubernetes_metadata(self, is_autoscaled, autoscaled_label):
                     "paasta.yelp.com/managed": "true",
                 },
                 name="kurupt-fm",
-                namespace="paasta",
+                namespace="paastasvc-kurupt",
             )
 
     @pytest.mark.parametrize(
@@ -2289,6 +2244,7 @@ def test_get_autoscaling_metric_spec_cpu(self, metrics_provider):
             "fake_name",
             "cluster",
             KubeClient(),
+            "paasta",
         )
         annotations: Dict[Any, Any] = {}
         expected_res = V2beta2HorizontalPodAutoscaler(
@@ -2352,6 +2308,7 @@ def test_get_autoscaling_metric_spec_cpu_prometheus(self, metrics_provider):
             "fake_name",
             "cluster",
             KubeClient(),
+            "paasta",
         )
         annotations: Dict[Any, Any] = {}
         expected_res = V2beta2HorizontalPodAutoscaler(
@@ -2438,6 +2395,7 @@ def test_get_autoscaling_metric_spec_uwsgi_prometheus(
             "fake_name",
             "cluster",
             KubeClient(),
+            "paasta",
         )
         expected_res = V2beta2HorizontalPodAutoscaler(
             kind="HorizontalPodAutoscaler",
@@ -2515,6 +2473,7 @@ def test_get_autoscaling_metric_spec_gunicorn_prometheus(
             "fake_name",
             "cluster",
             KubeClient(),
+            "paasta",
         )
         expected_res = V2beta2HorizontalPodAutoscaler(
             kind="HorizontalPodAutoscaler",
@@ -2585,6 +2544,7 @@ def test_override_scaledown_policies(self):
             "fake_name",
             "cluster",
             KubeClient(),
+            "paasta",
         )
         assert hpa_dict["spec"]["behavior"]["scaleDown"] == {
             "stabilizationWindowSeconds": 123,
@@ -2612,6 +2572,7 @@ def test_get_autoscaling_metric_spec_bespoke(self):
             "fake_name",
             "cluster",
             KubeClient(),
+            "paasta",
         )
         expected_res = None
         assert expected_res == return_value
@@ -2684,7 +2645,9 @@ def test_get_kubernetes_secret_env_vars(self):
                 name="SOME",
                 value_from=V1EnvVarSource(
                     secret_key_ref=V1SecretKeySelector(
-                        name="paasta-secret-kurupt-a--ref", key="a_ref", optional=False
+                        name="paastasvc-kurupt-secret-kurupt-a--ref",
+                        key="a_ref",
+                        optional=False,
                     )
                 ),
             ),
@@ -2692,7 +2655,7 @@ def test_get_kubernetes_secret_env_vars(self):
                 name="A",
                 value_from=V1EnvVarSource(
                     secret_key_ref=V1SecretKeySelector(
-                        name="paasta-secret-underscore-shared-underscore-ref1",
+                        name="paastasvc-kurupt-secret-underscore-shared-underscore-ref1",
                         key="_ref1",
                         optional=False,
                     )
@@ -3009,6 +2972,8 @@ def test_KubeClient():
 def test_ensure_namespace_doesnt_create_if_namespace_exists():
     with mock.patch(
         "paasta_tools.kubernetes_tools.ensure_paasta_api_rolebinding", autospec=True
+    ), mock.patch(
+        "paasta_tools.kubernetes_tools.ensure_paasta_namespace_limits", autospec=True
     ):
         mock_metadata = mock.Mock()
         type(mock_metadata).name = "paasta"
@@ -3023,6 +2988,8 @@ def test_ensure_namespace_doesnt_create_if_namespace_exists():
 def test_ensure_namespace_kube_system():
     with mock.patch(
         "paasta_tools.kubernetes_tools.ensure_paasta_api_rolebinding", autospec=True
+    ), mock.patch(
+        "paasta_tools.kubernetes_tools.ensure_paasta_namespace_limits", autospec=True
     ):
         mock_metadata = mock.Mock()
         type(mock_metadata).name = "kube-system"
@@ -3037,6 +3004,8 @@ def test_ensure_namespace_kube_system():
 def test_ensure_namespace_creates_namespace_if_doesnt_exist():
     with mock.patch(
         "paasta_tools.kubernetes_tools.ensure_paasta_api_rolebinding", autospec=True
+    ), mock.patch(
+        "paasta_tools.kubernetes_tools.ensure_paasta_namespace_limits", autospec=True
     ):
         mock_namespaces = mock.Mock(items=[])
         mock_client = mock.Mock(
@@ -3072,6 +3041,28 @@ def test_ensure_paasta_api_rolebinding_doesnt_create_if_exists():
     assert not mock_client.rbac.create_namespaced_role_binding.called
 
 
+def test_ensure_paasta_namespace_limits_creates_if_not_exist():
+    mock_limits = mock.Mock(items=[])
+    mock_client = mock.Mock(
+        core=mock.Mock(list_namespaced_limit_range=mock.Mock(return_value=mock_limits)),
+    )
+
+    ensure_paasta_namespace_limits(mock_client, namespace="paastasvc-cool-service-name")
+    assert mock_client.core.create_namespaced_limit_range.called
+
+
+def test_ensure_paasta_namespace_limits_doesnt_create_if_exists():
+    mock_metadata = mock.Mock()
+    type(mock_metadata).name = "limit-mem-cpu-disk-per-container"
+    mock_limits = mock.Mock(items=[mock.Mock(metadata=mock_metadata)])
+    mock_client = mock.Mock(
+        core=mock.Mock(list_namespaced_limit_range=mock.Mock(return_value=mock_limits)),
+    )
+
+    ensure_paasta_namespace_limits(mock_client, namespace="paastasvc-cool-service-name")
+    assert not mock_client.core.create_namespaced_role_binding.called
+
+
 @pytest.mark.parametrize(
     "addl_labels,replicas",
     (
@@ -3181,7 +3172,7 @@ def test_list_all_deployments(addl_labels, replicas):
             list_namespaced_stateful_set=mock.Mock(return_value=mock_stateful_sets),
         )
     )
-    assert list_all_deployments(kube_client=mock_client) == []
+    assert list_all_deployments(kube_client=mock_client, namespace="paasta") == []
 
     mock_items = [
         mock.Mock(
@@ -3230,7 +3221,7 @@ def test_list_all_deployments(addl_labels, replicas):
             list_namespaced_stateful_set=mock.Mock(return_value=mock_stateful_sets),
         )
     )
-    assert list_all_deployments(mock_client) == [
+    assert list_all_deployments(mock_client, namespace="paasta") == [
         KubeDeployment(
             service="kurupt",
             instance="fm",
@@ -3702,7 +3693,10 @@ def test_get_kubernetes_app_by_name():
     mock_client.deployments.read_namespaced_deployment_status.return_value = (
         mock_deployment
     )
-    assert get_kubernetes_app_by_name("someservice", mock_client) == mock_deployment
+    assert (
+        get_kubernetes_app_by_name("someservice", mock_client, namespace="paasta")
+        == mock_deployment
+    )
     assert mock_client.deployments.read_namespaced_deployment_status.called
     assert not mock_client.deployments.read_namespaced_stateful_set_status.called
 
@@ -3714,7 +3708,10 @@ def test_get_kubernetes_app_by_name():
     mock_client.deployments.read_namespaced_stateful_set_status.return_value = (
         mock_stateful_set
     )
-    assert get_kubernetes_app_by_name("someservice", mock_client) == mock_stateful_set
+    assert (
+        get_kubernetes_app_by_name("someservice", mock_client, namespace="paasta")
+        == mock_stateful_set
+    )
     assert mock_client.deployments.read_namespaced_deployment_status.called
     assert mock_client.deployments.read_namespaced_stateful_set_status.called
 
@@ -3723,7 +3720,7 @@ def test_get_kubernetes_app_by_name():
 async def test_pods_for_service_instance():
     mock_client = mock.Mock()
     assert (
-        await pods_for_service_instance("kurupt", "fm", mock_client)
+        await pods_for_service_instance("kurupt", "fm", mock_client, namespace="paasta")
         == mock_client.core.list_namespaced_pod.return_value.items
     )
 
@@ -3782,7 +3779,7 @@ def test_get_active_versions_for_service():
 def test_get_all_pods():
     mock_client = mock.Mock()
     assert (
-        get_all_pods(mock_client)
+        get_all_pods(mock_client, namespace="paasta")
         == mock_client.core.list_namespaced_pod.return_value.items
     )
 
@@ -4622,6 +4619,100 @@ def test_create_or_find_service_account_name_existing_create_rb_only():
         assert mock_client.rbac.create_namespaced_role_binding.called is True
 
 
+def test_create_or_find_service_account_name_caps():
+    iam_role = "arn:aws:iam::000000000000:role/Some_Role"
+    namespace = "test_namespace"
+    expected_sa_name = "paasta--arn-aws-iam-000000000000-role-some-role"
+    with mock.patch(
+        "paasta_tools.kubernetes_tools.kube_config.load_kube_config", autospec=True
+    ), mock.patch(
+        "paasta_tools.kubernetes_tools.KubeClient",
+        autospec=False,
+    ) as mock_kube_client:
+        mock_client = mock.Mock()
+        mock_client.core = mock.Mock(spec=kube_client.CoreV1Api)
+        mock_client.core.list_namespaced_service_account.return_value = mock.Mock(
+            spec=V1ServiceAccountList
+        )
+        mock_client.core.list_namespaced_service_account.return_value.items = [
+            V1ServiceAccount(
+                kind="ServiceAccount",
+                metadata=V1ObjectMeta(
+                    name=expected_sa_name,
+                    namespace=namespace,
+                    annotations={"eks.amazonaws.com/role-arn": iam_role},
+                ),
+            )
+        ]
+        mock_kube_client.return_value = mock_client
+
+        assert expected_sa_name == create_or_find_service_account_name(
+            iam_role,
+            namespace=namespace,
+        )
+        mock_client.core.create_namespaced_service_account.assert_not_called()
+
+
+def test_create_or_find_service_account_name_caps_with_k8s():
+    iam_role = "arn:aws:iam::000000000000:role/Some_Role"
+    namespace = "test_namespace"
+    k8s_role = "mega-admin"
+    expected_sa_name = "paasta--arn-aws-iam-000000000000-role-some-role--mega-admin"
+    with mock.patch(
+        "paasta_tools.kubernetes_tools.kube_config.load_kube_config", autospec=True
+    ), mock.patch(
+        "paasta_tools.kubernetes_tools.KubeClient",
+        autospec=False,
+    ) as mock_kube_client:
+        mock_client = mock.Mock()
+        mock_client.core = mock.Mock(spec=kube_client.CoreV1Api)
+        mock_client.rbac = mock.Mock(spec=kube_client.RbacAuthorizationV1Api)
+        mock_client.core.list_namespaced_service_account.return_value = mock.Mock(
+            spec=V1ServiceAccountList
+        )
+        mock_client.core.list_namespaced_service_account.return_value.items = [
+            V1ServiceAccount(
+                kind="ServiceAccount",
+                metadata=V1ObjectMeta(
+                    name=expected_sa_name,
+                    namespace=namespace,
+                    annotations={"eks.amazonaws.com/role-arn": iam_role},
+                ),
+            )
+        ]
+        mock_client.rbac.list_namespaced_role_binding.return_value = mock.Mock(
+            spec=V1RoleBinding,
+        )
+        mock_client.rbac.list_namespaced_role_binding.return_value.items = [
+            V1RoleBinding(
+                kind="ServiceAccount",
+                metadata=V1ObjectMeta(
+                    name=expected_sa_name,
+                    namespace=namespace,
+                ),
+                role_ref=V1RoleRef(
+                    api_group="rbac.authorization.k8s.io",
+                    kind="Role",
+                    name=k8s_role,
+                ),
+                subjects=[
+                    V1Subject(
+                        kind="ServiceAccount",
+                        namespace=namespace,
+                        name=expected_sa_name,
+                    )
+                ],
+            )
+        ]
+        mock_kube_client.return_value = mock_client
+
+        assert expected_sa_name == create_or_find_service_account_name(
+            iam_role, namespace=namespace, k8s_role=k8s_role
+        )
+        mock_client.core.create_namespaced_service_account.assert_not_called()
+        mock_client.rbac.create_namespaced_role_binding.assert_not_called()
+
+
 @pytest.mark.parametrize("decode", [(True), (False)])
 def test_get_kubernetes_secret(decode):
     with mock.patch(
@@ -4651,8 +4742,8 @@ def test_get_kubernetes_secret(decode):
             mock_client,
             get_paasta_secret_name(mock_namespace, service_name, secret_name),
             secret_name,
-            mock_namespace,
-            decode,
+            namespace=mock_namespace,
+            decode=decode,
         )
         mock_client.core.read_namespaced_secret.assert_called_with(
             name="paasta-secret-example--service-example--secret", namespace="paasta"
@@ -4689,6 +4780,7 @@ def test_get_kubernetes_secret_env_variables():
             kube_client=mock_client,
             environment=mock_environment,
             service_name="universe",
+            namespace="paasta",
         )
         assert ret == {
             "SECRET_NAME1": "123",
@@ -4755,6 +4847,7 @@ def test_get_kubernetes_secret_volumes_multiple_files():
             kube_client=mock_client,
             secret_volumes_config=mock_secret_volumes_config,
             service_name="universe",
+            namespace="paasta",
         )
         assert ret == {
             "/the/container/path/the_secret_filename1": "secret_contents1",
@@ -4784,6 +4877,7 @@ def test_get_kubernetes_secret_volumes_single_file():
             kube_client=mock_client,
             secret_volumes_config=mock_secret_volumes_config,
             service_name="universe",
+            namespace="paasta",
         )
         assert ret == {
             "/the/container/path/the_secret_name": "secret_contents",
diff --git a/tests/test_list_kubernetes_service_instances.py b/tests/test_list_kubernetes_service_instances.py
index 43560ccd13..94cd85d0d2 100644
--- a/tests/test_list_kubernetes_service_instances.py
+++ b/tests/test_list_kubernetes_service_instances.py
@@ -26,6 +26,8 @@ def test_parse_args():
         ("kubernetes", False, "service_1.instance1\nservice_2.instance1", False, None),
         ("kubernetes", True, "service--1-instance1\nservice--2-instance1", False, None),
         ("flink", True, "service--1-instance1\nservice--2-instance1", False, None),
+        ("eks", False, "service_1.instance1\nservice_2.instance1", False, None),
+        ("eks", True, "service--1-instance1\nservice--2-instance1", False, None),
     ],
 )
 def test_main(
diff --git a/tests/test_paasta_execute_docker_command.py b/tests/test_paasta_execute_docker_command.py
index 3b8108511f..01338eead3 100644
--- a/tests/test_paasta_execute_docker_command.py
+++ b/tests/test_paasta_execute_docker_command.py
@@ -38,6 +38,11 @@ def test_execute_in_container():
     )
 
 
+@mock.patch(
+    "paasta_tools.paasta_execute_docker_command.is_using_unprivileged_containers",
+    lambda: False,
+    autospec=None,
+)
 def test_execute_in_container_reuses_exec():
     fake_container_id = "fake_container_id"
     fake_execid = "fake_execid"
@@ -59,6 +64,11 @@ def test_execute_in_container_reuses_exec():
     mock_docker_client.exec_start.assert_called_once_with(fake_execid, stream=False)
 
 
+@mock.patch(
+    "paasta_tools.paasta_execute_docker_command.is_using_unprivileged_containers",
+    lambda: False,
+    autospec=None,
+)
 def test_execute_in_container_reuses_only_valid_exec():
     fake_container_id = "fake_container_id"
     fake_execid = "fake_execid"
diff --git a/tests/test_setup_kubernetes_job.py b/tests/test_setup_kubernetes_job.py
index 2a02a73e3a..a6f4c4188d 100644
--- a/tests/test_setup_kubernetes_job.py
+++ b/tests/test_setup_kubernetes_job.py
@@ -1,13 +1,19 @@
-# from typing import Sequence
+from typing import List
+from typing import Tuple
+from typing import Union
+
 import mock
+import pytest
 from kubernetes.client import V1Deployment
 from kubernetes.client import V1StatefulSet
 from pytest import raises
 
+from paasta_tools.eks_tools import EksDeploymentConfig
 from paasta_tools.kubernetes.application.controller_wrappers import Application
 from paasta_tools.kubernetes_tools import InvalidKubernetesConfig
 from paasta_tools.kubernetes_tools import KubeDeployment
 from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
+from paasta_tools.kubernetes_tools import KubernetesDeploymentConfigDict
 from paasta_tools.setup_kubernetes_job import create_application_object
 from paasta_tools.setup_kubernetes_job import get_kubernetes_deployment_config
 from paasta_tools.setup_kubernetes_job import get_service_instances_with_valid_names
@@ -49,7 +55,7 @@ def test_main_logging():
             service="my-service",
             instance="my-instance",
             cluster="cluster",
-            config_dict={},
+            config_dict=KubernetesDeploymentConfigDict(),
             branch_dict=None,
         )
         mock_service_instance_configs_list.return_value = [
@@ -70,7 +76,32 @@ def test_main_logging():
         mock_logging.getLogger.assert_called_with("kazoo")
 
 
-def test_main():
+@pytest.mark.parametrize(
+    "mock_kube_deploy_config, eks_flag",
+    [
+        (
+            KubernetesDeploymentConfig(
+                service="my-service",
+                instance="my-instance",
+                cluster="cluster",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            False,
+        ),
+        (
+            EksDeploymentConfig(
+                service="my-service",
+                instance="my-instance",
+                cluster="cluster",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            True,
+        ),
+    ],
+)
+def test_main(mock_kube_deploy_config, eks_flag):
     with mock.patch(
         "paasta_tools.setup_kubernetes_job.metrics_lib.get_metrics_interface",
         autospec=True,
@@ -88,13 +119,7 @@ def test_main():
     ) as mock_setup_kube_deployments:
         mock_setup_kube_deployments.return_value = True
         mock_metrics_interface = mock_get_metrics_interface.return_value
-        mock_kube_deploy_config = KubernetesDeploymentConfig(
-            service="my-service",
-            instance="my-instance",
-            cluster="cluster",
-            config_dict={},
-            branch_dict=None,
-        )
+        mock_parse_args.return_value.eks = eks_flag
         mock_service_instance_configs_list.return_value = [
             (True, mock_kube_deploy_config)
         ]
@@ -109,6 +134,7 @@ def test_main():
             rate_limit=mock_parse_args.return_value.rate_limit,
             service_instance_configs_list=mock_service_instance_configs_list.return_value,
             metrics_interface=mock_metrics_interface,
+            eks=mock_parse_args.return_value.eks,
         )
         mock_setup_kube_deployments.return_value = False
         with raises(SystemExit) as e:
@@ -191,7 +217,7 @@ def test_get_kubernetes_deployment_config():
             instance="instance",
             cluster="fake_cluster",
             soa_dir="nail/blah",
-            config_dict={},
+            config_dict=KubernetesDeploymentConfigDict(),
             branch_dict=None,
         )
         mock_load_kubernetes_service_config_no_cache.side_effect = None
@@ -210,14 +236,107 @@ def test_get_kubernetes_deployment_config():
                     instance="instance",
                     cluster="fake_cluster",
                     soa_dir="nail/blah",
-                    config_dict={},
+                    config_dict=KubernetesDeploymentConfigDict(),
+                    branch_dict=None,
+                ),
+            )
+        ]
+
+
+def test_get_eks_deployment_config():
+    with mock.patch(
+        "paasta_tools.setup_kubernetes_job.load_eks_service_config_no_cache",
+        autospec=True,
+    ) as mock_load_eks_service_config_no_cache:
+
+        mock_get_service_instances_with_valid_names = [
+            ("kurupt", "instance", None, None)
+        ]
+
+        # Testing NoDeploymentsAvailable exception
+        mock_load_eks_service_config_no_cache.side_effect = NoDeploymentsAvailable
+        ret = get_kubernetes_deployment_config(
+            service_instances_with_valid_names=mock_get_service_instances_with_valid_names,
+            cluster="fake_cluster",
+            soa_dir="nail/blah",
+            eks=True,
+        )
+        assert ret == [(True, None)]
+
+        # Testing NoConfigurationForServiceError exception
+        mock_load_eks_service_config_no_cache.side_effect = (
+            NoConfigurationForServiceError
+        )
+
+        ret = get_kubernetes_deployment_config(
+            service_instances_with_valid_names=mock_get_service_instances_with_valid_names,
+            cluster="fake_cluster",
+            soa_dir="nail/blah",
+            eks=True,
+        )
+        assert ret == [(False, None)]
+
+        # Testing returning a KubernetesDeploymentConfig
+        mock_kube_deploy = EksDeploymentConfig(
+            service="kurupt",
+            instance="instance",
+            cluster="fake_cluster",
+            soa_dir="nail/blah",
+            config_dict=KubernetesDeploymentConfigDict(),
+            branch_dict=None,
+        )
+        mock_load_eks_service_config_no_cache.side_effect = None
+        mock_load_eks_service_config_no_cache.return_value = mock_kube_deploy
+        ret = get_kubernetes_deployment_config(
+            service_instances_with_valid_names=mock_get_service_instances_with_valid_names,
+            cluster="fake_cluster",
+            soa_dir="nail/blah",
+            eks=True,
+        )
+
+        assert ret == [
+            (
+                True,
+                EksDeploymentConfig(
+                    service="kurupt",
+                    instance="instance",
+                    cluster="fake_cluster",
+                    soa_dir="nail/blah",
+                    config_dict=KubernetesDeploymentConfigDict(),
                     branch_dict=None,
                 ),
             )
         ]
 
 
-def test_create_application_object():
+@pytest.mark.parametrize(
+    "eks_flag, mock_service_config",
+    [
+        (
+            "False",
+            KubernetesDeploymentConfig(
+                service="kurupt",
+                instance="instance",
+                cluster="fake_cluster",
+                soa_dir="nail/blah",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+        ),
+        (
+            "True",
+            EksDeploymentConfig(
+                service="kurupt",
+                instance="instance",
+                cluster="fake_cluster",
+                soa_dir="nail/blah",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+        ),
+    ],
+)
+def test_create_application_object(eks_flag, mock_service_config):
     with mock.patch(
         "paasta_tools.setup_kubernetes_job.load_system_paasta_config", autospec=True
     ), mock.patch(
@@ -231,7 +350,7 @@ def test_create_application_object():
         autospec=True,
     ) as mock_stateful_set_wrapper:
         mock_deploy = mock.MagicMock(spec=V1Deployment)
-        service_config = mock.MagicMock()
+        service_config = mock.MagicMock(spec=mock_service_config)
         service_config.format_kubernetes_app.return_value = mock_deploy
 
         # Create DeploymentWrapper
@@ -239,6 +358,7 @@ def test_create_application_object():
             cluster="fake_cluster",
             soa_dir="/nail/blah",
             service_instance_config=service_config,
+            eks=eks_flag,
         )
 
         mock_deployment_wrapper.assert_called_with(mock_deploy)
@@ -251,6 +371,7 @@ def test_create_application_object():
             cluster="fake_cluster",
             soa_dir="/nail/blah",
             service_instance_config=service_config,
+            eks=eks_flag,
         )
         mock_stateful_set_wrapper.assert_called_with(mock_deploy)
 
@@ -261,6 +382,7 @@ def test_create_application_object():
                 cluster="fake_cluster",
                 soa_dir="/nail/blah",
                 service_instance_config=service_config,
+                eks=eks_flag,
             )
 
         mock_deployment_wrapper.reset_mock()
@@ -273,6 +395,7 @@ def test_create_application_object():
             cluster="fake_cluster",
             soa_dir="/nail/blah",
             service_instance_config=service_config,
+            eks=eks_flag,
         )
 
         assert ret == (False, None)
@@ -280,12 +403,41 @@ def test_create_application_object():
         assert not mock_stateful_set_wrapper.called
 
 
-def test_setup_kube_deployment_create_update():
+@pytest.mark.parametrize(
+    "mock_kube_deploy_config, eks_flag",
+    [
+        (
+            KubernetesDeploymentConfig(
+                service="kurupt",
+                instance="fm",
+                cluster="fake_cluster",
+                soa_dir="/nail/blah",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            False,
+        ),
+        (
+            EksDeploymentConfig(
+                service="kurupt",
+                instance="fm",
+                cluster="fake_cluster",
+                soa_dir="/nail/blah",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            True,
+        ),
+    ],
+)
+def test_setup_kube_deployment_create_update(mock_kube_deploy_config, eks_flag):
     fake_create = mock.MagicMock()
     fake_update = mock.MagicMock()
     fake_update_related_api_objects = mock.MagicMock()
 
-    def simple_create_application_object(cluster, soa_dir, service_instance_config):
+    def simple_create_application_object(
+        cluster, soa_dir, service_instance_config, eks
+    ):
         fake_app = mock.MagicMock(spec=Application)
         fake_app.kube_deployment = KubeDeployment(
             service=service_instance_config.service,
@@ -300,7 +452,14 @@ def simple_create_application_object(cluster, soa_dir, service_instance_config):
         fake_app.update = fake_update
         fake_app.update_related_api_objects = fake_update_related_api_objects
         fake_app.item = None
-        fake_app.soa_config = None
+        fake_app.soa_config = KubernetesDeploymentConfig(
+            service=service_instance_config.service,
+            cluster=cluster,
+            instance=service_instance_config.instance,
+            config_dict=service_instance_config.config_dict,
+            branch_dict=None,
+            soa_dir=soa_dir,
+        )
         fake_app.__str__ = lambda app: "fake_app"
         return True, fake_app
 
@@ -317,10 +476,12 @@ def simple_create_application_object(cluster, soa_dir, service_instance_config):
     ) as mock_no_metrics, mock.patch(
         "paasta_tools.setup_kubernetes_job.get_kubernetes_deployment_config",
         autospec=True,
-    ) as mock_service_instance_configs_list:
+    ):
         mock_client = mock.Mock()
         # No instances created
-        mock_service_instance_configs_list = []
+        mock_service_instance_configs_list: List[
+            Tuple[bool, Union[KubernetesDeploymentConfig, EksDeploymentConfig]]
+        ] = []
         setup_kube_deployments(
             kube_client=mock_client,
             service_instance_configs_list=mock_service_instance_configs_list,
@@ -335,26 +496,60 @@ def simple_create_application_object(cluster, soa_dir, service_instance_config):
         mock_log_obj.info.reset_mock()
 
         # Create a new instance
-        mock_kube_deploy_config = KubernetesDeploymentConfig(
+        mock_service_instance_configs_list = [(True, mock_kube_deploy_config)]
+        setup_kube_deployments(
+            kube_client=mock_client,
+            service_instance_configs_list=mock_service_instance_configs_list,
+            cluster="fake_cluster",
+            soa_dir="/nail/blah",
+            metrics_interface=mock_no_metrics,
+            eks=eks_flag,
+        )
+        assert fake_create.call_count == 1
+        assert fake_update.call_count == 0
+        assert fake_update_related_api_objects.call_count == 1
+        assert mock_no_metrics.emit_event.call_count == 1
+        mock_log_obj.info.reset_mock()
+        mock_no_metrics.reset_mock()
+
+        # Skipping downthenup instance cuz of existing_apps
+        fake_create.reset_mock()
+        fake_update.reset_mock()
+        fake_update_related_api_objects.reset_mock()
+        mock_list_all_paasta_deployments.return_value = [
+            KubeDeployment(
+                service="kurupt",
+                instance="fm",
+                git_sha="2",
+                namespace="paastasvc-kurupt",
+                image_version="extrastuff-1",
+                config_sha="1",
+                replicas=1,
+            )
+        ]
+        mock_downthenup_kube_deploy_config = KubernetesDeploymentConfig(
             service="kurupt",
             instance="fm",
             cluster="fake_cluster",
             soa_dir="/nail/blah",
-            config_dict={},
+            config_dict=KubernetesDeploymentConfigDict(bounce_method="downthenup"),
             branch_dict=None,
         )
-        mock_service_instance_configs_list = [(True, mock_kube_deploy_config)]
+        mock_service_instance_configs_list = [
+            (True, mock_downthenup_kube_deploy_config)
+        ]
         setup_kube_deployments(
             kube_client=mock_client,
             service_instance_configs_list=mock_service_instance_configs_list,
             cluster="fake_cluster",
             soa_dir="/nail/blah",
             metrics_interface=mock_no_metrics,
+            eks=eks_flag,
         )
-        assert fake_create.call_count == 1
+        assert fake_create.call_count == 0
         assert fake_update.call_count == 0
-        assert fake_update_related_api_objects.call_count == 1
-        assert mock_no_metrics.emit_event.call_count == 1
+        assert fake_update_related_api_objects.call_count == 0
+        assert mock_no_metrics.emit_event.call_count == 0
         mock_log_obj.info.reset_mock()
         mock_no_metrics.reset_mock()
 
@@ -379,6 +574,7 @@ def simple_create_application_object(cluster, soa_dir, service_instance_config):
             cluster="fake_cluster",
             soa_dir="/nail/blah",
             metrics_interface=mock_no_metrics,
+            eks=eks_flag,
         )
 
         assert fake_update.call_count == 1
@@ -417,6 +613,7 @@ def simple_create_application_object(cluster, soa_dir, service_instance_config):
             service_instance_configs_list=mock_service_instance_configs_list,
             cluster="fake_cluster",
             soa_dir="/nail/blah",
+            eks=eks_flag,
         )
         assert fake_update.call_count == 1
         assert fake_create.call_count == 0
@@ -443,6 +640,7 @@ def simple_create_application_object(cluster, soa_dir, service_instance_config):
             service_instance_configs_list=mock_service_instance_configs_list,
             cluster="fake_cluster",
             soa_dir="/nail/blah",
+            eks=eks_flag,
         )
         assert fake_update.call_count == 1
         assert fake_create.call_count == 0
@@ -469,6 +667,7 @@ def simple_create_application_object(cluster, soa_dir, service_instance_config):
             service_instance_configs_list=mock_service_instance_configs_list,
             cluster="fake_cluster",
             soa_dir="/nail/blah",
+            eks=eks_flag,
         )
         assert fake_update.call_count == 1
         assert fake_create.call_count == 0
@@ -484,7 +683,7 @@ def simple_create_application_object(cluster, soa_dir, service_instance_config):
             instance="garage",
             cluster="fake_cluster",
             soa_dir="/nail/blah",
-            config_dict={},
+            config_dict=KubernetesDeploymentConfigDict(),
             branch_dict=None,
         )
         mock_service_instance_configs_list = [
@@ -507,6 +706,7 @@ def simple_create_application_object(cluster, soa_dir, service_instance_config):
             service_instance_configs_list=mock_service_instance_configs_list,
             cluster="fake_cluster",
             soa_dir="/nail/blah",
+            eks=eks_flag,
         )
         assert fake_update.call_count == 1
         assert fake_create.call_count == 1
@@ -534,6 +734,7 @@ def simple_create_application_object(cluster, soa_dir, service_instance_config):
             service_instance_configs_list=mock_service_instance_configs_list,
             cluster="fake_cluster",
             soa_dir="/nail/blah",
+            eks=eks_flag,
         )
         assert fake_update.call_count == 0
         assert fake_create.call_count == 0
@@ -543,7 +744,65 @@ def simple_create_application_object(cluster, soa_dir, service_instance_config):
         )
 
 
-def test_setup_kube_deployments_rate_limit():
+@pytest.mark.parametrize(
+    "mock_kube_deploy_config_fm, mock_kube_deploy_config_garage, mock_kube_deploy_config_radio, eks_flag",
+    [
+        (
+            KubernetesDeploymentConfig(
+                service="kurupt",
+                instance="fm",
+                cluster="fake_cluster",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            KubernetesDeploymentConfig(
+                service="kurupt",
+                instance="garage",
+                cluster="fake_cluster",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            KubernetesDeploymentConfig(
+                service="kurupt",
+                instance="radio",
+                cluster="fake_cluster",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            False,
+        ),
+        (
+            EksDeploymentConfig(
+                service="kurupt",
+                instance="fm",
+                cluster="fake_cluster",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            EksDeploymentConfig(
+                service="kurupt",
+                instance="garage",
+                cluster="fake_cluster",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            EksDeploymentConfig(
+                service="kurupt",
+                instance="radio",
+                cluster="fake_cluster",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            True,
+        ),
+    ],
+)
+def test_setup_kube_deployments_rate_limit(
+    mock_kube_deploy_config_fm,
+    mock_kube_deploy_config_garage,
+    mock_kube_deploy_config_radio,
+    eks_flag,
+):
     with mock.patch(
         "paasta_tools.setup_kubernetes_job.create_application_object",
         autospec=True,
@@ -553,28 +812,9 @@ def test_setup_kube_deployments_rate_limit():
         "paasta_tools.setup_kubernetes_job.log", autospec=True
     ) as mock_log_obj:
         mock_client = mock.Mock()
-        mock_kube_deploy_config_fm = KubernetesDeploymentConfig(
-            service="kurupt",
-            instance="fm",
-            cluster="fake_cluster",
-            config_dict={},
-            branch_dict=None,
-        )
-        mock_kube_deploy_config_garage = KubernetesDeploymentConfig(
-            service="kurupt",
-            instance="garage",
-            cluster="fake_cluster",
-            config_dict={},
-            branch_dict=None,
-        )
-        mock_kube_deploy_config_radio = KubernetesDeploymentConfig(
-            service="kurupt",
-            instance="radio",
-            cluster="fake_cluster",
-            config_dict={},
-            branch_dict=None,
-        )
-        mock_service_instance_configs_list = [
+        mock_service_instance_configs_list: List[
+            Tuple[bool, Union[KubernetesDeploymentConfig, EksDeploymentConfig]]
+        ] = [
             (True, mock_kube_deploy_config_fm),
             (True, mock_kube_deploy_config_garage),
             (True, mock_kube_deploy_config_radio),
@@ -589,6 +829,7 @@ def test_setup_kube_deployments_rate_limit():
             cluster="fake_cluster",
             soa_dir="/nail/blah",
             rate_limit=2,
+            eks=eks_flag,
         )
         assert fake_app.create.call_count == 2
         mock_log_obj.info.assert_any_call(
@@ -603,11 +844,53 @@ def test_setup_kube_deployments_rate_limit():
             cluster="fake_cluster",
             soa_dir="/nail/blah",
             rate_limit=0,
+            eks=eks_flag,
         )
         assert fake_app.create.call_count == 3
 
 
-def test_setup_kube_deployments_skip_malformed_apps():
+@pytest.mark.parametrize(
+    "mock_kube_deploy_config_fake, mock_kube_deploy_config_mock, eks_flag",
+    [
+        (
+            KubernetesDeploymentConfig(
+                service="fake",
+                instance="instance",
+                cluster="fake_cluster",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            KubernetesDeploymentConfig(
+                service="mock",
+                instance="instance",
+                cluster="fake_cluster",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            False,
+        ),
+        (
+            EksDeploymentConfig(
+                service="fake",
+                instance="instance",
+                cluster="fake_cluster",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            EksDeploymentConfig(
+                service="mock",
+                instance="instance",
+                cluster="fake_cluster",
+                config_dict=KubernetesDeploymentConfigDict(),
+                branch_dict=None,
+            ),
+            True,
+        ),
+    ],
+)
+def test_setup_kube_deployments_skip_malformed_apps(
+    mock_kube_deploy_config_fake, mock_kube_deploy_config_mock, eks_flag
+):
     with mock.patch(
         "paasta_tools.setup_kubernetes_job.create_application_object",
         autospec=True,
@@ -617,21 +900,9 @@ def test_setup_kube_deployments_skip_malformed_apps():
         "paasta_tools.setup_kubernetes_job.log", autospec=True
     ) as mock_log_obj:
         mock_client = mock.Mock()
-        mock_kube_deploy_config_fake = KubernetesDeploymentConfig(
-            service="fake",
-            instance="instance",
-            cluster="fake_cluster",
-            config_dict={},
-            branch_dict=None,
-        )
-        mock_kube_deploy_config_mock = KubernetesDeploymentConfig(
-            service="mock",
-            instance="instance",
-            cluster="fake_cluster",
-            config_dict={},
-            branch_dict=None,
-        )
-        mock_service_instance_configs_list = [
+        mock_service_instance_configs_list: List[
+            Tuple[bool, Union[KubernetesDeploymentConfig, EksDeploymentConfig]]
+        ] = [
             (True, mock_kube_deploy_config_fake),
             (True, mock_kube_deploy_config_mock),
         ]
@@ -648,6 +919,7 @@ def test_setup_kube_deployments_skip_malformed_apps():
             cluster="fake_cluster",
             soa_dir="/nail/blah",
             rate_limit=0,
+            eks=eks_flag,
         )
         assert fake_app.create.call_count == 2
         assert len(mock_log_obj.exception.call_args_list) == 1
diff --git a/tests/test_setup_prometheus_adapter_config.py b/tests/test_setup_prometheus_adapter_config.py
index e02381d9c1..211330309e 100644
--- a/tests/test_setup_prometheus_adapter_config.py
+++ b/tests/test_setup_prometheus_adapter_config.py
@@ -1,8 +1,12 @@
 import mock
 import pytest
 
+from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
 from paasta_tools.long_running_service_tools import AutoscalingParamsDict
 from paasta_tools.setup_prometheus_adapter_config import _minify_promql
+from paasta_tools.setup_prometheus_adapter_config import (
+    create_instance_active_requests_scaling_rule,
+)
 from paasta_tools.setup_prometheus_adapter_config import (
     create_instance_arbitrary_promql_scaling_rule,
 )
@@ -16,6 +20,9 @@
     create_instance_uwsgi_scaling_rule,
 )
 from paasta_tools.setup_prometheus_adapter_config import get_rules_for_service_instance
+from paasta_tools.setup_prometheus_adapter_config import (
+    should_create_active_requests_scaling_rule,
+)
 from paasta_tools.setup_prometheus_adapter_config import should_create_cpu_scaling_rule
 from paasta_tools.setup_prometheus_adapter_config import (
     should_create_gunicorn_scaling_rule,
@@ -31,6 +38,113 @@
 )
 
 
+@pytest.mark.parametrize(
+    "autoscaling_config,expected",
+    [
+        (
+            {
+                "metrics_provider": "cpu",
+                "decision_policy": "bespoke",
+                "moving_average_window_seconds": 123,
+                "setpoint": 0.653,
+            },
+            False,
+        ),
+        (
+            {
+                "metrics_provider": "active-requests",
+                "moving_average_window_seconds": 124,
+                "desired_active_requests_per_replica": 0.425,
+            },
+            True,
+        ),
+        (
+            {
+                "metrics_provider": "active-requests",
+                "desired_active_requests_per_replica": 0.764,
+            },
+            True,
+        ),
+    ],
+)
+def test_should_create_active_requests_scaling_rule(
+    autoscaling_config: AutoscalingParamsDict, expected: bool
+) -> None:
+    should_create, reason = should_create_active_requests_scaling_rule(
+        autoscaling_config=autoscaling_config
+    )
+
+    assert should_create == expected
+    if expected:
+        assert reason is None
+    else:
+        assert reason is not None
+
+
+@pytest.mark.parametrize(
+    "registrations,expected_instance",
+    [
+        (
+            ["test_service.abc", "test_service.xyz", "test_service.123"],
+            "test_instance",
+        ),
+        (
+            ["test_service.xyz"],
+            "xyz",
+        ),
+    ],
+)
+def test_create_instance_active_requests_scaling_rule(
+    registrations: list, expected_instance: str
+) -> None:
+    service_name = "test_service"
+    instance_config = mock.Mock(
+        instance="test_instance",
+        get_autoscaling_params=mock.Mock(
+            return_value={
+                "metrics_provider": "active-requests",
+                "desired_active_requests_per_replica": 12,
+                "moving_average_window_seconds": 20120302,
+            }
+        ),
+        get_registrations=mock.Mock(return_value=registrations),
+    )
+    paasta_cluster = "test_cluster"
+
+    with mock.patch(
+        "paasta_tools.setup_prometheus_adapter_config.load_system_paasta_config",
+        autospec=True,
+        return_value=MOCK_SYSTEM_PAASTA_CONFIG,
+    ):
+        rule = create_instance_active_requests_scaling_rule(
+            service=service_name,
+            instance_config=instance_config,
+            paasta_cluster=paasta_cluster,
+        )
+
+    # we test that the format of the dictionary is as expected with mypy
+    # and we don't want to test the full contents of the retval since then
+    # we're basically just writting a change-detector test - instead, we test
+    # that we're actually using our inputs
+    assert service_name in rule["seriesQuery"]
+    assert instance_config.instance in rule["seriesQuery"]
+    assert paasta_cluster in rule["seriesQuery"]
+    # these two numbers are distinctive and unlikely to be used as constants
+    assert (
+        str(
+            instance_config.get_autoscaling_params()[
+                "desired_active_requests_per_replica"
+            ]
+        )
+        in rule["metricsQuery"]
+    )
+    assert (
+        str(instance_config.get_autoscaling_params()["moving_average_window_seconds"])
+        in rule["metricsQuery"]
+    )
+    assert f"paasta_instance='{expected_instance}'" in rule["metricsQuery"]
+
+
 @pytest.mark.parametrize(
     "autoscaling_config,expected",
     [
@@ -87,14 +201,18 @@ def test_should_create_uswgi_scaling_rule(
 
 def test_create_instance_uwsgi_scaling_rule() -> None:
     service_name = "test_service"
-    instance_name = "test_instance"
+    instance_config = mock.Mock(
+        instance="test_instance",
+        get_autoscaling_params=mock.Mock(
+            return_value={
+                "metrics_provider": "uwsgi",
+                "setpoint": 0.1234567890,
+                "moving_average_window_seconds": 20120302,
+                "use_prometheus": True,
+            }
+        ),
+    )
     paasta_cluster = "test_cluster"
-    autoscaling_config: AutoscalingParamsDict = {
-        "metrics_provider": "uwsgi",
-        "setpoint": 0.1234567890,
-        "moving_average_window_seconds": 20120302,
-        "use_prometheus": True,
-    }
 
     with mock.patch(
         "paasta_tools.setup_prometheus_adapter_config.load_system_paasta_config",
@@ -103,9 +221,8 @@ def test_create_instance_uwsgi_scaling_rule() -> None:
     ):
         rule = create_instance_uwsgi_scaling_rule(
             service=service_name,
-            instance=instance_name,
+            instance_config=instance_config,
             paasta_cluster=paasta_cluster,
-            autoscaling_config=autoscaling_config,
         )
 
     # we test that the format of the dictionary is as expected with mypy
@@ -113,12 +230,16 @@ def test_create_instance_uwsgi_scaling_rule() -> None:
     # we're basically just writting a change-detector test - instead, we test
     # that we're actually using our inputs
     assert service_name in rule["seriesQuery"]
-    assert instance_name in rule["seriesQuery"]
+    assert instance_config.instance in rule["seriesQuery"]
     assert paasta_cluster in rule["seriesQuery"]
     # these two numbers are distinctive and unlikely to be used as constants
-    assert str(autoscaling_config["setpoint"]) in rule["metricsQuery"]
     assert (
-        str(autoscaling_config["moving_average_window_seconds"]) in rule["metricsQuery"]
+        str(instance_config.get_autoscaling_params()["setpoint"])
+        in rule["metricsQuery"]
+    )
+    assert (
+        str(instance_config.get_autoscaling_params()["moving_average_window_seconds"])
+        in rule["metricsQuery"]
     )
 
 
@@ -177,29 +298,33 @@ def test_should_create_cpu_scaling_rule(
 
 def test_create_instance_cpu_scaling_rule() -> None:
     service_name = "test_service"
-    instance_name = "test_instance"
+    instance_config = mock.Mock(
+        instance="test_instance",
+        get_namespace=mock.Mock(return_value="test_namespace"),
+        get_autoscaling_params=mock.Mock(
+            return_value={
+                "metrics_provider": "cpu",
+                "setpoint": 0.1234567890,
+                "moving_average_window_seconds": 20120302,
+                "use_prometheus": True,
+            }
+        ),
+    )
     paasta_cluster = "test_cluster"
-    namespace = "test_namespace"
-    autoscaling_config: AutoscalingParamsDict = {
-        "metrics_provider": "cpu",
-        "setpoint": 0.1234567890,
-        "moving_average_window_seconds": 20120302,
-        "use_prometheus": True,
-    }
 
     rule = create_instance_cpu_scaling_rule(
         service=service_name,
-        instance=instance_name,
+        instance_config=instance_config,
         paasta_cluster=paasta_cluster,
-        autoscaling_config=autoscaling_config,
-        namespace=namespace,
     )
 
     # our query doesn't include the setpoint as we'll just give the HPA the current CPU usage and
     # let the HPA compare that to the setpoint directly
     assert (
-        str(autoscaling_config["moving_average_window_seconds"]) in rule["metricsQuery"]
+        str(instance_config.get_autoscaling_params()["moving_average_window_seconds"])
+        in rule["metricsQuery"]
     )
+    assert str(instance_config.get_namespace()) in rule["metricsQuery"]
 
 
 @pytest.mark.parametrize(
@@ -240,14 +365,18 @@ def test_should_create_gunicorn_scaling_rule(
 
 def test_create_instance_gunicorn_scaling_rule() -> None:
     service_name = "test_service"
-    instance_name = "test_instance"
+    instance_config = mock.Mock(
+        instance="test_instance",
+        get_autoscaling_params=mock.Mock(
+            return_value={
+                "metrics_provider": "gunicorn",
+                "setpoint": 0.1234567890,
+                "moving_average_window_seconds": 20120302,
+                "use_prometheus": True,
+            }
+        ),
+    )
     paasta_cluster = "test_cluster"
-    autoscaling_config: AutoscalingParamsDict = {
-        "metrics_provider": "gunicorn",
-        "setpoint": 0.1234567890,
-        "moving_average_window_seconds": 20120302,
-        "use_prometheus": True,
-    }
 
     with mock.patch(
         "paasta_tools.setup_prometheus_adapter_config.load_system_paasta_config",
@@ -256,9 +385,8 @@ def test_create_instance_gunicorn_scaling_rule() -> None:
     ):
         rule = create_instance_gunicorn_scaling_rule(
             service=service_name,
-            instance=instance_name,
+            instance_config=instance_config,
             paasta_cluster=paasta_cluster,
-            autoscaling_config=autoscaling_config,
         )
 
     # we test that the format of the dictionary is as expected with mypy
@@ -266,58 +394,86 @@ def test_create_instance_gunicorn_scaling_rule() -> None:
     # we're basically just writting a change-detector test - instead, we test
     # that we're actually using our inputs
     assert service_name in rule["seriesQuery"]
-    assert instance_name in rule["seriesQuery"]
+    assert instance_config.instance in rule["seriesQuery"]
     assert paasta_cluster in rule["seriesQuery"]
     # these two numbers are distinctive and unlikely to be used as constants
-    assert str(autoscaling_config["setpoint"]) in rule["metricsQuery"]
     assert (
-        str(autoscaling_config["moving_average_window_seconds"]) in rule["metricsQuery"]
+        str(instance_config.get_autoscaling_params()["setpoint"])
+        in rule["metricsQuery"]
+    )
+    assert (
+        str(instance_config.get_autoscaling_params()["moving_average_window_seconds"])
+        in rule["metricsQuery"]
     )
 
 
 @pytest.mark.parametrize(
-    "autoscaling_config,expected_rules",
+    "instance_config,expected_rules",
     [
         (
-            {
-                "metrics_provider": "uwsgi",
-                "setpoint": 0.1234567890,
-                "moving_average_window_seconds": 20120302,
-                "use_prometheus": True,
-            },
+            mock.Mock(
+                instance="instance",
+                get_namespace=mock.Mock(return_value="test_namespace"),
+                get_autoscaling_params=mock.Mock(
+                    return_value={
+                        "metrics_provider": "uwsgi",
+                        "setpoint": 0.1234567890,
+                        "moving_average_window_seconds": 20120302,
+                        "use_prometheus": True,
+                    }
+                ),
+            ),
             1,
         ),
         (
-            {
-                "metrics_provider": "uwsgi",
-                "setpoint": 0.1234567890,
-                "moving_average_window_seconds": 20120302,
-                "use_prometheus": False,
-            },
+            mock.Mock(
+                instance="instance",
+                get_namespace=mock.Mock(return_value="test_namespace"),
+                get_autoscaling_params=mock.Mock(
+                    return_value={
+                        "metrics_provider": "uwsgi",
+                        "setpoint": 0.1234567890,
+                        "moving_average_window_seconds": 20120302,
+                        "use_prometheus": False,
+                    }
+                ),
+            ),
             0,
         ),
         (
-            {
-                "metrics_provider": "cpu",
-                "setpoint": 0.1234567890,
-                "moving_average_window_seconds": 20120302,
-                "use_prometheus": False,
-            },
+            mock.Mock(
+                instance="instance",
+                get_namespace=mock.Mock(return_value="test_namespace"),
+                get_autoscaling_params=mock.Mock(
+                    return_value={
+                        "metrics_provider": "cpu",
+                        "setpoint": 0.1234567890,
+                        "moving_average_window_seconds": 20120302,
+                        "use_prometheus": False,
+                    }
+                ),
+            ),
             0,
         ),
         (
-            {
-                "metrics_provider": "cpu",
-                "setpoint": 0.1234567890,
-                "moving_average_window_seconds": 20120302,
-                "use_prometheus": True,
-            },
+            mock.Mock(
+                instance="instance",
+                get_namespace=mock.Mock(return_value="test_namespace"),
+                get_autoscaling_params=mock.Mock(
+                    return_value={
+                        "metrics_provider": "cpu",
+                        "setpoint": 0.1234567890,
+                        "moving_average_window_seconds": 20120302,
+                        "use_prometheus": True,
+                    }
+                ),
+            ),
             1,
         ),
     ],
 )
 def test_get_rules_for_service_instance(
-    autoscaling_config: AutoscalingParamsDict,
+    instance_config: KubernetesDeploymentConfig,
     expected_rules: int,
 ) -> None:
     with mock.patch(
@@ -329,10 +485,8 @@ def test_get_rules_for_service_instance(
             len(
                 get_rules_for_service_instance(
                     service_name="service",
-                    instance_name="instance",
-                    autoscaling_config=autoscaling_config,
+                    instance_config=instance_config,
                     paasta_cluster="cluster",
-                    namespace="test_namespace",
                 )
             )
             == expected_rules
@@ -359,10 +513,14 @@ def test__minify_promql(query: str, expected: str) -> None:
 def test_create_instance_arbitrary_promql_scaling_rule_no_seriesQuery():
     rule = create_instance_arbitrary_promql_scaling_rule(
         service="service",
-        instance="instance",
-        autoscaling_config={"prometheus_adapter_config": {"metricsQuery": "foo"}},
+        instance_config=mock.Mock(
+            instance="instance",
+            get_namespace=mock.Mock(return_value="paasta"),
+            get_autoscaling_params=mock.Mock(
+                return_value={"prometheus_adapter_config": {"metricsQuery": "foo"}}
+            ),
+        ),
         paasta_cluster="cluster",
-        namespace="paasta",
     )
 
     assert rule == {
@@ -381,12 +539,19 @@ def test_create_instance_arbitrary_promql_scaling_rule_no_seriesQuery():
 def test_create_instance_arbitrary_promql_scaling_rule_with_seriesQuery():
     rule = create_instance_arbitrary_promql_scaling_rule(
         service="service",
-        instance="instance",
-        autoscaling_config={
-            "prometheus_adapter_config": {"metricsQuery": "foo", "seriesQuery": "bar"}
-        },
+        instance_config=mock.Mock(
+            instance="instance",
+            get_namespace=mock.Mock(return_value="test_namespace"),
+            get_autoscaling_params=mock.Mock(
+                return_value={
+                    "prometheus_adapter_config": {
+                        "metricsQuery": "foo",
+                        "seriesQuery": "bar",
+                    }
+                }
+            ),
+        ),
         paasta_cluster="cluster",
-        namespace="test_namespace",
     )
 
     assert rule == {
diff --git a/tests/test_spark_tools.py b/tests/test_spark_tools.py
index acf6cd2052..62a71a8711 100644
--- a/tests/test_spark_tools.py
+++ b/tests/test_spark_tools.py
@@ -1,3 +1,4 @@
+import sys
 from unittest import mock
 
 import pytest
@@ -29,3 +30,39 @@ def test_inject_spark_conf_str(cmd, expected):
     assert (
         spark_tools.inject_spark_conf_str(cmd, "--conf spark.max.cores=100") == expected
     )
+
+
+@pytest.mark.parametrize(
+    "spark_conf,expected",
+    [
+        (
+            {
+                "spark.kubernetes.executor.volumes.hostPath.nailsrv-123.mount.path": "/nail/srv",
+                "spark.kubernetes.executor.volumes.hostPath.nailsrv-123.options.path": "/nail/srv",
+                "spark.kubernetes.executor.volumes.hostPath.nailsrv-123.mount.readOnly": "true",
+                "spark.kubernetes.executor.volumes.hostPath.123.mount.path": "/nail/123",
+                "spark.kubernetes.executor.volumes.hostPath.123.options.path": "/nail/123",
+                "spark.kubernetes.executor.volumes.hostPath.123.mount.readOnly": "false",
+            },
+            ["/nail/srv:/nail/srv:ro", "/nail/123:/nail/123:rw"],
+        ),
+        (
+            {
+                "spark.kubernetes.executor.volumes.hostPath.NAILsrv-123.mount.path": "/one/two",
+                "spark.kubernetes.executor.volumes.hostPath.NAILsrv-123.options.path": "/one/two",
+                "spark.kubernetes.executor.volumes.hostPath.NAILsrv-123.mount.readOnly": "true",
+            },
+            [""],
+        ),
+    ],
+)
+@mock.patch.object(sys, "exit")
+def test_get_volumes_from_spark_k8s_configs(mock_sys, spark_conf, expected):
+    result = spark_tools.get_volumes_from_spark_k8s_configs(spark_conf)
+    if (
+        "spark.kubernetes.executor.volumes.hostPath.NAILsrv-123.mount.path"
+        in spark_conf
+    ):
+        mock_sys.assert_called_with(1)
+    else:
+        assert result == expected
diff --git a/tests/test_tron_tools.py b/tests/test_tron_tools.py
index aec22adef0..8cfbb2bb34 100644
--- a/tests/test_tron_tools.py
+++ b/tests/test_tron_tools.py
@@ -32,7 +32,6 @@
         "volumes": [],
         "dockercfg_location": "/mock/dockercfg",
         "tron_default_pool_override": "big_pool",
-        "tron_use_k8s": True,
         "tron_k8s_cluster_overrides": {
             "paasta-dev-test": "paasta-dev",
         },
@@ -231,32 +230,20 @@ def mock_list_teams(self):
             yield f
 
     @pytest.mark.parametrize(
-        "action_service,action_deploy,cluster,expected_cluster,use_k8s",
+        "action_service,action_deploy,cluster,expected_cluster",
         [
             # normal case - no cluster override present and k8s enabled
-            (None, None, "paasta-dev", "paasta-dev", True),
-            (None, "special_deploy", "paasta-dev", "paasta-dev", True),
-            ("other_service", None, "paasta-dev", "paasta-dev", True),
-            (None, None, "paasta-dev", "paasta-dev", True),
-            (None, None, "paasta-dev", "paasta-dev", True),
+            (None, None, "paasta-dev", "paasta-dev"),
+            (None, "special_deploy", "paasta-dev", "paasta-dev"),
+            ("other_service", None, "paasta-dev", "paasta-dev"),
+            (None, None, "paasta-dev", "paasta-dev"),
+            (None, None, "paasta-dev", "paasta-dev"),
             # cluster override present and k8s enabled
-            (None, None, "paasta-dev-test", "paasta-dev", True),
-            (None, "special_deploy", "paasta-dev-test", "paasta-dev", True),
-            ("other_service", None, "paasta-dev-test", "paasta-dev", True),
-            (None, None, "paasta-dev-test", "paasta-dev", True),
-            (None, None, "paasta-dev-test", "paasta-dev", True),
-            # no cluster override present and k8s disabled
-            (None, None, "paasta-dev", "paasta-dev", False),
-            (None, "special_deploy", "paasta-dev", "paasta-dev", False),
-            ("other_service", None, "paasta-dev", "paasta-dev", False),
-            (None, None, "paasta-dev", "paasta-dev", False),
-            (None, None, "paasta-dev", "paasta-dev", False),
-            # cluster override present and k8s disabled
-            (None, None, "paasta-dev-test", "paasta-dev-test", False),
-            (None, "special_deploy", "paasta-dev-test", "paasta-dev-test", False),
-            ("other_service", None, "paasta-dev-test", "paasta-dev-test", False),
-            (None, None, "paasta-dev-test", "paasta-dev-test", False),
-            (None, None, "paasta-dev-test", "paasta-dev-test", False),
+            (None, None, "paasta-dev-test", "paasta-dev"),
+            (None, "special_deploy", "paasta-dev-test", "paasta-dev"),
+            ("other_service", None, "paasta-dev-test", "paasta-dev"),
+            (None, None, "paasta-dev-test", "paasta-dev"),
+            (None, None, "paasta-dev-test", "paasta-dev"),
         ],
     )
     @mock.patch("paasta_tools.tron_tools.load_v2_deployments_json", autospec=True)
@@ -267,7 +254,6 @@ def test_get_action_config(
         action_deploy,
         cluster,
         expected_cluster,
-        use_k8s,
     ):
         """Check resulting action config with various overrides from the action."""
         action_dict = {"command": "echo first"}
@@ -289,7 +275,6 @@ def test_get_action_config(
             "max_runtime": "2h",
             "actions": {"normal": action_dict},
             "monitoring": {"team": "noop"},
-            "use_k8s": use_k8s,
         }
 
         soa_dir = "/other_dir"
@@ -429,7 +414,6 @@ def test_format_tron_job_dict(self, mock_format_action, mock_get_action_config):
         )
         mock_format_action.assert_called_once_with(
             action_config=mock_get_action_config.return_value,
-            use_k8s=False,
         )
 
         assert result == {
@@ -455,7 +439,6 @@ def test_format_tron_job_dict_k8s_enabled(
         actions = {action_name: action_dict}
 
         job_dict = {
-            "use_k8s": True,
             "node": "batch_server",
             "schedule": "daily 12:10:00",
             "service": "my_service",
@@ -484,7 +467,6 @@ def test_format_tron_job_dict_k8s_enabled(
         )
         mock_format_action.assert_called_once_with(
             action_config=mock_get_action_config.return_value,
-            use_k8s=True,
         )
 
         assert result == {
@@ -496,7 +478,6 @@ def test_format_tron_job_dict_k8s_enabled(
             },
             "expected_runtime": "1h",
             "monitoring": {"team": "noop"},
-            "use_k8s": True,
         }
 
     @mock.patch(
@@ -831,7 +812,7 @@ def test_format_tron_action_dict_default_executor(self):
             autospec=True,
         ):
             result = tron_tools.format_tron_action_dict(action_config)
-        assert result["executor"] == "mesos"
+        assert result["executor"] == "kubernetes"
 
     def test_format_tron_action_dict_paasta(self):
         action_dict = {
@@ -898,7 +879,7 @@ def test_format_tron_action_dict_paasta(self):
             "retries": 2,
             "retries_delay": "5m",
             "docker_image": mock.ANY,
-            "executor": "mesos",
+            "executor": "kubernetes",
             "cpus": 2,
             "mem": 1200,
             "disk": 42,
@@ -915,10 +896,19 @@ def test_format_tron_action_dict_paasta(self):
             "extra_volumes": [
                 {"container_path": "/nail/tmp", "host_path": "/nail/tmp", "mode": "RW"}
             ],
-            "docker_parameters": mock.ANY,
-            "constraints": [
-                {"attribute": "pool", "operator": "LIKE", "value": "special_pool"}
-            ],
+            "field_selector_env": {"PAASTA_POD_IP": {"field_path": "status.podIP"}},
+            "node_selectors": {"yelp.com/pool": "special_pool"},
+            "labels": {
+                "paasta.yelp.com/cluster": "test-cluster",
+                "paasta.yelp.com/instance": "my_job.do_something",
+                "paasta.yelp.com/pool": "special_pool",
+                "paasta.yelp.com/service": "my_service",
+                "yelp.com/owner": "compute_infra_platform_experience",
+            },
+            "annotations": {"paasta.yelp.com/routable_ip": "false"},
+            "cap_drop": CAPS_DROP,
+            "cap_add": [],
+            "secret_env": {},
             "trigger_downstreams": True,
             "triggered_by": ["foo.bar.{shortdate}"],
             "trigger_timeout": "5m",
@@ -928,7 +918,6 @@ def test_format_tron_action_dict_paasta(self):
         )
         assert result["docker_image"] == expected_docker
         assert result["env"]["SHELL"] == "/bin/bash"
-        assert isinstance(result["docker_parameters"], list)
 
     @mock.patch("paasta_tools.spark_tools.spark_config.SparkConfBuilder", autospec=True)
     def test_format_tron_action_dict_spark(self, mock_spark_conf_builder):
@@ -1031,7 +1020,7 @@ def test_format_tron_action_dict_spark(self, mock_spark_conf_builder):
                 "spark.sql.files.minPartitionNum": "12",
                 "spark.default.parallelism": "12",
             }
-            result = tron_tools.format_tron_action_dict(action_config, use_k8s=True)
+            result = tron_tools.format_tron_action_dict(action_config)
 
         assert result == {
             "command": "spark-submit "
@@ -1172,7 +1161,7 @@ def test_format_tron_action_dict_paasta_k8s_service_account(self):
             "paasta_tools.tron_tools.load_system_paasta_config",
             autospec=True,
         ):
-            result = tron_tools.format_tron_action_dict(action_config, use_k8s=True)
+            result = tron_tools.format_tron_action_dict(action_config)
 
         assert result == {
             "command": "echo something",
@@ -1295,7 +1284,7 @@ def test_format_tron_action_dict_paasta_k8s(
             autospec=True,
             return_value=False,
         ):
-            result = tron_tools.format_tron_action_dict(action_config, use_k8s=True)
+            result = tron_tools.format_tron_action_dict(action_config)
 
         assert result == {
             "command": "echo something",
@@ -1411,7 +1400,7 @@ def test_format_tron_action_dict_paasta_no_branch_dict(self):
             "requires": ["required_action"],
             "retries": 2,
             "docker_image": "",
-            "executor": "mesos",
+            "executor": "kubernetes",
             "cpus": 2,
             "mem": 1200,
             "disk": 42,
@@ -1428,13 +1417,21 @@ def test_format_tron_action_dict_paasta_no_branch_dict(self):
             "extra_volumes": [
                 {"container_path": "/nail/tmp", "host_path": "/nail/tmp", "mode": "RW"}
             ],
-            "docker_parameters": mock.ANY,
-            "constraints": [
-                {"attribute": "pool", "operator": "LIKE", "value": "special_pool"}
-            ],
+            "field_selector_env": {"PAASTA_POD_IP": {"field_path": "status.podIP"}},
+            "node_selectors": {"yelp.com/pool": "special_pool"},
+            "labels": {
+                "paasta.yelp.com/cluster": "paasta-dev",
+                "paasta.yelp.com/instance": "my_job.do_something",
+                "paasta.yelp.com/pool": "special_pool",
+                "paasta.yelp.com/service": "my_service",
+                "yelp.com/owner": "compute_infra_platform_experience",
+            },
+            "annotations": {"paasta.yelp.com/routable_ip": "false"},
+            "cap_drop": CAPS_DROP,
+            "cap_add": [],
+            "secret_env": {},
         }
         assert result["env"]["SHELL"] == "/bin/bash"
-        assert isinstance(result["docker_parameters"], list)
 
     @mock.patch("paasta_tools.tron_tools.read_extra_service_information", autospec=True)
     def test_load_tron_service_config(self, mock_read_extra_service_information):
@@ -1571,7 +1568,7 @@ def test_create_complete_config_e2e(self, tmpdir):
         # that are not static, this will cause continuous reconfiguration, which
         # will add significant load to the Tron API, which happened in DAR-1461.
         # but if this is intended, just change the hash.
-        assert hasher.hexdigest() == "35972651618a848ac6bf7947245dbaea"
+        assert hasher.hexdigest() == "ba2ccfd2477b2ce2233de42619aa810a"
 
     def test_override_default_pool_override(self, tmpdir):
         soa_dir = tmpdir.mkdir("test_create_complete_config_soa")
diff --git a/tests/test_utils.py b/tests/test_utils.py
index ff78f6958a..11bb80bd76 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1169,7 +1169,7 @@ def test_load_service_instance_config(
     autospec=True,
 )
 @pytest.mark.parametrize("instance_type_enabled", [(True,), (False,)])
-def test_load_service_instance_auto_configs(
+def test_load_service_instance_auto_configs_no_aliases(
     mock_load_system_paasta_config,
     mock_read_extra_service_information,
     instance_type_enabled,
@@ -1177,6 +1177,9 @@ def test_load_service_instance_auto_configs(
     mock_load_system_paasta_config.return_value.get_auto_config_instance_types_enabled.return_value = {
         "marathon": instance_type_enabled,
     }
+    mock_load_system_paasta_config.return_value.get_auto_config_instance_type_aliases.return_value = (
+        {}
+    )
     result = utils.load_service_instance_auto_configs(
         service="fake_service",
         instance_type="marathon",
@@ -1195,6 +1198,41 @@ def test_load_service_instance_auto_configs(
         assert result == {}
 
 
+@pytest.mark.parametrize(
+    "instance_type_aliases, instance_type, expected_instance_type",
+    (({}, "kubernetes", "kubernetes"), ({"eks": "kubernetes"}, "eks", "kubernetes")),
+)
+def test_load_service_instance_auto_configs_with_autotune_aliases(
+    instance_type_aliases, instance_type, expected_instance_type
+):
+    with mock.patch(
+        "paasta_tools.utils.service_configuration_lib.read_extra_service_information",
+        autospec=True,
+    ) as mock_read_extra_service_information, mock.patch(
+        "paasta_tools.utils.load_system_paasta_config",
+        autospec=True,
+    ) as mock_load_system_paasta_config:
+        mock_load_system_paasta_config.return_value.get_auto_config_instance_types_enabled.return_value = {
+            expected_instance_type: True,
+        }
+        mock_load_system_paasta_config.return_value.get_auto_config_instance_type_aliases.return_value = (
+            instance_type_aliases
+        )
+        result = utils.load_service_instance_auto_configs(
+            service="fake_service",
+            instance_type=instance_type,
+            cluster="fake",
+            soa_dir="fake_dir",
+        )
+        mock_read_extra_service_information.assert_called_with(
+            "fake_service",
+            f"{utils.AUTO_SOACONFIG_SUBDIR}/{expected_instance_type}-fake",
+            soa_dir="fake_dir",
+            deepcopy=False,
+        )
+        assert result == mock_read_extra_service_information.return_value
+
+
 def test_get_services_for_cluster():
     cluster = "honey_bunches_of_oats"
     soa_dir = "completely_wholesome"
@@ -1541,9 +1579,9 @@ def test_format_docker_parameters_non_default(self):
                 {"key": "cpu-quota", "value": "600000"},
                 {"key": "label", "value": "paasta_service=fake_name"},
                 {"key": "label", "value": "paasta_instance=fake_instance"},
+                {"key": "init", "value": "true"},
                 {"key": "cap-add", "value": "IPC_LOCK"},
                 {"key": "cap-add", "value": "SYS_PTRACE"},
-                {"key": "init", "value": "true"},
                 {"key": "cap-drop", "value": "SETPCAP"},
                 {"key": "cap-drop", "value": "MKNOD"},
                 {"key": "cap-drop", "value": "AUDIT_WRITE"},
@@ -1560,6 +1598,51 @@ def test_format_docker_parameters_non_default(self):
                 {"key": "cap-drop", "value": "SETFCAP"},
             ]
 
+    def test_format_docker_parameters_overlapping_caps(self):
+        fake_conf = utils.InstanceConfig(
+            service="fake_name",
+            cluster="",
+            instance="fake_instance",
+            config_dict={
+                "cpu_burst_add": 2,
+                "cfs_period_us": 200000,
+                "cpus": 1,
+                "mem": 1024,
+                "disk": 1234,
+                "cap_add": ["IPC_LOCK", "SYS_PTRACE", "DAC_OVERRIDE", "NET_RAW"],
+            },
+            branch_dict=None,
+        )
+        with mock.patch(
+            "paasta_tools.utils.InstanceConfig.use_docker_disk_quota",
+            autospec=True,
+            return_value=False,
+        ):
+            assert fake_conf.format_docker_parameters() == [
+                {"key": "memory-swap", "value": "1088m"},
+                {"key": "cpu-period", "value": "200000"},
+                {"key": "cpu-quota", "value": "600000"},
+                {"key": "label", "value": "paasta_service=fake_name"},
+                {"key": "label", "value": "paasta_instance=fake_instance"},
+                {"key": "init", "value": "true"},
+                {"key": "cap-add", "value": "IPC_LOCK"},
+                {"key": "cap-add", "value": "SYS_PTRACE"},
+                {"key": "cap-add", "value": "DAC_OVERRIDE"},
+                {"key": "cap-add", "value": "NET_RAW"},
+                {"key": "cap-drop", "value": "SETPCAP"},
+                {"key": "cap-drop", "value": "MKNOD"},
+                {"key": "cap-drop", "value": "AUDIT_WRITE"},
+                {"key": "cap-drop", "value": "CHOWN"},
+                {"key": "cap-drop", "value": "FOWNER"},
+                {"key": "cap-drop", "value": "FSETID"},
+                {"key": "cap-drop", "value": "KILL"},
+                {"key": "cap-drop", "value": "SETGID"},
+                {"key": "cap-drop", "value": "SETUID"},
+                {"key": "cap-drop", "value": "NET_BIND_SERVICE"},
+                {"key": "cap-drop", "value": "SYS_CHROOT"},
+                {"key": "cap-drop", "value": "SETFCAP"},
+            ]
+
     def test_format_docker_parameters_with_disk_quota_non_default(self):
         fake_conf = utils.InstanceConfig(
             service="fake_name",
@@ -1587,9 +1670,9 @@ def test_format_docker_parameters_with_disk_quota_non_default(self):
                 {"key": "storage-opt", "value": "size=1293942784"},
                 {"key": "label", "value": "paasta_service=fake_name"},
                 {"key": "label", "value": "paasta_instance=fake_instance"},
+                {"key": "init", "value": "true"},
                 {"key": "cap-add", "value": "IPC_LOCK"},
                 {"key": "cap-add", "value": "SYS_PTRACE"},
-                {"key": "init", "value": "true"},
                 {"key": "cap-drop", "value": "SETPCAP"},
                 {"key": "cap-drop", "value": "MKNOD"},
                 {"key": "cap-drop", "value": "AUDIT_WRITE"},
@@ -2275,12 +2358,14 @@ def test_validate_service_instance_invalid():
     mock_paasta_native_instances = [("service1", "main2"), ("service1", "main3")]
     mock_adhoc_instances = [("service1", "interactive")]
     mock_k8s_instances = [("service1", "k8s")]
+    mock_eks_instances = [("service1", "eks")]
     mock_tron_instances = [("service1", "job.action")]
     mock_flink_instances = [("service1", "flink")]
     mock_cassandracluster_instances = [("service1", "cassandracluster")]
     mock_kafkacluster_instances = [("service1", "kafkacluster")]
     mock_nrtsearch_instances = [("service1", "nrtsearch")]
     mock_monkrelaycluster_instances = [("service1", "monkrelays")]
+    mock_vitesscluster_instances = [("service1", "vitesscluster")]
     my_service = "service1"
     my_instance = "main"
     fake_cluster = "fake_cluster"
@@ -2293,12 +2378,14 @@ def test_validate_service_instance_invalid():
             mock_paasta_native_instances,
             mock_adhoc_instances,
             mock_k8s_instances,
+            mock_eks_instances,
             mock_tron_instances,
             mock_flink_instances,
             mock_cassandracluster_instances,
             mock_kafkacluster_instances,
             mock_nrtsearch_instances,
             mock_monkrelaycluster_instances,
+            mock_vitesscluster_instances,
         ],
     ):
         with raises(
@@ -2560,7 +2647,6 @@ def test_is_deploy_step():
     assert utils.is_deploy_step("thingy")
 
     assert not utils.is_deploy_step("itest")
-    assert not utils.is_deploy_step("performance-check")
     assert not utils.is_deploy_step("command-thingy")
 
 
diff --git a/tox.ini b/tox.ini
index 75311abec3..472ba7f09e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -78,13 +78,15 @@ commands =
     /bin/rm -rf docs/source/generated/
     # The last arg to apidoc is a list of excluded paths
     sphinx-apidoc -f -e -o docs/source/generated/ paasta_tools
-    sphinx-build -b html -d docs/build/doctrees docs/source docs/build/html
+    sphinx-build -j auto -b html -d docs/build/doctrees docs/source docs/build/html
 
 [testenv:k8s_itests]
 basepython = python3.8
 whitelist_externals = bash
+# one day we'll use a fully pinned venv here...
 deps =
     urllib3<2.0
+    cryptography<42
     docker-compose=={[tox]docker_compose_version}
 setenv =
 passenv =
@@ -107,7 +109,7 @@ commands =
 	{toxinidir}/k8s_itests/scripts/setup.sh
     # Run paasta-tools k8s_itests in docker
     docker-compose down
-    docker-compose --verbose build --build-arg DOCKER_REGISTRY={env:DOCKER_REGISTRY:docker-dev.yelpcorp.com/} --build-arg {env:INDEX_URL_BUILD_ARG:UNUSED}=https://pypi.org/simple
+    docker-compose --verbose build --parallel --build-arg DOCKER_REGISTRY={env:DOCKER_REGISTRY:docker-dev.yelpcorp.com/} --build-arg {env:INDEX_URL_BUILD_ARG:UNUSED}=https://pypi.org/simple
     docker-compose up \
         --abort-on-container-exit
 
@@ -136,7 +138,7 @@ basepython = python3.8
 setenv =
     PAASTA_SYSTEM_CONFIG_DIR = {toxinidir}/general_itests/fake_etc_paasta
 changedir=general_itests/
-passenv = DOCKER_TLS_VERIFY DOCKER_HOST DOCKER_CERT_PATH
+passenv = DOCKER_TLS_VERIFY DOCKER_HOST DOCKER_CERT_PATH DOCKER_REGISTRY
 deps =
     {[testenv]deps}
     behave==1.2.5
diff --git a/yelp_package/Makefile b/yelp_package/Makefile
index bf172eb9f9..981d29be41 100644
--- a/yelp_package/Makefile
+++ b/yelp_package/Makefile
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 # Edit this release and run "make release"
-RELEASE=0.196.0
+RELEASE=0.218.6
 
 SHELL=/bin/bash
 
diff --git a/yelp_package/itest/ubuntu.sh b/yelp_package/itest/ubuntu.sh
index c0adab7b1c..c1c6716d3d 100755
--- a/yelp_package/itest/ubuntu.sh
+++ b/yelp_package/itest/ubuntu.sh
@@ -51,7 +51,6 @@ itest
 local-run
 mark-for-deployment
 metastatus
-performance-check
 push-to-registry
 security-check
 status