You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I'm encountering the error that my service account does not have the permission to obtain permissions, and it directly error out. Expected: it should keep trying other enabled clouds.
(base) root@4c2f83bfce91:/skycamp-tutorial/02_finetuning_llm# sky launch
Considered resources (1 node):
--------------------------------------------------------------------------------------------------------------------------------------------------
CLOUD INSTANCE vCPUs Mem(GB) ACCELERATORS REGION/ZONE COST ($) CHOSEN
--------------------------------------------------------------------------------------------------------------------------------------------------
Kubernetes 2CPU--2GB 2 2 - gke_skycamp-skypilot-fastchat_us-central1-c_skycamp-gke-test 0.00 ✔
GCP n2-standard-8 8 32 - us-central1-a 0.39
--------------------------------------------------------------------------------------------------------------------------------------------------
Launching a new cluster 'sky-374f-root'. Proceed? [Y/n]:
Traceback (most recent call last):
File "/opt/conda/bin/sky", line 8, in<module>sys.exit(cli())
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1157, in __call__
return self.main(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1078, in main
rv = self.invoke(ctx)
File "/opt/conda/lib/python3.9/site-packages/sky/utils/common_utils.py", line 367, in _record
return f(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/sky/cli.py", line 812, in invoke
returnsuper().invoke(ctx)
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 783, in invoke
return __callback(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/sky/utils/common_utils.py", line 388, in _record
return f(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/sky/cli.py", line 1125, in launch
_launch_with_confirm(task,
File "/opt/conda/lib/python3.9/site-packages/sky/cli.py", line 603, in _launch_with_confirm
sky.launch(
File "/opt/conda/lib/python3.9/site-packages/sky/utils/common_utils.py", line 388, in _record
return f(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/sky/utils/common_utils.py", line 388, in _record
return f(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/sky/execution.py", line 454, in launch
return _execute(
File "/opt/conda/lib/python3.9/site-packages/sky/execution.py", line 280, in _execute
handle = backend.provision(task,
File "/opt/conda/lib/python3.9/site-packages/sky/utils/common_utils.py", line 388, in _record
return f(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/sky/utils/common_utils.py", line 367, in _record
return f(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/sky/backends/backend.py", line 60, in provision
return self._provision(task, to_provision, dryrun, stream_logs,
File "/opt/conda/lib/python3.9/site-packages/sky/backends/cloud_vm_ray_backend.py", line 2810, in _provision
config_dict = retry_provisioner.provision_with_retries(
File "/opt/conda/lib/python3.9/site-packages/sky/utils/common_utils.py", line 388, in _record
return f(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/sky/backends/cloud_vm_ray_backend.py", line 1988, in provision_with_retries
config_dict = self._retry_zones(
File "/opt/conda/lib/python3.9/site-packages/sky/backends/cloud_vm_ray_backend.py", line 1399, in _retry_zones
config_dict = backend_utils.write_cluster_config(
File "/opt/conda/lib/python3.9/site-packages/sky/utils/common_utils.py", line 388, in _record
return f(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/sky/backends/backend_utils.py", line 987, in write_cluster_config
_add_auth_to_cluster_config(cloud, tmp_yaml_path)
File "/opt/conda/lib/python3.9/site-packages/sky/backends/backend_utils.py", line 1046, in _add_auth_to_cluster_config
config = auth.setup_kubernetes_authentication(config)
File "/opt/conda/lib/python3.9/site-packages/sky/authentication.py", line 411, in setup_kubernetes_authentication
if kubernetes_utils.check_secret_exists(secret_name, namespace, context):
File "/opt/conda/lib/python3.9/site-packages/sky/provision/kubernetes/utils.py", line 1661, in check_secret_exists
kubernetes.core_api(context).read_namespaced_secret(
File "/opt/conda/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/opt/conda/lib/python3.9/site-packages/kubernetes/client/api/core_v1_api.py", line 25013, in read_namespaced_secret
return self.read_namespaced_secret_with_http_info(name, namespace, **kwargs) # noqa: E501
File "/opt/conda/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/opt/conda/lib/python3.9/site-packages/kubernetes/client/api/core_v1_api.py", line 25100, in read_namespaced_secret_with_http_info
return self.api_client.call_api(
File "/opt/conda/lib/python3.9/site-packages/kubernetes/client/api_client.py", line 348, in call_api
return self.__call_api(resource_path, method,
File "/opt/conda/lib/python3.9/site-packages/kubernetes/client/api_client.py", line 180, in __call_api
response_data = self.request(
File "/opt/conda/lib/python3.9/site-packages/kubernetes/client/api_client.py", line 373, in request
return self.rest_client.GET(url,
File "/opt/conda/lib/python3.9/site-packages/kubernetes/client/rest.py", line 244, in GET
return self.request("GET", url,
File "/opt/conda/lib/python3.9/site-packages/kubernetes/client/rest.py", line 238, in request
raise ApiException(http_resp=r)
kubernetes.client.exceptions.ApiException: (403)
Reason: Forbidden
HTTP response headers: HTTPHeaderDict({'Audit-Id': 'b75a4cdf-a286-4711-ac2e-6c29d5eaa489', 'Cache-Control': 'no-cache, private', 'Content-Type': 'application/json', 'X-Content-Type-Options': 'nosniff', 'X-Kubernetes-Pf-Flowschema-Uid': '18013214-4eff-4988-aa5b-b23e86d71962', 'X-Kubernetes-Pf-Prioritylevel-Uid': '20c437ec-3ec1-4351-94d7-951619d123b3', 'Date': 'Tue, 22 Oct 2024 21:16:04 GMT', 'Content-Length': '416'})
HTTP response body: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"secrets \"sky-ssh-keys\" is forbidden: User \"[email protected]\" cannot get resource \"secrets\" in API group \"\" in the namespace \"default\": requires one of [\"container.secrets.get\"] permission(s).","reason":"Forbidden","details":{"name":"sky-ssh-keys","kind":"secrets"},"code":403}
The text was updated successfully, but these errors were encountered:
cblmemo
changed the title
[Serve][K8s] Error in k8s secret fetching breaks the provision failover loop
[K8s] Error in k8s secret fetching breaks the provision failover loop
Oct 22, 2024
I'm encountering the error that my service account does not have the permission to obtain permissions, and it directly error out. Expected: it should keep trying other enabled clouds.
Version & Commit info:
sky -c
:skypilot, commit f5d4f64dd42e831546df0982fa2e46d280a74cbd
The text was updated successfully, but these errors were encountered: