allow to use project entry keyword in scrapinghub.yml instead of nume…

…ric id
scrapinghub · Sep 4, 2024 · fa9f84f · fa9f84f
1 parent efdf8cb
commit fa9f84f
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 24 deletions.
diff --git a/shub_workflow/script.py b/shub_workflow/script.py
@@ -106,12 +106,11 @@ def parse_args(self) -> Namespace:
 
 class SCProjectClassProtocol(Protocol):
 
-    project_id: Optional[int]
+    project_id: int
 
 
 class SCProjectClass(SCProjectClassProtocol):
     def __init__(self):
-        self.project_id = resolve_project_id()
         self.client = ScrapinghubClient(max_retries=100)
         super().__init__()
 
@@ -263,8 +262,7 @@ def set_flow_id_name(self, args: Namespace):
     def add_argparser_options(self):
         self.argparser.add_argument(
             "--project-id",
-            help="Overrides target project id.",
-            type=int,
+            help="Either numeric id, or entry keyword in scrapinghub.yml. Overrides target project id.",
             default=self.default_project_id,
         )
         self.argparser.add_argument("--flow-id", help="If given, use the given flow id.")
@@ -285,6 +283,7 @@ def parse_args(self) -> Namespace:
         self.project_id = resolve_project_id(self.parse_project_id(args))
         if not self.project_id:
             self.argparser.error("Project id not provided.")
+        logger.info(f"Running on project {self.project_id}")
 
         return args
 

diff --git a/shub_workflow/utils/__init__.py b/shub_workflow/utils/__init__.py
@@ -23,7 +23,7 @@ def resolve_shub_jobkey() -> Optional[str]:
     return os.environ.get("SHUB_JOBKEY")
 
 
-def resolve_project_id(project_id=None) -> Optional[int]:
+def resolve_project_id(project_id=None) -> int:
     """
     Gets project id from following sources in following order of precedence:
     - default parameter values
@@ -34,32 +34,37 @@ def resolve_project_id(project_id=None) -> Optional[int]:
     either locally or from scrapinghub, correctly configured
     """
     if project_id:
-        return int(project_id)
-
-    # read from environment
-    if os.environ.get("PROJECT_ID") is not None:
-        return int(os.environ["PROJECT_ID"])
-
-    # for ScrapyCloud jobs:
-    jobkey = resolve_shub_jobkey()
-    if jobkey:
-        return int(jobkey.split("/")[0])
+        try:
+            return int(project_id)
+        except ValueError:
+            pass
+    else:
+        # read from environment only if not explicitly provided
+        if os.environ.get("PROJECT_ID") is not None:
+            return int(os.environ["PROJECT_ID"])
+
+        # for ScrapyCloud jobs:
+        jobkey = resolve_shub_jobkey()
+        if jobkey:
+            return int(jobkey.split("/")[0])
 
     # read from scrapinghub.yml
     try:
-        from shub.config import load_shub_config  # pylint: disable=import-error
+        from shub.config import load_shub_config
 
         cfg = load_shub_config()
-        project_id = cfg.get_project_id("default")
-        if project_id:
-            return int(project_id)
+        try:
+            project_id = project_id or "default"
+            return int(cfg.get_project_id(project_id))
+        except Exception:
+            logger.error(f"Project entry '{project_id}' not found in scrapinghub.yml.")
     except ImportError:
-        logger.warning("Install shub package if want to access scrapinghub.yml")
-
-    if not project_id:
-        logger.warning("Project id not found. Use either PROJECT_ID env. variable or scrapinghub.yml default target.")
+        logger.error("Install shub package if want to access scrapinghub.yml")
 
-    return None
+    raise ValueError(
+        "No default project id found. Use either PROJECT_ID env. variable or set 'default' entry in scrapinghub.yml, "
+        "or use --project-id with a project numeric id or an existing entry in scrapinghub.yml."
+    )
 
 
 MINS_IN_A_DAY = 24 * 60