diff --git a/shub_workflow/script.py b/shub_workflow/script.py index 44cb9bd..47f655a 100644 --- a/shub_workflow/script.py +++ b/shub_workflow/script.py @@ -106,12 +106,11 @@ def parse_args(self) -> Namespace: class SCProjectClassProtocol(Protocol): - project_id: Optional[int] + project_id: int class SCProjectClass(SCProjectClassProtocol): def __init__(self): - self.project_id = resolve_project_id() self.client = ScrapinghubClient(max_retries=100) super().__init__() @@ -263,8 +262,7 @@ def set_flow_id_name(self, args: Namespace): def add_argparser_options(self): self.argparser.add_argument( "--project-id", - help="Overrides target project id.", - type=int, + help="Either numeric id, or entry keyword in scrapinghub.yml. Overrides target project id.", default=self.default_project_id, ) self.argparser.add_argument("--flow-id", help="If given, use the given flow id.") @@ -285,6 +283,7 @@ def parse_args(self) -> Namespace: self.project_id = resolve_project_id(self.parse_project_id(args)) if not self.project_id: self.argparser.error("Project id not provided.") + logger.info(f"Running on project {self.project_id}") return args diff --git a/shub_workflow/utils/__init__.py b/shub_workflow/utils/__init__.py index 71b797e..5e97271 100644 --- a/shub_workflow/utils/__init__.py +++ b/shub_workflow/utils/__init__.py @@ -23,7 +23,7 @@ def resolve_shub_jobkey() -> Optional[str]: return os.environ.get("SHUB_JOBKEY") -def resolve_project_id(project_id=None) -> Optional[int]: +def resolve_project_id(project_id=None) -> int: """ Gets project id from following sources in following order of precedence: - default parameter values @@ -34,32 +34,37 @@ def resolve_project_id(project_id=None) -> Optional[int]: either locally or from scrapinghub, correctly configured """ if project_id: - return int(project_id) - - # read from environment - if os.environ.get("PROJECT_ID") is not None: - return int(os.environ["PROJECT_ID"]) - - # for ScrapyCloud jobs: - jobkey = resolve_shub_jobkey() - if jobkey: - return int(jobkey.split("/")[0]) + try: + return int(project_id) + except ValueError: + pass + else: + # read from environment only if not explicitly provided + if os.environ.get("PROJECT_ID") is not None: + return int(os.environ["PROJECT_ID"]) + + # for ScrapyCloud jobs: + jobkey = resolve_shub_jobkey() + if jobkey: + return int(jobkey.split("/")[0]) # read from scrapinghub.yml try: - from shub.config import load_shub_config # pylint: disable=import-error + from shub.config import load_shub_config cfg = load_shub_config() - project_id = cfg.get_project_id("default") - if project_id: - return int(project_id) + try: + project_id = project_id or "default" + return int(cfg.get_project_id(project_id)) + except Exception: + logger.error(f"Project entry '{project_id}' not found in scrapinghub.yml.") except ImportError: - logger.warning("Install shub package if want to access scrapinghub.yml") - - if not project_id: - logger.warning("Project id not found. Use either PROJECT_ID env. variable or scrapinghub.yml default target.") + logger.error("Install shub package if want to access scrapinghub.yml") - return None + raise ValueError( + "No default project id found. Use either PROJECT_ID env. variable or set 'default' entry in scrapinghub.yml, " + "or use --project-id with a project numeric id or an existing entry in scrapinghub.yml." + ) MINS_IN_A_DAY = 24 * 60