Skip to content

Commit

Permalink
allow to use project entry keyword in scrapinghub.yml instead of nume…
Browse files Browse the repository at this point in the history
…ric id
  • Loading branch information
kalessin committed Sep 4, 2024
1 parent efdf8cb commit fa9f84f
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 24 deletions.
7 changes: 3 additions & 4 deletions shub_workflow/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,11 @@ def parse_args(self) -> Namespace:

class SCProjectClassProtocol(Protocol):

project_id: Optional[int]
project_id: int


class SCProjectClass(SCProjectClassProtocol):
def __init__(self):
self.project_id = resolve_project_id()
self.client = ScrapinghubClient(max_retries=100)
super().__init__()

Expand Down Expand Up @@ -263,8 +262,7 @@ def set_flow_id_name(self, args: Namespace):
def add_argparser_options(self):
self.argparser.add_argument(
"--project-id",
help="Overrides target project id.",
type=int,
help="Either numeric id, or entry keyword in scrapinghub.yml. Overrides target project id.",
default=self.default_project_id,
)
self.argparser.add_argument("--flow-id", help="If given, use the given flow id.")
Expand All @@ -285,6 +283,7 @@ def parse_args(self) -> Namespace:
self.project_id = resolve_project_id(self.parse_project_id(args))
if not self.project_id:
self.argparser.error("Project id not provided.")
logger.info(f"Running on project {self.project_id}")

return args

Expand Down
45 changes: 25 additions & 20 deletions shub_workflow/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def resolve_shub_jobkey() -> Optional[str]:
return os.environ.get("SHUB_JOBKEY")


def resolve_project_id(project_id=None) -> Optional[int]:
def resolve_project_id(project_id=None) -> int:
"""
Gets project id from following sources in following order of precedence:
- default parameter values
Expand All @@ -34,32 +34,37 @@ def resolve_project_id(project_id=None) -> Optional[int]:
either locally or from scrapinghub, correctly configured
"""
if project_id:
return int(project_id)

# read from environment
if os.environ.get("PROJECT_ID") is not None:
return int(os.environ["PROJECT_ID"])

# for ScrapyCloud jobs:
jobkey = resolve_shub_jobkey()
if jobkey:
return int(jobkey.split("/")[0])
try:
return int(project_id)
except ValueError:
pass
else:
# read from environment only if not explicitly provided
if os.environ.get("PROJECT_ID") is not None:
return int(os.environ["PROJECT_ID"])

# for ScrapyCloud jobs:
jobkey = resolve_shub_jobkey()
if jobkey:
return int(jobkey.split("/")[0])

# read from scrapinghub.yml
try:
from shub.config import load_shub_config # pylint: disable=import-error
from shub.config import load_shub_config

cfg = load_shub_config()
project_id = cfg.get_project_id("default")
if project_id:
return int(project_id)
try:
project_id = project_id or "default"
return int(cfg.get_project_id(project_id))
except Exception:
logger.error(f"Project entry '{project_id}' not found in scrapinghub.yml.")
except ImportError:
logger.warning("Install shub package if want to access scrapinghub.yml")

if not project_id:
logger.warning("Project id not found. Use either PROJECT_ID env. variable or scrapinghub.yml default target.")
logger.error("Install shub package if want to access scrapinghub.yml")

return None
raise ValueError(
"No default project id found. Use either PROJECT_ID env. variable or set 'default' entry in scrapinghub.yml, "
"or use --project-id with a project numeric id or an existing entry in scrapinghub.yml."
)


MINS_IN_A_DAY = 24 * 60
Expand Down

0 comments on commit fa9f84f

Please sign in to comment.