Skip to content

Commit

Permalink
Fix milascraping preprodserver (#76)
Browse files Browse the repository at this point in the history
* add local clusters configs (for tests)

* allow `none` value for cluster host name

* add local call to sacct if host=null, host="" ou host="localhost" (+ unit test)

* black + lint

* fix "scraping"and "scraper" spellings (no more scraPPer or scraPPing)

* update waste_stats to comply to authenticated mongoDB access (SARC_MODE=scraping envvar)

* update systemd script to scrape jobs on mila cluster too

* update prod config to execute sacct command locally

* lint

* fix test_localhost test

* lint

* update config files

* Add --allusers to sacct

New slurm version (23.02.6) now requires the argument `--allusers` with `--json` otherwise it only returns jobs from the current user.

* Fix tests

* added test for new slurm version output format

* fix for slurm 23.x.x json format

* fix cedar scraping

* fix cedar and graham config (no prometheus for these clusters)

* lint

* update

* remove `host=None` and `host=""` and keep only `host="localhost"` option

* commit forgotten file

---------

Co-authored-by: Xavier Bouthillier <[email protected]>
Co-authored-by: Xavier Bouthillier <[email protected]>
Co-authored-by: bruno.carrez <[email protected]>
  • Loading branch information
4 people authored Nov 13, 2023
1 parent ee12dfa commit 5ccd799
Show file tree
Hide file tree
Showing 14 changed files with 79 additions and 31 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ clockwork_ldap.key
sarc-cache/
.idea
_build
sarc_mongo
8 changes: 4 additions & 4 deletions config/sarc-dev.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@
"duc_inodes_command": null,
"duc_storage_command": null,
"diskusage_report_command": "diskusage_report --project --all_users",
"prometheus_url": "https://mila-thanos.calculquebec.ca",
"prometheus_headers_file": "secrets/drac_prometheus/headers.json",
"prometheus_url": null,
"prometheus_headers_file": null,
"start_date": "2022-04-01",
"nodes_info_file": "secrets/nodes_graham.txt"
},
Expand All @@ -75,8 +75,8 @@
"duc_inodes_command": null,
"duc_storage_command": null,
"diskusage_report_command": "diskusage_report --project --all_users",
"prometheus_url": "https://mila-thanos.calculquebec.ca",
"prometheus_headers_file": "secrets/drac_prometheus/headers.json",
"prometheus_url": null,
"prometheus_headers_file": null,
"start_date": "2022-04-01",
"nodes_info_file": "secrets/nodes_cedar.txt"
}
Expand Down
8 changes: 4 additions & 4 deletions docs/deployment.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ You have 3 mongo users account;

- `mongoadmin` (password: see `secrets/mongo_admin_password.txt`) is the global mongodb administrator account.
- `readuser`(password: `readpwd`, see `config/sarc-client.json`) is the user for general use (only reads data from the database)
- `writeuser` (password: see `secrets/mongo_writeuser_password.txt`) is used by the server during scrapping
- `writeuser` (password: see `secrets/mongo_writeuser_password.txt`) is used by the server during scraping

Therefore, if you want to admin the database with compass, the connection string will be (see `config/sarc-*.json`) :
```
Expand Down Expand Up @@ -121,9 +121,9 @@ See https://www.mongodb.com/docs/manual/administration/production-notes/#x86_64
Users creation:

```
SARC_MODE=scrapping poetry run sarc db init --database sarc --url "mongodb://mongoadmin:<admin password>@localhost:27017" --username admin --password <admin password> --account admin
SARC_MODE=scrapping poetry run sarc db init --database sarc --url "mongodb://mongoadmin:<admin password>@localhost:27017" --username readuser --password readpwd --account read
SARC_MODE=scrapping poetry run sarc db init --database sarc --url "mongodb://mongoadmin:<admin password>@localhost:27017" --username writeuser --password <writeuser password> --account write
SARC_MODE=scraping poetry run sarc db init --database sarc --url "mongodb://mongoadmin:<admin password>@localhost:27017" --username admin --password <admin password> --account admin
SARC_MODE=scraping poetry run sarc db init --database sarc --url "mongodb://mongoadmin:<admin password>@localhost:27017" --username readuser --password readpwd --account read
SARC_MODE=scraping poetry run sarc db init --database sarc --url "mongodb://mongoadmin:<admin password>@localhost:27017" --username writeuser --password <writeuser password> --account write
```

### (optionnal) database restoration
Expand Down
10 changes: 9 additions & 1 deletion examples/waste_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pandas as pd
from tqdm import tqdm

from sarc.config import config
from sarc.config import ScraperConfig, _config_class, config
from sarc.jobs import get_jobs


Expand All @@ -21,6 +21,7 @@ def load_job_series(filename=None) -> pd.DataFrame:
)

df = None

# Fetch all jobs from the clusters
for job in tqdm(get_jobs(cluster=cluster, start="2023-02-10"), total=total):
if job.duration < timedelta(seconds=60):
Expand All @@ -47,6 +48,13 @@ def load_job_series(filename=None) -> pd.DataFrame:
return df


# to access series, you need prometheus access rights. This is doable only with `SARC_MODE=scraping` for the moment
# check SARC_MODE env variable
config_class = _config_class(os.getenv("SARC_MODE", "none"))
if config_class is not ScraperConfig:
print("SARC_MODE=scraping is required to access job series (prometheus))")
exit(0)

filename = "mila_job_series4.pkl"
df = load_job_series(filename)

Expand Down
2 changes: 1 addition & 1 deletion sarc/cli/acquire/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def parse_dates(dates: list[str], cluster_name: str) -> list[(datetime, bool)]:
for date in dates:
if date == "auto":
# is_auto is set to True to indicate that the database collection `clusters`
# should be updated if scrapping successful
# should be updated if scraping successful
dates_auto = _dates_auto(cluster_name)
parsed_dates.extend([(date, True) for date in dates_auto])
elif date.count("-") == 5:
Expand Down
8 changes: 4 additions & 4 deletions sarc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def replace(self, **replacements):


class ClusterConfig(BaseModel):
host: str
host: str = "localhost"
timezone: Union[str, zoneinfo.ZoneInfo] # | does not work with Pydantic's eval
prometheus_url: str = None
prometheus_headers_file: str = None
Expand Down Expand Up @@ -185,7 +185,7 @@ class Config(BaseModel):
_abs_path = validator("cache", allow_reuse=True)(_absolute_path)


class ScrapperConfig(BaseModel):
class ScraperConfig(BaseModel):
mongo: MongoConfig
cache: Path = None

Expand Down Expand Up @@ -246,7 +246,7 @@ def parse_config(config_path, config_cls=Config):

def _config_class(mode):
modes = {
"scrapping": ScrapperConfig,
"scraping": ScraperConfig,
"client": Config,
}
return modes.get(mode, Config)
Expand All @@ -264,7 +264,7 @@ def config():
except pydantic.error_wrappers.ValidationError as err:
if config_class is Config:
raise ConfigurationError(
"Try `SARC_MODE=scrapping sarc acquire...` if you want admin rights"
"Try `SARC_MODE=scraping sarc ...` if you want admin rights"
) from err
raise

Expand Down
10 changes: 8 additions & 2 deletions sarc/jobs/sacct.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import logging
import subprocess
import sys
import traceback
from datetime import date, datetime, time, timedelta
Expand Down Expand Up @@ -56,15 +57,20 @@ def __init__(self, cluster: ClusterConfig, day: datetime):
self.cachefile = None

def fetch_raw(self) -> dict:
"""Fetch the raw sacct data as a dict via SSH."""
"""Fetch the raw sacct data as a dict via SSH, or run sacct locally."""
fmt = "%Y-%m-%dT%H:%M"
start = self.start.strftime(fmt)
end = self.end.strftime(fmt)
accounts = self.cluster.accounts and ",".join(self.cluster.accounts)
accounts_option = f"-A {accounts}" if accounts else ""
cmd = f"{self.cluster.sacct_bin} {accounts_option} -X -S '{start}' -E '{end}' --allusers --json"
print(f"{self.cluster.name} $ {cmd}")
results = self.cluster.ssh.run(cmd, hide=True)
if self.cluster.host == "localhost":
results = subprocess.run(
cmd, shell=True, text=True, capture_output=True, check=False
)
else:
results = self.cluster.ssh.run(cmd, hide=True)
return json.loads(results.stdout[results.stdout.find("{") :])

def get_raw(self) -> dict:
Expand Down
6 changes: 3 additions & 3 deletions scripts/launch_mongod.sh
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,9 @@ function mongo_start {
_mongo_no_auth &
wait_mongo

SARC_MODE=scrapping sarc db init --database $DBNAME --url "mongodb://$ADDRESS:$PORT" --username $ADMIN --password $PASSWORD --account admin
SARC_MODE=scrapping sarc db init --database $DBNAME --url "mongodb://$ADDRESS:$PORT" --username $WRITEUSER_NAME --password $WRITEUSER_PWD --account write
SARC_MODE=scrapping sarc db init --database $DBNAME --url "mongodb://$ADDRESS:$PORT" --username $READUSER_NAME --password $READUSER_PWD --account read
SARC_MODE=scraping sarc db init --database $DBNAME --url "mongodb://$ADDRESS:$PORT" --username $ADMIN --password $PASSWORD --account admin
SARC_MODE=scraping sarc db init --database $DBNAME --url "mongodb://$ADDRESS:$PORT" --username $WRITEUSER_NAME --password $WRITEUSER_PWD --account write
SARC_MODE=scraping sarc db init --database $DBNAME --url "mongodb://$ADDRESS:$PORT" --username $READUSER_NAME --password $READUSER_PWD --account read

mongo_stop
fg
Expand Down
2 changes: 1 addition & 1 deletion scripts/systemd/scrapers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
SCRIPT=$(readlink -f "$0")
SCRIPTPATH=$(dirname "$SCRIPT")
cd $SCRIPTPATH/../../
sudo -u sarc SARC_MODE=scrapping SARC_CONFIG=$SCRIPTPATH/../../config/sarc-prod.json ../.local/bin/poetry run sarc acquire jobs -c narval cedar beluga graham -d auto
sudo -u sarc SARC_MODE=scraping SARC_CONFIG=$SCRIPTPATH/../../config/sarc-prod.json ../.local/bin/poetry run sarc acquire jobs -c narval cedar beluga graham mila -d auto
10 changes: 5 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
ClusterConfig,
Config,
MongoConfig,
ScrapperConfig,
ScraperConfig,
config,
parse_config,
using_config,
Expand All @@ -23,8 +23,8 @@
@pytest.fixture(scope="session")
def standard_config_object():
mpatch = MonkeyPatch()
mpatch.setenv("SARC_MODE", "scrapping")
yield parse_config(Path(__file__).parent / "sarc-test.json", ScrapperConfig)
mpatch.setenv("SARC_MODE", "scraping")
yield parse_config(Path(__file__).parent / "sarc-test.json", ScraperConfig)
mpatch.undo()


Expand All @@ -46,14 +46,14 @@ def client_config(client_config_object, tmp_path):
@pytest.fixture()
def standard_config(standard_config_object, tmp_path):
cfg = standard_config_object.replace(cache=tmp_path / "sarc-tmp-test-cache")
with using_config(cfg, ScrapperConfig) as cfg:
with using_config(cfg, ScraperConfig) as cfg:
yield cfg


@pytest.fixture
def disabled_cache():
cfg = config().replace(cache=None)
with using_config(cfg, ScrapperConfig) as cfg:
with using_config(cfg, ScraperConfig) as cfg:
yield


Expand Down
10 changes: 5 additions & 5 deletions tests/functional/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,9 @@ def freeport():


@pytest.fixture
def scrapping_mode():
def scraping_mode():
mpatch = MonkeyPatch()
mpatch.setenv("SARC_MODE", "scrapping")
mpatch.setenv("SARC_MODE", "scraping")
yield
mpatch.undo()

Expand Down Expand Up @@ -323,7 +323,7 @@ def make_config(newpath, uri):


@pytest.fixture
def admin_setup(mongodb, scrapping_mode, tmp_path, freeport, monkeypatch):
def admin_setup(mongodb, scraping_mode, tmp_path, freeport, monkeypatch):
"""MongoDB admin user, can do anything."""

config_path = tmp_path / "config.json"
Expand All @@ -334,7 +334,7 @@ def admin_setup(mongodb, scrapping_mode, tmp_path, freeport, monkeypatch):


@pytest.fixture
def write_setup(mongodb, scrapping_mode, tmp_path, freeport, monkeypatch):
def write_setup(mongodb, scraping_mode, tmp_path, freeport, monkeypatch):
"""SARC write user, can only write to sarc database.
Have access to secrets
"""
Expand All @@ -348,7 +348,7 @@ def write_setup(mongodb, scrapping_mode, tmp_path, freeport, monkeypatch):


@pytest.fixture
def read_setup(mongodb, scrapping_mode, tmp_path, freeport, monkeypatch):
def read_setup(mongodb, scraping_mode, tmp_path, freeport, monkeypatch):
"""SARC read user, can onlly read to sarc database.
Does not have access to secrets
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
tests the scrapping of disk usage report on DRAC clusters
tests the scraping of disk usage report on DRAC clusters
"""
from pathlib import Path

Expand Down
23 changes: 23 additions & 0 deletions tests/functional/jobs/test_func_sacct.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

import copy
import json
import subprocess
from datetime import datetime, timedelta
from pathlib import Path
from unittest.mock import patch

import pytest
from fabric.testing.base import Command, Session
Expand Down Expand Up @@ -269,6 +271,27 @@ def test_sacct_bin_and_accounts(test_config, remote):
assert len(list(scraper)) == 0


@patch("os.system")
@pytest.mark.usefixtures("write_setup")
def test_localhost(os_system, monkeypatch):
def mock_subprocess_run(*args, **kwargs):
mock_subprocess_run.called += 1
return subprocess.CompletedProcess(
args=args, returncode=0, stdout='{"jobs": []}', stderr=""
)

mock_subprocess_run.called = 0

monkeypatch.setattr(subprocess, "run", mock_subprocess_run)

scraper = SAcctScraper(
cluster=config().clusters["local"], day=datetime(2023, 2, 14)
)

assert len(list(scraper)) == 0
assert mock_subprocess_run.called >= 1


@pytest.mark.parametrize(
"test_config", [{"clusters": {"raisin": {"host": "raisin"}}}], indirect=True
)
Expand Down
10 changes: 10 additions & 0 deletions tests/sarc-test.json
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,16 @@
"diskusage_report_command": "diskusage_report --project --all_users",
"prometheus_url": "https://hyrule-thanos.calcul.ca",
"prometheus_headers_file": "tests/not-so-secrets/patate_prometheus/headers.json"
},
"local": {
"host": "localhost",
"timezone": "America/Montreal",
"accounts": null,
"sacct_bin": "/opt/slurm/bin/sacct",
"duc_inodes_command": null,
"duc_storage_command": null,
"diskusage_report_command": null,
"prometheus_url": null
}
}
}

0 comments on commit 5ccd799

Please sign in to comment.