Skip to content

Commit

Permalink
Merge pull request #125 from scrapinghub/kumo-3323-cancel-multiple-jobs
Browse files Browse the repository at this point in the history
It adds the ability to cancel multiple jobs
  • Loading branch information
vshlapakov authored Jul 23, 2019
2 parents 8319db9 + 770d39e commit 3016227
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ docs/_build

.DS_Store
pytestdebug.log
.idea
7 changes: 7 additions & 0 deletions scrapinghub/client/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ class Unauthorized(ScrapinghubAPIError):
"""Request lacks valid authentication credentials for the target resource."""


class Forbidden(ScrapinghubAPIError):
"""You don't have the permission to access the requested resource.
It is either read-protected or not readable by the server."""


class NotFound(ScrapinghubAPIError):
"""Entity doesn't exist (e.g. spider or project)."""

Expand Down Expand Up @@ -68,6 +73,8 @@ def wrapped(*args, **kwargs):
raise BadRequest(http_error=exc)
elif status_code == 401:
raise Unauthorized(http_error=exc)
elif status_code == 403:
raise Forbidden(http_error=exc)
elif status_code == 404:
raise NotFound(http_error=exc)
elif status_code == 413:
Expand Down
60 changes: 60 additions & 0 deletions scrapinghub/client/jobs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import absolute_import

import json

from ..hubstorage.job import JobMeta as _JobMeta
from ..hubstorage.job import Items as _Items
from ..hubstorage.job import Logs as _Logs
Expand Down Expand Up @@ -77,6 +79,64 @@ def count(self, spider=None, state=None, has_tag=None, lacks_tag=None,
params['spider'] = self.spider.name
return next(self._project.jobq.apiget(('count',), params=params))

def cancel_jobs(self, keys=None, count=None, **params):
"""Cancel a list of jobs using the keys provided.
:param keys: (optional) a list of strings containing the job keys in
the format: <project>/<spider>/<job_id>.
:param count: (optional) it requires admin access. Used for admins
to bulk cancel an amount of ``count`` jobs.
:return: a dict with the amount of jobs cancelled.
:rtype: :class:`dict`
Usage:
- cancel jobs 123 and 321 from project 111 and spiders 222 and 333::
>>> project.jobs.cancel_jobs(['111/222/123', '111/333/321'])
{'count': 2}
- cancel 100 jobs asynchronously::
>>> project.jobs.cancel_jobs(count=100)
{'count': 100}
"""
update_kwargs(params, count=count, keys=keys)
keys = params.get('keys')
count = params.get('count')

if keys and count:
raise ValueError("keys and count can't be defined simultaneously")

elif not keys and not count:
raise ValueError("keys or count should be defined")

elif keys:
if not isinstance(keys, list):
raise ValueError("keys should be a list")

# it raises ValueError if invalid
keys = [parse_job_key(k) for k in keys]

if not all([key.project_id == self.project_id for key in keys]):
raise ValueError(
"all keys should belong to project: %s" % self.project_id
)

# change it to the format in which JobQ expects.
data = [{"key": str(k)} for k in keys]

# may raise BadRequest if JobQ doesn't validate
return list(self._project.jobq.apipost("cancel",
data=json.dumps(data)))[0]
elif count:
if not isinstance(count, int):
raise ValueError("count should be an int")

# may raise Forbidden
return self._project.jobq.apipost("cancel?count=%s" % count)

def iter(self, count=None, start=None, spider=None, state=None,
has_tag=None, lacks_tag=None, startts=None, endts=None,
meta=None, **params):
Expand Down
1 change: 1 addition & 0 deletions tests/client/cassetes/test_job/test_cancel_jobs.gz
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
eJy11gd428YVAGDJe4/EiR07TmDGVllX4BRljbKJoihqYtlSKtqCnV7TE3jiQSIBvgOgYZdt2iTdSfde6d57792me++999677w6QSpGwrVhfbX/+Doe7A/De/+5484oatMSNtS0tLdNMuJZjQ+vhVmMTXlu2xwQ1PexzYQWBlfEarAqGCgY+cz1YXYM1cWMN9lSYx50irDVW4cXI8GgB1hkrsekLC9Yb+7HFPa/ak0xms4ngXy7d05XqSiVp1UoK305MuvjsDWr+uFOcg43Gldh0/XGrmPdoydXTbVXhTDLTy2eCP21u1Soykeeu7uHb6MFlGy0Wb8IJclIaNqn35YziHRc212BL3NiAPcdcJvS+ErM92Epgm7EL+6pz+A227poC38kucX88mUmkE2nYTo2teL/PNFnV0wds0ynifbiAwIXGZrxROmVV27UimyhTj8EOqiISjIaLCFysInEweRB2UvXwfse2mQos7CJwieqbYqyq07I1zWA3NbYEozx8P32I2SWPwx4ClxorsL8zC3upytD8iMJclcFlBC439mEvrVbLlknl8slZfWZmRp9wREX3RZnJN2dF0Kh67T4fv1dYp9RQ2GdksO9q6lqmdrKy1L8nOiHm+8Y6hcKtIhUGV9Rgf6DC9ajnu3CgBm1xlVn5fLjH4btUVirMdWmJQVx91vBhuKfPMUMHg6FFGct7EWg39uJVwWftWiarXe+XtUwq3a2lsz25TE8upw0eKYAehBxzioghQSCpHuBPW6YjbEgF4TLDcHkyXGkCGWNbQ7iUwSw1Vsv+ctmZgQ4CORUsabpdGx4pXDd8dBQ6g0S6zNNNx5myGBwi0GXcKZ/iignPmWK2XmTT485svniq8+T0WH8BU1swzPGOwRPHqk5nZfC4xwbM7BQ7OdExkjOGJgfGRq8tHervyw15neZYbmyyMJVNHSlYvRqbrVoY3XwQhHQYhExqPgidMgi92hE6K0nns+mOju7OVKpXG6Eezyd7tVFaYaOWx/JDdBa6A17z0SgHvHoI9Ko8ZLvg3lRlYJqKOcgTuI8aH3hu1/qD772S+nxjDa4K8yxkQfQZbXhxOhakPdYTc6Zi7bFJB0sYL8KqTaaTuVSsBle7vl+D/jjHTeSaOK/bOQb43dgxruVNG0Xm7m8UGRiU9O4b57gbXMe3U47FfT3fQTnW72G+k3Is1CG+O3AWhuAIgaPGDf+PlMMw5VjvI/P1fgPlWN734xrl512mo77PsTILcY7leCzOsQr5QuUdV5U3Vld5BoETS6i8k4sq70YC919UeSSi8h5A4KboyntgfeVRAuMRlWdG6i0SYPN6J+r0lgjwKL1WoHeSL5lsGslOKbJlRbYiycp9fXCgADaK7f6fWNdzBG5sielKou4wSZhOJYkru8n6SkjKJzNw+MajMgXVpuMJCIizH0+u0upFHEW+8jsd+p1Bv+fvZzbwM6f8nGrwc1r5eVC88XirEXhw4/H2kIDMaEDmZgIPVUbskmXPJvF7OhMpeFjEAXcLgVuj3dxGje3Yb+hyTctk+vHwp8zDCTxCJQnrEh4ZuLhGBuZRBB59Vt2HlO7HRJ7EjyVwu9GK/d1wBzX2hLpcV5fjhFPW+yRgfVhY+EnwOAKPV6MPwhMCdU9EdeuxI1ZltvwhEYMnKVhPVrCeUg/rqecNKx3CeloI6+lxjpKewQMsz+QBjWeFNJ69LBrPCWg8V9F4XgONOxWN5zfReAGBFzbSeNEiGi8m8JJmGi+NoPEyAi+PpvGKM9F4JYFXLdB4dR2N1xB47RJovC6SxusJvIEvAcUbCbyJhxze3MThLYrDWxWHty06Gt+OHnJL9QALHkxqm6wM78CzUqb4xtOxKTYX69EazuR2LeoG7nwE3imT+C5l6N2hofeEht4bGnqfNISn1vtV5nMp+MAySH0wIPUhRerDDaQ+okh9tInUXQQ+1kjq44tIfYLAJ5tJfSqC1KcJfCaa1GfPROpzBD6/QOoLdaS+SOBLSyD15UhSXyHwVfXh6Qx87dyyvk7gG/OyvomyNqnjzXR828O8ZmrwLYXr2wrXd+r3mu8u+xD7XrjXfF85+UHo5Iehkx+FTn68rL3mJwGMnyoYP2uA8XMF4xdNMH5J4FeNMH69CMZvCPy2GcbvImD8nsAfomH88Uww/kTgzwsw/lIH468E/rYEGH+PhPEPAv8MYKTgX+eG8W8C/wlhiBY8j9CGjEhswrItl7NiTLS2yp/Rfk2swIb0IVaqxgIRsUrOW+aJJFbLRRQUsUatD0SslX2Ki1gnm0qMWC+bCo3YIJvLcCM24gKKjtikHnqgJjarRj0gsUWOkm+2Vd5rYCS2YScR2/H/BkziAuyr9yQuVEN3yKGNqsRFanAjLHGxmrJTToniJXapaZHCxCVq7m45N3Qm9qjh89TEpWrEXjnibOC6JDhxmZrbbE5crlbR5CqBPLFPDT0HPhFT866QSQwJ7o8keCAgyPzEfwGFLKo2
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
eJyllfd/20QYxtM90slq6QDVlGBKZHk0JnEwxW1DR4ZT7IRrylEu8sWn2Jb0ajhOi6Hsvfcue2/+CP4sXp3ckMYuzYfa/uGWdO/4Po8vrGxCV5Ss6+rqqnPHNSwTVgyvIJtwbpged5ju4ZoLKymsijZhdXjU4eBz14M1TVgbJWtxpcY9YZVgHVmNk/F8oQjrySoc+o4BG8h+HAnPszOalkrFwl9fItMf749rzDY0xzdjsy7evVE+P22V5qGbHMKh608bpazHyq6a6LEda5brXjYZfnpc2yhxJytc1cNo1HDaw0qls/hA8FACNsl4BWe448LmJmyJko24MuFyR82VuenBVgrbyE5cs+cxB1N1dQdjMsvCn9aSsUQsAdsZ2Yr7OV3ntqcOmbpVwn24jsL1ZDNulM8Zdq9S4jNV5nG4gcmKhKfhRgo3yUoc0A7ADiYvP2KZJpeFhZ0UbpZrFc5tlVWNOoddjGwJT3kYnzrCzbInYDeFPWQlrqdTsJfJDl06UZy3OdxC4VayD1eZbVcNnQWv1xrq3NycOmM5NdV3qjyInJdAYTLsnI/5OsY5eRT2kSSuHWauoStTteV+T6ch4vtkvYTCtREVDrc1YX9Ihesxz3fh9ib0RGVng/vhjuG/ZVdq3HVZmUNUppUfhjt9gR06EJXJ6a3kvCC5uyj0km1LkpPEqIysCdarVWsOYhQ0mVpAYK+SHy+eyI8VIB6W3eWeqltWxeCQoJAkF4NbXGfGsyrcVEu8Pm01srxydIZMEs8aqpwaTfYJfXSsUBg9Xa/W8xN1L8kKp8rHxyAxNONPzs5N5wrVukjnpvon5qd8r1YfgUGFN2wDa5Et+rxXSSaUk35VScaTcSWRyvSlMsm0cmy0OKiMskYAYDaVOHhwIB2PDyrjzBNZbVApsBovGB7PjrAGpEKaEFfUJxyk0Cdr59cN3XJMSIesXCpWNWTlbgr9sqipfhhgsvJ15sxDhsKgPB/C2ascCctxT3imFOCbpXAv2YuzMP5UK/7EwGXxwyHmi+4m3NfqsxMIIkd6cHI+ErY9kolYlUhvZNZCCeOkpVotofUlI0047Pp+E45EBZrI0ahYJ0VybKgIQ2IDGfjXMFzPcpCSWL0WW6TMmG7VNHyzqy1+rRbczOF+0T0WkHSsTevHKZz4b62fZAJ1PdxB1yNMoJhHxQ4mULVjYhcT/1szed8XKJPxqEBtnIoKlIRYkMEDUgaFKNnd6pTrqoHUHauq5gLO1bxjlA0TihQmyIrAW2CSke04IGoBQTF0rk623PxBCkSWFvmG02GfjwbpTFE4s4w+P9TRjCiFh+XNA3A2BLQQAvoIhdCbTAywoWFF07E4THfwK51CqbOkeZtLzlAoL3VJEQJoiBzZgDsRm5uBK0dgVoJVkWBVA7AW/pJqSFbfcsmCBbJ0Zuq8CqboJnvw6TPnIxU+H8koi4hOxPETaVKwgtbZUYGggQhZckRIjtsixwvIQTf3Zb+T/VC/BpDmQpAaEqT5JSCdkyCdvzpIj1Joiisi9BiFxxcQurAIoScoPLkMhJ7qiNDTFJ6RgSaS8OxlDD1H4fl2hl7owNCLFF7qzNDLbQy9QuHVpQy9FjL0OjK0STqXbvmmh62NN+ENidGbEqO3FvvT29fsT++0/Oldycl7LU7eb3HyQYuTD6/JYT4KwfhYgvHJEjA+lWB8dnUwLlL4/MpgfEHhywUwvloExtcUvlkGGN92BOM7Ct+Ly13lBwo/thPxUwcifqbwS2cifm0j4jcKvy8l4o+QiD/bXOUvxIH7sX8AEwJsIA==
51 changes: 51 additions & 0 deletions tests/client/test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from scrapinghub.client.logs import Logs
from scrapinghub.client.requests import Requests
from scrapinghub.client.samples import Samples
from scrapinghub.client.exceptions import BadRequest

from ..conftest import TEST_PROJECT_ID
from ..conftest import TEST_SPIDER_NAME
Expand Down Expand Up @@ -44,6 +45,56 @@ def test_job_update_tags(spider):
assert job2.metadata.get('tags') == ['tag2']


def test_cancel_jobs_validation(spider):
with pytest.raises(ValueError) as err:
spider.jobs.cancel_jobs()

assert 'keys or count should be defined' in str(err)

with pytest.raises(ValueError) as err:
spider.jobs.cancel_jobs(['2222222/1/1'], count=2)

assert "keys and count can't be defined simultaneously" in str(err)

with pytest.raises(ValueError) as err:
spider.jobs.cancel_jobs(keys="testing")

assert 'keys should be a list' in str(err)

with pytest.raises(ValueError) as err:
spider.jobs.cancel_jobs(count=[1,2])

assert 'count should be an int' in str(err)

with pytest.raises(ValueError) as err:
spider.jobs.cancel_jobs(['2222222/1/1', '2222226/1/1'])

assert 'all keys should belong to project' in str(err)


def test_cancel_jobs(spider):
job1 = spider.jobs.run(job_args={'subid': 'tags-1'}, add_tag=['tag1'])
job2 = spider.jobs.run(job_args={'subid': 'tags-2'}, add_tag=['tag2'])
assert job1.metadata.get('state') == 'pending'
assert job2.metadata.get('state') == 'pending'

output = spider.jobs.cancel_jobs([job1.key, job2.key])

assert job1.metadata.get('state') == 'finished'
assert job2.metadata.get('state') == 'finished'
assert output == {'count': 2}


def test_cancel_jobs_non_existent(spider):
job1 = spider.jobs.run(job_args={'subid': 'tags-1'}, add_tag=['tag1'])
assert job1.metadata.get('state') == 'pending'

# Non-existent job
output = spider.jobs.cancel_jobs(['%s/1/10000' % job1.project_id])
assert output == {'count': 0}
assert job1.metadata.get('state') == 'pending'


def test_job_start(spider):
job = spider.jobs.run()
assert job.metadata.get('state') == 'pending'
Expand Down

0 comments on commit 3016227

Please sign in to comment.