Skip to content

Commit

Permalink
Merge pull request #381 from PyAr/transmit-redirection-score
Browse files Browse the repository at this point in the history
Transmit redirection scores to destination articles
  • Loading branch information
facundobatista authored Mar 30, 2021
2 parents 85767e1 + c61ca44 commit 61f73b4
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 5 deletions.
9 changes: 5 additions & 4 deletions src/preprocessing/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-

# Copyright 2020 CDPedistas (see AUTHORS.txt)
# Copyright 2020-2021 CDPedistas (see AUTHORS.txt)
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 3, as published
Expand Down Expand Up @@ -177,9 +177,10 @@ def process(self):
scores_log.write("{}|R|{:d}\n".format(
to3dirs.to_pagina(page_path), this_total_score))

# save the extra pages score (that may exist or not in the dump)
for extra_page, extra_score in other_pages_scores:
scores_log.write("{}|E|{:d}\n".format(extra_page, extra_score))
# save the extra pages score (that may exist or not in the dump) even if page
# is discarded (e.g. for transfering score from redirect pages to its targets)
for extra_page, extra_score in other_pages_scores:
scores_log.write("{}|E|{:d}\n".format(extra_page, extra_score))

# with score or discarded, log it as processed
processed_before_log.write(page_path + "\n")
Expand Down
24 changes: 23 additions & 1 deletion tests/test_preprocess.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2013-2020 CDPedistas (see AUTHORS.txt)
# Copyright 2013-2021 CDPedistas (see AUTHORS.txt)
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 3, as published
Expand Down Expand Up @@ -199,6 +199,28 @@ def test_empty_dir(self, tmp_path, wikisite):
ws.commit()
assert os.path.getsize(config.LOG_PREPROCESADO) == 0

def test_transmit_redirection_score_to_destination(self, mocker, tmp_path, wikisite):
"""Test that extra scores produced while processing a redirection are not discarded."""
ws = wikisite(str(tmp_path))

# mock preprocessor that discards the redirection and transmits the score to destination
omit_redirects = mocker.Mock(return_value=(None, [('destination', 1234)]))
mocker.patch.object(ws, 'preprocessors', [omit_redirects])

# dummy redirection article that will be discarded
article = tmp_path.joinpath('r', 'e', 'd', 'redirection')
article.parent.mkdir(parents=True)
article.touch()

ws.process()
ws.commit()

with open(preprocess.LOG_SCORES_ACCUM, 'rt', encoding='utf-8') as fh:
scores = fh.read()

# real score of redirection is discarded, extra score of destination is saved
assert scores == 'destination|E|1234\n'


class TestPagesSelector(object):
"""Tests for the PagesSelector"""
Expand Down

0 comments on commit 61f73b4

Please sign in to comment.