Skip to content

Commit

Permalink
refactor: convert other into dataclass
Browse files Browse the repository at this point in the history
  • Loading branch information
yzqzss committed Apr 8, 2024
1 parent 0923364 commit 1469bf9
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 91 deletions.
2 changes: 1 addition & 1 deletion wikiteam3/dumpgenerator/api/get_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def get_JSON(request: requests.Response):
# request.encoding = request.apparent_encoding
try:
return request.json()
except:
except Exception:
# Maybe an older API version which did not return correct JSON
print("Error: Could not parse JSON")
return {}
43 changes: 22 additions & 21 deletions wikiteam3/dumpgenerator/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import re
import sys
import traceback
from typing import Dict, Tuple
from typing import Tuple

import requests
from requests.adapters import DEFAULT_RETRIES as REQUESTS_DEFAULT_RETRIES
Expand All @@ -22,7 +22,7 @@
)
from wikiteam3.dumpgenerator.api.index_check import check_index
from wikiteam3.dumpgenerator.cli.delay import Delay
from wikiteam3.dumpgenerator.config import Config, new_config
from wikiteam3.dumpgenerator.config import Config, OtherConfig, new_config
from wikiteam3.dumpgenerator.version import getVersion
from wikiteam3.utils import (
get_random_UserAgent,
Expand Down Expand Up @@ -276,7 +276,7 @@ def checkParameters(args=argparse.Namespace()) -> bool:

return passed

def get_parameters(params=None) -> Tuple[Config, Dict]:
def get_parameters(params=None) -> Tuple[Config, OtherConfig]:
# if not params:
# params = sys.argv

Expand Down Expand Up @@ -535,24 +535,25 @@ def sleep(self, response=None):
"retries": int(args.retries),
})

other = {
"resume": args.resume,
"force": args.force,
"session": session,
"stdout_log_path": args.stdout_log_path,
"bypass_cdn_image_compression": args.bypass_cdn_image_compression,
"add_referer_header": args.add_referer_header,
"image_timestamp_interval": args.image_timestamp_interval,
"ia_wbm_booster": args.ia_wbm_booster,

"assert_max_pages": args.assert_max_pages,
"assert_max_edits": args.assert_max_edits,
"assert_max_images": args.assert_max_images,
"assert_max_images_bytes": args.assert_max_images_bytes,

"upload": args.upload,
"uploader_args": args.uploader_args,
}

other = OtherConfig(
resume = args.resume,
force = args.force,
session = session,
stdout_log_path = args.stdout_log_path,
bypass_cdn_image_compression = args.bypass_cdn_image_compression,
add_referer_header = args.add_referer_header,
image_timestamp_interval = args.image_timestamp_interval,
ia_wbm_booster = args.ia_wbm_booster,

assert_max_pages = args.assert_max_pages,
assert_max_edits = args.assert_max_edits,
assert_max_images = args.assert_max_images,
assert_max_images_bytes = args.assert_max_images_bytes,

upload = args.upload,
uploader_args = args.uploader_args,
)

# calculating path, if not defined by user with --path=
if not config.path:
Expand Down
28 changes: 26 additions & 2 deletions wikiteam3/dumpgenerator/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import dataclasses
import json
from typing import List
from typing import List, Optional

import requests


def _dataclass_from_dict(klass_or_obj, d: dict):
Expand Down Expand Up @@ -90,4 +92,26 @@ def save_config(config: Config, config_filename: str):
"""Save config file"""

with open(f"{config.path}/{config_filename}", "w", encoding="utf-8") as outfile:
json.dump(dataclasses.asdict(config), outfile, indent=4, sort_keys=True)
json.dump(dataclasses.asdict(config), outfile, indent=4, sort_keys=True)


@dataclasses.dataclass
class OtherConfig:
resume: bool
force: bool
session: requests.Session
stdout_log_path: Optional[str]
bypass_cdn_image_compression: bool
add_referer_header: Optional[str]
'''None, "auto", {URL}'''
image_timestamp_interval: Optional[str]
''' 2019-01-02T01:36:06Z/2023-08-12T10:36:06Z '''
ia_wbm_booster: int

assert_max_pages: Optional[int]
assert_max_edits: Optional[int]
assert_max_images: Optional[int]
assert_max_images_bytes: Optional[int]

upload: bool
uploader_args: List[str]
52 changes: 26 additions & 26 deletions wikiteam3/dumpgenerator/dump/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from file_read_backwards import FileReadBackwards

from wikiteam3.dumpgenerator.config import load_config, save_config
from wikiteam3.dumpgenerator.config import OtherConfig, load_config, save_config
from wikiteam3.dumpgenerator.config import Config
from wikiteam3.dumpgenerator.cli import get_parameters, bye, welcome
from wikiteam3.dumpgenerator.dump.image.image import FILENAME_LIMIT, Image
Expand Down Expand Up @@ -56,12 +56,12 @@ def __init__(params=None):
config, other = get_parameters(params=params)
avoid_WikiMedia_projects(config=config, other=other)

with (Tee(other["stdout_log_path"]) if other["stdout_log_path"] is not None else contextlib.nullcontext()):
with (Tee(other.stdout_log_path) if other.stdout_log_path else contextlib.nullcontext()):
print(welcome())
print("Analysing %s" % (config.api if config.api else config.index))

# do not enter if resume is requested from begining
while not other["resume"] and os.path.isdir(config.path):
while not other.resume and os.path.isdir(config.path):
print('\nWarning!: "%s" path exists' % (config.path))
reply = "y" if config.failfast else ""
while reply.lower()[:1] not in ["y", "n"]:
Expand All @@ -79,45 +79,45 @@ def __init__(params=None):
print("No config file found. I can't resume. Aborting.")
sys.exit(1)
print("You have selected: YES")
other["resume"] = True
other.resume = True
break
elif reply == "n":
print("You have selected: NO.\nbye.")
# other["resume"] = False
# other.resume = False
sys.exit(0)

if asserts_enabled := [(arg, v) for arg, v in other.items() if arg.startswith("assert_") and v is not None]:
site_info = get_siteinfo(config=config, session=other["session"])
if asserts_enabled := [(arg, v) for arg, v in other.__dict__.items() if arg.startswith("assert_") and v is not None]:
site_info = get_siteinfo(config=config, session=other.session)
assert_siteinfo(site_info, other)
[print(f"--{arg}: {v}, passed") for arg, v in asserts_enabled]

if other["resume"]:
if other.resume:
print("Loading config file to resume...")
config = load_config(config=config, config_filename=config_filename)
else:
if not other['force'] and any_recent_ia_item_exists(config, days=365):
if not other.force and any_recent_ia_item_exists(config, days=365):
print("A dump of this wiki was uploaded to IA in the last 365 days. Aborting.")
sys.exit(88)

os.mkdir(config.path)
save_config(config=config, config_filename=config_filename)

if other["resume"]:
if other.resume:
DumpGenerator.resumePreviousDump(config=config, other=other)
else:
DumpGenerator.createNewDump(config=config, other=other)

if config.index:
save_IndexPHP(config=config, session=other["session"])
save_SpecialVersion(config=config, session=other["session"])
save_IndexPHP(config=config, session=other.session)
save_SpecialVersion(config=config, session=other.session)
if config.api:
save_siteinfo(config=config, session=other["session"])
save_siteinfo(config=config, session=other.session)

mark_as_done(config=config, mark=ALL_DUMPED_MARK)
bye(config.path)
if other["upload"]:
if other.upload:
print('Calling uploader... (--upload)')
retcode = subprocess.call([sys.executable, '-m', 'wikiteam3.uploader', config.path] + other["uploader_args"],
retcode = subprocess.call([sys.executable, '-m', 'wikiteam3.uploader', config.path] + other.uploader_args,
shell=False)
if retcode:
print(f'--upload: Failed: {retcode}')
Expand All @@ -126,25 +126,25 @@ def __init__(params=None):
print('--upload: Done')

@staticmethod
def createNewDump(config: Config, other: Dict):
def createNewDump(config: Config, other: OtherConfig):
# we do lazy title dumping here :)
images = []
print("Trying generating a new dump into a new directory...")
if config.xml:
generate_XML_dump(config=config, session=other["session"])
check_XML_integrity(config=config, session=other["session"])
generate_XML_dump(config=config, session=other.session)
check_XML_integrity(config=config, session=other.session)
if config.images:
images += Image.get_image_names(config=config, session=other["session"])
images += Image.get_image_names(config=config, session=other.session)
Image.save_image_names(config=config, other=other, images=images)
Image.generate_image_dump(
config=config, other=other, images=images, session=other["session"]
config=config, other=other, images=images, session=other.session
)
if config.logs:
pass # TODO
# save_SpecialLog(config=config, session=other["session"])
# save_SpecialLog(config=config, session=other.session)

@staticmethod
def resumePreviousDump(config: Config, other: Dict):
def resumePreviousDump(config: Config, other: OtherConfig):
images = []
print("Resuming previous dump process...")
if config.xml:
Expand Down Expand Up @@ -188,13 +188,13 @@ def resumePreviousDump(config: Config, other: Dict):
print('Resuming XML dump from "%s" (revision id %s)' % (last_xml_title, last_xml_revid))
generate_XML_dump(
config=config,
session=other["session"],
session=other.session,
resume=True,
)
else:
# corrupt? only has XML header?
print("XML is corrupt? Regenerating...")
generate_XML_dump(config=config, session=other["session"])
generate_XML_dump(config=config, session=other.session)

if config.images:
# load images list
Expand All @@ -219,7 +219,7 @@ def resumePreviousDump(config: Config, other: Dict):
print("Image list is incomplete. Reloading...")
# do not resume, reload, to avoid inconsistences, deleted images or
# so
images = Image.get_image_names(config=config, session=other["session"])
images = Image.get_image_names(config=config, session=other.session)
Image.save_image_names(config=config, other=other, images=images)
# checking images directory
files = set()
Expand Down Expand Up @@ -282,7 +282,7 @@ def resumePreviousDump(config: Config, other: Dict):
config=config,
other=other,
images=images,
session=other["session"],
session=other.session,
)

if config.logs:
Expand Down
Loading

0 comments on commit 1469bf9

Please sign in to comment.