Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use click for marc2bf cli #34

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ nosetests.xml
.mr.developer.cfg
.project
.pydevproject

venv
.venv
92 changes: 3 additions & 89 deletions exec/marc2bf
Original file line number Diff line number Diff line change
@@ -1,94 +1,8 @@
#!/usr/bin/env python
#-*- mode: python -*-
# -*- mode: python -*-

import sys
import json
import logging
import argparse

from bibframe.reader import bfconvert
from amara3.inputsource import inputsourcetype


def run(inputs=None, base=None, out=None, limit=None, rdfttl=None, rdfxml=None, xml=None,
config=None, verbose=False, mods=None, modfiles=None, canonical=False, lax=False):
'''
Basically takes parameters typical for command line invocation and adapts them for use in the API

'''
if config is None:
config = {}
else:
config = json.load(config)

logger = logging.getLogger('marc2bf')
if verbose:
logger.setLevel(logging.DEBUG)

for mod in mods:
__import__(mod, globals(), locals(), [])

for modfile in modfiles:
with open(modfile) as f:
code = compile(f.read(), modfile, 'exec')
exec(code, globals(), locals())

bfconvert(inputs=inputs, entbase=base, out=out, limit=limit, rdfttl=rdfttl, rdfxml=rdfxml,
xml=xml, config=config, verbose=verbose, canonical=canonical, logger=logger,
lax=lax, defaultsourcetype=inputsourcetype.filename)
return

from bibframe.cli import marc2bf

if __name__ == '__main__':
#marc2bf -v test/resource/700t.mrx
#marc2bf -v -o /dev/null --rdfttl /tmp/foo.ttl test/resource/700t.mrx
#parser = argparse.ArgumentParser(prog="bootstrap", add_help=False)
parser = argparse.ArgumentParser()
parser.add_argument('inputs', metavar='inputs', nargs='*',
help='One or more MARC/XML files to be parsed and converted to BIBFRAME RDF.')
parser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout,
help='File where raw Versa JSON output should be written'
'(default: write to stdout)')
parser.add_argument('-p', '--postout', metavar="IRI",
help='HTTP endpoint for pushing or posting raw Versa JSON output'
'(default: write to stdout)')
parser.add_argument('--rdfttl', type=argparse.FileType('wb'),
help='File where RDF Turtle output should be written')
parser.add_argument('--rdfxml', type=argparse.FileType('wb'),
help='File where RDF XML output should be written')
parser.add_argument('--xml', type=argparse.FileType('w'),
help='File where MicroXML output should be written')
parser.add_argument('-c', '--config', type=argparse.FileType('r'),
help='File containing config in JSON format')
#parser.add_argument('-s', '--stats', type=argparse.FileType('w'),
# help='file where statistics output should be written in JSOn format')
parser.add_argument('-l', '--limit', metavar="NUMBER",
help='Limit the number of records processed to this number. If omitted, all records will be processed.')
parser.add_argument('-b', '--base', metavar="IRI", #dest="base",
help='Base IRI to be used for creating resources.')
parser.add_argument('--mod', metavar="PYMODULE", nargs="*", action='append',
help='Python module to be imported in order to register plugins (can be specified multiple times.')
parser.add_argument('--modfile', metavar="FILEPATH", nargs="*", action='append',
help='Python file to be executed as a module in order to register plugins (can be specified multiple times.')
parser.add_argument('-v', '--verbose', action='store_true',
help='Show additional messages and information')
parser.add_argument('--canonical', action='store_true',
help='Use Versa\'s canonical form for output. Warning: memory inefficient')
parser.add_argument('--lax', action='store_true',
help='Parse less strictly, e.g. accepting MARC/XML with bad namespace declarations')
#XXX: Any way to get generalized archive support using shutil? Perhaps along with tempfile?
#https://docs.python.org/3/library/shutil.html#archiving-operations
#parser.add_argument('-z', '--zipcheck', action='store_true',
# help='Check for zip files among the inputs')
#
args = parser.parse_args()
args.mod = [i for items in args.mod or [] for i in items]
args.modfile = [i for items in args.modfile or [] for i in items]

run(inputs=args.inputs, base=args.base, out=args.out, limit=args.limit, rdfttl=args.rdfttl,
rdfxml=args.rdfxml, xml=args.xml, config=args.config, verbose=args.verbose,
mods=args.mod, modfiles=args.modfile, canonical=args.canonical, lax=args.lax)
#for f in args.inputs: f.close()
if args.rdfttl: args.rdfttl.close()
if args.rdfxml: args.rdfxml.close()
args.out.close()
marc2bf()
111 changes: 111 additions & 0 deletions lib/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import click
import sys

from bibframe.main import run


@click.command()
@click.argument(
"inputs",
type=click.Path(exists=True, dir_okay=False),
nargs=-1, # unlimited number of arguments,
metavar="inputs",
# help="One or more MARC/XML files to be parsed and converted to BIBFRAME RDF.", # no help for argument in Click
)
@click.option(
"-o",
"--out",
type=click.File("w"),
default=sys.stdout,
help="File where raw Versa JSON output should be written"
"(default: write to stdout)",
)
# @click.option("-p", "--postout", metavar="IRI")
@click.option(
"--rdfttl",
type=click.File("wb"),
help="File where RDF Turtle output should be written",
)
@click.option(
"--rdfxml",
type=click.File("wb"),
help="File where RDF XML output should be written",
)
@click.option(
"--xml", type=click.File("w"), help="File where MicroXML output should be written"
)
@click.option(
"-c", "--config", type=click.File("r"), help="File containing config in JSON format"
)
# @click.option(
# "-s", "--stats", type=click.File("w"),
# help='file where statistics output should be written in JSOn format'
# )
@click.option(
"-l",
"--limit",
type=click.INT,
metavar="NUMBER",
help="Limit the number of records processed to this number. If omitted, all records will be processed.",
)
@click.option(
"-b",
"--base",
metavar="IRI",
help="Base IRI to be used for creating resources.",
)
@click.option(
"--mod",
multiple=True,
metavar="PYMODULE",
help="Python module to be imported in order to register plugins (can be specified multiple times).",
)
@click.option(
"--modfile",
multiple=True,
metavar="FILEPATH",
help="Python file to be executed as a module in order to register plugins (can be specified multiple times).",
)
@click.option(
"-v", "--verbose", is_flag=True, help="Show additional messages and information"
)
@click.option(
"--canonical",
is_flag=True,
help="Use Versa's canonical form for output. Warning: memory inefficient",
)
@click.option(
"--lax",
is_flag=True,
help="Parse less strictly, e.g. accepting MARC/XML with bad namespace declarations",
)
def marc2bf(
inputs,
out,
rdfttl,
rdfxml,
xml,
config,
limit,
base,
mod,
modfile,
verbose,
canonical,
lax,
):
run(
inputs=inputs,
base=base,
out=out,
limit=limit,
rdfttl=rdfttl,
rdfxml=rdfxml,
xml=xml,
config=config,
verbose=verbose,
mods=mod,
modfiles=modfile,
canonical=canonical,
lax=lax,
)
34 changes: 34 additions & 0 deletions lib/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import json
import logging

from bibframe.reader import bfconvert
from amara3.inputsource import inputsourcetype


def run(inputs=None, base=None, out=None, limit=None, rdfttl=None, rdfxml=None, xml=None,
config=None, verbose=False, mods=None, modfiles=None, canonical=False, lax=False):
'''
Basically takes parameters typical for command line invocation and adapts them for use in the API

'''
if config is None:
config = {}
else:
config = json.load(config)

logger = logging.getLogger('marc2bf')
if verbose:
logger.setLevel(logging.DEBUG)

for mod in mods:
__import__(mod, globals(), locals(), [])

for modfile in modfiles:
with open(modfile) as f:
code = compile(f.read(), modfile, 'exec')
exec(code, globals(), locals())

bfconvert(inputs=inputs, entbase=base, out=out, limit=limit, rdfttl=rdfttl, rdfxml=rdfxml,
xml=xml, config=config, verbose=verbose, canonical=canonical, logger=logger,
lax=lax, defaultsourcetype=inputsourcetype.filename)
return
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ mmh3
pytest
versa==0.5.2
amara3.xml
click