Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: archive: add plugin interface #485

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions contrib/plugins/artifactoryArchiveAccess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from bob.archive_access import BaseArchiveAccess

from artifactory import ArtifactoryPath
import os
import tempfile
import datetime
import calendar
import struct

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

class Artifactory(BaseArchiveAccess):
def __init__(self):
self.__url = "https://artifactory/bobs_cache"
print("Using Artifactory Archive @ " + self.__url)

def get(self, path):
out = tempfile.NamedTemporaryFile("wb", delete=False)
try:
archive = ArtifactoryPath(self.__url + path, verify=False)
with archive.open() as fd:
out.write(fd.read())

except Exception as e:
logging.error(traceback.format_exc())
out.close()
return out.name

def removeTmp(self, tmp):
# remove the tmp file
if tmp is not None and os.path.exists(tmp):
os.unlink(tmp)

def listdir(self, path):
if path != ".":
base = self.__url + path
else:
base = self.__url
if not base.endswith("/"):
base += "/"
self.__path = ArtifactoryPath(base, verify=False)
ret = [ str(p).replace(base, "") for p in self.__path ]
return ret

def binStat(self, path):
archive = ArtifactoryPath(self.__url + path, verify=False)
# Get FileStat
stat = archive.stat()
ctime = calendar.timegm(stat.ctime.timetuple())
mtime = calendar.timegm(stat.mtime.timetuple())
size = stat.size
archive = ArtifactoryPath(self.__url + path, verify=False)
return struct.pack('=qqQ64s', ctime, mtime, stat.size, bytes(stat.sha256, 'utf-8'))

def unlink(self, path):
archive = ArtifactoryPath(self.__url + path, verify=False)
if archive.exists():
archive.unlink()

def getSize(self,path):
archive = ArtifactoryPath(self.__url + path, verify=False)
if archive.exists():
return archive.stat().size

ArtifactoryAccess = Artifactory()

manifest = {
'apiVersion' : "0.21",
'archiveAccessors' : {
'Artifactory' : ArtifactoryAccess
}
}
22 changes: 22 additions & 0 deletions pym/bob/archive_access.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
class BaseArchiveAccess:
"""Base class for Archive Access handlers.
"""
def get(self, path):
"""Get the package 'path' from the archive.
Return the path the a local accessable archive file."""
return ""
def removeTmp(self, path):
"""Remove the temporary file returned by 'get'"""
return None
def listdir(self, path):
"""Return a list of directory entries"""
return None
def getSize(self,path):
"""Return the file size (in bytes) for 'path'"""
return None
def unlink(self, path):
"""Unlink 'path' from archive"""
return None
def binStat(self, path):
"""Return binary stat for 'path'"""
return None
71 changes: 56 additions & 15 deletions pym/bob/cmds/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

from ..audit import Audit
from ..errors import BobError
from ..input import RecipeSet
from ..utils import binStat, asHexStr, infixBinaryOp
from ..archive_access import BaseArchiveAccess
import argparse
import gzip
import json
Expand All @@ -20,14 +22,29 @@
# need to enable this for nested expression parsing performance
pyparsing.ParserElement.enablePackrat()

class LocalAccessor(BaseArchiveAccess):
def get(self, path):
return path
def removeTmp(self, path):
return None
def listdir(self, path):
return os.listdir(path)
def getSize(self,path):
return os.stat(path).st_size
def unlink(self, path):
os.unlink(path)
def binStat(self, path):
return binStat(path)

class ArchiveScanner:
CUR_VERSION = 2

def __init__(self):
def __init__(self, accessor):
self.__dirSchema = re.compile(r'[0-9a-zA-Z]{2}')
self.__archiveSchema = re.compile(r'[0-9a-zA-Z]{36,}-1.tgz')
self.__db = None
self.__cleanup = False
self.__accessor = accessor

def __enter__(self):
try:
Expand Down Expand Up @@ -80,18 +97,20 @@ def scan(self, verbose):
try:
found = False
self.__db.execute("BEGIN")
for l1 in os.listdir("."):
for l1 in self.__accessor.listdir("."):
if not self.__dirSchema.fullmatch(l1): continue
for l2 in os.listdir(l1):
for l2 in self.__accessor.listdir(l1):
if not self.__dirSchema.fullmatch(l2): continue
l2 = os.path.join(l1, l2)
for l3 in os.listdir(l2):
for l3 in self.__accessor.listdir(l2):
m = self.__archiveSchema.fullmatch(l3)
if not m: continue
found = True
self.__scan(os.path.join(l2, l3), verbose)
except OSError as e:
raise BobError("Error scanning archive: " + str(e))
except Exception as e:
raise BobError("Error: " + str(e))
finally:
self.__db.execute("END")
if verbose and not found:
Expand All @@ -101,8 +120,9 @@ def scan(self, verbose):
return found

def __scan(self, fileName, verbose):
tmpFileName = None
try:
st = binStat(fileName)
st = self.__accessor.binStat(fileName)
bidHex, sep, suffix = fileName.partition("-")
bid = bytes.fromhex(bidHex[0:2] + bidHex[3:5] + bidHex[6:])

Expand All @@ -116,9 +136,10 @@ def __scan(self, fileName, verbose):
self.__db.execute("DELETE FROM files WHERE bid=?",
(bid,))

tmpFileName = self.__accessor.get(fileName)
# read audit trail
if verbose: print("scan", fileName)
with tarfile.open(fileName, errorlevel=1) as tar:
with tarfile.open(tmpFileName, errorlevel=1) as tar:
# validate
if tar.pax_headers.get('bob-archive-vsn') != "1":
print("Not a Bob archive:", fileName, "Ignored!")
Expand All @@ -135,7 +156,7 @@ def __scan(self, fileName, verbose):
# read audit trail
auditJsonGz = tar.extractfile(f)
auditJson = gzip.GzipFile(fileobj=auditJsonGz)
audit = Audit.fromByteStream(auditJson, fileName)
audit = Audit.fromByteStream(auditJson, tmpFileName)

# import data
artifact = audit.getArtifact()
Expand All @@ -152,6 +173,10 @@ def __scan(self, fileName, verbose):
raise BobError("Cannot read {}: {}".format(fileName, str(e)))
except OSError as e:
raise BobError(str(e))
except Exception as e:
raise BobError("Error: " + str(e))
finally:
self.__accessor.removeTmp(tmpFileName)

def remove(self, bid):
self.__cleanup = True
Expand Down Expand Up @@ -386,22 +411,22 @@ def query(scanner, expressions):
return retained


def doArchiveScan(argv):
def doArchiveScan(accessor, argv):
parser = argparse.ArgumentParser(prog="bob archive scan")
parser.add_argument("-v", "--verbose", action='store_true',
help="Verbose operation")
parser.add_argument("-f", "--fail", action='store_true',
help="Return a non-zero error code in case of errors")
args = parser.parse_args(argv)

scanner = ArchiveScanner()
scanner = ArchiveScanner(accessor)
with scanner:
if not scanner.scan(args.verbose) and args.fail:
sys.exit(1)


# meta.package == "root" && build.date > "2017-06-19" LIMIT 5 ORDER BY build.date ASC
def doArchiveClean(argv):
def doArchiveClean(accessor, argv):
parser = argparse.ArgumentParser(prog="bob archive clean")
parser.add_argument('expression', nargs='+',
help="Expression of artifacts that shall be kept")
Expand All @@ -415,7 +440,7 @@ def doArchiveClean(argv):
help="Return a non-zero error code in case of errors")
args = parser.parse_args(argv)

scanner = ArchiveScanner()
scanner = ArchiveScanner(accessor)
with scanner:
if not args.noscan:
if not scanner.scan(args.verbose) and args.fail:
Expand All @@ -435,24 +460,29 @@ def doArchiveClean(argv):
todo.update(scanner.getReferencedBuildIds(n))

# Third pass: remove everything that is *not* retained
totalRemoved = 0
for bid in scanner.getBuildIds():
if bid in retained: continue
victim = asHexStr(bid)
victim = os.path.join(victim[0:2], victim[2:4], victim[4:] + "-1.tgz")
if args.dry_run:
print(victim)
totalRemoved += accessor.getSize(victim)
else:
try:
if args.verbose:
print("rm", victim)
os.unlink(victim)
totalRemoved += accessor.getSize(victim)
accessor.unlink(victim)
except FileNotFoundError:
pass
except OSError as e:
raise BobError("Cannot remove {}: {}".format(victim, str(e)))
scanner.remove(bid)
print("{} {} Bytes from archive".format ("Would remove " if args.dry_run else "Removed",
totalRemoved))

def doArchiveFind(argv):
def doArchiveFind(accessor, argv):
parser = argparse.ArgumentParser(prog="bob archive find")
parser.add_argument('expression', nargs='+',
help="Expression that artifacts need to match")
Expand All @@ -464,7 +494,7 @@ def doArchiveFind(argv):
help="Return a non-zero error code in case of errors")
args = parser.parse_args(argv)

scanner = ArchiveScanner()
scanner = ArchiveScanner(accessor)
with scanner:
if not args.noscan:
if not scanner.scan(args.verbose) and args.fail:
Expand Down Expand Up @@ -492,14 +522,25 @@ def doArchive(argv, bobRoot):

bob archive {}
""".format(subHelp))
parser.add_argument('-a', '--accessor', nargs='?', default=None, help="Archive Accessor (plugin)")
parser.add_argument('subcommand', help="Subcommand")
parser.add_argument('args', nargs=argparse.REMAINDER,
help="Arguments for subcommand")

args = parser.parse_args(argv)

if args.accessor:
recipes = RecipeSet()
recipes.parse()
accessors = recipes.getArchiveAccessors()
if not args.accessor in accessors:
parser.error("Unknown archive accessor '{}'".format(args.accessor))
accessor = accessors[args.accessor]
else:
accessor = LocalAccessor()

if args.subcommand in availableArchiveCmds:
availableArchiveCmds[args.subcommand][0](args.args)
availableArchiveCmds[args.subcommand][0](accessor, args.args)
else:
parser.error("Unknown subcommand '{}'".format(args.subcommand))

10 changes: 10 additions & 0 deletions pym/bob/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -2897,6 +2897,7 @@ def __init__(self):
self.__scmOverrides = []
self.__hooks = {}
self.__projectGenerators = {}
self.__archiveAccessors = {}
self.__configFiles = []
self.__properties = {}
self.__states = {}
Expand Down Expand Up @@ -3157,6 +3158,12 @@ def __loadPlugin(self, mangledName, fileName, name):
}
self.__projectGenerators.update(projectGenerators)

archiveAccessors = manifest.get('archiveAccessors', {})
if not isinstance(archiveAccessors, dict):
raise ParseError("Plugin '"+fileName+"': 'archiveAccessor' has wrong type!")
if archiveAccessors:
self.__archiveAccessors.update(archiveAccessors)

properties = manifest.get('properties', {})
if not isinstance(properties, dict):
raise ParseError("Plugin '"+fileName+"': 'properties' has wrong type!")
Expand Down Expand Up @@ -3225,6 +3232,9 @@ def defineHook(self, name, value):
def setConfigFiles(self, configFiles):
self.__configFiles = configFiles

def getArchiveAccessors (self):
return self.__archiveAccessors

def getCommandConfig(self):
return self.__commandConfig

Expand Down