Skip to content

Commit

Permalink
Merge branch 'master' into travisg/20210415-add-build-ordering
Browse files Browse the repository at this point in the history
  • Loading branch information
tgrigsby-sc authored Apr 26, 2021
2 parents 87c4ba3 + cf378a0 commit 2ae47ae
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 11 deletions.
6 changes: 4 additions & 2 deletions batch-setup/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ def env_for_image(name, db_hosts, db_name, db_user, db_password, buckets,

def cmd_for_image(name, region):
if name == 'rawr-batch':
cmd = ['tilequeue', 'rawr-tile',
cmd = ['/usr/bin/time', '-f', '"{\\"max_resident_kb\\": %M, \\"cpu_percent\\": \\"%P\\", \\"wall_time_seconds\\": %e}\\"',
'tilequeue', 'rawr-tile',
'--config', '/etc/tilequeue/config.yaml',
'--tile', 'Ref::tile',
'--run_id', 'Ref::run_id']
Expand All @@ -120,7 +121,8 @@ def cmd_for_image(name, region):
'--run_id', 'Ref::run_id']

elif name == 'meta-low-zoom-batch':
cmd = ['tilequeue', 'meta-tile-low-zoom',
cmd = ['/usr/bin/time', '-f', '"{\\"max_resident_kb\\": %M, \\"cpu_percent\\": \\"%P\\", \\"wall_time_seconds\\": %e}\\"',
'tilequeue', 'meta-tile-low-zoom',
'--config', '/etc/tilequeue/config.yaml',
'--tile', 'Ref::tile',
'--run_id', 'Ref::run_id']
Expand Down
26 changes: 20 additions & 6 deletions batch-setup/make_meta_tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def _big_jobs(rawr_bucket, prefix, key_format_type, rawr_zoom, group_zoom,
return big_jobs


def enqueue_tiles(config_file, tile_list_file, check_metatile_exists, tile_specifier=TileSpecifier()):
def enqueue_tiles(config_file, tile_list_file, check_metatile_exists, tile_specifier=TileSpecifier(), mem_multiplier=1.0, mem_max=32 * 1024):
from tilequeue.command import make_config_from_argparse
from tilequeue.command import tilequeue_batch_enqueue
from make_rawr_tiles import BatchEnqueueArgs
Expand All @@ -385,18 +385,28 @@ def enqueue_tiles(config_file, tile_list_file, check_metatile_exists, tile_speci
coord_lines = [line.strip() for line in tile_list.readlines()]

reordered_lines = tile_specifier.reorder(coord_lines)

overprovision_multiplier = 1.2 # overprovision by 20%

# if we aren't already increasing our memory usage somewhere else, we want to
# overprovision by 20% to allow for changes in tile complexity over time
overprovision_multiplier = 1.0
if mem_multiplier <= 1.0:
overprovision_multiplier = 1.2

print("[%s] Starting to enqueue %d tile batches" % (time.ctime(), len(reordered_lines)))
for coord_line in reordered_lines:
# override memory requirements for this job with what the tile_specifier tells us
memory_mb = int(tile_specifier.get_mem_reqs_mb(coord_line, overprovision_multiplier))
cfg.yml["batch"]["memory"] = memory_mb
mem_mb = int(tile_specifier.get_mem_reqs_mb(coord_line, overprovision_multiplier))
update_memory_request(cfg, mem_mb, mem_multiplier, mem_max)

args = BatchEnqueueArgs(config_file, coord_line, None, None)
tilequeue_batch_enqueue(cfg, args)
print("[%s] Done enqueuing tile batches" % time.ctime())

def update_memory_request(cfg, mem_mb, mem_multiplier, mem_max):
adjusted_mem = mem_mb * mem_multiplier
cfg.yml["batch"]["memory"] = int(min(adjusted_mem, mem_max))


# adaptor class for MissingTiles to see just the high zoom parts, this is used
# along with the LowZoomLense to loop over missing tiles generically but
# separately.
Expand Down Expand Up @@ -436,7 +446,10 @@ def _missing(self):
self.split_zoom, self.zoom_max, self.big_jobs)

def render(self, num_retries, lense):
mem_max = 32 * 1024 # 32 GiB

for retry_number in range(0, num_retries):
mem_multiplier = 1.5 ** retry_number
with self._missing() as missing:
missing_tile_file = lense.missing_file(missing)
count = wc_line(missing_tile_file)
Expand All @@ -455,8 +468,9 @@ def render(self, num_retries, lense):
sample = head_lines(missing_tile_file, 10)
print("Enqueueing %d %s tiles (e.g. %s)" %
(count, lense.description, ', '.join(sample)))

enqueue_tiles(lense.config, missing_tile_file,
check_metatile_exists, self.tile_specifier)
check_metatile_exists, self.tile_specifier, mem_multiplier, mem_max)

else:
with self._missing() as missing:
Expand Down
4 changes: 2 additions & 2 deletions batch-setup/make_tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ def _chr_range(a, b):
# raised if jobs fail with out-of-memory errors.
memory = {
'rawr-batch': 8192,
'meta-batch': 12288, # 12 GiB
'meta-low-zoom-batch': 12288, # 12 GiB
'meta-batch': 4096, # 4 GiB
'meta-low-zoom-batch': 8192, # 8 GiB
'missing-meta-tiles-write': 1024,
}
# defaults for the moment. TODO: make them configurable from the command
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func main() {
flag.StringVar(&destBucket, "dest-bucket", "", "dest s3 bucket to write tiles")
flag.StringVar(&destDatePrefix, "dest-date-prefix", "", "dest date prefix to write tiles found")
flag.StringVar(&hexPrefix, "hex-prefix", "", "hex prefix for job, must be 3 lowercase hexadecimal characters")
flag.UintVar(&concurrency, "concurrency", 16, "number of goroutines listing bucket per hash prefix")
flag.UintVar(&concurrency, "concurrency", 4, "number of goroutines listing bucket per hash prefix")
flag.StringVar(&region, "region", "us-east-1", "region")
flag.StringVar(&keyFormatTypeStr, "key-format-type", "", "Either 'prefix-hash' or 'hash-prefix' to control the order of the date prefix and hash in the src S3 key.")
flag.BoolVar(&allBuckets, "all-buckets", false, "If true, check all buckets in list, not just the last one.")
Expand Down

0 comments on commit 2ae47ae

Please sign in to comment.