generated from linz/template-javascript-hello-world
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add stac creation and download for gdal
- Loading branch information
1 parent
f3256d7
commit 252fa66
Showing
5 changed files
with
263 additions
and
188 deletions.
There are no files selected for viewing
File renamed without changes.
File renamed without changes.
262 changes: 262 additions & 0 deletions
262
src/commands/basemaps-topo-import/import-topographic-maps.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,262 @@ | ||
import { tmpdir } from 'node:os'; | ||
|
||
import { GdalRunner } from '@basemaps/cogify/build/cogify/gdal.runner.js'; | ||
import { loadTiffsFromPaths } from '@basemaps/config-loader/build//json/tiff.config.js'; | ||
import { Bounds, Nztm2000QuadTms, Projection } from '@basemaps/geo'; | ||
import { CliId } from '@basemaps/shared/build/cli/info.js'; | ||
import { fsa } from '@chunkd/fs'; | ||
import { Tiff } from '@cogeotiff/core'; | ||
import { command, option, string } from 'cmd-ts'; | ||
import { mkdir, rm } from 'fs/promises'; | ||
import pLimit from 'p-limit'; | ||
import path from 'path'; | ||
import { StacCollection, StacItem } from 'stac-ts'; | ||
import { GeoJSONPolygon } from 'stac-ts/src/types/geojson.js'; | ||
|
||
import { CliInfo } from '../../cli.info.js'; | ||
import { logger } from '../../log.js'; | ||
import { isArgo } from '../../utils/argo.js'; | ||
import { findBoundingBox } from '../../utils/geotiff.js'; | ||
import { HashTransform } from '../../utils/hash.stream.js'; | ||
import { config, forceOutput, registerCli, tryParseUrl, verbose } from '../common.js'; | ||
import { gdalBuildCogCommands } from './gdal-commands.js'; | ||
|
||
const Q = pLimit(10); | ||
|
||
/** | ||
* List all the tiffs in a directory for topographic maps and create cogs for each. | ||
* | ||
* @param source: Location of the source files | ||
* @example s3://linz-topographic-upload/topographic/TopoReleaseArchive/NZTopo50_GeoTif_Gridless/ | ||
* | ||
* @param target: Location of the target path | ||
*/ | ||
export const importTopographicMaps = command({ | ||
name: 'import-topographic-maps', | ||
description: 'List input topographic files and run dgal to standardize and import into target.', | ||
version: CliInfo.version, | ||
args: { | ||
config, | ||
verbose, | ||
forceOutput, | ||
title: option({ | ||
type: string, | ||
long: 'title', | ||
description: 'Imported imagery title', | ||
}), | ||
source: option({ | ||
type: string, | ||
long: 'source', | ||
description: 'Location of the source files', | ||
}), | ||
target: option({ | ||
type: string, | ||
long: 'target', | ||
description: 'Target location for the output files', | ||
}), | ||
}, | ||
async handler(args) { | ||
const startTime = performance.now(); | ||
registerCli(this, args); | ||
logger.info('ListJobs:Start'); | ||
|
||
const items = await loadTiffsToCreateStacs(args.source, args.target, args.title, args.forceOutput); | ||
if (items.length === 0) throw new Error('No Stac items created'); | ||
|
||
const tmpPath = path.join(tmpdir(), CliId); | ||
const tmpURL = tryParseUrl(tmpPath); | ||
const tmpFolder = tmpURL.href.endsWith('/') ? new URL(tmpURL.href) : new URL(`${tmpURL.href}/`); | ||
await Promise.all( | ||
items.map((item) => | ||
Q(async () => { | ||
await createCogs(item, args.target, tmpFolder); | ||
}), | ||
), | ||
); | ||
|
||
logger.info({ duration: performance.now() - startTime }, 'ListJobs:Done'); | ||
}, | ||
}); | ||
|
||
/** | ||
* Extract the map code and version from the provided path. | ||
* Throws an error if either detail cannot be parsed. | ||
* | ||
* @param file: filepath from which to extract the map code and version | ||
* | ||
* @example | ||
* file: "s3://linz-topographic-upload/topographic/TopoReleaseArchive/NZTopo50_GeoTif_Gridless/CJ10_GRIDLESS_GeoTifv1-00.tif" | ||
* returns: { mapCode: "CJ10", version: "v1-00" } | ||
* | ||
* @returns an object containing the map code and version | ||
*/ | ||
export function extractMapSheetNameWithVersion(file: string): { mapCode: string; version: string } { | ||
const url = tryParseUrl(file); | ||
const filePath = path.parse(url.href); | ||
const fileName = filePath.name; | ||
|
||
// extract map code from head of the file name (e.g. CJ10) | ||
const mapCode = fileName.split('_')[0]; | ||
if (mapCode == null) throw new Error('Map sheet not found in the file name'); | ||
|
||
// extract version from tail of the file name (e.g. v1-0) | ||
const version = fileName.match(/v(\d)-(\d\d)/)?.[0]; | ||
if (version == null) throw new Error('Version not found in the file name'); | ||
|
||
logger.info({ mapCode, version }, 'ListJobs:Output'); | ||
return { mapCode, version }; | ||
} | ||
|
||
/** | ||
* @param source: Source directory URL from which to load tiff files | ||
* @example TODO | ||
* | ||
* @param target: Destination directory URL into which to save the STAC collection and item JSON files | ||
* @example TODO | ||
* | ||
* @param title: The title of the collection | ||
* @example "New Zealand Topo50 Map Series (Gridless)" | ||
* | ||
* @returns an array of StacItem objects | ||
*/ | ||
async function loadTiffsToCreateStacs( | ||
source: string, | ||
target: string, | ||
title: string, | ||
force: boolean, | ||
): Promise<StacItem[]> { | ||
// extract all file paths from the source directory and convert them into URL objects | ||
logger.info({ source }, 'LoadTiffs:Start'); | ||
const files = await fsa.toArray(fsa.list(source)); | ||
const fileUrls = files.map((f) => tryParseUrl(f)); | ||
|
||
const tiffs = await loadTiffsFromPaths(fileUrls, Q); | ||
const projection = Projection.get(Nztm2000QuadTms); | ||
const cliDate = new Date().toISOString(); | ||
|
||
const items: StacItem[] = []; | ||
let imageryBound: Bounds | undefined; | ||
logger.info({ tiffs: tiffs.length }, 'CreateStac:Start'); | ||
const brokenTiffs = new Map<string, Tiff>(); | ||
for (const tiff of tiffs) { | ||
const source = tiff.source.url.href; | ||
const { mapCode, version } = extractMapSheetNameWithVersion(source); | ||
const tileName = `${mapCode}_${version}`; | ||
let bounds; | ||
try { | ||
bounds = Bounds.fromBbox(await findBoundingBox(tiff)); | ||
} catch (e) { | ||
brokenTiffs.set(tileName, tiff); | ||
continue; | ||
} | ||
if (imageryBound == null) { | ||
imageryBound = bounds; | ||
} else { | ||
imageryBound = imageryBound.union(bounds); | ||
} | ||
|
||
logger.info({ tileName }, 'CreateStac:Item'); | ||
const item: StacItem = { | ||
id: tileName, | ||
type: 'Feature', | ||
collection: CliId, | ||
stac_version: '1.0.0', | ||
stac_extensions: [], | ||
geometry: projection.boundsToGeoJsonFeature(bounds).geometry as GeoJSONPolygon, | ||
bbox: projection.boundsToWgs84BoundingBox(bounds), | ||
links: [ | ||
{ href: `./${tileName}.json`, rel: 'self' }, | ||
{ href: './collection.json', rel: 'collection' }, | ||
{ href: './collection.json', rel: 'parent' }, | ||
{ href: source, rel: 'linz_basemaps:source', type: 'image/tiff; application=geotiff' }, | ||
], | ||
properties: { | ||
version, | ||
datetime: cliDate, | ||
start_datetime: undefined, | ||
end_datetime: undefined, | ||
'proj:epsg': projection.epsg.code, | ||
created_at: cliDate, | ||
updated_at: cliDate, | ||
'linz:lifecycle': 'ongoing', | ||
'linz:geospatial_category': 'topographic-maps', | ||
'linz:region': 'new-zealand', | ||
'linz:security_classification': 'unclassified', | ||
'linz:slug': 'topo50', | ||
}, | ||
assets: {}, | ||
}; | ||
items.push(item); | ||
} | ||
if (imageryBound == null) throw new Error('No imagery bounds found'); | ||
|
||
logger.info({ items: items.length }, 'CreateStac:Collection'); | ||
const collection: StacCollection = { | ||
id: CliId, | ||
type: 'Collection', | ||
stac_version: '1.0.0', | ||
stac_extensions: [], | ||
license: 'CC-BY-4.0', | ||
title, | ||
description: 'Topographic maps of New Zealand', | ||
providers: [{ name: 'Land Information New Zealand', roles: ['host', 'licensor', 'processor', 'producer'] }], | ||
extent: { | ||
spatial: { bbox: [projection.boundsToWgs84BoundingBox(imageryBound)] }, | ||
// Default the temporal time today if no times were found as it is required for STAC | ||
temporal: { interval: [[cliDate, null]] }, | ||
}, | ||
links: items.map((item) => { | ||
return { href: `./${item.id}.json`, rel: 'item', type: 'application/json' }; | ||
}), | ||
}; | ||
|
||
if (force || isArgo()) { | ||
logger.info({ target }, 'CreateStac:Output'); | ||
logger.info({ items: items.length, collectionID: collection.id }, 'Stac:Output'); | ||
for (const item of items) { | ||
await fsa.write(fsa.join(target, `${item.id}.json`), JSON.stringify(item, null, 2)); | ||
} | ||
await fsa.write(fsa.join(target, 'collection.json'), JSON.stringify(collection, null, 2)); | ||
} | ||
|
||
await fsa.write( | ||
fsa.join(`${target}/broken/`, 'broken.json'), | ||
JSON.stringify(Array.from(brokenTiffs.keys()), null, 2), | ||
); | ||
|
||
return items; | ||
} | ||
|
||
async function createCogs(item: StacItem, target: string, tmpFolder: URL): Promise<void> { | ||
try { | ||
// Extract the source URL from the item | ||
logger.info({ item: item.id }, 'CogCreation:Start'); | ||
const source = item.links.find((l) => l.rel === 'linz_basemaps:source')?.href; | ||
if (source == null) throw new Error('No source file found in the item'); | ||
await mkdir(tmpFolder, { recursive: true }); | ||
|
||
// Download the source file | ||
logger.info({ item: item.id }, 'CogCreation:Download'); | ||
const url = tryParseUrl(source); | ||
const filePath = path.parse(url.href); | ||
const fileName = filePath.base; | ||
const hashStreamSource = fsa.stream(source).pipe(new HashTransform('sha256')); | ||
const input = fsa.join(tmpFolder.href, fileName); | ||
await fsa.write(input, hashStreamSource); | ||
|
||
// run gdal_translate for each job | ||
logger.info({ item: item.id }, 'CogCreation:gdal_translate'); | ||
const output = fsa.join(tmpFolder.href, `${item.id}.tiff`); | ||
const command = gdalBuildCogCommands(tryParseUrl(input), tryParseUrl(output)); | ||
await new GdalRunner(command).run(logger); | ||
|
||
// fsa.write output to target location | ||
logger.info({ item: item.id }, 'CogCreation:Output'); | ||
const readStream = fsa.stream(output).pipe(new HashTransform('sha256')); | ||
await fsa.write(fsa.join(target, `${item.id}.tiff`), readStream); | ||
} finally { | ||
// Cleanup the temporary folder once everything is done | ||
logger.info({ path: tmpFolder }, 'CogCreation:Cleanup'); | ||
await rm(tmpFolder, { recursive: true, force: true }); | ||
} | ||
} |
Oops, something went wrong.