From 81018b2dd2b024f0177164b8f7580f3dbc581001 Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Wed, 8 May 2024 13:08:35 -0400 Subject: [PATCH 01/61] CUMULUS-3692: Update granules List endpoints to query postgres for basic queries (#3637) * CUMULUS-3692:Granule list endpoint for basic postgres query --- CHANGELOG.md | 6 + packages/api/endpoints/granules.js | 12 +- .../granules/test-searchafter-10k.js | 3 +- packages/api/tests/endpoints/test-granules.js | 48 +++++- packages/db/src/index.ts | 6 + packages/db/src/search/BaseSearch.ts | 128 +++++++++++++++ packages/db/src/search/GranuleSearch.ts | 93 +++++++++++ packages/db/src/translate/granules.ts | 91 +++++++---- packages/db/src/types/search.ts | 15 ++ .../db/tests/search/test-GranuleSearch.js | 148 ++++++++++++++++++ 10 files changed, 519 insertions(+), 31 deletions(-) create mode 100644 packages/db/src/search/BaseSearch.ts create mode 100644 packages/db/src/search/GranuleSearch.ts create mode 100644 packages/db/src/types/search.ts create mode 100644 packages/db/tests/search/test-GranuleSearch.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d2262b2e28..27a9662b7d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## Unreleased +### Replace ElasticSearch Phase 1 + +- **CUMULUS-3692** + - Update granules List endpoints to query postgres for basic queries + + ### Migration Notes #### CUMULUS-3433 Update to node.js v20 diff --git a/packages/api/endpoints/granules.js b/packages/api/endpoints/granules.js index fd634ce0555..0f4b2cc1f55 100644 --- a/packages/api/endpoints/granules.js +++ b/packages/api/endpoints/granules.js @@ -12,6 +12,7 @@ const { v4: uuidv4 } = require('uuid'); const Logger = require('@cumulus/logger'); const { deconstructCollectionId } = require('@cumulus/message/Collections'); const { RecordDoesNotExist } = require('@cumulus/errors'); +const { GranuleSearch } = require('@cumulus/db'); const { CollectionPgModel, @@ -101,6 +102,7 @@ function _createNewGranuleDateValue() { * @returns {Promise} the promise of express response object */ async function list(req, res) { + log.trace(`list query ${JSON.stringify(req.query)}`); const { getRecoveryStatus, ...queryStringParameters } = req.query; let es; @@ -113,7 +115,15 @@ async function list(req, res) { } else { es = new Search({ queryStringParameters }, 'granule', process.env.ES_INDEX); } - const result = await es.query(); + let result; + // TODO the condition should be removed after we support all the query parameters + if (Object.keys(queryStringParameters).filter((item) => !['limit', 'page', 'sort_key'].includes(item)).length === 0) { + log.debug('list perform db search'); + const dbSearch = new GranuleSearch({ queryStringParameters }); + result = await dbSearch.query(); + } else { + result = await es.query(); + } if (getRecoveryStatus === 'true') { return res.send(await addOrcaRecoveryStatus(result)); } diff --git a/packages/api/tests/endpoints/granules/test-searchafter-10k.js b/packages/api/tests/endpoints/granules/test-searchafter-10k.js index 61f7f740cd2..ccc927c01ee 100644 --- a/packages/api/tests/endpoints/granules/test-searchafter-10k.js +++ b/packages/api/tests/endpoints/granules/test-searchafter-10k.js @@ -36,7 +36,8 @@ test.after.always(async (t) => { await t.context.esClient.client.indices.delete({ index: t.context.esIndex }); }); -test.serial('CUMULUS-2930 /GET granules allows searching past 10K results windows with searchContext', async (t) => { +// TODO postgres query doesn't return searchContext +test.serial.skip('CUMULUS-2930 /GET granules allows searching past 10K results windows with searchContext', async (t) => { const numGranules = 12 * 1000; // create granules in batches of 1000 diff --git a/packages/api/tests/endpoints/test-granules.js b/packages/api/tests/endpoints/test-granules.js index 63fb708a6a5..5ec292f04e7 100644 --- a/packages/api/tests/endpoints/test-granules.js +++ b/packages/api/tests/endpoints/test-granules.js @@ -401,7 +401,8 @@ test.after.always(async (t) => { await cleanupTestIndex(t.context); }); -test.serial('default lists and paginates correctly with search_after', async (t) => { +// TODO postgres query doesn't return searchContext +test.serial.skip('default lists and paginates correctly with search_after', async (t) => { const granuleIds = t.context.fakePGGranules.map((i) => i.granule_id); const response = await request(app) .get('/granules') @@ -446,6 +447,48 @@ test.serial('default lists and paginates correctly with search_after', async (t) t.not(meta.searchContext === newMeta.searchContext); }); +test.serial('default lists and paginates correctly from querying database', async (t) => { + const granuleIds = t.context.fakePGGranules.map((i) => i.granule_id); + const response = await request(app) + .get('/granules') + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .expect(200); + + const { meta, results } = response.body; + t.is(results.length, 4); + t.is(meta.stack, process.env.stackName); + t.is(meta.table, 'granule'); + t.is(meta.count, 4); + results.forEach((r) => { + t.true(granuleIds.includes(r.granuleId)); + }); + // default paginates correctly + const firstResponse = await request(app) + .get('/granules?limit=1') + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .expect(200); + + const { meta: firstMeta, results: firstResults } = firstResponse.body; + t.is(firstResults.length, 1); + t.is(firstMeta.page, 1); + + const newResponse = await request(app) + .get('/granules?limit=1&page=2') + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .expect(200); + + const { meta: newMeta, results: newResults } = newResponse.body; + t.is(newResults.length, 1); + t.is(newMeta.page, 2); + + t.true(granuleIds.includes(results[0].granuleId)); + t.true(granuleIds.includes(newResults[0].granuleId)); + t.not(results[0].granuleId, newResults[0].granuleId); +}); + test.serial('CUMULUS-911 GET without pathParameters and without an Authorization header returns an Authorization Missing response', async (t) => { const response = await request(app) .get('/granules') @@ -3846,7 +3889,8 @@ test.serial('PUT returns 404 if collection is not part of URI', async (t) => { t.is(response.statusCode, 404); }); -test.serial('default paginates correctly with search_after', async (t) => { +// TODO postgres query doesn't return searchContext +test.serial.skip('default paginates correctly with search_after', async (t) => { const response = await request(app) .get('/granules?limit=1') .set('Accept', 'application/json') diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index 1f4a747dcbf..c761e630c90 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -136,6 +136,12 @@ export { export { QuerySearchClient, } from './lib/QuerySearchClient'; +export { + BaseSearch, +} from './search/BaseSearch'; +export { + GranuleSearch, +} from './search/GranuleSearch'; export { AsyncOperationPgModel } from './models/async_operation'; export { BasePgModel } from './models/base'; diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts new file mode 100644 index 00000000000..00b703e9897 --- /dev/null +++ b/packages/db/src/search/BaseSearch.ts @@ -0,0 +1,128 @@ +import { Knex } from 'knex'; +import Logger from '@cumulus/logger'; +import { getKnexClient } from '../connection'; +import { BaseRecord } from '../types/base'; +import { DbQueryParameters, QueryEvent, QueryStringParameters } from '../types/search'; + +const log = new Logger({ sender: '@cumulus/db/BaseSearch' }); + +export type Meta = { + name: string, + stack?: string, + table?: string, + limit?: number, + page?: number, + count?: number, +}; + +/** + * Class to build and execute db search query + */ +class BaseSearch { + readonly type?: string; + readonly queryStringParameters: QueryStringParameters; + // parsed from queryStringParameters for query build + dbQueryParameters: DbQueryParameters = {}; + + constructor(event: QueryEvent, type?: string) { + this.type = type; + this.queryStringParameters = event?.queryStringParameters ?? {}; + this.dbQueryParameters.page = Number.parseInt( + (this.queryStringParameters.page) ?? '1', + 10 + ); + this.dbQueryParameters.limit = Number.parseInt( + (this.queryStringParameters.limit) ?? '10', + 10 + ); + this.dbQueryParameters.offset = (this.dbQueryParameters.page - 1) + * this.dbQueryParameters.limit; + } + + /** + * build the search query + * + * @param knex - DB client + * @returns queries for getting count and search result + */ + private _buildSearch(knex: Knex) + : { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + } { + const { countQuery, searchQuery } = this.buildBasicQuery(knex); + if (this.dbQueryParameters.limit) searchQuery.limit(this.dbQueryParameters.limit); + if (this.dbQueryParameters.offset) searchQuery.offset(this.dbQueryParameters.offset); + + return { countQuery, searchQuery }; + } + + /** + * metadata template for query result + * + * @returns metadata template + */ + private _metaTemplate(): Meta { + return { + name: 'cumulus-api', + stack: process.env.stackName, + table: this.type, + }; + } + + /** + * build basic query + * + * @param knex - DB client + * @throws - function is not implemented + */ + protected buildBasicQuery(knex: Knex): { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + } { + log.debug(`buildBasicQuery is not implemented ${knex.constructor.name}`); + throw new Error('buildBasicQuery is not implemented'); + } + + /** + * Translate postgres records to api records + * + * @param pgRecords - postgres records returned from query + * @throws - function is not implemented + */ + protected translatePostgresRecordsToApiRecords(pgRecords: BaseRecord[]) { + log.error(`translatePostgresRecordsToApiRecords is not implemented ${pgRecords[0]}`); + throw new Error('translatePostgresRecordsToApiRecords is not implemented'); + } + + /** + * build and execute search query + * + * @param testKnex - knex for testing + * @returns search result + */ + async query(testKnex: Knex | undefined) { + const knex = testKnex ?? await getKnexClient(); + const { countQuery, searchQuery } = this._buildSearch(knex); + try { + const countResult = await countQuery; + const meta = this._metaTemplate(); + meta.limit = this.dbQueryParameters.limit; + meta.page = this.dbQueryParameters.page; + meta.count = Number(countResult[0]?.count ?? 0); + + const pgRecords = await searchQuery; + const apiRecords = this.translatePostgresRecordsToApiRecords(pgRecords); + + return { + meta, + results: apiRecords, + }; + } catch (error) { + log.error(`Error caught in search query for ${JSON.stringify(this.queryStringParameters)}`, error); + return error; + } + } +} + +export { BaseSearch }; diff --git a/packages/db/src/search/GranuleSearch.ts b/packages/db/src/search/GranuleSearch.ts new file mode 100644 index 00000000000..8ff2ec6eb74 --- /dev/null +++ b/packages/db/src/search/GranuleSearch.ts @@ -0,0 +1,93 @@ +import { Knex } from 'knex'; + +import { ApiGranuleRecord } from '@cumulus/types/api/granules'; +import Logger from '@cumulus/logger'; + +import { BaseRecord } from '../types/base'; +import { BaseSearch } from './BaseSearch'; +import { PostgresGranuleRecord } from '../types/granule'; +import { QueryEvent } from '../types/search'; + +import { TableNames } from '../tables'; +import { translatePostgresGranuleToApiGranuleWithoutDbQuery } from '../translate/granules'; + +const log = new Logger({ sender: '@cumulus/db/BaseSearch' }); + +export interface GranuleRecord extends BaseRecord, PostgresGranuleRecord { + cumulus_id: number, + updated_at: Date, + collection_cumulus_id: number, + collectionName: string, + collectionVersion: string, + pdr_cumulus_id: number, + pdrName?: string, + provider_cumulus_id?: number, + providerName?: string, +} + +/** + * Class to build and execute db search query for granules + */ +export class GranuleSearch extends BaseSearch { + constructor(event: QueryEvent) { + super(event, 'granule'); + } + + /** + * build basic query + * + * @param knex - DB client + * @returns queries for getting count and search result + */ + protected buildBasicQuery(knex: Knex) + : { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + } { + const { + granules: granulesTable, + collections: collectionsTable, + providers: providersTable, + pdrs: pdrsTable, + } = TableNames; + const countQuery = knex(granulesTable) + .count(`${granulesTable}.cumulus_id`); + + const searchQuery = knex(granulesTable) + .select(`${granulesTable}.*`) + .select({ + providerName: `${providersTable}.name`, + collectionName: `${collectionsTable}.name`, + collectionVersion: `${collectionsTable}.version`, + pdrName: `${pdrsTable}.name`, + }) + .innerJoin(collectionsTable, `${granulesTable}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`) + .leftJoin(providersTable, `${granulesTable}.provider_cumulus_id`, `${providersTable}.cumulus_id`) + .leftJoin(pdrsTable, `${granulesTable}.pdr_cumulus_id`, `${pdrsTable}.cumulus_id`); + return { countQuery, searchQuery }; + } + + /** + * Translate postgres records to api records + * + * @param pgRecords - postgres records returned from query + * @returns translated api records + */ + protected translatePostgresRecordsToApiRecords(pgRecords: GranuleRecord[]) : ApiGranuleRecord[] { + log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); + const apiRecords = pgRecords.map((item: GranuleRecord) => { + const granulePgRecord = item; + const collectionPgRecord = { + cumulus_id: item.collection_cumulus_id, + name: item.collectionName, + version: item.collectionVersion, + }; + const pdr = item.pdrName ? { name: item.pdrName } : undefined; + const providerPgRecord = item.providerName ? { name: item.providerName } : undefined; + return translatePostgresGranuleToApiGranuleWithoutDbQuery({ + granulePgRecord, collectionPgRecord, pdr, providerPgRecord, + }); + }); + return apiRecords; + } +} diff --git a/packages/db/src/translate/granules.ts b/packages/db/src/translate/granules.ts index 45b22ca14b5..11bfdbfc778 100644 --- a/packages/db/src/translate/granules.ts +++ b/packages/db/src/translate/granules.ts @@ -14,12 +14,69 @@ import { FilePgModel } from '../models/file'; import { getExecutionInfoByGranuleCumulusId } from '../lib/execution'; import { PostgresCollectionRecord } from '../types/collection'; +import { PostgresExecutionRecord } from '../types/execution'; import { PostgresGranule, PostgresGranuleRecord } from '../types/granule'; +import { PostgresFileRecord } from '../types/file'; +import { PostgresPdrRecord } from '../types/pdr'; import { GranuleWithProviderAndCollectionInfo } from '../types/query'; import { PostgresProviderRecord } from '../types/provider'; import { translatePostgresFileToApiFile } from './file'; +/** + * Generate an API Granule object from the granule and associated Postgres objects without + * querying the database + * + * @param params - params + * @param params.granulePgRecord - Granule from Postgres + * @param params.collectionPgRecord - Collection from Postgres + * @param [params.executionUrls] - executionUrls from Postgres + * @param [params.files] - granule files from Postgres + * @param [params.pdr] - pdr from Postgres + * @param [params.providerPgRecord] - provider from Postgres + * @returns An API Granule with associated Files + */ +export const translatePostgresGranuleToApiGranuleWithoutDbQuery = ({ + granulePgRecord, + collectionPgRecord, + executionUrls = [], + files = [], + pdr, + providerPgRecord, +}: { + granulePgRecord: PostgresGranuleRecord, + collectionPgRecord: Pick, + executionUrls?: Partial[], + files?: PostgresFileRecord[], + pdr?: Pick, + providerPgRecord?: Pick, +}): ApiGranuleRecord => removeNilProperties({ + beginningDateTime: granulePgRecord.beginning_date_time?.toISOString(), + cmrLink: granulePgRecord.cmr_link, + collectionId: constructCollectionId(collectionPgRecord.name, collectionPgRecord.version), + createdAt: granulePgRecord.created_at?.getTime(), + duration: granulePgRecord.duration, + endingDateTime: granulePgRecord.ending_date_time?.toISOString(), + error: granulePgRecord.error, + execution: executionUrls[0] ? executionUrls[0].url : undefined, + files: files.length > 0 ? files.map((file) => translatePostgresFileToApiFile(file)) : [], + granuleId: granulePgRecord.granule_id, + lastUpdateDateTime: granulePgRecord.last_update_date_time?.toISOString(), + pdrName: pdr ? pdr.name : undefined, + processingEndDateTime: granulePgRecord.processing_end_date_time?.toISOString(), + processingStartDateTime: granulePgRecord.processing_start_date_time?.toISOString(), + productionDateTime: granulePgRecord.production_date_time?.toISOString(), + productVolume: granulePgRecord.product_volume, + provider: providerPgRecord ? providerPgRecord.name : undefined, + published: granulePgRecord.published, + queryFields: granulePgRecord.query_fields, + status: granulePgRecord.status as GranuleStatus, + timestamp: granulePgRecord.timestamp?.getTime(), + timeToArchive: granulePgRecord.time_to_archive, + timeToPreprocess: granulePgRecord.time_to_process, + updatedAt: granulePgRecord.updated_at?.getTime(), +}); + /** * Generate an API Granule object from a Postgres Granule with associated Files. * @@ -88,34 +145,14 @@ export const translatePostgresGranuleToApiGranule = async ({ ); } - const apiGranule: ApiGranuleRecord = removeNilProperties({ - beginningDateTime: granulePgRecord.beginning_date_time?.toISOString(), - cmrLink: granulePgRecord.cmr_link, - collectionId: constructCollectionId(collection.name, collection.version), - createdAt: granulePgRecord.created_at?.getTime(), - duration: granulePgRecord.duration, - endingDateTime: granulePgRecord.ending_date_time?.toISOString(), - error: granulePgRecord.error, - execution: executionUrls[0] ? executionUrls[0].url : undefined, - files: files.length > 0 ? files.map((file) => translatePostgresFileToApiFile(file)) : [], - granuleId: granulePgRecord.granule_id, - lastUpdateDateTime: granulePgRecord.last_update_date_time?.toISOString(), - pdrName: pdr ? pdr.name : undefined, - processingEndDateTime: granulePgRecord.processing_end_date_time?.toISOString(), - processingStartDateTime: granulePgRecord.processing_start_date_time?.toISOString(), - productionDateTime: granulePgRecord.production_date_time?.toISOString(), - productVolume: granulePgRecord.product_volume, - provider: provider ? provider.name : undefined, - published: granulePgRecord.published, - queryFields: granulePgRecord.query_fields, - status: granulePgRecord.status as GranuleStatus, - timestamp: granulePgRecord.timestamp?.getTime(), - timeToArchive: granulePgRecord.time_to_archive, - timeToPreprocess: granulePgRecord.time_to_process, - updatedAt: granulePgRecord.updated_at?.getTime(), + return translatePostgresGranuleToApiGranuleWithoutDbQuery({ + granulePgRecord, + collectionPgRecord: collection, + executionUrls, + files, + pdr, + providerPgRecord: provider, }); - - return apiGranule; }; /** diff --git a/packages/db/src/types/search.ts b/packages/db/src/types/search.ts new file mode 100644 index 00000000000..50a3664ef48 --- /dev/null +++ b/packages/db/src/types/search.ts @@ -0,0 +1,15 @@ +export type QueryStringParameters = { + limit?: string, + page?: string, + [key: string]: string | string[] | undefined, +}; + +export type QueryEvent = { + queryStringParameters?: QueryStringParameters, +}; + +export type DbQueryParameters = { + limit?: number, + offset?: number, + page?: number, +}; diff --git a/packages/db/tests/search/test-GranuleSearch.js b/packages/db/tests/search/test-GranuleSearch.js new file mode 100644 index 00000000000..a18690d70b0 --- /dev/null +++ b/packages/db/tests/search/test-GranuleSearch.js @@ -0,0 +1,148 @@ +const test = require('ava'); +const cryptoRandomString = require('crypto-random-string'); +const range = require('lodash/range'); + +const { constructCollectionId } = require('@cumulus/message/Collections'); + +const { + CollectionPgModel, + fakeCollectionRecordFactory, + fakeGranuleRecordFactory, + fakePdrRecordFactory, + fakeProviderRecordFactory, + generateLocalTestDb, + GranulePgModel, + GranuleSearch, + PdrPgModel, + ProviderPgModel, + migrationDir, +} = require('../../dist'); + +const testDbName = `granule_${cryptoRandomString({ length: 10 })}`; + +test.before(async (t) => { + const { knexAdmin, knex } = await generateLocalTestDb( + testDbName, + migrationDir + ); + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + // Create collection + t.context.collectionPgModel = new CollectionPgModel(); + t.context.collectionName = 'fakeCollection'; + t.context.collectionVersion = 'v1'; + + const collectionName2 = 'fakeCollection2'; + const collectionVersion2 = 'v2'; + + t.context.collectionId = constructCollectionId( + t.context.collectionName, + t.context.collectionVersion + ); + + t.context.collectionId2 = constructCollectionId( + collectionName2, + collectionVersion2 + ); + + t.context.testPgCollection = fakeCollectionRecordFactory({ + name: t.context.collectionName, + version: t.context.collectionVersion, + }); + t.context.testPgCollection2 = fakeCollectionRecordFactory({ + name: collectionName2, + version: collectionVersion2, + }); + + const [pgCollection] = await t.context.collectionPgModel.create( + t.context.knex, + t.context.testPgCollection + ); + const [pgCollection2] = await t.context.collectionPgModel.create( + t.context.knex, + t.context.testPgCollection2 + ); + t.context.collectionCumulusId = pgCollection.cumulus_id; + t.context.collectionCumulusId2 = pgCollection2.cumulus_id; + + // Create provider + t.context.providerPgModel = new ProviderPgModel(); + t.context.provider = fakeProviderRecordFactory(); + + const [pgProvider] = await t.context.providerPgModel.create( + t.context.knex, + t.context.provider + ); + t.context.providerCumulusId = pgProvider.cumulus_id; + + // Create PDR + t.context.pdrPgModel = new PdrPgModel(); + t.context.pdr = fakePdrRecordFactory({ + collection_cumulus_id: pgCollection.cumulus_id, + provider_cumulus_id: t.context.providerCumulusId, + }); + const [pgPdr] = await t.context.pdrPgModel.create( + t.context.knex, + t.context.pdr + ); + t.context.pdrCumulusId = pgPdr.cumulus_id; + + // Create Granule + t.context.granulePgModel = new GranulePgModel(); + t.context.pgGranules = await t.context.granulePgModel.insert( + knex, + range(100).map((num) => fakeGranuleRecordFactory({ + collection_cumulus_id: (num % 2) + ? t.context.collectionCumulusId : t.context.collectionCumulusId2, + pdr_cumulus_id: t.context.pdrCumulusId, + provider_cumulus_id: t.context.providerCumulusId, + })) + ); +}); + +test('Granule search returns 10 granule records by default', async (t) => { + const { knex } = t.context; + const dbSearch = new GranuleSearch(); + const response = await dbSearch.query(knex); + + t.is(response.meta.count, 100); + + const apiGranules = response.results || {}; + t.is(apiGranules.length, 10); + const validatedRecords = apiGranules.filter((granule) => ( + [t.context.collectionId, t.context.collectionId2].includes(granule.collectionId) + && granule.provider === t.context.provider.name + && granule.pdrName === t.context.pdr.name)); + t.is(validatedRecords.length, apiGranules.length); +}); + +test('Granule search supports page and limit params', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 20, + page: 2, + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 20); + + queryStringParameters = { + limit: 11, + page: 10, + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 1); + + queryStringParameters = { + limit: 10, + page: 11, + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 0); +}); From 1c2a666ef1f90df455db8876861e0c433662b933 Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Mon, 13 May 2024 19:44:07 -0400 Subject: [PATCH 02/61] CUMULUS-3694: Update granules List endpoints to query postgres - filter by field value (#3656) * CUMULUS-3692:Granule list endpoint for basic postgres query * refactor * refactor * typing * add changelog entry * skip search_after * skip searchafter unit tests * add granule list test * rename * refactor * build query parameters * update comment * add field-mapping * update jsdoc * use type over interface,add log * update test description * build term/terms * buildDbQueryParameters * add unit test no terms search * add doc * rename * add unit test * add fields test * add more unit tests * support error.Error search * fix lint * rename functions * ignore files * add convert query unit tests * add all types * add unit test for fieldmapping types fix timestamp * update timestamp test * add multiple term field test * ignore execution in granule list record --- CHANGELOG.md | 9 +- example/spec/helpers/granuleUtils.js | 1 + example/spec/parallel/testAPI/granuleSpec.js | 3 +- packages/api/endpoints/granules.js | 23 +- packages/api/tests/endpoints/test-granules.js | 50 +++- packages/db/src/search/BaseSearch.ts | 89 ++++-- packages/db/src/search/GranuleSearch.ts | 136 +++++++++- packages/db/src/search/field-mapping.ts | 223 +++++++++++++++ packages/db/src/search/queries.ts | 100 +++++++ packages/db/src/types/search.ts | 11 + .../db/tests/search/test-GranuleSearch.js | 253 +++++++++++++++++- .../db/tests/search/test-field-mapping.js | 222 +++++++++++++++ packages/db/tests/search/test-queries.js | 38 +++ 13 files changed, 1095 insertions(+), 63 deletions(-) create mode 100644 packages/db/src/search/field-mapping.ts create mode 100644 packages/db/src/search/queries.ts create mode 100644 packages/db/tests/search/test-field-mapping.js create mode 100644 packages/db/tests/search/test-queries.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bff62e9cf4..df6d3a3afc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Replace ElasticSearch Phase 1 - **CUMULUS-3692** - - Update granules List endpoints to query postgres for basic queries - + - Added `@cumulus/db/src/search` `BaseSearch` and `GranuleSearch` classes to + support basic queries for granules + - Updated granules List endpoint to query postgres for basic queries +- **CUMULUS-3694** + - Added functionality to `@cumulus/db/src/search` to support term queries + - Updated `BaseSearch` and `GranuleSearch` classes to support term queries for granules + - Updated granules List endpoint to search postgres ### Migration Notes diff --git a/example/spec/helpers/granuleUtils.js b/example/spec/helpers/granuleUtils.js index e73eece1e96..4dbc9720204 100644 --- a/example/spec/helpers/granuleUtils.js +++ b/example/spec/helpers/granuleUtils.js @@ -234,6 +234,7 @@ const waitForGranuleRecordUpdatedInList = async (stackName, granule, additionalQ 'beginningDateTime', 'endingDateTime', 'error', + 'execution', // TODO remove after CUMULUS-3698 'files', // TODO -2714 this should be removed 'lastUpdateDateTime', 'productionDateTime', diff --git a/example/spec/parallel/testAPI/granuleSpec.js b/example/spec/parallel/testAPI/granuleSpec.js index 2a977c079b2..e9d170fa9e6 100644 --- a/example/spec/parallel/testAPI/granuleSpec.js +++ b/example/spec/parallel/testAPI/granuleSpec.js @@ -183,7 +183,8 @@ describe('The Granules API', () => { }); const searchedGranule = JSON.parse(searchResults.body).results[0]; - expect(searchedGranule).toEqual(jasmine.objectContaining(randomGranuleRecord)); + // TODO CUMULUS-3698 includes files + expect(searchedGranule).toEqual(jasmine.objectContaining(omit(randomGranuleRecord, 'files'))); }); it('can modify the granule via API.', async () => { diff --git a/packages/api/endpoints/granules.js b/packages/api/endpoints/granules.js index 0f4b2cc1f55..f25e5bb262c 100644 --- a/packages/api/endpoints/granules.js +++ b/packages/api/endpoints/granules.js @@ -32,7 +32,6 @@ const { recordNotFoundString, multipleRecordFoundString, } = require('@cumulus/es-client/search'); -const ESSearchAfter = require('@cumulus/es-client/esSearchAfter'); const { deleteGranuleAndFiles } = require('../src/lib/granule-delete'); const { zodParser } = require('../src/zod-utils'); @@ -105,25 +104,9 @@ async function list(req, res) { log.trace(`list query ${JSON.stringify(req.query)}`); const { getRecoveryStatus, ...queryStringParameters } = req.query; - let es; - if (queryStringParameters.searchContext) { - es = new ESSearchAfter( - { queryStringParameters }, - 'granule', - process.env.ES_INDEX - ); - } else { - es = new Search({ queryStringParameters }, 'granule', process.env.ES_INDEX); - } - let result; - // TODO the condition should be removed after we support all the query parameters - if (Object.keys(queryStringParameters).filter((item) => !['limit', 'page', 'sort_key'].includes(item)).length === 0) { - log.debug('list perform db search'); - const dbSearch = new GranuleSearch({ queryStringParameters }); - result = await dbSearch.query(); - } else { - result = await es.query(); - } + const dbSearch = new GranuleSearch({ queryStringParameters }); + const result = await dbSearch.query(); + if (getRecoveryStatus === 'true') { return res.send(await addOrcaRecoveryStatus(result)); } diff --git a/packages/api/tests/endpoints/test-granules.js b/packages/api/tests/endpoints/test-granules.js index 90b8cd905a1..fc6f9425889 100644 --- a/packages/api/tests/endpoints/test-granules.js +++ b/packages/api/tests/endpoints/test-granules.js @@ -288,6 +288,7 @@ test.beforeEach(async (t) => { const granuleId1 = t.context.createGranuleId(); const granuleId2 = t.context.createGranuleId(); const granuleId3 = t.context.createGranuleId(); + const timestamp = new Date(); // create fake Postgres granule records t.context.fakePGGranules = [ @@ -299,21 +300,24 @@ test.beforeEach(async (t) => { cmr_link: 'https://cmr.uat.earthdata.nasa.gov/search/granules.json?concept_id=A123456789-TEST_A', duration: 47.125, - timestamp: new Date(Date.now()), + timestamp, + updated_at: timestamp, }), fakeGranuleRecordFactory({ granule_id: granuleId2, status: 'failed', collection_cumulus_id: t.context.collectionCumulusId, duration: 52.235, - timestamp: new Date(Date.now()), + timestamp, + updated_at: timestamp, }), fakeGranuleRecordFactory({ granule_id: granuleId3, status: 'failed', collection_cumulus_id: t.context.collectionCumulusId, duration: 52.235, - timestamp: new Date(Date.now()), + timestamp, + updated_at: timestamp, }), // granule with same granule_id as above but different collection_cumulus_id fakeGranuleRecordFactory({ @@ -321,7 +325,8 @@ test.beforeEach(async (t) => { status: 'failed', collection_cumulus_id: t.context.collectionCumulusId2, duration: 52.235, - timestamp: new Date(Date.now()), + timestamp, + updated_at: timestamp, }), ]; @@ -456,7 +461,7 @@ test.serial('default lists and paginates correctly from querying database', asyn const { meta, results } = response.body; t.is(results.length, 4); t.is(meta.stack, process.env.stackName); - t.is(meta.table, 'granule'); + t.is(meta.table, 'granules'); t.is(meta.count, 4); results.forEach((r) => { t.true(granuleIds.includes(r.granuleId)); @@ -487,6 +492,41 @@ test.serial('default lists and paginates correctly from querying database', asyn t.not(results[0].granuleId, newResults[0].granuleId); }); +test.serial('LIST endpoint returns search result correctly', async (t) => { + const granuleIds = t.context.fakePGGranules.map((i) => i.granule_id); + const searchParams = new URLSearchParams({ + granuleId: granuleIds[3], + }); + const response = await request(app) + .get(`/granules?limit=1&page=2&${searchParams}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .expect(200); + + const { meta, results } = response.body; + t.is(meta.count, 2); + t.is(results.length, 1); + t.true([granuleIds[2], granuleIds[3]].includes(results[0].granuleId)); + + const newSearchParams = new URLSearchParams({ + collectionId: t.context.collectionId, + status: 'failed', + duration: 52.235, + timestamp: t.context.fakePGGranules[0].timestamp.getTime(), + }); + const newResponse = await request(app) + .get(`/granules?${newSearchParams}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .expect(200); + + const { meta: newMeta, results: newResults } = newResponse.body; + t.is(newMeta.count, 2); + t.is(newResults.length, 2); + const newResultIds = newResults.map((g) => g.granuleId); + t.deepEqual([granuleIds[1], granuleIds[2]].sort(), newResultIds.sort()); +}); + test.serial('CUMULUS-911 GET without pathParameters and without an Authorization header returns an Authorization Missing response', async (t) => { const response = await request(app) .get('/granules') diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index 00b703e9897..dd1fc0cd063 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -1,8 +1,11 @@ import { Knex } from 'knex'; import Logger from '@cumulus/logger'; -import { getKnexClient } from '../connection'; + import { BaseRecord } from '../types/base'; +import { getKnexClient } from '../connection'; +import { TableNames } from '../tables'; import { DbQueryParameters, QueryEvent, QueryStringParameters } from '../types/search'; +import { convertQueryStringToDbQueryParameters } from './queries'; const log = new Logger({ sender: '@cumulus/db/BaseSearch' }); @@ -15,32 +18,35 @@ export type Meta = { count?: number, }; +const typeToTable: { [key: string]: string } = { + asyncOperation: TableNames.asyncOperations, + collection: TableNames.collections, + execution: TableNames.executions, + granule: TableNames.granules, + pdr: TableNames.pdrs, + provider: TableNames.providers, + rule: TableNames.rules, +}; + /** * Class to build and execute db search query */ class BaseSearch { - readonly type?: string; + readonly type: string; readonly queryStringParameters: QueryStringParameters; // parsed from queryStringParameters for query build dbQueryParameters: DbQueryParameters = {}; - constructor(event: QueryEvent, type?: string) { + constructor(event: QueryEvent, type: string) { this.type = type; this.queryStringParameters = event?.queryStringParameters ?? {}; - this.dbQueryParameters.page = Number.parseInt( - (this.queryStringParameters.page) ?? '1', - 10 - ); - this.dbQueryParameters.limit = Number.parseInt( - (this.queryStringParameters.limit) ?? '10', - 10 + this.dbQueryParameters = convertQueryStringToDbQueryParameters( + this.type, this.queryStringParameters ); - this.dbQueryParameters.offset = (this.dbQueryParameters.page - 1) - * this.dbQueryParameters.limit; } /** - * build the search query + * Build the search query * * @param knex - DB client * @returns queries for getting count and search result @@ -51,14 +57,19 @@ class BaseSearch { searchQuery: Knex.QueryBuilder, } { const { countQuery, searchQuery } = this.buildBasicQuery(knex); - if (this.dbQueryParameters.limit) searchQuery.limit(this.dbQueryParameters.limit); - if (this.dbQueryParameters.offset) searchQuery.offset(this.dbQueryParameters.offset); + this.buildTermQuery({ countQuery, searchQuery }); + this.buildInfixPrefixQuery({ countQuery, searchQuery }); + const { limit, offset } = this.dbQueryParameters; + if (limit) searchQuery.limit(limit); + if (offset) searchQuery.offset(offset); + + log.debug(`_buildSearch returns countQuery: ${countQuery.toSQL().sql}, searchQuery: ${searchQuery.toSQL().sql}`); return { countQuery, searchQuery }; } /** - * metadata template for query result + * Get metadata template for query result * * @returns metadata template */ @@ -66,12 +77,12 @@ class BaseSearch { return { name: 'cumulus-api', stack: process.env.stackName, - table: this.type, + table: this.type && typeToTable[this.type], }; } /** - * build basic query + * Build basic query * * @param knex - DB client * @throws - function is not implemented @@ -84,6 +95,46 @@ class BaseSearch { throw new Error('buildBasicQuery is not implemented'); } + /** + * Build queries for infix and prefix + * + * @param params + * @param params.countQuery - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildInfixPrefixQuery(params: { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + log.debug(`buildInfixPrefixQuery is not implemented ${Object.keys(params)}`); + throw new Error('buildInfixPrefixQuery is not implemented'); + } + + /** + * Build queries for term fields + * + * @param params + * @param params.countQuery - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildTermQuery(params: { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const table = typeToTable[this.type]; + const { countQuery, searchQuery, dbQueryParameters } = params; + const { term = {} } = dbQueryParameters || this.dbQueryParameters; + + Object.entries(term).forEach(([name, value]) => { + countQuery.where(`${table}.${name}`, value); + searchQuery.where(`${table}.${name}`, value); + }); + } + /** * Translate postgres records to api records * @@ -96,7 +147,7 @@ class BaseSearch { } /** - * build and execute search query + * Build and execute search query * * @param testKnex - knex for testing * @returns search result diff --git a/packages/db/src/search/GranuleSearch.ts b/packages/db/src/search/GranuleSearch.ts index 8ff2ec6eb74..b875dae52fe 100644 --- a/packages/db/src/search/GranuleSearch.ts +++ b/packages/db/src/search/GranuleSearch.ts @@ -1,17 +1,18 @@ import { Knex } from 'knex'; +import omit from 'lodash/omit'; +import pick from 'lodash/pick'; import { ApiGranuleRecord } from '@cumulus/types/api/granules'; import Logger from '@cumulus/logger'; import { BaseRecord } from '../types/base'; import { BaseSearch } from './BaseSearch'; +import { DbQueryParameters, QueryEvent } from '../types/search'; import { PostgresGranuleRecord } from '../types/granule'; -import { QueryEvent } from '../types/search'; - -import { TableNames } from '../tables'; import { translatePostgresGranuleToApiGranuleWithoutDbQuery } from '../translate/granules'; +import { TableNames } from '../tables'; -const log = new Logger({ sender: '@cumulus/db/BaseSearch' }); +const log = new Logger({ sender: '@cumulus/db/GranuleSearch' }); export interface GranuleRecord extends BaseRecord, PostgresGranuleRecord { cumulus_id: number, @@ -25,6 +26,8 @@ export interface GranuleRecord extends BaseRecord, PostgresGranuleRecord { providerName?: string, } +const foreignFields = ['collectionName', 'collectionVersion', 'providerName', 'pdrName']; + /** * Class to build and execute db search query for granules */ @@ -33,8 +36,23 @@ export class GranuleSearch extends BaseSearch { super(event, 'granule'); } + private searchCollection(): boolean { + const term = this.dbQueryParameters.term; + return !!(term && (term.collectionName || term.collectionVersion)); + } + + private searchPdr(): boolean { + const term = this.dbQueryParameters.term; + return !!(term && term.pdrName); + } + + private searchProvider(): boolean { + const term = this.dbQueryParameters.term; + return !!(term && term.providerName); + } + /** - * build basic query + * Build basic query * * @param knex - DB client * @returns queries for getting count and search result @@ -61,19 +79,114 @@ export class GranuleSearch extends BaseSearch { collectionVersion: `${collectionsTable}.version`, pdrName: `${pdrsTable}.name`, }) - .innerJoin(collectionsTable, `${granulesTable}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`) - .leftJoin(providersTable, `${granulesTable}.provider_cumulus_id`, `${providersTable}.cumulus_id`) - .leftJoin(pdrsTable, `${granulesTable}.pdr_cumulus_id`, `${pdrsTable}.cumulus_id`); + .innerJoin(collectionsTable, `${granulesTable}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); + + if (this.searchCollection()) { + countQuery.innerJoin(collectionsTable, `${granulesTable}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); + } + + if (this.searchProvider()) { + countQuery.innerJoin(providersTable, `${granulesTable}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + searchQuery.innerJoin(providersTable, `${granulesTable}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + } else { + searchQuery.leftJoin(providersTable, `${granulesTable}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + } + + if (this.searchPdr()) { + countQuery.innerJoin(pdrsTable, `${granulesTable}.pdr_cumulus_id`, `${pdrsTable}.cumulus_id`); + searchQuery.innerJoin(pdrsTable, `${granulesTable}.pdr_cumulus_id`, `${pdrsTable}.cumulus_id`); + } else { + searchQuery.leftJoin(pdrsTable, `${granulesTable}.pdr_cumulus_id`, `${pdrsTable}.cumulus_id`); + } return { countQuery, searchQuery }; } + /** + * Build queries for infix and prefix + * + * @param params + * @param params.countQuery - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildInfixPrefixQuery(params: { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { granules: granulesTable } = TableNames; + const { countQuery, searchQuery, dbQueryParameters } = params; + const { infix, prefix } = dbQueryParameters || this.dbQueryParameters; + if (infix) { + countQuery.whereLike(`${granulesTable}.granule_id`, `%${infix}%`); + searchQuery.whereLike(`${granulesTable}.granule_id`, `%${infix}%`); + } + if (prefix) { + countQuery.whereLike(`${granulesTable}.granule_id`, `${prefix}%`); + searchQuery.whereLike(`${granulesTable}.granule_id`, `${prefix}%`); + } + } + + /** + * Build queries for term fields + * + * @param params + * @param params.countQuery - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildTermQuery(params: { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { + granules: granulesTable, + collections: collectionsTable, + providers: providersTable, + pdrs: pdrsTable, + } = TableNames; + + const { countQuery, searchQuery, dbQueryParameters } = params; + const { term = {} } = dbQueryParameters || this.dbQueryParameters; + + Object.entries(term).forEach(([name, value]) => { + if (name === 'collectionName') { + countQuery.where(`${collectionsTable}.name`, value); + searchQuery.where(`${collectionsTable}.name`, value); + } + if (name === 'collectionVersion') { + countQuery.where(`${collectionsTable}.version`, value); + searchQuery.where(`${collectionsTable}.version`, value); + } + if (name === 'providerName') { + countQuery.where(`${providersTable}.name`, value); + searchQuery.where(`${providersTable}.name`, value); + } + if (name === 'pdrName') { + countQuery.where(`${pdrsTable}.name`, value); + searchQuery.where(`${pdrsTable}.name`, value); + } + if (name === 'error.Error') { + countQuery.whereRaw(`${granulesTable}.error->>'Error' = '${value}'`); + searchQuery.whereRaw(`${granulesTable}.error->>'Error' = '${value}'`); + } + }); + + super.buildTermQuery({ + ...params, + dbQueryParameters: { term: omit(term, foreignFields, 'error.Error') }, + }); + } + /** * Translate postgres records to api records * * @param pgRecords - postgres records returned from query * @returns translated api records */ - protected translatePostgresRecordsToApiRecords(pgRecords: GranuleRecord[]) : ApiGranuleRecord[] { + protected translatePostgresRecordsToApiRecords(pgRecords: GranuleRecord[]) + : Partial[] { log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); const apiRecords = pgRecords.map((item: GranuleRecord) => { const granulePgRecord = item; @@ -84,9 +197,12 @@ export class GranuleSearch extends BaseSearch { }; const pdr = item.pdrName ? { name: item.pdrName } : undefined; const providerPgRecord = item.providerName ? { name: item.providerName } : undefined; - return translatePostgresGranuleToApiGranuleWithoutDbQuery({ + const apiRecord = translatePostgresGranuleToApiGranuleWithoutDbQuery({ granulePgRecord, collectionPgRecord, pdr, providerPgRecord, }); + return this.dbQueryParameters.fields + ? pick(apiRecord, this.dbQueryParameters.fields) + : apiRecord; }); return apiRecords; } diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts new file mode 100644 index 00000000000..64a243ff618 --- /dev/null +++ b/packages/db/src/search/field-mapping.ts @@ -0,0 +1,223 @@ +import { deconstructCollectionId } from '@cumulus/message/Collections'; +import Logger from '@cumulus/logger'; + +const log = new Logger({ sender: '@cumulus/db/field-mapping' }); + +// functions to map the api search string field name and value to postgres db field +const granuleMapping: { [key: string]: Function } = { + beginningDateTime: (value?: string) => ({ + beginning_date_time: value, + }), + cmrLink: (value?: string) => ({ + cmr_link: value, + }), + createdAt: (value?: string) => ({ + created_at: value && new Date(Number(value)), + }), + duration: (value?: string) => ({ + duration: value && Number(value), + }), + endingDateTime: (value?: string) => ({ + ending_date_time: value, + }), + granuleId: (value?: string) => ({ + granule_id: value, + }), + lastUpdateDateTime: (value?: string) => ({ + last_update_date_time: value, + }), + processingEndDateTime: (value?: string) => ({ + processing_end_date_time: value, + }), + processingStartDateTime: (value?: string) => ({ + processing_start_date_time: value, + }), + productionDateTime: (value?: string) => ({ + production_date_time: value, + }), + productVolume: (value?: string) => ({ + product_volume: value, + }), + published: (value?: string) => ({ + published: (value === 'true'), + }), + status: (value?: string) => ({ + status: value, + }), + timestamp: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), + timeToArchive: (value?: string) => ({ + time_to_archive: Number(value), + }), + timeToPreprocess: (value?: string) => ({ + time_to_process: Number(value), + }), + updatedAt: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), + // nested error field + 'error.Error': (value?: string) => ({ + 'error.Error': value, + }), + // The following fields require querying other tables + collectionId: (value?: string) => { + const { name, version } = (value && deconstructCollectionId(value)) || {}; + return { + collectionName: name, + collectionVersion: version, + }; + }, + provider: (value?: string) => ({ + providerName: value, + }), + pdrName: (value?: string) => ({ + pdrName: value, + }), +}; + +// TODO add and verify all queryable fields for the following record types +const asyncOperationMapping : { [key: string]: Function } = { + createdAt: (value?: string) => ({ + created_at: value && new Date(Number(value)), + }), + id: (value?: string) => ({ + id: value, + }), + operationType: (value?: string) => ({ + operation_type: value, + }), + status: (value?: string) => ({ + status: value, + }), + taskArn: (value?: string) => ({ + task_arn: value, + }), + timestamp: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), + updatedAt: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), +}; + +const collectionMapping : { [key: string]: Function } = { + createdAt: (value?: string) => ({ + created_at: value && new Date(Number(value)), + }), + name: (value?: string) => ({ + name: value, + }), + version: (value?: string) => ({ + version: value, + }), + timestamp: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), + updatedAt: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), +}; + +const executionMapping : { [key: string]: Function } = { + arn: (value?: string) => ({ + arn: value, + }), + createdAt: (value?: string) => ({ + created_at: value && new Date(Number(value)), + }), + execution: (value?: string) => ({ + url: value, + }), + status: (value?: string) => ({ + status: value, + }), + timestamp: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), + updatedAt: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), +}; + +const pdrMapping : { [key: string]: Function } = { + createdAt: (value?: string) => ({ + created_at: value && new Date(Number(value)), + }), + pdrName: (value?: string) => ({ + name: value, + }), + status: (value?: string) => ({ + status: value, + }), + timestamp: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), + updatedAt: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), +}; + +const providerMapping : { [key: string]: Function } = { + createdAt: (value?: string) => ({ + created_at: value && new Date(Number(value)), + }), + id: (value?: string) => ({ + name: value, + }), + timestamp: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), + updatedAt: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), +}; + +const ruleMapping : { [key: string]: Function } = { + createdAt: (value?: string) => ({ + created_at: value && new Date(Number(value)), + }), + name: (value?: string) => ({ + name: value, + }), + state: (value?: string) => ({ + enabled: (value === 'ENABLED'), + }), + timestamp: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), + updatedAt: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), +}; + +// type and its mapping +const supportedMappings: { [key: string]: any } = { + granule: granuleMapping, + asyncOperation: asyncOperationMapping, + collection: collectionMapping, + execution: executionMapping, + pdr: pdrMapping, + provider: providerMapping, + rule: ruleMapping, +}; + +/** + * Map query string field to db field + * + * @param type - query record type + * @param queryField - query field + * @param queryField.name - query field value + * @param [queryField.value] - query field value + * @returns db field + */ +export const mapQueryStringFieldToDbField = ( + type: string, + queryField: { name: string, value?: string } +): { [key: string]: any } | undefined => { + if (!(supportedMappings[type] && supportedMappings[type][queryField.name])) { + log.warn(`No db mapping field found for type: ${type}, field ${JSON.stringify(queryField)}`); + return undefined; + } + return supportedMappings[type] && supportedMappings[type][queryField.name](queryField.value); +}; diff --git a/packages/db/src/search/queries.ts b/packages/db/src/search/queries.ts new file mode 100644 index 00000000000..32bf6ac0482 --- /dev/null +++ b/packages/db/src/search/queries.ts @@ -0,0 +1,100 @@ +import omit from 'lodash/omit'; +import Logger from '@cumulus/logger'; +import { DbQueryParameters, QueryStringParameters } from '../types/search'; +import { mapQueryStringFieldToDbField } from './field-mapping'; + +const log = new Logger({ sender: '@cumulus/db/queries' }); + +// reserved words which are not record fields +const reservedWords = [ + 'limit', + 'page', + 'skip', + 'sort_by', + 'sort_key', + 'order', + 'prefix', + 'infix', + 'fields', + 'searchContext', +]; + +/** + * regexp for matching api query string parameter to query type + */ +const regexes: { [key: string]: RegExp } = { + terms: /^(.*)__in$/, + term: /^((?!__).)*$/, + not: /^(.*)__not$/, + exists: /^(.*)__exists$/, + range: /^(.*)__(from|to)$/, +}; + +/** + * Conert term query fields to db query parameters from api query string fields + * + * @param type - query record type + * @param queryStringFields - api query fields + * @returns term query parameter + */ +const convertTerm = ( + type: string, + queryStringFields: { name: string, value: string }[] +): { term: { [key: string]: any } } => { + const term = queryStringFields.reduce((acc, queryField) => { + const queryParam = mapQueryStringFieldToDbField(type, queryField); + return { ...acc, ...queryParam }; + }, {}); + + return { term }; +}; + +/** + * functions for converting from api query string parameters to db query parameters + * for each type of query + */ +const convert: { [key: string]: Function } = { + term: convertTerm, +}; + +/** + * Convert api query string parameters to db query parameters + * + * @param type - query record type + * @param queryStringParameters - query string parameters + * @returns db query parameters + */ +export const convertQueryStringToDbQueryParameters = ( + type: string, + queryStringParameters: QueryStringParameters +): DbQueryParameters => { + const { limit, page, prefix, infix, fields } = queryStringParameters; + + const dbQueryParameters: DbQueryParameters = {}; + dbQueryParameters.page = Number.parseInt(page ?? '1', 10); + dbQueryParameters.limit = Number.parseInt(limit ?? '10', 10); + dbQueryParameters.offset = (dbQueryParameters.page - 1) * dbQueryParameters.limit; + + if (typeof infix === 'string') dbQueryParameters.infix = infix; + if (typeof prefix === 'string') dbQueryParameters.prefix = prefix; + if (typeof fields === 'string') dbQueryParameters.fields = fields.split(','); + + // remove reserved words (that are not fields) + const fieldParams = omit(queryStringParameters, reservedWords); + // determine which search strategy should be applied + // options are term, terms, range, exists and not in + const fieldsList = Object.entries(fieldParams).map(([name, value]) => ({ name, value })); + + // for each search strategy, get all parameters and convert them to db parameters + Object.keys(regexes).forEach((k: string) => { + const matchedFields = fieldsList.filter((f) => f.name.match(regexes[k])); + + if (matchedFields && matchedFields.length > 0 && convert[k]) { + const queryParams = convert[k](type, matchedFields, regexes[k]); + Object.assign(dbQueryParameters, queryParams); + } + }); + + log.debug(`convertQueryStringToDbQueryParameters returns ${JSON.stringify(dbQueryParameters)}`); + return dbQueryParameters; +}; diff --git a/packages/db/src/types/search.ts b/packages/db/src/types/search.ts index 50a3664ef48..1a40a093833 100644 --- a/packages/db/src/types/search.ts +++ b/packages/db/src/types/search.ts @@ -1,6 +1,12 @@ export type QueryStringParameters = { + fields?: string, + infix?: string, limit?: string, page?: string, + order?: string, + prefix?: string, + sort_by?: string, + sort_key?: string, [key: string]: string | string[] | undefined, }; @@ -9,7 +15,12 @@ export type QueryEvent = { }; export type DbQueryParameters = { + infix?: string, limit?: number, offset?: number, page?: number, + prefix?: string, + fields?: string[], + term?: { [key: string]: any }, + terms?: { [key: string]: any }, }; diff --git a/packages/db/tests/search/test-GranuleSearch.js b/packages/db/tests/search/test-GranuleSearch.js index a18690d70b0..ffad472c444 100644 --- a/packages/db/tests/search/test-GranuleSearch.js +++ b/packages/db/tests/search/test-GranuleSearch.js @@ -20,6 +20,14 @@ const { const testDbName = `granule_${cryptoRandomString({ length: 10 })}`; +// generate granuleId for infix and prefix search +const generateGranuleId = (num) => { + let granuleId = cryptoRandomString({ length: 10 }); + if (num % 30 === 0) granuleId = `${cryptoRandomString({ length: 5 })}infix${cryptoRandomString({ length: 5 })}`; + if (num % 50 === 0) granuleId = `prefix${cryptoRandomString({ length: 10 })}`; + return granuleId; +}; + test.before(async (t) => { const { knexAdmin, knex } = await generateLocalTestDb( testDbName, @@ -89,19 +97,57 @@ test.before(async (t) => { t.context.pdrCumulusId = pgPdr.cumulus_id; // Create Granule + t.context.granuleSearchFields = { + beginningDateTime: '2020-03-16T19:50:24.757Z', + cmrLink: 'https://fakeLink', + duration: '6.8', + endingDateTime: '2020-03-17T10:00:00.000Z', + lastUpdateDateTime: '2020-03-18T10:00:00.000Z', + processingEndDateTime: '2020-03-16T10:00:00.000Z', + productVolume: '600', + timeToArchive: '700.29', + timeToPreprocess: '800.18', + status: 'failed', + timestamp: 1579352700000, + updatedAt: 1579352700000, + }; + + const error = { + Cause: 'cause string', + Error: 'CumulusMessageAdapterExecutionError', + }; + t.context.granulePgModel = new GranulePgModel(); t.context.pgGranules = await t.context.granulePgModel.insert( knex, range(100).map((num) => fakeGranuleRecordFactory({ + granule_id: generateGranuleId(num), collection_cumulus_id: (num % 2) ? t.context.collectionCumulusId : t.context.collectionCumulusId2, - pdr_cumulus_id: t.context.pdrCumulusId, - provider_cumulus_id: t.context.providerCumulusId, + pdr_cumulus_id: !(num % 2) ? t.context.pdrCumulusId : undefined, + provider_cumulus_id: !(num % 2) ? t.context.providerCumulusId : undefined, + beginning_date_time: !(num % 2) + ? new Date(t.context.granuleSearchFields.beginningDateTime) : undefined, + cmr_link: !(num % 100) ? t.context.granuleSearchFields.cmrLink : undefined, + duration: !(num % 2) ? Number(t.context.granuleSearchFields.duration) : undefined, + ending_date_time: !(num % 2) + ? new Date(t.context.granuleSearchFields.endingDateTime) : new Date(), + error: !(num % 2) ? JSON.stringify(error) : undefined, + last_update_date_time: !(num % 2) + ? t.context.granuleSearchFields.lastUpdateDateTime : undefined, + published: !!(num % 2), + product_volume: !(num % 5) ? Number(t.context.granuleSearchFields.productVolume) : undefined, + time_to_archive: !(num % 10) + ? Number(t.context.granuleSearchFields.timeToArchive) : undefined, + time_to_process: !(num % 20) + ? Number(t.context.granuleSearchFields.timeToPreprocess) : undefined, + status: !(num % 2) ? t.context.granuleSearchFields.status : 'completed', + updated_at: !(num % 2) ? new Date(t.context.granuleSearchFields.timestamp) : undefined, })) ); }); -test('Granule search returns 10 granule records by default', async (t) => { +test('GranuleSearch returns 10 granule records by default', async (t) => { const { knex } = t.context; const dbSearch = new GranuleSearch(); const response = await dbSearch.query(knex); @@ -112,12 +158,12 @@ test('Granule search returns 10 granule records by default', async (t) => { t.is(apiGranules.length, 10); const validatedRecords = apiGranules.filter((granule) => ( [t.context.collectionId, t.context.collectionId2].includes(granule.collectionId) - && granule.provider === t.context.provider.name - && granule.pdrName === t.context.pdr.name)); + && (!granule.provider || granule.provider === t.context.provider.name) + && (!granule.pdrName || granule.pdrName === t.context.pdr.name))); t.is(validatedRecords.length, apiGranules.length); }); -test('Granule search supports page and limit params', async (t) => { +test('GranuleSearch supports page and limit params', async (t) => { const { knex } = t.context; let queryStringParameters = { limit: 20, @@ -146,3 +192,198 @@ test('Granule search supports page and limit params', async (t) => { t.is(response.meta.count, 100); t.is(response.results?.length, 0); }); + +test('GranuleSearch supports infix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + infix: 'infix', + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 3); + t.is(response.results?.length, 3); +}); + +test('GranuleSearch supports prefix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + prefix: 'prefix', + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 2); + t.is(response.results?.length, 2); +}); + +test('GranuleSearch supports collectionId term search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + collectionId: t.context.collectionId2, + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports provider term search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + provider: t.context.provider.name, + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports pdrName term search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + pdrName: t.context.pdr.name, + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports term search for boolean field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + published: 'true', + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports term search for date field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + beginningDateTime: t.context.granuleSearchFields.beginningDateTime, + endingDateTime: t.context.granuleSearchFields.endingDateTime, + lastUpdateDateTime: t.context.granuleSearchFields.lastUpdateDateTime, + updatedAt: t.context.granuleSearchFields.updatedAt, + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports term search for number field', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 5, + duration: t.context.granuleSearchFields.duration, + productVolume: t.context.granuleSearchFields.productVolume, + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 10); + t.is(response.results?.length, 5); + + queryStringParameters = { + limit: 200, + timeToArchive: t.context.granuleSearchFields.timeToArchive, + timeToPreprocess: t.context.granuleSearchFields.timeToPreprocess, + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 5); + t.is(response.results?.length, 5); +}); + +test('GranuleSearch supports term search for string field', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + status: t.context.granuleSearchFields.status, + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 200, + cmrLink: t.context.granuleSearchFields.cmrLink, + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('GranuleSearch supports term search for timestamp', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + timestamp: t.context.granuleSearchFields.timestamp, + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports term search for nested error.Error', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + 'error.Error': 'CumulusMessageAdapterExecutionError', + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports term search for multiple fields', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + collectionId: t.context.collectionId2, + provider: t.context.provider.name, + 'error.Error': 'CumulusMessageAdapterExecutionError', + status: 'failed', + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch non-existing fields are ignored', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + non_existing_field: `non_exist_${cryptoRandomString({ length: 5 })}`, + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 100); +}); + +test('GranuleSearch returns fields specified', async (t) => { + const { knex } = t.context; + const fields = 'granuleId,endingDateTime,collectionId,published,status'; + const queryStringParameters = { + fields, + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 10); + response.results.forEach((granule) => t.deepEqual(Object.keys(granule), fields.split(','))); +}); diff --git a/packages/db/tests/search/test-field-mapping.js b/packages/db/tests/search/test-field-mapping.js new file mode 100644 index 00000000000..4a93a2d21a3 --- /dev/null +++ b/packages/db/tests/search/test-field-mapping.js @@ -0,0 +1,222 @@ +const test = require('ava'); +const { + mapQueryStringFieldToDbField, +} = require('../../dist/search/field-mapping'); + +test('mapQueryStringFieldToDbField converts an api field to db field', (t) => { + const querStringField = { name: 'beginningDateTime', value: '2017-10-24T00:00:00.000Z' }; + const dbQueryParam = mapQueryStringFieldToDbField('granule', querStringField); + const expectedResult = { beginning_date_time: '2017-10-24T00:00:00.000Z' }; + t.deepEqual(dbQueryParam, expectedResult); +}); + +test('mapQueryStringFieldToDbField returns undefined if the api field is not supported', (t) => { + const querStringField = { name: 'apiNoMatchingDbField', value: '2017-10-24T00:00:00.000Z' }; + const dbQueryParam = mapQueryStringFieldToDbField('granule', querStringField); + t.falsy(dbQueryParam); +}); + +test('mapQueryStringFieldToDbField correctly converts all granule api fields to db fields', (t) => { + const queryStringParameters = { + beginningDateTime: '2017-10-24T00:00:00.000Z', + cmrLink: 'example.com', + createdAt: '1591312763823', + duration: '26.939', + endingDateTime: '2017-11-08T23:59:59.000Z', + granuleId: 'MOD09GQ.A1657416.CbyoRi.006.9697917818587', + lastUpdateDateTime: '2018-04-25T21:45:45.524Z', + processingEndDateTime: '2018-09-24T23:28:45.731Z', + processingStartDateTime: '2018-09-24T22:52:34.578Z', + productionDateTime: '2018-07-19T12:01:01Z', + productVolume: '17956339', + published: 'true', + status: 'completed', + timestamp: '1576106371369', + timeToArchive: '5.6', + timeToPreprocess: '10.892', + 'error.Error': 'CumulusMessageAdapterExecutionError', + collectionId: 'MOD09GQ___006', + provider: 's3_provider', + pdrName: 'MOD09GQ_1granule_v3.PDR', + }; + + const expectedDbParameters = { + beginning_date_time: '2017-10-24T00:00:00.000Z', + cmr_link: 'example.com', + created_at: new Date(1591312763823), + duration: 26.939, + ending_date_time: '2017-11-08T23:59:59.000Z', + granule_id: 'MOD09GQ.A1657416.CbyoRi.006.9697917818587', + last_update_date_time: '2018-04-25T21:45:45.524Z', + processing_end_date_time: '2018-09-24T23:28:45.731Z', + processing_start_date_time: '2018-09-24T22:52:34.578Z', + production_date_time: '2018-07-19T12:01:01Z', + product_volume: '17956339', + published: true, + status: 'completed', + time_to_archive: 5.6, + time_to_process: 10.892, + updated_at: new Date(1576106371369), + 'error.Error': 'CumulusMessageAdapterExecutionError', + collectionName: 'MOD09GQ', + collectionVersion: '006', + providerName: 's3_provider', + pdrName: 'MOD09GQ_1granule_v3.PDR', + }; + + const apiFieldsList = Object.entries(queryStringParameters) + .map(([name, value]) => ({ name, value })); + const dbQueryParams = apiFieldsList.reduce((acc, queryField) => { + const queryParam = mapQueryStringFieldToDbField('granule', queryField); + return { ...acc, ...queryParam }; + }, {}); + t.deepEqual(dbQueryParams, expectedDbParameters); +}); + +test('mapQueryStringFieldToDbField correctly converts all asyncOperation api fields to db fields', (t) => { + const queryStringParameters = { + createdAt: '1591312763823', + id: '0eb8e809-8790-5409-1239-bcd9e8d28b8e', + operationType: 'Bulk Granule Delete', + taskArn: 'arn:aws:ecs:us-east-1:111111111111:task/d481e76e-f5fc-9c1c-2411-fa13779b111a', + status: 'SUCCEEDED', + timestamp: '1591384094512', + }; + + const expectedDbParameters = { + created_at: new Date(1591312763823), + id: '0eb8e809-8790-5409-1239-bcd9e8d28b8e', + operation_type: 'Bulk Granule Delete', + task_arn: 'arn:aws:ecs:us-east-1:111111111111:task/d481e76e-f5fc-9c1c-2411-fa13779b111a', + status: 'SUCCEEDED', + updated_at: new Date(1591384094512), + }; + + const apiFieldsList = Object.entries(queryStringParameters) + .map(([name, value]) => ({ name, value })); + const dbQueryParams = apiFieldsList.reduce((acc, queryField) => { + const queryParam = mapQueryStringFieldToDbField('asyncOperation', queryField); + return { ...acc, ...queryParam }; + }, {}); + t.deepEqual(dbQueryParams, expectedDbParameters); +}); + +test('mapQueryStringFieldToDbField correctly converts all collection api fields to db fields', (t) => { + const queryStringParameters = { + createdAt: '1591312763823', + name: 'MOD11A1', + version: '006', + updatedAt: 1591384094512, + }; + + const expectedDbParameters = { + created_at: new Date(1591312763823), + name: 'MOD11A1', + version: '006', + updated_at: new Date(1591384094512), + }; + + const apiFieldsList = Object.entries(queryStringParameters) + .map(([name, value]) => ({ name, value })); + const dbQueryParams = apiFieldsList.reduce((acc, queryField) => { + const queryParam = mapQueryStringFieldToDbField('collection', queryField); + return { ...acc, ...queryParam }; + }, {}); + t.deepEqual(dbQueryParams, expectedDbParameters); +}); + +test('mapQueryStringFieldToDbField correctly converts all execution api fields to db fields', (t) => { + const queryStringParameters = { + arn: 'https://example.com/arn', + createdAt: '1591312763823', + execution: 'https://example.com', + status: 'completed', + updatedAt: 1591384094512, + }; + + const expectedDbParameters = { + arn: 'https://example.com/arn', + created_at: new Date(1591312763823), + url: 'https://example.com', + status: 'completed', + updated_at: new Date(1591384094512), + }; + + const apiFieldsList = Object.entries(queryStringParameters) + .map(([name, value]) => ({ name, value })); + const dbQueryParams = apiFieldsList.reduce((acc, queryField) => { + const queryParam = mapQueryStringFieldToDbField('execution', queryField); + return { ...acc, ...queryParam }; + }, {}); + t.deepEqual(dbQueryParams, expectedDbParameters); +}); + +test('mapQueryStringFieldToDbField correctly converts all pdr api fields to db fields', (t) => { + const queryStringParameters = { + createdAt: '1591312763823', + pdrName: 'fakePdrName', + status: 'completed', + updatedAt: 1591384094512, + }; + + const expectedDbParameters = { + created_at: new Date(1591312763823), + name: 'fakePdrName', + status: 'completed', + updated_at: new Date(1591384094512), + }; + + const apiFieldsList = Object.entries(queryStringParameters) + .map(([name, value]) => ({ name, value })); + const dbQueryParams = apiFieldsList.reduce((acc, queryField) => { + const queryParam = mapQueryStringFieldToDbField('pdr', queryField); + return { ...acc, ...queryParam }; + }, {}); + t.deepEqual(dbQueryParams, expectedDbParameters); +}); + +test('mapQueryStringFieldToDbField correctly converts all provider api fields to db fields', (t) => { + const queryStringParameters = { + createdAt: '1591312763823', + id: 'fakeProviderId', + updatedAt: 1591384094512, + }; + + const expectedDbParameters = { + created_at: new Date(1591312763823), + name: 'fakeProviderId', + updated_at: new Date(1591384094512), + }; + + const apiFieldsList = Object.entries(queryStringParameters) + .map(([name, value]) => ({ name, value })); + const dbQueryParams = apiFieldsList.reduce((acc, queryField) => { + const queryParam = mapQueryStringFieldToDbField('provider', queryField); + return { ...acc, ...queryParam }; + }, {}); + t.deepEqual(dbQueryParams, expectedDbParameters); +}); + +test('mapQueryStringFieldToDbField correctly converts all rule api fields to db fields', (t) => { + const queryStringParameters = { + createdAt: '1591312763823', + name: 'fakePdrName', + state: 'DISABLED', + updatedAt: 1591384094512, + }; + + const expectedDbParameters = { + created_at: new Date(1591312763823), + name: 'fakePdrName', + enabled: false, + updated_at: new Date(1591384094512), + }; + + const apiFieldsList = Object.entries(queryStringParameters) + .map(([name, value]) => ({ name, value })); + const dbQueryParams = apiFieldsList.reduce((acc, queryField) => { + const queryParam = mapQueryStringFieldToDbField('rule', queryField); + return { ...acc, ...queryParam }; + }, {}); + t.deepEqual(dbQueryParams, expectedDbParameters); +}); diff --git a/packages/db/tests/search/test-queries.js b/packages/db/tests/search/test-queries.js new file mode 100644 index 00000000000..4de313d81d0 --- /dev/null +++ b/packages/db/tests/search/test-queries.js @@ -0,0 +1,38 @@ +const test = require('ava'); +const { + convertQueryStringToDbQueryParameters, +} = require('../../dist/search/queries'); + +test('convertQueryStringToDbQueryParameters correctly converts api query string parameters to db query parameters', (t) => { + const queryStringParameters = { + fields: 'granuleId,collectionId,status,updatedAt', + infix: 'A1657416', + limit: 20, + page: 3, + prefix: 'MO', + published: 'true', + status: 'completed', + 'error.Error': 'CumulusMessageAdapterExecutionError', + collectionId: 'MOD09GQ___006', + nonExistingField: 'nonExistingFieldValue', + }; + + const expectedDbQueryParameters = { + fields: ['granuleId', 'collectionId', 'status', 'updatedAt'], + infix: 'A1657416', + limit: 20, + offset: 40, + page: 3, + prefix: 'MO', + term: { + collectionName: 'MOD09GQ', + collectionVersion: '006', + published: true, + status: 'completed', + 'error.Error': 'CumulusMessageAdapterExecutionError', + }, + }; + + const dbQueryParams = convertQueryStringToDbQueryParameters('granule', queryStringParameters); + t.deepEqual(dbQueryParams, expectedDbQueryParameters); +}); From 6744454fe25f8172992e8815e425fec6e0bf95cd Mon Sep 17 00:00:00 2001 From: Naga Nages <66387215+Nnaga1@users.noreply.github.com> Date: Fri, 17 May 2024 11:33:11 -0400 Subject: [PATCH 03/61] CUMULUS-3689: Update Stats/Summary and Stats/Aggregate endpoints to use psql (#3659) * first commit on new branch * CHANGELOG change * small fix * PR feedback * adding jsdoc + fixing spelling/grammar --- CHANGELOG.md | 5 + packages/api/endpoints/stats.js | 31 +- packages/api/tests/endpoints/stats.js | 203 +++++---- packages/db/.nycrc.json | 4 +- packages/db/src/index.ts | 3 + packages/db/src/search/BaseSearch.ts | 22 +- packages/db/src/search/StatsSearch.ts | 293 +++++++++++++ packages/db/src/types/search.ts | 1 + packages/db/tests/search/test-StatsSearch.js | 436 +++++++++++++++++++ 9 files changed, 879 insertions(+), 119 deletions(-) create mode 100644 packages/db/src/search/StatsSearch.ts create mode 100644 packages/db/tests/search/test-StatsSearch.js diff --git a/CHANGELOG.md b/CHANGELOG.md index df6d3a3afc3..da197138b77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Replace ElasticSearch Phase 1 +- **CUMULUS-3688** + - Updated `stats` api endpoint to query postgres instead of elasticsearch +- **CUMULUS-3689** + - Updated `stats/aggregate` api endpoint to query postgres instead of elasticsearch + - Created a new StatsSearch class for querying postgres with the stats endpoint - **CUMULUS-3692** - Added `@cumulus/db/src/search` `BaseSearch` and `GranuleSearch` classes to support basic queries for granules diff --git a/packages/api/endpoints/stats.js b/packages/api/endpoints/stats.js index ed73e8b0d08..a94a8bdd085 100644 --- a/packages/api/endpoints/stats.js +++ b/packages/api/endpoints/stats.js @@ -2,7 +2,8 @@ const router = require('express-promise-router')(); const get = require('lodash/get'); -const Stats = require('@cumulus/es-client/stats'); +const { StatsSearch } = require('@cumulus/db'); +const omit = require('lodash/omit'); /** * Map requested stats types to supported types @@ -34,17 +35,10 @@ function getType(req) { * @returns {Promise} the promise of express response object */ async function summary(req, res) { - const params = req.query; - - params.timestamp__from = Number.parseInt(get( - params, - 'timestamp__from', - 0 - ), 10); - params.timestamp__to = Number.parseInt(get(params, 'timestamp__to', Date.now()), 10); - - const stats = new Stats({ queryStringParameters: params }, undefined, process.env.ES_INDEX); - const r = await stats.query(); + const stats = new StatsSearch({ + queryStringParameters: omit(req.query, 'type'), + }, 'granule'); + const r = await stats.summary(); return res.send(r); } @@ -56,13 +50,12 @@ async function summary(req, res) { * @returns {Promise} the promise of express response object */ async function aggregate(req, res) { - const type = getType(req); - - const stats = new Stats({ - queryStringParameters: req.query, - }, type, process.env.ES_INDEX); - const r = await stats.count(); - return res.send(r); + if (getType(req)) { + const stats = new StatsSearch({ queryStringParameters: omit(req.query, 'type') }, getType(req)); + const r = await stats.aggregate(); + return res.send(r); + } + return res.boom.badRequest('Type must be included in Stats Aggregate query string parameters'); } router.get('/aggregate/:type?', aggregate); diff --git a/packages/api/tests/endpoints/stats.js b/packages/api/tests/endpoints/stats.js index 5c1642dcd95..e0612a8392f 100644 --- a/packages/api/tests/endpoints/stats.js +++ b/packages/api/tests/endpoints/stats.js @@ -3,51 +3,59 @@ const test = require('ava'); const request = require('supertest'); const rewire = require('rewire'); -const sinon = require('sinon'); +const range = require('lodash/range'); const awsServices = require('@cumulus/aws-client/services'); const s3 = require('@cumulus/aws-client/S3'); const { randomId } = require('@cumulus/common/test-utils'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); -const indexer = rewire('@cumulus/es-client/indexer'); -const { getEsClient } = require('@cumulus/es-client/search'); const models = require('../../models'); const { - fakeGranuleFactoryV2, - fakeCollectionFactory, createFakeJwtAuthToken, setAuthorizedOAuthUsers, } = require('../../lib/testUtils'); +const { + destroyLocalTestDb, + generateLocalTestDb, + GranulePgModel, + CollectionPgModel, + fakeCollectionRecordFactory, + fakeGranuleRecordFactory, + migrationDir, + localStackConnectionEnv, +} = require('../../../db/dist'); + +const testDbName = randomId('collection'); + const assertions = require('../../lib/assertions'); const stats = rewire('../../endpoints/stats'); const getType = stats.__get__('getType'); -let esClient; +// import the express app after setting the env variables +const { app } = require('../../app'); -process.env.AccessTokensTable = randomId('accessTokenTable'); +let accessTokenModel; +let jwtAuthToken; +process.env.PG_HOST = randomId('hostname'); +process.env.PG_USER = randomId('user'); +process.env.PG_PASSWORD = randomId('password'); +process.env.stackName = randomId('userstack'); +process.env.AccessTokensTable = randomId('tokentable'); process.env.system_bucket = randomId('bucket'); process.env.stackName = randomId('stackName'); - -const esIndex = randomId('esindex'); -const esAlias = randomId('esAlias'); - -process.env.ES_INDEX = esAlias; process.env.TOKEN_SECRET = randomId('tokensecret'); -// import the express app after setting the env variables -const { app } = require('../../app'); - -let accessTokenModel; -let jwtAuthToken; +process.env = { + ...process.env, + ...localStackConnectionEnv, + PG_DATABASE: testDbName, +}; -test.before(async () => { - // create buckets +test.before(async (t) => { await awsServices.s3().createBucket({ Bucket: process.env.system_bucket }); - esClient = await getEsClient(); const username = randomId(); await setAuthorizedOAuthUsers([username]); @@ -56,48 +64,61 @@ test.before(async () => { jwtAuthToken = await createFakeJwtAuthToken({ accessTokenModel, username }); - // create the elasticsearch index and add mapping - await bootstrapElasticSearch({ - host: 'fakehost', - index: esIndex, - alias: esAlias, - }); - // Index test data - 2 collections, 3 granules - await Promise.all([ - indexer.indexCollection(esClient, fakeCollectionFactory(), esAlias), - indexer.indexCollection(esClient, fakeCollectionFactory(), esAlias), - indexer.indexGranule(esClient, fakeGranuleFactoryV2({ collectionId: 'coll1' }), esAlias), - indexer.indexGranule(esClient, fakeGranuleFactoryV2({ collectionId: 'coll1' }), esAlias), - indexer.indexGranule(esClient, fakeGranuleFactoryV2({ status: 'failed', duration: 3 }), esAlias), - ]); - - // Indexing using Date.now() to generate the timestamp - const stub = sinon.stub(Date, 'now').returns((new Date(2020, 0, 29)).getTime()); - - await Promise.all([ - indexer.indexCollection(esClient, fakeCollectionFactory({ - updatedAt: new Date(2020, 0, 29), - }), esAlias), - indexer.indexGranule(esClient, fakeGranuleFactoryV2({ - status: 'failed', - updatedAt: new Date(2020, 0, 29), - duration: 4, - }), esAlias), - indexer.indexGranule(esClient, fakeGranuleFactoryV2({ - updatedAt: new Date(2020, 0, 29), - duration: 4, - }), esAlias), - ]); - - stub.restore(); + const { knexAdmin, knex } = await generateLocalTestDb( + testDbName, + migrationDir + ); + + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + t.context.collectionPgModel = new CollectionPgModel(); + t.context.granulePgModel = new GranulePgModel(); + + const statuses = ['queued', 'failed', 'completed', 'running']; + const errors = [{ Error: 'UnknownError' }, { Error: 'CumulusMessageAdapterError' }, { Error: 'IngestFailure' }, { Error: 'CmrFailure' }, {}]; + const granules = []; + const collections = []; + + range(20).map((num) => ( + collections.push(fakeCollectionRecordFactory({ + name: `testCollection${num}`, + cumulus_id: num, + })) + )); + + range(100).map((num) => ( + granules.push(fakeGranuleRecordFactory({ + collection_cumulus_id: num % 20, + status: statuses[num % 4], + created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))).toISOString(), + updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))).toISOString(), + error: errors[num % 5], + duration: num + (num / 10), + })) + )); + + await t.context.collectionPgModel.insert( + t.context.knex, + collections + ); + + await t.context.granulePgModel.insert( + t.context.knex, + granules + ); }); -test.after.always(async () => { +test.after.always(async (t) => { await Promise.all([ - esClient.client.indices.delete({ index: esIndex }), await accessTokenModel.deleteTable(), s3.recursivelyDeleteS3Bucket(process.env.system_bucket), ]); + + await destroyLocalTestDb({ + ...t.context, + testDbName, + }); }); test('GET without pathParameters and without an Authorization header returns an Authorization Missing response', async (t) => { @@ -128,18 +149,6 @@ test('GET without pathParameters and with an invalid access token returns an una assertions.isInvalidAccessTokenResponse(t, response); }); -test.todo('GET without pathParameters and with an unauthorized user returns an unauthorized response'); - -test('GET /stats/aggregate with an invalid access token returns an unauthorized response', async (t) => { - const response = await request(app) - .get('/stats/aggregate') - .set('Accept', 'application/json') - .set('Authorization', 'Bearer ThisIsAnInvalidAuthorizationToken') - .expect(401); - - assertions.isInvalidAccessTokenResponse(t, response); -}); - test('getType gets correct type for granules', (t) => { const type = getType({ params: { type: 'granules' } }); @@ -188,6 +197,18 @@ test('getType returns correct type from query params', (t) => { t.is(type, 'provider'); }); +test.todo('GET without pathParameters and with an unauthorized user returns an unauthorized response'); + +test('GET /stats/aggregate with an invalid access token returns an unauthorized response', async (t) => { + const response = await request(app) + .get('/stats/aggregate') + .set('Accept', 'application/json') + .set('Authorization', 'Bearer ThisIsAnInvalidAuthorizationToken') + .expect(401); + + assertions.isInvalidAccessTokenResponse(t, response); +}); + test('GET /stats returns correct response, defaulted to all', async (t) => { const response = await request(app) .get('/stats') @@ -195,23 +216,23 @@ test('GET /stats returns correct response, defaulted to all', async (t) => { .set('Authorization', `Bearer ${jwtAuthToken}`) .expect(200); - t.is(response.body.errors.value, 2); - t.is(response.body.collections.value, 2); - t.is(response.body.processingTime.value, 2.2); - t.is(response.body.granules.value, 5); + t.is(response.body.errors.value, 80); + t.is(response.body.processingTime.value, 54.44999999642372); + t.is(response.body.granules.value, 100); + t.is(response.body.collections.value, 20); }); test('GET /stats returns correct response with date params filters values correctly', async (t) => { const response = await request(app) - .get(`/stats?timestamp__from=${(new Date(2020, 0, 28)).getTime()}×tamp__to=${(new Date(2020, 0, 30)).getTime()}`) + .get(`/stats?timestamp__from=${(new Date(2018, 1, 28)).getTime()}×tamp__to=${(new Date(2019, 1, 30)).getTime()}`) .set('Accept', 'application/json') .set('Authorization', `Bearer ${jwtAuthToken}`) .expect(200); - t.is(response.body.errors.value, 1); - t.is(response.body.collections.value, 1); - t.is(response.body.processingTime.value, 4); - t.is(response.body.granules.value, 2); + t.is(response.body.errors.value, 15); + t.is(response.body.collections.value, 10); + t.is(response.body.processingTime.value, 53.38235317258274); + t.is(response.body.granules.value, 17); }); test('GET /stats/aggregate returns correct response', async (t) => { @@ -221,21 +242,29 @@ test('GET /stats/aggregate returns correct response', async (t) => { .set('Authorization', `Bearer ${jwtAuthToken}`) .expect(200); - t.is(response.body.meta.count, 5); - t.deepEqual(response.body.count, [ - { key: 'completed', count: 3 }, { key: 'failed', count: 2 }, - ]); + const expectedCount = [ + { key: 'completed', count: 25 }, + { key: 'failed', count: 25 }, + { key: 'queued', count: 25 }, + { key: 'running', count: 25 }, + ]; + t.is(response.body.meta.count, 100); + t.deepEqual(response.body.count, expectedCount); }); test('GET /stats/aggregate filters correctly by date', async (t) => { const response = await request(app) - .get(`/stats/aggregate?type=granules×tamp__from=${(new Date(2020, 0, 28)).getTime()}×tamp__to=${(new Date(2020, 0, 30)).getTime()}`) + .get(`/stats/aggregate?type=granules×tamp__from=${(new Date(2020, 11, 28)).getTime()}×tamp__to=${(new Date(2023, 8, 30)).getTime()}`) .set('Accept', 'application/json') .set('Authorization', `Bearer ${jwtAuthToken}`) .expect(200); - t.is(response.body.meta.count, 2); - t.deepEqual(response.body.count, [ - { key: 'completed', count: 1 }, { key: 'failed', count: 1 }, - ]); + const expectedCount = [ + { key: 'failed', count: 16 }, + { key: 'running', count: 9 }, + { key: 'completed', count: 8 }, + { key: 'queued', count: 8 }, + ]; + t.is(response.body.meta.count, 41); + t.deepEqual(response.body.count, expectedCount); }); diff --git a/packages/db/.nycrc.json b/packages/db/.nycrc.json index 0349dfb5383..c251aa952b3 100644 --- a/packages/db/.nycrc.json +++ b/packages/db/.nycrc.json @@ -1,7 +1,7 @@ { "extends": "../../nyc.config.js", "statements": 89.0, - "functions": 75.0, - "branches": 71.0, + "functions": 77.0, + "branches": 75.0, "lines": 90.0 } \ No newline at end of file diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index c761e630c90..234f5f80785 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -142,6 +142,9 @@ export { export { GranuleSearch, } from './search/GranuleSearch'; +export { + StatsSearch, +} from './search/StatsSearch'; export { AsyncOperationPgModel } from './models/async_operation'; export { BasePgModel } from './models/base'; diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index dd1fc0cd063..739756d6790 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -18,7 +18,7 @@ export type Meta = { count?: number, }; -const typeToTable: { [key: string]: string } = { +export const typeToTable: { [key: string]: string } = { asyncOperation: TableNames.asyncOperations, collection: TableNames.collections, execution: TableNames.executions, @@ -51,9 +51,9 @@ class BaseSearch { * @param knex - DB client * @returns queries for getting count and search result */ - private _buildSearch(knex: Knex) + protected buildSearch(knex: Knex) : { - countQuery: Knex.QueryBuilder, + countQuery?: Knex.QueryBuilder, searchQuery: Knex.QueryBuilder, } { const { countQuery, searchQuery } = this.buildBasicQuery(knex); @@ -64,7 +64,7 @@ class BaseSearch { if (limit) searchQuery.limit(limit); if (offset) searchQuery.offset(offset); - log.debug(`_buildSearch returns countQuery: ${countQuery.toSQL().sql}, searchQuery: ${searchQuery.toSQL().sql}`); + log.debug(`buildSearch returns countQuery: ${countQuery?.toSQL().sql}, searchQuery: ${searchQuery.toSQL().sql}`); return { countQuery, searchQuery }; } @@ -88,7 +88,7 @@ class BaseSearch { * @throws - function is not implemented */ protected buildBasicQuery(knex: Knex): { - countQuery: Knex.QueryBuilder, + countQuery?: Knex.QueryBuilder, searchQuery: Knex.QueryBuilder, } { log.debug(`buildBasicQuery is not implemented ${knex.constructor.name}`); @@ -99,12 +99,12 @@ class BaseSearch { * Build queries for infix and prefix * * @param params - * @param params.countQuery - query builder for getting count + * @param [params.countQuery] - query builder for getting count * @param params.searchQuery - query builder for search * @param [params.dbQueryParameters] - db query parameters */ protected buildInfixPrefixQuery(params: { - countQuery: Knex.QueryBuilder, + countQuery?: Knex.QueryBuilder, searchQuery: Knex.QueryBuilder, dbQueryParameters?: DbQueryParameters, }) { @@ -116,12 +116,12 @@ class BaseSearch { * Build queries for term fields * * @param params - * @param params.countQuery - query builder for getting count + * @param [params.countQuery] - query builder for getting count * @param params.searchQuery - query builder for search * @param [params.dbQueryParameters] - db query parameters */ protected buildTermQuery(params: { - countQuery: Knex.QueryBuilder, + countQuery?: Knex.QueryBuilder, searchQuery: Knex.QueryBuilder, dbQueryParameters?: DbQueryParameters, }) { @@ -130,7 +130,7 @@ class BaseSearch { const { term = {} } = dbQueryParameters || this.dbQueryParameters; Object.entries(term).forEach(([name, value]) => { - countQuery.where(`${table}.${name}`, value); + countQuery?.where(`${table}.${name}`, value); searchQuery.where(`${table}.${name}`, value); }); } @@ -154,7 +154,7 @@ class BaseSearch { */ async query(testKnex: Knex | undefined) { const knex = testKnex ?? await getKnexClient(); - const { countQuery, searchQuery } = this._buildSearch(knex); + const { countQuery, searchQuery } = this.buildSearch(knex); try { const countResult = await countQuery; const meta = this._metaTemplate(); diff --git a/packages/db/src/search/StatsSearch.ts b/packages/db/src/search/StatsSearch.ts new file mode 100644 index 00000000000..8dfaf79bcbc --- /dev/null +++ b/packages/db/src/search/StatsSearch.ts @@ -0,0 +1,293 @@ +import omit from 'lodash/omit'; +import { Knex } from 'knex'; +import { getKnexClient } from '../connection'; +import { TableNames } from '../tables'; +import { DbQueryParameters, QueryEvent } from '../types/search'; +import { BaseSearch, typeToTable } from './BaseSearch'; + +type TotalSummary = { + count_errors: number, + count_collections: number, + count_granules: number, + avg_processing_time: number, +}; + +type Aggregate = { + count: string, + aggregatedfield: string, +}; + +type Summary = { + dateFrom: string, + dateTo: string, + value: number, + aggregation: string, + unit: string, +}; + +type SummaryResult = { + errors: Summary, + granules: Summary, + collections: Summary, + processingTime: Summary, +}; + +type Meta = { + name: string, + count: number, + field: string, +}; + +type AggregateRes = { + key: string, + count: number, +}; + +type ApiAggregateResult = { + meta: Meta, + count: AggregateRes[] +}; + +const infixMapping: { [key: string]: string } = { + granules: 'granule_id', + collections: 'name', + providers: 'name', + executions: 'arn', + pdrs: 'name', +}; + +/** + * A class to query postgres for the STATS and STATS/AGGREGATE endpoints + */ +class StatsSearch extends BaseSearch { + readonly tableName: string; + + constructor(event: QueryEvent, type: string) { + super(event, type); + this.tableName = typeToTable[this.type]; + this.queryStringParameters.field = this.queryStringParameters.field ?? 'status'; + this.dbQueryParameters = omit(this.dbQueryParameters, ['limit', 'offset']); + } + + /** + * Formats the postgres records into an API stats/aggregate response + * + * @param {Record} result - the postgres query results + * @returns {ApiAggregateResult} the api object with the aggregate statistics + */ + private formatAggregateResult(result: Record): ApiAggregateResult { + let totalCount = 0; + const responses = []; + for (const row of Object.keys(result)) { + responses.push( + { + key: result[row].aggregatedfield, + count: Number.parseInt(result[row].count, 10), + } + ); + totalCount += Number(result[row].count); + } + return { + meta: { + name: 'cumulus-api', + count: totalCount, + field: `${this.queryStringParameters.field}`, + }, + count: responses, + }; + } + + /** + * Formats the postgres results into an API stats/summary response + * + * @param {TotalSummary} result - the knex summary query results + * @returns {SummaryResult} the api object with the summary statistics + */ + private formatSummaryResult(result: TotalSummary): SummaryResult { + const timestampTo = Number.parseInt(this.queryStringParameters.timestamp__to as string, 10); + const timestampFrom = Number.parseInt(this.queryStringParameters.timestamp__from as string, 10); + const dateto = this.queryStringParameters.timestamp__to + ? new Date(timestampTo).toISOString() : new Date().toISOString(); + const datefrom = this.queryStringParameters.timestamp__from + ? new Date(timestampFrom).toISOString() : '1970-01-01T12:00:00+00:00'; + return { + errors: { + dateFrom: datefrom, + dateTo: dateto, + value: Number(result.count_errors), + aggregation: 'count', + unit: 'error', + }, + collections: { + dateFrom: datefrom, + dateTo: dateto, + value: Number(result.count_collections), + aggregation: 'count', + unit: 'collection', + }, + processingTime: { + dateFrom: datefrom, + dateTo: dateto, + value: Number(result.avg_processing_time), + aggregation: 'average', + unit: 'second', + }, + granules: { + dateFrom: datefrom, + dateTo: dateto, + value: Number(result.count_granules), + aggregation: 'count', + unit: 'granule', + }, + }; + } + + /** + * Queries postgres for a summary of statistics around the granules in the system + * + * @param {Knex} sendKnex - the knex client to be used + * @returns {Promise} the postgres aggregations based on query + */ + public async summary(sendKnex: Knex): Promise { + const knex = sendKnex ?? await getKnexClient(); + const aggregateQuery:Knex.QueryBuilder = knex(this.tableName); + if (this.queryStringParameters.timestamp__from) { + aggregateQuery.where(`${this.tableName}.updated_at`, '>=', new Date(Number.parseInt(this.queryStringParameters.timestamp__from as string, 10))); + } + if (this.queryStringParameters.timestamp__to) { + aggregateQuery.where(`${this.tableName}.updated_at`, '<=', new Date(Number.parseInt(this.queryStringParameters.timestamp__to as string, 10))); + } + aggregateQuery.select( + knex.raw(`COUNT(CASE WHEN ${this.tableName}.error ->> 'Error' is not null THEN 1 END) AS count_errors`), + knex.raw(`COUNT(${this.tableName}.cumulus_id) AS count_granules`), + knex.raw(`AVG(${this.tableName}.duration) AS avg_processing_time`), + knex.raw(`COUNT(DISTINCT ${this.tableName}.collection_cumulus_id) AS count_collections`) + ); + const aggregateQueryRes: TotalSummary[] = await aggregateQuery; + return this.formatSummaryResult(aggregateQueryRes[0]); + } + + /** + * Performs joins on the provider and/or collection table if neccessary + * + * @param {Knex.QueryBuilder} query - the knex query to be joined or not + */ + private joinTables(query: Knex.QueryBuilder) { + if (this.queryStringParameters.collectionId) { + query.join(`${TableNames.collections}`, `${this.tableName}.collection_cumulus_id`, 'collections.cumulus_id'); + } + + if (this.queryStringParameters.provider) { + query.join(`${TableNames.providers}`, `${this.tableName}.provider_cumulus_id`, 'providers.cumulus_id'); + } + } + + /** + * Aggregates the search query based on queryStringParameters + * + * @param {Knex.QueryBuilder} query - the knex query to be aggregated + * @param {Knex} knex - the knex client to be used + */ + private aggregateQueryField(query: Knex.QueryBuilder, knex: Knex) { + if (this.queryStringParameters.field?.includes('error.Error')) { + query.select(knex.raw("error ->> 'Error' as aggregatedfield")); + } else { + query.select(`${this.tableName}.${this.queryStringParameters.field} as aggregatedfield`); + } + query.modify((queryBuilder) => this.joinTables(queryBuilder)) + .count(`${this.tableName}.cumulus_id as count`) + .groupBy('aggregatedfield') + .orderBy([{ column: 'count', order: 'desc' }, { column: 'aggregatedfield' }]); + } + + /** + * Builds basic query + * + * @param {Knex} knex - the knex client + * @returns the search query + */ + protected buildBasicQuery(knex: Knex) + : { + searchQuery: Knex.QueryBuilder, + } { + const searchQuery:Knex.QueryBuilder = knex(`${this.tableName}`); + this.aggregateQueryField(searchQuery, knex); + return { searchQuery }; + } + + /** + * Builds queries for infix and prefix + * + * @param params + * @param {Knex.QueryBuilder} params.searchQuery - the search query + * @param [params.dbQueryParameters] - the db query parameters + */ + protected buildInfixPrefixQuery(params: { + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { searchQuery, dbQueryParameters } = params; + const { infix, prefix } = dbQueryParameters || this.dbQueryParameters; + const fieldName = infixMapping[this.tableName]; + if (infix) { + searchQuery.whereLike(`${this.tableName}.${fieldName}`, `%${infix}%`); + } + if (prefix) { + searchQuery.whereLike(`${this.tableName}.${fieldName}`, `%${prefix}%`); + } + } + + /** + * Builds queries for term fields + * + * @param params + * @param {Knex.QueryBuilder} params.searchQuery - the search query + * @param [params.dbQueryParameters] - the db query parameters + * @returns {Knex.QueryBuilder} - the updated search query based on queryStringParams + */ + protected buildTermQuery(params: { + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { searchQuery } = params; + if (this.queryStringParameters.collectionId) { + searchQuery.where(`${TableNames.collections}.name`, '=', this.queryStringParameters.collectionId); + } + if (this.queryStringParameters.provider) { + searchQuery.where(`${TableNames.providers}.name`, '=', this.queryStringParameters.provider); + } + if (this.queryStringParameters.timestamp__from) { + searchQuery.where(`${this.tableName}.updated_at`, '>=', new Date(Number.parseInt(this.queryStringParameters.timestamp__from as string, 10))); + } + if (this.queryStringParameters.timestamp__to) { + searchQuery.where(`${this.tableName}.updated_at`, '<=', new Date(Number.parseInt(this.queryStringParameters.timestamp__to as string, 10))); + } + if (this.queryStringParameters.field?.includes('error.Error')) { + searchQuery.whereRaw(`${this.tableName}.error ->> 'Error' is not null`); + } + const { term = {} } = this.dbQueryParameters; + return super.buildTermQuery({ + ...params, + dbQueryParameters: { term: omit(term, ['collectionName', 'collectionVersion', 'pdrName', 'error.Error', 'providerName']) }, + }); + } + + /** + * Executes the aggregate search query + * + * @param {Knex | undefined} testKnex - the knex client to be used + * @returns {Promise} - the aggregate query results in api format + */ + async aggregate(testKnex: Knex | undefined): Promise { + const knex = testKnex ?? await getKnexClient(); + const { searchQuery } = this.buildSearch(knex); + try { + const pgRecords = await searchQuery; + return this.formatAggregateResult(pgRecords); + } catch (error) { + return error; + } + } +} + +export { StatsSearch }; diff --git a/packages/db/src/types/search.ts b/packages/db/src/types/search.ts index 1a40a093833..2157c947de5 100644 --- a/packages/db/src/types/search.ts +++ b/packages/db/src/types/search.ts @@ -1,4 +1,5 @@ export type QueryStringParameters = { + field?: string, fields?: string, infix?: string, limit?: string, diff --git a/packages/db/tests/search/test-StatsSearch.js b/packages/db/tests/search/test-StatsSearch.js new file mode 100644 index 00000000000..a2d7b2c6b8f --- /dev/null +++ b/packages/db/tests/search/test-StatsSearch.js @@ -0,0 +1,436 @@ +'use strict'; + +const test = require('ava'); +const cryptoRandomString = require('crypto-random-string'); +const range = require('lodash/range'); +const { StatsSearch } = require('../../dist/search/StatsSearch'); + +const { + destroyLocalTestDb, + generateLocalTestDb, + GranulePgModel, + CollectionPgModel, + fakeCollectionRecordFactory, + fakeGranuleRecordFactory, + fakeProviderRecordFactory, + migrationDir, + fakePdrRecordFactory, + fakeExecutionRecordFactory, + PdrPgModel, + ExecutionPgModel, + ProviderPgModel, +} = require('../../dist'); + +const testDbName = `collection_${cryptoRandomString({ length: 10 })}`; + +test.before(async (t) => { + const { knexAdmin, knex } = await generateLocalTestDb( + testDbName, + migrationDir + ); + + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + t.context.collectionPgModel = new CollectionPgModel(); + t.context.granulePgModel = new GranulePgModel(); + t.context.providerPgModel = new ProviderPgModel(); + t.context.PdrPgModel = new PdrPgModel(); + t.context.ExecutionPgModel = new ExecutionPgModel(); + + const statuses = ['queued', 'failed', 'completed', 'running']; + const errors = [{ Error: 'UnknownError' }, { Error: 'CumulusMessageAdapterError' }, { Error: 'IngestFailure' }, { Error: 'CmrFailure' }, {}]; + const granules = []; + const collections = []; + const executions = []; + const pdrs = []; + const providers = []; + + range(20).map((num) => ( + collections.push(fakeCollectionRecordFactory({ + name: `testCollection___${num}`, + cumulus_id: num, + })) + )); + + range(10).map((num) => ( + providers.push(fakeProviderRecordFactory({ + cumulus_id: num, + name: `testProvider${num}`, + })) + )); + + range(100).map((num) => ( + granules.push(fakeGranuleRecordFactory({ + collection_cumulus_id: num % 20, + granule_id: num % 2 === 0 ? `testGranule${num}` : `query__Granule${num}`, + status: statuses[num % 4], + created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))).toISOString(), + updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))).toISOString(), + error: errors[num % 5], + duration: num + (num / 10), + provider_cumulus_id: num % 10, + })) + )); + + range(20).map((num) => ( + pdrs.push(fakePdrRecordFactory({ + collection_cumulus_id: num, + status: statuses[(num % 3) + 1], + provider_cumulus_id: num % 10, + created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))).toISOString(), + updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))).toISOString(), + // eslint-disable-next-line no-sequences + })), + executions.push(fakeExecutionRecordFactory({ + collection_cumulus_id: num, + status: statuses[(num % 3) + 1], + error: errors[num % 5], + created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))).toISOString(), + updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))).toISOString(), + })) + )); + + await t.context.collectionPgModel.insert( + t.context.knex, + collections + ); + + await t.context.providerPgModel.insert( + t.context.knex, + providers + ); + + await t.context.granulePgModel.insert( + t.context.knex, + granules + ); + + await t.context.ExecutionPgModel.insert( + t.context.knex, + executions + ); + + await t.context.PdrPgModel.insert( + t.context.knex, + pdrs + ); +}); + +test.after.always(async (t) => { + await destroyLocalTestDb({ + ...t.context, + testDbName, + }); +}); + +test('StatsSearch returns correct response for basic granules query', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + type: 'granules', + }; + const AggregateSearch = new StatsSearch({ queryStringParameters }, 'granule'); + const results = await AggregateSearch.aggregate(knex); + const expectedResponse = [ + { key: 'completed', count: 25 }, + { key: 'failed', count: 25 }, + { key: 'queued', count: 25 }, + { key: 'running', count: 25 }, + ]; + t.is(results.meta.count, 100); + t.deepEqual(results.count, expectedResponse); +}); + +test('StatsSearch filters correctly by date', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + type: 'granules', + timestamp__from: `${(new Date(2020, 1, 28)).getTime()}`, + timestamp__to: `${(new Date(2022, 2, 30)).getTime()}`, + }; + + const AggregateSearch = new StatsSearch({ queryStringParameters }, 'granule'); + const results = await AggregateSearch.aggregate(knex); + const expectedResponse = [ + { key: 'completed', count: 9 }, + { key: 'running', count: 9 }, + { key: 'failed', count: 8 }, + { key: 'queued', count: 8 }, + ]; + t.is(results.meta.count, 34); + t.deepEqual(results.count, expectedResponse); +}); + +test('StatsSearch filters executions correctly', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + type: 'executions', + field: 'status', + }; + + const AggregateSearch = new StatsSearch({ queryStringParameters }, 'execution'); + const results = await AggregateSearch.aggregate(knex); + const expectedResponse1 = [ + { key: 'completed', count: 7 }, + { key: 'failed', count: 7 }, + { key: 'running', count: 6 }, + ]; + t.is(results.meta.count, 20); + t.deepEqual(results.count, expectedResponse1); + + queryStringParameters = { + type: 'executions', + field: 'status', + timestamp__to: `${(new Date(2023, 11, 30)).getTime()}`, + timestamp__from: `${(new Date(2021, 1, 28)).getTime()}`, + }; + + const AggregateSearch2 = new StatsSearch({ queryStringParameters }, 'execution'); + const results2 = await AggregateSearch2.aggregate(knex); + const expectedResponse2 = [ + { key: 'completed', count: 3 }, + { key: 'failed', count: 3 }, + { key: 'running', count: 3 }, + ]; + t.is(results2.meta.count, 9); + t.deepEqual(results2.count, expectedResponse2); + + queryStringParameters = { + type: 'executions', + field: 'status', + timestamp__to: `${(new Date(2023, 11, 30)).getTime()}`, + timestamp__from: `${(new Date(2021, 1, 28)).getTime()}`, + collectionId: 'testCollection___5', + status: 'running', + }; + + const AggregateSearch3 = new StatsSearch({ queryStringParameters }, 'execution'); + const results3 = await AggregateSearch3.aggregate(knex); + const expectedResponse3 = [{ key: 'running', count: 1 }]; + t.deepEqual(results3.count, expectedResponse3); + t.is(results3.meta.count, 1); +}); + +test('StatsSearch filters PDRs correctly', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + type: 'pdrs', + field: 'status', + }; + + const AggregateSearch = new StatsSearch({ queryStringParameters }, 'pdr'); + const results = await AggregateSearch.aggregate(knex); + const expectedResponse = [ + { key: 'completed', count: 7 }, + { key: 'failed', count: 7 }, + { key: 'running', count: 6 }, + ]; + t.is(results.meta.count, 20); + t.deepEqual(results.count, expectedResponse); + + queryStringParameters = { + type: 'pdrs', + field: 'status', + timestamp__to: `${(new Date(2019, 12, 9)).getTime()}`, + timestamp__from: `${(new Date(2018, 1, 28)).getTime()}`, + }; + + const AggregateSearch2 = new StatsSearch({ queryStringParameters }, 'pdr'); + const results2 = await AggregateSearch2.aggregate(knex); + const expectedResponse2 = [{ key: 'completed', count: 4 }, { key: 'failed', count: 2 }]; + t.is(results2.meta.count, 6); + t.deepEqual(results2.count, expectedResponse2); + + queryStringParameters = { + type: 'pdrs', + field: 'status', + timestamp__to: `${(new Date(2019, 12, 9)).getTime()}`, + timestamp__from: `${(new Date(2018, 1, 28)).getTime()}`, + status: 'failed', + }; + + const AggregateSearch3 = new StatsSearch({ queryStringParameters }, 'pdr'); + const results3 = await AggregateSearch3.aggregate(knex); + const expectedResponse3 = [{ key: 'failed', count: 2 }]; + t.is(results3.meta.count, 2); + t.deepEqual(results3.count, expectedResponse3); +}); + +test('StatsSearch returns correct response when queried by provider', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + type: 'granules', + field: 'status', + provider: 'testProvider2', + }; + + const AggregateSearch = new StatsSearch({ queryStringParameters }, 'granule'); + const results = await AggregateSearch.aggregate(knex); + const expectedResponse = [{ key: 'completed', count: 5 }, { key: 'queued', count: 5 }]; + t.is(results.meta.count, 10); + t.deepEqual(results.count, expectedResponse); +}); + +test('StatsSearch returns correct response when queried by collection', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + type: 'granules', + field: 'status', + collectionId: 'testCollection___8', + }; + + const AggregateSearch = new StatsSearch({ queryStringParameters }, 'granule'); + const results = await AggregateSearch.aggregate(knex); + const expectedResponse = [{ key: 'queued', count: 5 }]; + t.is(results.meta.count, 5); + t.deepEqual(results.count, expectedResponse); +}); + +test('StatsSearch returns correct response when queried by collection and provider', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + type: 'granules', + field: 'status', + collectionId: 'testCollection___1', + providerId: 'testProvider1', + }; + + const AggregateSearch = new StatsSearch({ queryStringParameters }, 'granule'); + const results = await AggregateSearch.aggregate(knex); + const expectedResponse = [{ key: 'failed', count: 5 }]; + t.is(results.meta.count, 5); + t.deepEqual(results.count, expectedResponse); + + queryStringParameters = { + type: 'granules', + field: 'status', + collectionId: 'testCollection___1', + providerId: 'testProvider1', + timestamp__to: `${(new Date(2019, 12, 9)).getTime()}`, + timestamp__from: `${(new Date(2018, 1, 28)).getTime()}`, + }; + + const AggregateSearch2 = new StatsSearch({ queryStringParameters }, 'granule'); + const results2 = await AggregateSearch2.aggregate(knex); + const expectedResponse2 = [{ key: 'failed', count: 2 }]; + t.is(results2.meta.count, 2); + t.deepEqual(results2.count, expectedResponse2); + queryStringParameters = { + type: 'granules', + field: 'status', + collectionId: 'testCollection___1', + providerId: 'testProvider1', + timestamp__to: `${(new Date(2019, 12, 9)).getTime()}`, + timestamp__from: `${(new Date(2018, 1, 28)).getTime()}`, + status: 'failed', + }; + + const AggregateSearch3 = new StatsSearch({ queryStringParameters }, 'granule'); + const results3 = await AggregateSearch3.aggregate(knex); + const expectedResponse3 = [{ key: 'failed', count: 2 }]; + t.is(results3.meta.count, 2); + t.deepEqual(results3.count, expectedResponse3); +}); + +test('StatsSearch returns correct response when queried by error', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + type: 'granules', + field: 'error.Error.keyword', + }; + const AggregateSearch = new StatsSearch({ queryStringParameters }, 'granule'); + const results = await AggregateSearch.aggregate(knex); + const expectedResponse1 = [ + { key: 'CmrFailure', count: 20 }, + { key: 'CumulusMessageAdapterError', count: 20 }, + { key: 'IngestFailure', count: 20 }, + { key: 'UnknownError', count: 20 }, + ]; + t.is(results.meta.count, 80); + t.deepEqual(results.count, expectedResponse1); + + queryStringParameters = { + type: 'granules', + field: 'error.Error.keyword', + timestamp__to: `${(new Date(2021, 12, 9)).getTime()}`, + timestamp__from: `${(new Date(2020, 1, 28)).getTime()}`, + }; + const AggregateSearch2 = new StatsSearch({ queryStringParameters }, 'granule'); + const results2 = await AggregateSearch2.aggregate(knex); + const expectedResponse2 = [ + { key: 'CmrFailure', count: 8 }, + { key: 'IngestFailure', count: 7 }, + { key: 'CumulusMessageAdapterError', count: 6 }, + { key: 'UnknownError', count: 6 }, + ]; + t.is(results2.meta.count, 27); + t.deepEqual(results2.count, expectedResponse2); + + queryStringParameters = { + type: 'granules', + collectionId: 'testCollection___1', + providerId: 'testProvider1', + field: 'error.Error.keyword', + timestamp__to: `${(new Date(2019, 12, 9)).getTime()}`, + timestamp__from: `${(new Date(2018, 1, 28)).getTime()}`, + }; + const AggregateSearch3 = new StatsSearch({ queryStringParameters }, 'granule'); + const results3 = await AggregateSearch3.aggregate(knex); + const expectedResponse3 = [{ key: 'CumulusMessageAdapterError', count: 2 }]; + t.is(results3.meta.count, 2); + t.deepEqual(results3.count, expectedResponse3); +}); + +test('StatsSearch can query by infix and prefix when type is defined', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + type: 'granules', + infix: 'testGra', + }; + const AggregateSearch = new StatsSearch({ queryStringParameters }, 'granule'); + const results = await AggregateSearch.aggregate(knex); + const expectedResponse1 = [{ key: 'completed', count: 25 }, { key: 'queued', count: 25 }]; + t.is(results.meta.count, 50); + t.deepEqual(results.count, expectedResponse1); + + queryStringParameters = { + type: 'granules', + prefix: 'query', + }; + const AggregateSearch2 = new StatsSearch({ queryStringParameters }, 'granule'); + const results2 = await AggregateSearch2.aggregate(knex); + const expectedResponse2 = [{ key: 'failed', count: 25 }, { key: 'running', count: 25 }]; + t.is(results2.meta.count, 50); + t.deepEqual(results2.count, expectedResponse2); + + queryStringParameters = { + type: 'collections', + infix: 'testCollection___8', + field: 'name', + }; + const AggregateSearch3 = new StatsSearch({ queryStringParameters }, 'collection'); + const results3 = await AggregateSearch3.aggregate(knex); + const expectedResponse3 = [{ key: 'testCollection___8', count: 1 }]; + t.is(results3.meta.count, 1); + t.deepEqual(results3.count, expectedResponse3); +}); + +test('StatsSummary works', async (t) => { + const { knex } = t.context; + const StatsSummary = new StatsSearch({}, 'granule'); + const results = await StatsSummary.summary(knex); + t.is(results.collections.value, 20); + t.is(results.granules.value, 100); + t.is(results.errors.value, 80); + t.is(results.processingTime.value, 54.44999999642372); + const queryStringParameters = { + timestamp__to: `${(new Date(2019, 12, 9)).getTime()}`, + timestamp__from: `${(new Date(2018, 1, 28)).getTime()}`, + }; + const StatsSummary2 = new StatsSearch({ queryStringParameters }, 'granule'); + const results2 = await StatsSummary2.summary(knex); + t.is(results2.collections.value, 15); + t.is(results2.granules.value, 25); + t.is(results2.errors.value, 21); + t.is(results2.processingTime.value, 53.54799992084503); +}); From ad288419380ba8062bed0cb11f8ade8b7ccf37be Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Fri, 24 May 2024 20:59:38 -0400 Subject: [PATCH 04/61] CUMULUS-3693: Update granules List endpoints to query postgres - range (#3660) * add range query support --- CHANGELOG.md | 2 + example/config.yml | 5 ++ example/deployments/cumulus/cumulus-es.tfvars | 4 + .../data-persistence/cumulus-es.tfvars | 1 + .../db-migration/cumulus-es.tfvars | 1 + packages/api/endpoints/stats.js | 4 +- packages/db/src/search/BaseSearch.ts | 80 +++++++++++++++-- packages/db/src/search/GranuleSearch.ts | 86 +++++-------------- packages/db/src/search/StatsSearch.ts | 76 ++++++++-------- packages/db/src/search/field-mapping.ts | 30 +++++++ packages/db/src/search/queries.ts | 45 +++++++++- packages/db/src/types/search.ts | 12 ++- .../db/tests/search/test-GranuleSearch.js | 53 ++++++++++-- packages/db/tests/search/test-StatsSearch.js | 39 +++------ .../db/tests/search/test-field-mapping.js | 13 +++ packages/db/tests/search/test-queries.js | 13 +++ 16 files changed, 308 insertions(+), 156 deletions(-) create mode 100644 example/deployments/cumulus/cumulus-es.tfvars create mode 100644 example/deployments/data-persistence/cumulus-es.tfvars create mode 100644 example/deployments/db-migration/cumulus-es.tfvars diff --git a/CHANGELOG.md b/CHANGELOG.md index 09c0830e48f..e31689c25e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `@cumulus/db/src/search` `BaseSearch` and `GranuleSearch` classes to support basic queries for granules - Updated granules List endpoint to query postgres for basic queries +- **CUMULUS-3693** + - Added functionality to `@cumulus/db/src/search` to support range queries - **CUMULUS-3694** - Added functionality to `@cumulus/db/src/search` to support term queries - Updated `BaseSearch` and `GranuleSearch` classes to support term queries for granules diff --git a/example/config.yml b/example/config.yml index 2d1f34e345b..74e5e79812d 100644 --- a/example/config.yml +++ b/example/config.yml @@ -8,6 +8,11 @@ cumulus-sit: apiUsername: jasmine pdrNodeNameProviderBucket: cumulus-sit-pdr-node-name-provider +cumulus-es: + bucket: cumulus-sit-internal + apiUsername: jasmine + pdrNodeNameProviderBucket: cumulus-sit-pdr-node-name-provider + mvd-tf: bucket: mvd-internal diff --git a/example/deployments/cumulus/cumulus-es.tfvars b/example/deployments/cumulus/cumulus-es.tfvars new file mode 100644 index 00000000000..6a8a3d8df37 --- /dev/null +++ b/example/deployments/cumulus/cumulus-es.tfvars @@ -0,0 +1,4 @@ +prefix = "cumulus-es" +archive_api_port = 8000 +key_name = "lp" +cmr_oauth_provider = "launchpad" diff --git a/example/deployments/data-persistence/cumulus-es.tfvars b/example/deployments/data-persistence/cumulus-es.tfvars new file mode 100644 index 00000000000..3501103d61c --- /dev/null +++ b/example/deployments/data-persistence/cumulus-es.tfvars @@ -0,0 +1 @@ +prefix = "cumulus-es" diff --git a/example/deployments/db-migration/cumulus-es.tfvars b/example/deployments/db-migration/cumulus-es.tfvars new file mode 100644 index 00000000000..3501103d61c --- /dev/null +++ b/example/deployments/db-migration/cumulus-es.tfvars @@ -0,0 +1 @@ +prefix = "cumulus-es" diff --git a/packages/api/endpoints/stats.js b/packages/api/endpoints/stats.js index a94a8bdd085..1caf461416e 100644 --- a/packages/api/endpoints/stats.js +++ b/packages/api/endpoints/stats.js @@ -35,9 +35,7 @@ function getType(req) { * @returns {Promise} the promise of express response object */ async function summary(req, res) { - const stats = new StatsSearch({ - queryStringParameters: omit(req.query, 'type'), - }, 'granule'); + const stats = new StatsSearch({ queryStringParameters: req.query }, 'granule'); const r = await stats.summary(); return res.send(r); } diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index 739756d6790..10d6bb38d79 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -9,7 +9,7 @@ import { convertQueryStringToDbQueryParameters } from './queries'; const log = new Logger({ sender: '@cumulus/db/BaseSearch' }); -export type Meta = { +type Meta = { name: string, stack?: string, table?: string, @@ -33,18 +33,33 @@ export const typeToTable: { [key: string]: string } = { */ class BaseSearch { readonly type: string; + readonly tableName: string; readonly queryStringParameters: QueryStringParameters; // parsed from queryStringParameters for query build dbQueryParameters: DbQueryParameters = {}; constructor(event: QueryEvent, type: string) { this.type = type; + this.tableName = typeToTable[this.type]; this.queryStringParameters = event?.queryStringParameters ?? {}; this.dbQueryParameters = convertQueryStringToDbQueryParameters( this.type, this.queryStringParameters ); } + protected searchCollection(): boolean { + const term = this.dbQueryParameters.term; + return !!(term?.collectionName || term?.collectionVersion); + } + + protected searchPdr(): boolean { + return !!this.dbQueryParameters.term?.pdrName; + } + + protected searchProvider(): boolean { + return !!this.dbQueryParameters.term?.providerName; + } + /** * Build the search query * @@ -58,6 +73,7 @@ class BaseSearch { } { const { countQuery, searchQuery } = this.buildBasicQuery(knex); this.buildTermQuery({ countQuery, searchQuery }); + this.buildRangeQuery({ countQuery, searchQuery }); this.buildInfixPrefixQuery({ countQuery, searchQuery }); const { limit, offset } = this.dbQueryParameters; @@ -77,7 +93,7 @@ class BaseSearch { return { name: 'cumulus-api', stack: process.env.stackName, - table: this.type && typeToTable[this.type], + table: this.tableName, }; } @@ -112,6 +128,33 @@ class BaseSearch { throw new Error('buildInfixPrefixQuery is not implemented'); } + /** + * Build queries for range fields + * + * @param params + * @param [params.countQuery] - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildRangeQuery(params: { + countQuery?: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { countQuery, searchQuery, dbQueryParameters } = params; + const { range = {} } = dbQueryParameters ?? this.dbQueryParameters; + + Object.entries(range).forEach(([name, rangeValues]) => { + if (rangeValues.gte) { + countQuery?.where(`${this.tableName}.${name}`, '>=', rangeValues.gte); + searchQuery.where(`${this.tableName}.${name}`, '>=', rangeValues.gte); + } + if (rangeValues.lte) { + countQuery?.where(`${this.tableName}.${name}`, '<=', rangeValues.lte); + searchQuery.where(`${this.tableName}.${name}`, '<=', rangeValues.lte); + } + }); + } /** * Build queries for term fields * @@ -125,13 +168,38 @@ class BaseSearch { searchQuery: Knex.QueryBuilder, dbQueryParameters?: DbQueryParameters, }) { - const table = typeToTable[this.type]; + const { + collections: collectionsTable, + providers: providersTable, + pdrs: pdrsTable, + } = TableNames; + const { countQuery, searchQuery, dbQueryParameters } = params; - const { term = {} } = dbQueryParameters || this.dbQueryParameters; + const { term = {} } = dbQueryParameters ?? this.dbQueryParameters; Object.entries(term).forEach(([name, value]) => { - countQuery?.where(`${table}.${name}`, value); - searchQuery.where(`${table}.${name}`, value); + switch (name) { + case 'collectionName': + countQuery?.where(`${collectionsTable}.name`, value); + searchQuery.where(`${collectionsTable}.name`, value); + break; + case 'collectionVersion': + countQuery?.where(`${collectionsTable}.version`, value); + searchQuery.where(`${collectionsTable}.version`, value); + break; + case 'providerName': + countQuery?.where(`${providersTable}.name`, value); + searchQuery.where(`${providersTable}.name`, value); + break; + case 'pdrName': + countQuery?.where(`${pdrsTable}.name`, value); + searchQuery.where(`${pdrsTable}.name`, value); + break; + default: + countQuery?.where(`${this.tableName}.${name}`, value); + searchQuery.where(`${this.tableName}.${name}`, value); + break; + } }); } diff --git a/packages/db/src/search/GranuleSearch.ts b/packages/db/src/search/GranuleSearch.ts index b875dae52fe..37a35e27ba9 100644 --- a/packages/db/src/search/GranuleSearch.ts +++ b/packages/db/src/search/GranuleSearch.ts @@ -14,7 +14,7 @@ import { TableNames } from '../tables'; const log = new Logger({ sender: '@cumulus/db/GranuleSearch' }); -export interface GranuleRecord extends BaseRecord, PostgresGranuleRecord { +interface GranuleRecord extends BaseRecord, PostgresGranuleRecord { cumulus_id: number, updated_at: Date, collection_cumulus_id: number, @@ -26,8 +26,6 @@ export interface GranuleRecord extends BaseRecord, PostgresGranuleRecord { providerName?: string, } -const foreignFields = ['collectionName', 'collectionVersion', 'providerName', 'pdrName']; - /** * Class to build and execute db search query for granules */ @@ -36,21 +34,6 @@ export class GranuleSearch extends BaseSearch { super(event, 'granule'); } - private searchCollection(): boolean { - const term = this.dbQueryParameters.term; - return !!(term && (term.collectionName || term.collectionVersion)); - } - - private searchPdr(): boolean { - const term = this.dbQueryParameters.term; - return !!(term && term.pdrName); - } - - private searchProvider(): boolean { - const term = this.dbQueryParameters.term; - return !!(term && term.providerName); - } - /** * Build basic query * @@ -63,40 +46,39 @@ export class GranuleSearch extends BaseSearch { searchQuery: Knex.QueryBuilder, } { const { - granules: granulesTable, collections: collectionsTable, providers: providersTable, pdrs: pdrsTable, } = TableNames; - const countQuery = knex(granulesTable) - .count(`${granulesTable}.cumulus_id`); + const countQuery = knex(this.tableName) + .count(`${this.tableName}.cumulus_id`); - const searchQuery = knex(granulesTable) - .select(`${granulesTable}.*`) + const searchQuery = knex(this.tableName) + .select(`${this.tableName}.*`) .select({ providerName: `${providersTable}.name`, collectionName: `${collectionsTable}.name`, collectionVersion: `${collectionsTable}.version`, pdrName: `${pdrsTable}.name`, }) - .innerJoin(collectionsTable, `${granulesTable}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); + .innerJoin(collectionsTable, `${this.tableName}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); if (this.searchCollection()) { - countQuery.innerJoin(collectionsTable, `${granulesTable}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); + countQuery.innerJoin(collectionsTable, `${this.tableName}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); } if (this.searchProvider()) { - countQuery.innerJoin(providersTable, `${granulesTable}.provider_cumulus_id`, `${providersTable}.cumulus_id`); - searchQuery.innerJoin(providersTable, `${granulesTable}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + countQuery.innerJoin(providersTable, `${this.tableName}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + searchQuery.innerJoin(providersTable, `${this.tableName}.provider_cumulus_id`, `${providersTable}.cumulus_id`); } else { - searchQuery.leftJoin(providersTable, `${granulesTable}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + searchQuery.leftJoin(providersTable, `${this.tableName}.provider_cumulus_id`, `${providersTable}.cumulus_id`); } if (this.searchPdr()) { - countQuery.innerJoin(pdrsTable, `${granulesTable}.pdr_cumulus_id`, `${pdrsTable}.cumulus_id`); - searchQuery.innerJoin(pdrsTable, `${granulesTable}.pdr_cumulus_id`, `${pdrsTable}.cumulus_id`); + countQuery.innerJoin(pdrsTable, `${this.tableName}.pdr_cumulus_id`, `${pdrsTable}.cumulus_id`); + searchQuery.innerJoin(pdrsTable, `${this.tableName}.pdr_cumulus_id`, `${pdrsTable}.cumulus_id`); } else { - searchQuery.leftJoin(pdrsTable, `${granulesTable}.pdr_cumulus_id`, `${pdrsTable}.cumulus_id`); + searchQuery.leftJoin(pdrsTable, `${this.tableName}.pdr_cumulus_id`, `${pdrsTable}.cumulus_id`); } return { countQuery, searchQuery }; } @@ -114,16 +96,15 @@ export class GranuleSearch extends BaseSearch { searchQuery: Knex.QueryBuilder, dbQueryParameters?: DbQueryParameters, }) { - const { granules: granulesTable } = TableNames; const { countQuery, searchQuery, dbQueryParameters } = params; - const { infix, prefix } = dbQueryParameters || this.dbQueryParameters; + const { infix, prefix } = dbQueryParameters ?? this.dbQueryParameters; if (infix) { - countQuery.whereLike(`${granulesTable}.granule_id`, `%${infix}%`); - searchQuery.whereLike(`${granulesTable}.granule_id`, `%${infix}%`); + countQuery.whereLike(`${this.tableName}.granule_id`, `%${infix}%`); + searchQuery.whereLike(`${this.tableName}.granule_id`, `%${infix}%`); } if (prefix) { - countQuery.whereLike(`${granulesTable}.granule_id`, `${prefix}%`); - searchQuery.whereLike(`${granulesTable}.granule_id`, `${prefix}%`); + countQuery.whereLike(`${this.tableName}.granule_id`, `${prefix}%`); + searchQuery.whereLike(`${this.tableName}.granule_id`, `${prefix}%`); } } @@ -140,42 +121,19 @@ export class GranuleSearch extends BaseSearch { searchQuery: Knex.QueryBuilder, dbQueryParameters?: DbQueryParameters, }) { - const { - granules: granulesTable, - collections: collectionsTable, - providers: providersTable, - pdrs: pdrsTable, - } = TableNames; - const { countQuery, searchQuery, dbQueryParameters } = params; - const { term = {} } = dbQueryParameters || this.dbQueryParameters; + const { term = {} } = dbQueryParameters ?? this.dbQueryParameters; Object.entries(term).forEach(([name, value]) => { - if (name === 'collectionName') { - countQuery.where(`${collectionsTable}.name`, value); - searchQuery.where(`${collectionsTable}.name`, value); - } - if (name === 'collectionVersion') { - countQuery.where(`${collectionsTable}.version`, value); - searchQuery.where(`${collectionsTable}.version`, value); - } - if (name === 'providerName') { - countQuery.where(`${providersTable}.name`, value); - searchQuery.where(`${providersTable}.name`, value); - } - if (name === 'pdrName') { - countQuery.where(`${pdrsTable}.name`, value); - searchQuery.where(`${pdrsTable}.name`, value); - } if (name === 'error.Error') { - countQuery.whereRaw(`${granulesTable}.error->>'Error' = '${value}'`); - searchQuery.whereRaw(`${granulesTable}.error->>'Error' = '${value}'`); + countQuery.whereRaw(`${this.tableName}.error->>'Error' = '${value}'`); + searchQuery.whereRaw(`${this.tableName}.error->>'Error' = '${value}'`); } }); super.buildTermQuery({ ...params, - dbQueryParameters: { term: omit(term, foreignFields, 'error.Error') }, + dbQueryParameters: { term: omit(term, 'error.Error') }, }); } diff --git a/packages/db/src/search/StatsSearch.ts b/packages/db/src/search/StatsSearch.ts index 8dfaf79bcbc..5a2ddbfebf7 100644 --- a/packages/db/src/search/StatsSearch.ts +++ b/packages/db/src/search/StatsSearch.ts @@ -3,7 +3,7 @@ import { Knex } from 'knex'; import { getKnexClient } from '../connection'; import { TableNames } from '../tables'; import { DbQueryParameters, QueryEvent } from '../types/search'; -import { BaseSearch, typeToTable } from './BaseSearch'; +import { BaseSearch } from './BaseSearch'; type TotalSummary = { count_errors: number, @@ -60,12 +60,12 @@ const infixMapping: { [key: string]: string } = { * A class to query postgres for the STATS and STATS/AGGREGATE endpoints */ class StatsSearch extends BaseSearch { - readonly tableName: string; + readonly field: string; constructor(event: QueryEvent, type: string) { - super(event, type); - this.tableName = typeToTable[this.type]; - this.queryStringParameters.field = this.queryStringParameters.field ?? 'status'; + const { field, ...queryStringParameters } = event.queryStringParameters || {}; + super({ queryStringParameters }, type); + this.field = field ?? 'status'; this.dbQueryParameters = omit(this.dbQueryParameters, ['limit', 'offset']); } @@ -91,7 +91,7 @@ class StatsSearch extends BaseSearch { meta: { name: 'cumulus-api', count: totalCount, - field: `${this.queryStringParameters.field}`, + field: this.field, }, count: responses, }; @@ -104,12 +104,10 @@ class StatsSearch extends BaseSearch { * @returns {SummaryResult} the api object with the summary statistics */ private formatSummaryResult(result: TotalSummary): SummaryResult { - const timestampTo = Number.parseInt(this.queryStringParameters.timestamp__to as string, 10); - const timestampFrom = Number.parseInt(this.queryStringParameters.timestamp__from as string, 10); - const dateto = this.queryStringParameters.timestamp__to - ? new Date(timestampTo).toISOString() : new Date().toISOString(); - const datefrom = this.queryStringParameters.timestamp__from - ? new Date(timestampFrom).toISOString() : '1970-01-01T12:00:00+00:00'; + const timestampTo = this.dbQueryParameters.range?.updated_at?.lte ?? new Date(); + const timestampFrom = this.dbQueryParameters.range?.updated_at?.gte ?? new Date(0); + const dateto = (timestampTo as Date).toISOString(); + const datefrom = (timestampFrom as Date).toISOString(); return { errors: { dateFrom: datefrom, @@ -150,13 +148,8 @@ class StatsSearch extends BaseSearch { */ public async summary(sendKnex: Knex): Promise { const knex = sendKnex ?? await getKnexClient(); - const aggregateQuery:Knex.QueryBuilder = knex(this.tableName); - if (this.queryStringParameters.timestamp__from) { - aggregateQuery.where(`${this.tableName}.updated_at`, '>=', new Date(Number.parseInt(this.queryStringParameters.timestamp__from as string, 10))); - } - if (this.queryStringParameters.timestamp__to) { - aggregateQuery.where(`${this.tableName}.updated_at`, '<=', new Date(Number.parseInt(this.queryStringParameters.timestamp__to as string, 10))); - } + const aggregateQuery: Knex.QueryBuilder = knex(this.tableName); + this.buildRangeQuery({ searchQuery: aggregateQuery }); aggregateQuery.select( knex.raw(`COUNT(CASE WHEN ${this.tableName}.error ->> 'Error' is not null THEN 1 END) AS count_errors`), knex.raw(`COUNT(${this.tableName}.cumulus_id) AS count_granules`), @@ -173,12 +166,21 @@ class StatsSearch extends BaseSearch { * @param {Knex.QueryBuilder} query - the knex query to be joined or not */ private joinTables(query: Knex.QueryBuilder) { - if (this.queryStringParameters.collectionId) { - query.join(`${TableNames.collections}`, `${this.tableName}.collection_cumulus_id`, 'collections.cumulus_id'); + const { + collections: collectionsTable, + providers: providersTable, + pdrs: pdrsTable, + } = TableNames; + if (this.searchCollection()) { + query.join(collectionsTable, `${this.tableName}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); } - if (this.queryStringParameters.provider) { - query.join(`${TableNames.providers}`, `${this.tableName}.provider_cumulus_id`, 'providers.cumulus_id'); + if (this.searchProvider()) { + query.join(providersTable, `${this.tableName}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + } + + if (this.searchPdr()) { + query.join(pdrsTable, `${this.tableName}.pdr_cumulus_id`, `${pdrsTable}.cumulus_id`); } } @@ -189,10 +191,10 @@ class StatsSearch extends BaseSearch { * @param {Knex} knex - the knex client to be used */ private aggregateQueryField(query: Knex.QueryBuilder, knex: Knex) { - if (this.queryStringParameters.field?.includes('error.Error')) { + if (this.field?.includes('error.Error')) { query.select(knex.raw("error ->> 'Error' as aggregatedfield")); } else { - query.select(`${this.tableName}.${this.queryStringParameters.field} as aggregatedfield`); + query.select(`${this.tableName}.${this.field} as aggregatedfield`); } query.modify((queryBuilder) => this.joinTables(queryBuilder)) .count(`${this.tableName}.cumulus_id as count`) @@ -210,7 +212,7 @@ class StatsSearch extends BaseSearch { : { searchQuery: Knex.QueryBuilder, } { - const searchQuery:Knex.QueryBuilder = knex(`${this.tableName}`); + const searchQuery:Knex.QueryBuilder = knex(this.tableName); this.aggregateQueryField(searchQuery, knex); return { searchQuery }; } @@ -249,26 +251,16 @@ class StatsSearch extends BaseSearch { searchQuery: Knex.QueryBuilder, dbQueryParameters?: DbQueryParameters, }) { - const { searchQuery } = params; - if (this.queryStringParameters.collectionId) { - searchQuery.where(`${TableNames.collections}.name`, '=', this.queryStringParameters.collectionId); - } - if (this.queryStringParameters.provider) { - searchQuery.where(`${TableNames.providers}.name`, '=', this.queryStringParameters.provider); - } - if (this.queryStringParameters.timestamp__from) { - searchQuery.where(`${this.tableName}.updated_at`, '>=', new Date(Number.parseInt(this.queryStringParameters.timestamp__from as string, 10))); - } - if (this.queryStringParameters.timestamp__to) { - searchQuery.where(`${this.tableName}.updated_at`, '<=', new Date(Number.parseInt(this.queryStringParameters.timestamp__to as string, 10))); - } - if (this.queryStringParameters.field?.includes('error.Error')) { + const { dbQueryParameters, searchQuery } = params; + const { term = {} } = dbQueryParameters ?? this.dbQueryParameters; + + if (this.field?.includes('error.Error')) { searchQuery.whereRaw(`${this.tableName}.error ->> 'Error' is not null`); } - const { term = {} } = this.dbQueryParameters; + return super.buildTermQuery({ ...params, - dbQueryParameters: { term: omit(term, ['collectionName', 'collectionVersion', 'pdrName', 'error.Error', 'providerName']) }, + dbQueryParameters: { term: omit(term, 'error.Error') }, }); } diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts index 64a243ff618..d72689d3be4 100644 --- a/packages/db/src/search/field-mapping.ts +++ b/packages/db/src/search/field-mapping.ts @@ -138,6 +138,14 @@ const executionMapping : { [key: string]: Function } = { updatedAt: (value?: string) => ({ updated_at: value && new Date(Number(value)), }), + // The following fields require querying other tables + collectionId: (value?: string) => { + const { name, version } = (value && deconstructCollectionId(value)) || {}; + return { + collectionName: name, + collectionVersion: version, + }; + }, }; const pdrMapping : { [key: string]: Function } = { @@ -156,6 +164,17 @@ const pdrMapping : { [key: string]: Function } = { updatedAt: (value?: string) => ({ updated_at: value && new Date(Number(value)), }), + // The following fields require querying other tables + collectionId: (value?: string) => { + const { name, version } = (value && deconstructCollectionId(value)) || {}; + return { + collectionName: name, + collectionVersion: version, + }; + }, + provider: (value?: string) => ({ + providerName: value, + }), }; const providerMapping : { [key: string]: Function } = { @@ -189,6 +208,17 @@ const ruleMapping : { [key: string]: Function } = { updatedAt: (value?: string) => ({ updated_at: value && new Date(Number(value)), }), + // The following fields require querying other tables + collectionId: (value?: string) => { + const { name, version } = (value && deconstructCollectionId(value)) || {}; + return { + collectionName: name, + collectionVersion: version, + }; + }, + provider: (value?: string) => ({ + providerName: value, + }), }; // type and its mapping diff --git a/packages/db/src/search/queries.ts b/packages/db/src/search/queries.ts index 32bf6ac0482..e75ea2c56c8 100644 --- a/packages/db/src/search/queries.ts +++ b/packages/db/src/search/queries.ts @@ -1,6 +1,6 @@ import omit from 'lodash/omit'; import Logger from '@cumulus/logger'; -import { DbQueryParameters, QueryStringParameters } from '../types/search'; +import { DbQueryParameters, QueryStringParameters, RangeType } from '../types/search'; import { mapQueryStringFieldToDbField } from './field-mapping'; const log = new Logger({ sender: '@cumulus/db/queries' }); @@ -31,7 +31,47 @@ const regexes: { [key: string]: RegExp } = { }; /** - * Conert term query fields to db query parameters from api query string fields + * Convert range query fields to db query parameters from api query string fields + * + * @param type - query record type + * @param queryStringFields - api query fields + * @returns range query parameter + */ +const convertRange = ( + type: string, + queryStringFields: { name: string, value: string }[] +): { range: { [key: string]: RangeType } } => { + const range = queryStringFields.reduce((acc: { [key: string]: RangeType }, queryField) => { + const match = queryField.name.match(regexes.range); + if (!match) return acc; + + // get corresponding db field name, e.g. timestamp => updated_at + const dbField = mapQueryStringFieldToDbField(type, { ...queryField, name: match[1] }); + if (!dbField) return acc; + const dbFieldName = Object.keys(dbField)[0]; + + // build a range field, e.g. + // { timestamp__from: '1712708508310', timestamp__to: '1712712108310' } => + // { updated_at: { + // gte: new Date(1712708508310), + // lte: new Date(1712712108310), + // }, + // } + const rangeField: { [key: string]: RangeType } = { [dbFieldName]: acc[dbFieldName] || {} }; + if (match[2] === 'from') { + rangeField[dbFieldName].gte = dbField[dbFieldName]; + } + if (match[2] === 'to') { + rangeField[dbFieldName].lte = dbField[dbFieldName]; + } + return { ...acc, ...rangeField }; + }, {}); + + return { range }; +}; + +/** + * Convert term query fields to db query parameters from api query string fields * * @param type - query record type * @param queryStringFields - api query fields @@ -54,6 +94,7 @@ const convertTerm = ( * for each type of query */ const convert: { [key: string]: Function } = { + range: convertRange, term: convertTerm, }; diff --git a/packages/db/src/types/search.ts b/packages/db/src/types/search.ts index 2157c947de5..d61da918422 100644 --- a/packages/db/src/types/search.ts +++ b/packages/db/src/types/search.ts @@ -15,13 +15,21 @@ export type QueryEvent = { queryStringParameters?: QueryStringParameters, }; +type QueriableType = boolean | Date | number | string; + +export type RangeType = { + gte?: Omit, + lte?: Omit, +}; + export type DbQueryParameters = { + fields?: string[], infix?: string, limit?: number, offset?: number, page?: number, prefix?: string, - fields?: string[], - term?: { [key: string]: any }, + range?: { [key: string]: RangeType }, + term?: { [key: string]: QueriableType | undefined }, terms?: { [key: string]: any }, }; diff --git a/packages/db/tests/search/test-GranuleSearch.js b/packages/db/tests/search/test-GranuleSearch.js index ffad472c444..9f9ff180849 100644 --- a/packages/db/tests/search/test-GranuleSearch.js +++ b/packages/db/tests/search/test-GranuleSearch.js @@ -100,7 +100,7 @@ test.before(async (t) => { t.context.granuleSearchFields = { beginningDateTime: '2020-03-16T19:50:24.757Z', cmrLink: 'https://fakeLink', - duration: '6.8', + duration: 6.8, endingDateTime: '2020-03-17T10:00:00.000Z', lastUpdateDateTime: '2020-03-18T10:00:00.000Z', processingEndDateTime: '2020-03-16T10:00:00.000Z', @@ -126,10 +126,9 @@ test.before(async (t) => { ? t.context.collectionCumulusId : t.context.collectionCumulusId2, pdr_cumulus_id: !(num % 2) ? t.context.pdrCumulusId : undefined, provider_cumulus_id: !(num % 2) ? t.context.providerCumulusId : undefined, - beginning_date_time: !(num % 2) - ? new Date(t.context.granuleSearchFields.beginningDateTime) : undefined, + beginning_date_time: new Date(t.context.granuleSearchFields.beginningDateTime), cmr_link: !(num % 100) ? t.context.granuleSearchFields.cmrLink : undefined, - duration: !(num % 2) ? Number(t.context.granuleSearchFields.duration) : undefined, + duration: t.context.granuleSearchFields.duration + (num % 2), ending_date_time: !(num % 2) ? new Date(t.context.granuleSearchFields.endingDateTime) : new Date(), error: !(num % 2) ? JSON.stringify(error) : undefined, @@ -142,7 +141,7 @@ test.before(async (t) => { time_to_process: !(num % 20) ? Number(t.context.granuleSearchFields.timeToPreprocess) : undefined, status: !(num % 2) ? t.context.granuleSearchFields.status : 'completed', - updated_at: !(num % 2) ? new Date(t.context.granuleSearchFields.timestamp) : undefined, + updated_at: new Date(t.context.granuleSearchFields.timestamp + (num % 2) * 1000), })) ); }); @@ -272,7 +271,7 @@ test('GranuleSearch supports term search for date field', async (t) => { beginningDateTime: t.context.granuleSearchFields.beginningDateTime, endingDateTime: t.context.granuleSearchFields.endingDateTime, lastUpdateDateTime: t.context.granuleSearchFields.lastUpdateDateTime, - updatedAt: t.context.granuleSearchFields.updatedAt, + updatedAt: `${t.context.granuleSearchFields.updatedAt}`, }; const dbSearch = new GranuleSearch({ queryStringParameters }); const response = await dbSearch.query(knex); @@ -328,7 +327,7 @@ test('GranuleSearch supports term search for timestamp', async (t) => { const { knex } = t.context; const queryStringParameters = { limit: 200, - timestamp: t.context.granuleSearchFields.timestamp, + timestamp: `${t.context.granuleSearchFields.timestamp}`, }; const dbSearch = new GranuleSearch({ queryStringParameters }); const response = await dbSearch.query(knex); @@ -348,7 +347,42 @@ test('GranuleSearch supports term search for nested error.Error', async (t) => { t.is(response.results?.length, 50); }); -test('GranuleSearch supports term search for multiple fields', async (t) => { +test('GranuleSearch supports range search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + beginningDateTime__from: '2020-03-16', + duration__from: `${t.context.granuleSearchFields.duration - 1}`, + duration__to: `${t.context.granuleSearchFields.duration + 1}`, + timestamp__from: `${t.context.granuleSearchFields.timestamp}`, + timestamp__to: `${t.context.granuleSearchFields.timestamp + 1600}`, + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 100); + + queryStringParameters = { + limit: 200, + timestamp__from: t.context.granuleSearchFields.timestamp, + timestamp__to: t.context.granuleSearchFields.timestamp + 500, + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 200, + duration__from: `${t.context.granuleSearchFields.duration + 2}`, + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); +}); + +test('GranuleSearch supports search for multiple fields', async (t) => { const { knex } = t.context; const queryStringParameters = { limit: 200, @@ -356,6 +390,8 @@ test('GranuleSearch supports term search for multiple fields', async (t) => { provider: t.context.provider.name, 'error.Error': 'CumulusMessageAdapterExecutionError', status: 'failed', + timestamp__from: t.context.granuleSearchFields.timestamp, + timestamp__to: t.context.granuleSearchFields.timestamp + 500, }; const dbSearch = new GranuleSearch({ queryStringParameters }); const response = await dbSearch.query(knex); @@ -368,6 +404,7 @@ test('GranuleSearch non-existing fields are ignored', async (t) => { const queryStringParameters = { limit: 200, non_existing_field: `non_exist_${cryptoRandomString({ length: 5 })}`, + non_existing_field__from: `non_exist_${cryptoRandomString({ length: 5 })}`, }; const dbSearch = new GranuleSearch({ queryStringParameters }); const response = await dbSearch.query(knex); diff --git a/packages/db/tests/search/test-StatsSearch.js b/packages/db/tests/search/test-StatsSearch.js index a2d7b2c6b8f..6b94d7fa14b 100644 --- a/packages/db/tests/search/test-StatsSearch.js +++ b/packages/db/tests/search/test-StatsSearch.js @@ -48,7 +48,8 @@ test.before(async (t) => { range(20).map((num) => ( collections.push(fakeCollectionRecordFactory({ - name: `testCollection___${num}`, + name: 'testCollection', + version: `${num}`, cumulus_id: num, })) )); @@ -126,10 +127,7 @@ test.after.always(async (t) => { test('StatsSearch returns correct response for basic granules query', async (t) => { const { knex } = t.context; - const queryStringParameters = { - type: 'granules', - }; - const AggregateSearch = new StatsSearch({ queryStringParameters }, 'granule'); + const AggregateSearch = new StatsSearch({}, 'granule'); const results = await AggregateSearch.aggregate(knex); const expectedResponse = [ { key: 'completed', count: 25 }, @@ -144,7 +142,6 @@ test('StatsSearch returns correct response for basic granules query', async (t) test('StatsSearch filters correctly by date', async (t) => { const { knex } = t.context; const queryStringParameters = { - type: 'granules', timestamp__from: `${(new Date(2020, 1, 28)).getTime()}`, timestamp__to: `${(new Date(2022, 2, 30)).getTime()}`, }; @@ -164,7 +161,6 @@ test('StatsSearch filters correctly by date', async (t) => { test('StatsSearch filters executions correctly', async (t) => { const { knex } = t.context; let queryStringParameters = { - type: 'executions', field: 'status', }; @@ -179,7 +175,6 @@ test('StatsSearch filters executions correctly', async (t) => { t.deepEqual(results.count, expectedResponse1); queryStringParameters = { - type: 'executions', field: 'status', timestamp__to: `${(new Date(2023, 11, 30)).getTime()}`, timestamp__from: `${(new Date(2021, 1, 28)).getTime()}`, @@ -196,7 +191,6 @@ test('StatsSearch filters executions correctly', async (t) => { t.deepEqual(results2.count, expectedResponse2); queryStringParameters = { - type: 'executions', field: 'status', timestamp__to: `${(new Date(2023, 11, 30)).getTime()}`, timestamp__from: `${(new Date(2021, 1, 28)).getTime()}`, @@ -214,7 +208,6 @@ test('StatsSearch filters executions correctly', async (t) => { test('StatsSearch filters PDRs correctly', async (t) => { const { knex } = t.context; let queryStringParameters = { - type: 'pdrs', field: 'status', }; @@ -229,7 +222,6 @@ test('StatsSearch filters PDRs correctly', async (t) => { t.deepEqual(results.count, expectedResponse); queryStringParameters = { - type: 'pdrs', field: 'status', timestamp__to: `${(new Date(2019, 12, 9)).getTime()}`, timestamp__from: `${(new Date(2018, 1, 28)).getTime()}`, @@ -242,7 +234,6 @@ test('StatsSearch filters PDRs correctly', async (t) => { t.deepEqual(results2.count, expectedResponse2); queryStringParameters = { - type: 'pdrs', field: 'status', timestamp__to: `${(new Date(2019, 12, 9)).getTime()}`, timestamp__from: `${(new Date(2018, 1, 28)).getTime()}`, @@ -259,7 +250,6 @@ test('StatsSearch filters PDRs correctly', async (t) => { test('StatsSearch returns correct response when queried by provider', async (t) => { const { knex } = t.context; const queryStringParameters = { - type: 'granules', field: 'status', provider: 'testProvider2', }; @@ -274,7 +264,6 @@ test('StatsSearch returns correct response when queried by provider', async (t) test('StatsSearch returns correct response when queried by collection', async (t) => { const { knex } = t.context; const queryStringParameters = { - type: 'granules', field: 'status', collectionId: 'testCollection___8', }; @@ -289,10 +278,9 @@ test('StatsSearch returns correct response when queried by collection', async (t test('StatsSearch returns correct response when queried by collection and provider', async (t) => { const { knex } = t.context; let queryStringParameters = { - type: 'granules', field: 'status', collectionId: 'testCollection___1', - providerId: 'testProvider1', + provider: 'testProvider1', }; const AggregateSearch = new StatsSearch({ queryStringParameters }, 'granule'); @@ -302,10 +290,9 @@ test('StatsSearch returns correct response when queried by collection and provid t.deepEqual(results.count, expectedResponse); queryStringParameters = { - type: 'granules', field: 'status', collectionId: 'testCollection___1', - providerId: 'testProvider1', + provider: 'testProvider1', timestamp__to: `${(new Date(2019, 12, 9)).getTime()}`, timestamp__from: `${(new Date(2018, 1, 28)).getTime()}`, }; @@ -316,10 +303,9 @@ test('StatsSearch returns correct response when queried by collection and provid t.is(results2.meta.count, 2); t.deepEqual(results2.count, expectedResponse2); queryStringParameters = { - type: 'granules', field: 'status', collectionId: 'testCollection___1', - providerId: 'testProvider1', + provider: 'testProvider1', timestamp__to: `${(new Date(2019, 12, 9)).getTime()}`, timestamp__from: `${(new Date(2018, 1, 28)).getTime()}`, status: 'failed', @@ -335,7 +321,6 @@ test('StatsSearch returns correct response when queried by collection and provid test('StatsSearch returns correct response when queried by error', async (t) => { const { knex } = t.context; let queryStringParameters = { - type: 'granules', field: 'error.Error.keyword', }; const AggregateSearch = new StatsSearch({ queryStringParameters }, 'granule'); @@ -350,7 +335,6 @@ test('StatsSearch returns correct response when queried by error', async (t) => t.deepEqual(results.count, expectedResponse1); queryStringParameters = { - type: 'granules', field: 'error.Error.keyword', timestamp__to: `${(new Date(2021, 12, 9)).getTime()}`, timestamp__from: `${(new Date(2020, 1, 28)).getTime()}`, @@ -367,9 +351,8 @@ test('StatsSearch returns correct response when queried by error', async (t) => t.deepEqual(results2.count, expectedResponse2); queryStringParameters = { - type: 'granules', collectionId: 'testCollection___1', - providerId: 'testProvider1', + provider: 'testProvider1', field: 'error.Error.keyword', timestamp__to: `${(new Date(2019, 12, 9)).getTime()}`, timestamp__from: `${(new Date(2018, 1, 28)).getTime()}`, @@ -384,7 +367,6 @@ test('StatsSearch returns correct response when queried by error', async (t) => test('StatsSearch can query by infix and prefix when type is defined', async (t) => { const { knex } = t.context; let queryStringParameters = { - type: 'granules', infix: 'testGra', }; const AggregateSearch = new StatsSearch({ queryStringParameters }, 'granule'); @@ -394,7 +376,6 @@ test('StatsSearch can query by infix and prefix when type is defined', async (t) t.deepEqual(results.count, expectedResponse1); queryStringParameters = { - type: 'granules', prefix: 'query', }; const AggregateSearch2 = new StatsSearch({ queryStringParameters }, 'granule'); @@ -404,13 +385,13 @@ test('StatsSearch can query by infix and prefix when type is defined', async (t) t.deepEqual(results2.count, expectedResponse2); queryStringParameters = { - type: 'collections', - infix: 'testCollection___8', + infix: 'testCollection', + version: '8', field: 'name', }; const AggregateSearch3 = new StatsSearch({ queryStringParameters }, 'collection'); const results3 = await AggregateSearch3.aggregate(knex); - const expectedResponse3 = [{ key: 'testCollection___8', count: 1 }]; + const expectedResponse3 = [{ key: 'testCollection', count: 1 }]; t.is(results3.meta.count, 1); t.deepEqual(results3.count, expectedResponse3); }); diff --git a/packages/db/tests/search/test-field-mapping.js b/packages/db/tests/search/test-field-mapping.js index 4a93a2d21a3..b1d18befd30 100644 --- a/packages/db/tests/search/test-field-mapping.js +++ b/packages/db/tests/search/test-field-mapping.js @@ -132,6 +132,7 @@ test('mapQueryStringFieldToDbField correctly converts all execution api fields t execution: 'https://example.com', status: 'completed', updatedAt: 1591384094512, + collectionId: 'MOD09GQ___006', }; const expectedDbParameters = { @@ -140,6 +141,8 @@ test('mapQueryStringFieldToDbField correctly converts all execution api fields t url: 'https://example.com', status: 'completed', updated_at: new Date(1591384094512), + collectionName: 'MOD09GQ', + collectionVersion: '006', }; const apiFieldsList = Object.entries(queryStringParameters) @@ -157,6 +160,8 @@ test('mapQueryStringFieldToDbField correctly converts all pdr api fields to db f pdrName: 'fakePdrName', status: 'completed', updatedAt: 1591384094512, + collectionId: 'MOD09GQ___006', + provider: 's3_provider', }; const expectedDbParameters = { @@ -164,6 +169,9 @@ test('mapQueryStringFieldToDbField correctly converts all pdr api fields to db f name: 'fakePdrName', status: 'completed', updated_at: new Date(1591384094512), + collectionName: 'MOD09GQ', + collectionVersion: '006', + providerName: 's3_provider', }; const apiFieldsList = Object.entries(queryStringParameters) @@ -203,6 +211,8 @@ test('mapQueryStringFieldToDbField correctly converts all rule api fields to db name: 'fakePdrName', state: 'DISABLED', updatedAt: 1591384094512, + collectionId: 'MOD09GQ___006', + provider: 's3_provider', }; const expectedDbParameters = { @@ -210,6 +220,9 @@ test('mapQueryStringFieldToDbField correctly converts all rule api fields to db name: 'fakePdrName', enabled: false, updated_at: new Date(1591384094512), + collectionName: 'MOD09GQ', + collectionVersion: '006', + providerName: 's3_provider', }; const apiFieldsList = Object.entries(queryStringParameters) diff --git a/packages/db/tests/search/test-queries.js b/packages/db/tests/search/test-queries.js index 4de313d81d0..34d766a75c7 100644 --- a/packages/db/tests/search/test-queries.js +++ b/packages/db/tests/search/test-queries.js @@ -5,6 +5,7 @@ const { test('convertQueryStringToDbQueryParameters correctly converts api query string parameters to db query parameters', (t) => { const queryStringParameters = { + duration__from: 25, fields: 'granuleId,collectionId,status,updatedAt', infix: 'A1657416', limit: 20, @@ -12,9 +13,12 @@ test('convertQueryStringToDbQueryParameters correctly converts api query string prefix: 'MO', published: 'true', status: 'completed', + timestamp__from: '1712708508310', + timestamp__to: '1712712108310', 'error.Error': 'CumulusMessageAdapterExecutionError', collectionId: 'MOD09GQ___006', nonExistingField: 'nonExistingFieldValue', + nonExistingField__from: 'nonExistingFieldValue', }; const expectedDbQueryParameters = { @@ -24,6 +28,15 @@ test('convertQueryStringToDbQueryParameters correctly converts api query string offset: 40, page: 3, prefix: 'MO', + range: { + duration: { + gte: queryStringParameters.duration__from, + }, + updated_at: { + gte: new Date(Number(queryStringParameters.timestamp__from)), + lte: new Date(Number(queryStringParameters.timestamp__to)), + }, + }, term: { collectionName: 'MOD09GQ', collectionVersion: '006', From fff3505f1314e7227cedf2a839effd719d8b9dcf Mon Sep 17 00:00:00 2001 From: Naga Nages <66387215+Nnaga1@users.noreply.github.com> Date: Thu, 30 May 2024 12:31:03 -0400 Subject: [PATCH 05/61] CUMULUS-3695 - Update Granules endpoint to handle SortFields (#3663) * first committ * CHANGELOG change * fixing sortQueryMethod * simplifying code * PR feedback * merge conflicts + improving code * small jsdoc fix * PR feedback * PR feedback * PR feedback * fixing test * PR feedback * PR feedback --- CHANGELOG.md | 2 + packages/db/src/search/BaseSearch.ts | 23 +++ packages/db/src/search/field-mapping.ts | 3 + packages/db/src/search/queries.ts | 31 +++- packages/db/src/types/search.ts | 8 +- .../db/tests/search/test-GranuleSearch.js | 133 +++++++++++++++++- packages/db/tests/search/test-queries.js | 31 ++++ 7 files changed, 224 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e31689c25e9..43576aec1cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Replace ElasticSearch Phase 1 +- **CUMULUS-3695** + - Updated `granule` list api endpoint and BaseSearch class to handle sort fields - **CUMULUS-3688** - Updated `stats` api endpoint to query postgres instead of elasticsearch - **CUMULUS-3689** diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index 10d6bb38d79..249e0452fd2 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -75,6 +75,7 @@ class BaseSearch { this.buildTermQuery({ countQuery, searchQuery }); this.buildRangeQuery({ countQuery, searchQuery }); this.buildInfixPrefixQuery({ countQuery, searchQuery }); + this.buildSortQuery({ searchQuery }); const { limit, offset } = this.dbQueryParameters; if (limit) searchQuery.limit(limit); @@ -203,6 +204,28 @@ class BaseSearch { }); } + /** + * Build queries for sort keys and fields + * + * @param params + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildSortQuery(params: { + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { searchQuery, dbQueryParameters } = params; + const { sort } = dbQueryParameters || this.dbQueryParameters; + sort?.forEach((key) => { + if (key.column.startsWith('error')) { + searchQuery.orderByRaw(`${this.tableName}.error ->> 'Error' ${key.order}`); + } else { + searchQuery.orderBy([key]); + } + }); + } + /** * Translate postgres records to api records * diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts index d72689d3be4..e2f97ddc94f 100644 --- a/packages/db/src/search/field-mapping.ts +++ b/packages/db/src/search/field-mapping.ts @@ -60,6 +60,9 @@ const granuleMapping: { [key: string]: Function } = { 'error.Error': (value?: string) => ({ 'error.Error': value, }), + 'error.Error.keyword': (value?: string) => ({ + 'error.Error': value, + }), // The following fields require querying other tables collectionId: (value?: string) => { const { name, version } = (value && deconstructCollectionId(value)) || {}; diff --git a/packages/db/src/search/queries.ts b/packages/db/src/search/queries.ts index e75ea2c56c8..f6f9dc118e3 100644 --- a/packages/db/src/search/queries.ts +++ b/packages/db/src/search/queries.ts @@ -1,6 +1,6 @@ import omit from 'lodash/omit'; import Logger from '@cumulus/logger'; -import { DbQueryParameters, QueryStringParameters, RangeType } from '../types/search'; +import { DbQueryParameters, QueryStringParameters, RangeType, SortType } from '../types/search'; import { mapQueryStringFieldToDbField } from './field-mapping'; const log = new Logger({ sender: '@cumulus/db/queries' }); @@ -89,6 +89,34 @@ const convertTerm = ( return { term }; }; +/** + * Convert sort query fields to db query parameters from api query string fields + * + * @param type - query record type + * @param queryStringParameters - query string parameters + * @returns sort query parameter + */ +const convertSort = ( + type: string, + queryStringParameters: QueryStringParameters +): SortType[] => { + const sortArray: SortType[] = []; + const { sort_by: sortBy, sort_key: sortKey } = queryStringParameters; + let { order } = queryStringParameters; + if (sortBy) { + order = order ?? 'asc'; + const queryParam = mapQueryStringFieldToDbField(type, { name: sortBy }); + Object.keys(queryParam ?? {}).map((key) => sortArray.push({ column: key, order })); + } else if (sortKey) { + sortKey.map((item) => { + order = item.startsWith('-') ? 'desc' : 'asc'; + const queryParam = mapQueryStringFieldToDbField(type, { name: item.replace(/^[+-]/, '') }); + return Object.keys(queryParam ?? {}).map((key) => sortArray.push({ column: key, order })); + }); + } + return sortArray; +}; + /** * functions for converting from api query string parameters to db query parameters * for each type of query @@ -119,6 +147,7 @@ export const convertQueryStringToDbQueryParameters = ( if (typeof infix === 'string') dbQueryParameters.infix = infix; if (typeof prefix === 'string') dbQueryParameters.prefix = prefix; if (typeof fields === 'string') dbQueryParameters.fields = fields.split(','); + dbQueryParameters.sort = convertSort(type, queryStringParameters); // remove reserved words (that are not fields) const fieldParams = omit(queryStringParameters, reservedWords); diff --git a/packages/db/src/types/search.ts b/packages/db/src/types/search.ts index d61da918422..f694e7aae4d 100644 --- a/packages/db/src/types/search.ts +++ b/packages/db/src/types/search.ts @@ -7,7 +7,7 @@ export type QueryStringParameters = { order?: string, prefix?: string, sort_by?: string, - sort_key?: string, + sort_key?: string[], [key: string]: string | string[] | undefined, }; @@ -22,6 +22,11 @@ export type RangeType = { lte?: Omit, }; +export type SortType = { + column: string, + order?: string, +}; + export type DbQueryParameters = { fields?: string[], infix?: string, @@ -30,6 +35,7 @@ export type DbQueryParameters = { page?: number, prefix?: string, range?: { [key: string]: RangeType }, + sort?: SortType[], term?: { [key: string]: QueriableType | undefined }, terms?: { [key: string]: any }, }; diff --git a/packages/db/tests/search/test-GranuleSearch.js b/packages/db/tests/search/test-GranuleSearch.js index 9f9ff180849..2e4c87f813b 100644 --- a/packages/db/tests/search/test-GranuleSearch.js +++ b/packages/db/tests/search/test-GranuleSearch.js @@ -41,7 +41,7 @@ test.before(async (t) => { t.context.collectionName = 'fakeCollection'; t.context.collectionVersion = 'v1'; - const collectionName2 = 'fakeCollection2'; + const collectionName2 = 'testCollection2'; const collectionVersion2 = 'v2'; t.context.collectionId = constructCollectionId( @@ -104,7 +104,7 @@ test.before(async (t) => { endingDateTime: '2020-03-17T10:00:00.000Z', lastUpdateDateTime: '2020-03-18T10:00:00.000Z', processingEndDateTime: '2020-03-16T10:00:00.000Z', - productVolume: '600', + productVolume: '6000', timeToArchive: '700.29', timeToPreprocess: '800.18', status: 'failed', @@ -135,7 +135,8 @@ test.before(async (t) => { last_update_date_time: !(num % 2) ? t.context.granuleSearchFields.lastUpdateDateTime : undefined, published: !!(num % 2), - product_volume: !(num % 5) ? Number(t.context.granuleSearchFields.productVolume) : undefined, + product_volume: Math.round(Number(t.context.granuleSearchFields.productVolume) + * (1 / (num + 1))).toString(), time_to_archive: !(num % 10) ? Number(t.context.granuleSearchFields.timeToArchive) : undefined, time_to_process: !(num % 20) @@ -288,8 +289,8 @@ test('GranuleSearch supports term search for number field', async (t) => { }; let dbSearch = new GranuleSearch({ queryStringParameters }); let response = await dbSearch.query(knex); - t.is(response.meta.count, 10); - t.is(response.results?.length, 5); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); queryStringParameters = { limit: 200, @@ -424,3 +425,125 @@ test('GranuleSearch returns fields specified', async (t) => { t.is(response.results?.length, 10); response.results.forEach((granule) => t.deepEqual(Object.keys(granule), fields.split(','))); }); + +test('GranuleSearch supports sorting', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + sort_by: 'timestamp', + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 100); + t.true(response.results[0].updatedAt < response.results[99].updatedAt); + t.true(response.results[1].updatedAt < response.results[50].updatedAt); + + queryStringParameters = { + limit: 200, + sort_by: 'timestamp', + order: 'asc', + }; + const dbSearch2 = new GranuleSearch({ queryStringParameters }); + const response2 = await dbSearch2.query(knex); + t.is(response2.meta.count, 100); + t.is(response2.results?.length, 100); + t.true(response2.results[0].updatedAt < response2.results[99].updatedAt); + t.true(response2.results[1].updatedAt < response2.results[50].updatedAt); + + queryStringParameters = { + limit: 200, + sort_key: ['-timestamp'], + }; + const dbSearch3 = new GranuleSearch({ queryStringParameters }); + const response3 = await dbSearch3.query(knex); + t.is(response3.meta.count, 100); + t.is(response3.results?.length, 100); + t.true(response3.results[0].updatedAt > response3.results[99].updatedAt); + t.true(response3.results[1].updatedAt > response3.results[50].updatedAt); + + queryStringParameters = { + limit: 200, + sort_key: ['+productVolume'], + }; + const dbSearch4 = new GranuleSearch({ queryStringParameters }); + const response4 = await dbSearch4.query(knex); + t.is(response4.meta.count, 100); + t.is(response4.results?.length, 100); + t.true(Number(response4.results[0].productVolume) < Number(response4.results[1].productVolume)); + t.true(Number(response4.results[98].productVolume) < Number(response4.results[99].productVolume)); + + queryStringParameters = { + limit: 200, + sort_key: ['-timestamp', '+productVolume'], + }; + const dbSearch5 = new GranuleSearch({ queryStringParameters }); + const response5 = await dbSearch5.query(knex); + t.is(response5.meta.count, 100); + t.is(response5.results?.length, 100); + t.true(response5.results[0].updatedAt > response5.results[99].updatedAt); + t.true(response5.results[1].updatedAt > response5.results[50].updatedAt); + t.true(Number(response5.results[1].productVolume) < Number(response5.results[99].productVolume)); + t.true(Number(response5.results[0].productVolume) < Number(response5.results[10].productVolume)); + + queryStringParameters = { + limit: 200, + sort_key: ['-timestamp'], + sort_by: 'timestamp', + order: 'asc', + }; + const dbSearch6 = new GranuleSearch({ queryStringParameters }); + const response6 = await dbSearch6.query(knex); + t.is(response6.meta.count, 100); + t.is(response6.results?.length, 100); + t.true(response6.results[0].updatedAt < response6.results[99].updatedAt); + t.true(response6.results[1].updatedAt < response6.results[50].updatedAt); +}); + +test('GranuleSearch supports sorting by CollectionId', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + sort_by: 'collectionId', + order: 'asc', + }; + const dbSearch8 = new GranuleSearch({ queryStringParameters }); + const response8 = await dbSearch8.query(knex); + t.is(response8.meta.count, 100); + t.is(response8.results?.length, 100); + t.true(response8.results[0].collectionId < response8.results[99].collectionId); + t.true(response8.results[0].collectionId < response8.results[50].collectionId); + + queryStringParameters = { + limit: 200, + sort_key: ['-collectionId'], + }; + const dbSearch9 = new GranuleSearch({ queryStringParameters }); + const response9 = await dbSearch9.query(knex); + t.is(response9.meta.count, 100); + t.is(response9.results?.length, 100); + t.true(response9.results[0].collectionId > response9.results[99].collectionId); + t.true(response9.results[0].collectionId > response9.results[50].collectionId); +}); + +test('GranuleSearch supports sorting by Error', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + sort_by: 'error.Error', + }; + const dbSearch7 = new GranuleSearch({ queryStringParameters }); + const response7 = await dbSearch7.query(knex); + t.is(response7.results[0].error.Error, 'CumulusMessageAdapterExecutionError'); + t.is(response7.results[99].error, undefined); + + queryStringParameters = { + limit: 200, + sort_by: 'error.Error.keyword', + order: 'asc', + }; + const dbSearch10 = new GranuleSearch({ queryStringParameters }); + const response10 = await dbSearch10.query(knex); + t.is(response10.results[0].error.Error, 'CumulusMessageAdapterExecutionError'); + t.is(response10.results[99].error, undefined); +}); diff --git a/packages/db/tests/search/test-queries.js b/packages/db/tests/search/test-queries.js index 34d766a75c7..2b4b3324d78 100644 --- a/packages/db/tests/search/test-queries.js +++ b/packages/db/tests/search/test-queries.js @@ -11,6 +11,7 @@ test('convertQueryStringToDbQueryParameters correctly converts api query string limit: 20, page: 3, prefix: 'MO', + sort_key: ['-productVolume', '+timestamp'], published: 'true', status: 'completed', timestamp__from: '1712708508310', @@ -28,6 +29,14 @@ test('convertQueryStringToDbQueryParameters correctly converts api query string offset: 40, page: 3, prefix: 'MO', + sort: [{ + column: 'product_volume', + order: 'desc', + }, + { + column: 'updated_at', + order: 'asc', + }], range: { duration: { gte: queryStringParameters.duration__from, @@ -49,3 +58,25 @@ test('convertQueryStringToDbQueryParameters correctly converts api query string const dbQueryParams = convertQueryStringToDbQueryParameters('granule', queryStringParameters); t.deepEqual(dbQueryParams, expectedDbQueryParameters); }); + +test('convertQueryStringToDbQueryParameters correctly converts sortby error parameter to db query parameters', (t) => { + const queryStringParameters = { + sort_by: 'error.Error.keyword', + order: 'asc', + }; + + const expectedDbQueryParameters = { + limit: 10, + offset: 0, + page: 1, + sort: [ + { + column: 'error.Error', + order: 'asc', + }, + ], + }; + + const dbQueryParams = convertQueryStringToDbQueryParameters('granule', queryStringParameters); + t.deepEqual(dbQueryParams, expectedDbQueryParameters); +}); From f08edd59aa4df9a3592d6c3b0a0c84b657762abf Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:22:22 -0400 Subject: [PATCH 06/61] CUMULUS-3696: Update granules List endpoints to query postgres - match (#3674) * add methods to convert terms,not,exists --- CHANGELOG.md | 2 + packages/db/src/search/BaseSearch.ts | 199 +++++++++++- packages/db/src/search/GranuleSearch.ts | 36 +-- packages/db/src/search/field-mapping.ts | 3 + packages/db/src/search/queries.ts | 96 +++++- packages/db/src/types/search.ts | 6 +- .../db/tests/search/test-GranuleSearch.js | 298 +++++++++++++++++- packages/db/tests/search/test-queries.js | 15 + 8 files changed, 597 insertions(+), 58 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3b682c8b69..75d4b5d12ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added functionality to `@cumulus/db/src/search` to support term queries - Updated `BaseSearch` and `GranuleSearch` classes to support term queries for granules - Updated granules List endpoint to search postgres +- **CUMULUS-3696** + - Added functionality to `@cumulus/db/src/search` to support terms, `not` and `exists` queries ### Migration Notes diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index 249e0452fd2..d616a12d0c2 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -1,10 +1,11 @@ import { Knex } from 'knex'; +import omit from 'lodash/omit'; import Logger from '@cumulus/logger'; import { BaseRecord } from '../types/base'; import { getKnexClient } from '../connection'; import { TableNames } from '../tables'; -import { DbQueryParameters, QueryEvent, QueryStringParameters } from '../types/search'; +import { DbQueryParameters, QueriableType, QueryEvent, QueryStringParameters } from '../types/search'; import { convertQueryStringToDbQueryParameters } from './queries'; const log = new Logger({ sender: '@cumulus/db/BaseSearch' }); @@ -47,17 +48,39 @@ class BaseSearch { ); } + /** + * check if joined collections table search is needed + * + * @returns whether collection search is needed + */ protected searchCollection(): boolean { - const term = this.dbQueryParameters.term; - return !!(term?.collectionName || term?.collectionVersion); + const { not, term, terms } = this.dbQueryParameters; + return !!(not?.collectionName + || not?.collectionVersion + || term?.collectionName + || term?.collectionVersion + || terms?.collectionName + || terms?.collectionVersion); } + /** + * check if joined pdrs table search is needed + * + * @returns whether pdr search is needed + */ protected searchPdr(): boolean { - return !!this.dbQueryParameters.term?.pdrName; + const { not, term, terms } = this.dbQueryParameters; + return !!(not?.pdrName || term?.pdrName || terms?.pdrName); } + /** + * check if joined providers table search is needed + * + * @returns whether provider search is needed + */ protected searchProvider(): boolean { - return !!this.dbQueryParameters.term?.providerName; + const { not, term, terms } = this.dbQueryParameters; + return !!(not?.providerName || term?.providerName || terms?.providerName); } /** @@ -73,7 +96,10 @@ class BaseSearch { } { const { countQuery, searchQuery } = this.buildBasicQuery(knex); this.buildTermQuery({ countQuery, searchQuery }); + this.buildTermsQuery({ countQuery, searchQuery }); + this.buildNotMatchQuery({ countQuery, searchQuery }); this.buildRangeQuery({ countQuery, searchQuery }); + this.buildExistsQuery({ countQuery, searchQuery }); this.buildInfixPrefixQuery({ countQuery, searchQuery }); this.buildSortQuery({ searchQuery }); @@ -129,6 +155,47 @@ class BaseSearch { throw new Error('buildInfixPrefixQuery is not implemented'); } + /** + * Build queries for checking if field 'exists' + * + * @param params + * @param [params.countQuery] - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildExistsQuery(params: { + countQuery?: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { countQuery, searchQuery, dbQueryParameters } = params; + const { exists = {} } = dbQueryParameters ?? this.dbQueryParameters; + + Object.entries(exists).forEach(([name, value]) => { + const queryMethod = value ? 'whereNotNull' : 'whereNull'; + const checkNull = value ? 'not null' : 'null'; + switch (name) { + case 'collectionName': + case 'collectionVersion': + [countQuery, searchQuery].forEach((query) => query?.[queryMethod](`${this.tableName}.collection_cumulus_id`)); + break; + case 'providerName': + [countQuery, searchQuery].forEach((query) => query?.[queryMethod](`${this.tableName}.provider_cumulus_id`)); + break; + case 'pdrName': + [countQuery, searchQuery].forEach((query) => query?.[queryMethod](`${this.tableName}.pdr_cumulus_id`)); + break; + case 'error': + case 'error.Error': + [countQuery, searchQuery].forEach((query) => query?.whereRaw(`${this.tableName}.error ->> 'Error' is ${checkNull}`)); + break; + default: + [countQuery, searchQuery].forEach((query) => query?.[queryMethod](`${this.tableName}.${name}`)); + break; + } + }); + } + /** * Build queries for range fields * @@ -156,6 +223,7 @@ class BaseSearch { } }); } + /** * Build queries for term fields * @@ -181,24 +249,125 @@ class BaseSearch { Object.entries(term).forEach(([name, value]) => { switch (name) { case 'collectionName': - countQuery?.where(`${collectionsTable}.name`, value); - searchQuery.where(`${collectionsTable}.name`, value); + [countQuery, searchQuery].forEach((query) => query?.where(`${collectionsTable}.name`, value)); break; case 'collectionVersion': - countQuery?.where(`${collectionsTable}.version`, value); - searchQuery.where(`${collectionsTable}.version`, value); + [countQuery, searchQuery].forEach((query) => query?.where(`${collectionsTable}.version`, value)); break; case 'providerName': - countQuery?.where(`${providersTable}.name`, value); - searchQuery.where(`${providersTable}.name`, value); + [countQuery, searchQuery].forEach((query) => query?.where(`${providersTable}.name`, value)); break; case 'pdrName': - countQuery?.where(`${pdrsTable}.name`, value); - searchQuery.where(`${pdrsTable}.name`, value); + [countQuery, searchQuery].forEach((query) => query?.where(`${pdrsTable}.name`, value)); + break; + case 'error.Error': + [countQuery, searchQuery] + .forEach((query) => query?.whereRaw(`${this.tableName}.error->>'Error' = '${value}'`)); + break; + default: + [countQuery, searchQuery].forEach((query) => query?.where(`${this.tableName}.${name}`, value)); + break; + } + }); + } + + /** + * Build queries for terms fields + * + * @param params + * @param [params.countQuery] - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildTermsQuery(params: { + countQuery?: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { + collections: collectionsTable, + providers: providersTable, + pdrs: pdrsTable, + } = TableNames; + + const { countQuery, searchQuery, dbQueryParameters } = params; + const { terms = {} } = dbQueryParameters ?? this.dbQueryParameters; + + // collection name and version are searched in pair + if (terms.collectionName && terms.collectionVersion + && terms.collectionName.length > 0 + && terms.collectionVersion.length > 0) { + const collectionPair: QueriableType[][] = []; + for (let i = 0; i < terms.collectionName.length; i += 1) { + const name = terms.collectionName[i]; + const version = terms.collectionVersion[i]; + if (name && version) collectionPair.push([name, version]); + } + [countQuery, searchQuery] + .forEach((query) => query?.whereIn([`${collectionsTable}.name`, `${collectionsTable}.version`], collectionPair)); + } + + Object.entries(omit(terms, ['collectionName', 'collectionVersion'])).forEach(([name, value]) => { + switch (name) { + case 'providerName': + [countQuery, searchQuery].forEach((query) => query?.whereIn(`${providersTable}.name`, value)); + break; + case 'pdrName': + [countQuery, searchQuery].forEach((query) => query?.whereIn(`${pdrsTable}.name`, value)); + break; + case 'error.Error': + [countQuery, searchQuery] + .forEach((query) => query?.whereRaw(`${this.tableName}.error->>'Error' in ('${value.join('\',\'')}')`)); + break; + default: + [countQuery, searchQuery].forEach((query) => query?.whereIn(`${this.tableName}.${name}`, value)); + break; + } + }); + } + + /** + * Build queries for checking if field doesn't match the given value + * + * @param params + * @param [params.countQuery] - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildNotMatchQuery(params: { + countQuery?: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { + collections: collectionsTable, + providers: providersTable, + pdrs: pdrsTable, + } = TableNames; + + const { countQuery, searchQuery, dbQueryParameters } = params; + const { not: term = {} } = dbQueryParameters ?? this.dbQueryParameters; + + // collection name and version are searched in pair + if (term.collectionName && term.collectionVersion) { + [countQuery, searchQuery].forEach((query) => query?.whereNot({ + [`${collectionsTable}.name`]: term.collectionName, + [`${collectionsTable}.version`]: term.collectionVersion, + })); + } + Object.entries(omit(term, ['collectionName', 'collectionVersion'])).forEach(([name, value]) => { + switch (name) { + case 'providerName': + [countQuery, searchQuery].forEach((query) => query?.whereNot(`${providersTable}.name`, value)); + break; + case 'pdrName': + [countQuery, searchQuery].forEach((query) => query?.whereNot(`${pdrsTable}.name`, value)); + break; + case 'error.Error': + [countQuery, searchQuery].forEach((query) => query?.whereRaw(`${this.tableName}.error->>'Error' != '${value}'`)); break; default: - countQuery?.where(`${this.tableName}.${name}`, value); - searchQuery.where(`${this.tableName}.${name}`, value); + [countQuery, searchQuery].forEach((query) => query?.whereNot(`${this.tableName}.${name}`, value)); break; } }); diff --git a/packages/db/src/search/GranuleSearch.ts b/packages/db/src/search/GranuleSearch.ts index 37a35e27ba9..56e57a9bfa7 100644 --- a/packages/db/src/search/GranuleSearch.ts +++ b/packages/db/src/search/GranuleSearch.ts @@ -1,5 +1,4 @@ import { Knex } from 'knex'; -import omit from 'lodash/omit'; import pick from 'lodash/pick'; import { ApiGranuleRecord } from '@cumulus/types/api/granules'; @@ -99,44 +98,13 @@ export class GranuleSearch extends BaseSearch { const { countQuery, searchQuery, dbQueryParameters } = params; const { infix, prefix } = dbQueryParameters ?? this.dbQueryParameters; if (infix) { - countQuery.whereLike(`${this.tableName}.granule_id`, `%${infix}%`); - searchQuery.whereLike(`${this.tableName}.granule_id`, `%${infix}%`); + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.granule_id`, `%${infix}%`)); } if (prefix) { - countQuery.whereLike(`${this.tableName}.granule_id`, `${prefix}%`); - searchQuery.whereLike(`${this.tableName}.granule_id`, `${prefix}%`); + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.granule_id`, `${prefix}%`)); } } - /** - * Build queries for term fields - * - * @param params - * @param params.countQuery - query builder for getting count - * @param params.searchQuery - query builder for search - * @param [params.dbQueryParameters] - db query parameters - */ - protected buildTermQuery(params: { - countQuery: Knex.QueryBuilder, - searchQuery: Knex.QueryBuilder, - dbQueryParameters?: DbQueryParameters, - }) { - const { countQuery, searchQuery, dbQueryParameters } = params; - const { term = {} } = dbQueryParameters ?? this.dbQueryParameters; - - Object.entries(term).forEach(([name, value]) => { - if (name === 'error.Error') { - countQuery.whereRaw(`${this.tableName}.error->>'Error' = '${value}'`); - searchQuery.whereRaw(`${this.tableName}.error->>'Error' = '${value}'`); - } - }); - - super.buildTermQuery({ - ...params, - dbQueryParameters: { term: omit(term, 'error.Error') }, - }); - } - /** * Translate postgres records to api records * diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts index e2f97ddc94f..75cc91a00b7 100644 --- a/packages/db/src/search/field-mapping.ts +++ b/packages/db/src/search/field-mapping.ts @@ -56,6 +56,9 @@ const granuleMapping: { [key: string]: Function } = { updatedAt: (value?: string) => ({ updated_at: value && new Date(Number(value)), }), + error: (value?: string) => ({ + error: value, + }), // nested error field 'error.Error': (value?: string) => ({ 'error.Error': value, diff --git a/packages/db/src/search/queries.ts b/packages/db/src/search/queries.ts index f6f9dc118e3..192fa01265c 100644 --- a/packages/db/src/search/queries.ts +++ b/packages/db/src/search/queries.ts @@ -1,6 +1,6 @@ import omit from 'lodash/omit'; import Logger from '@cumulus/logger'; -import { DbQueryParameters, QueryStringParameters, RangeType, SortType } from '../types/search'; +import { DbQueryParameters, QueriableType, QueryStringParameters, RangeType, SortType } from '../types/search'; import { mapQueryStringFieldToDbField } from './field-mapping'; const log = new Logger({ sender: '@cumulus/db/queries' }); @@ -30,6 +30,54 @@ const regexes: { [key: string]: RegExp } = { range: /^(.*)__(from|to)$/, }; +/** + * Convert 'exists' query fields to db query parameters from api query string fields + * + * @param type - query record type + * @param queryStringFields - api query fields + * @returns 'exists' query parameter + */ +const convertExists = ( + type: string, + queryStringFields: { name: string, value: string }[] +): { exists: { [key: string]: boolean } } => { + const exists = queryStringFields.reduce((acc, queryField) => { + const match = queryField.name.match(regexes.exists); + if (!match) return acc; + + // get corresponding db field name, e.g. granuleId => granule_id + const dbField = mapQueryStringFieldToDbField(type, { name: match[1] }); + if (!dbField) return acc; + Object.keys(dbField).forEach((key) => { dbField[key] = (queryField.value === 'true'); }); + return { ...acc, ...dbField }; + }, {}); + + return { exists }; +}; + +/** + * Convert 'not' query fields to db query parameters from api query string fields + * + * @param type - query record type + * @param queryStringFields - api query fields + * @returns 'not' query parameter + */ +const convertNotMatch = ( + type: string, + queryStringFields: { name: string, value: string }[] +): { not: { [key: string]: QueriableType } } => { + const not = queryStringFields.reduce((acc, queryField) => { + const match = queryField.name.match(regexes.not); + if (!match) return acc; + + // get corresponding db field name, e.g. granuleId => granule_id + const queryParam = mapQueryStringFieldToDbField(type, { ...queryField, name: match[1] }); + return { ...acc, ...queryParam }; + }, {}); + + return { not }; +}; + /** * Convert range query fields to db query parameters from api query string fields * @@ -80,7 +128,7 @@ const convertRange = ( const convertTerm = ( type: string, queryStringFields: { name: string, value: string }[] -): { term: { [key: string]: any } } => { +): { term: { [key: string]: QueriableType } } => { const term = queryStringFields.reduce((acc, queryField) => { const queryParam = mapQueryStringFieldToDbField(type, queryField); return { ...acc, ...queryParam }; @@ -89,6 +137,47 @@ const convertTerm = ( return { term }; }; +/** + * Convert terms query fields to db query parameters from api query string fields + * + * @param type - query record type + * @param queryStringFields - api query fields + * @returns terms query parameter + */ +const convertTerms = ( + type: string, + queryStringFields: { name: string, value: string }[] +): { terms: { [key: string]: QueriableType[] } } => { + const terms = queryStringFields.reduce((acc: { [key: string]: QueriableType[] }, queryField) => { + const match = queryField.name.match(regexes.terms); + if (!match) return acc; + + // build a terms field, e.g. + // { granuleId__in: 'granuleId1,granuleId2' } => + // [[granule_id, granuleId1], [granule_id, granuleId2]] => + // { granule_id: [granuleId1, granuleId2] } + // this converts collectionId into name and version fields + const name = match[1]; + const values = queryField.value.split(','); + const dbFieldValues = values + .map((value: string) => { + const dbField = mapQueryStringFieldToDbField(type, { name, value }); + return Object.entries(dbField ?? {}); + }) + .filter(Boolean) + .flat(); + + if (dbFieldValues.length === 0) return acc; + dbFieldValues.forEach(([field, value]) => { + acc[field] = acc[field] ?? []; + acc[field].push(value); + }); + return acc; + }, {}); + + return { terms }; +}; + /** * Convert sort query fields to db query parameters from api query string fields * @@ -122,8 +211,11 @@ const convertSort = ( * for each type of query */ const convert: { [key: string]: Function } = { + exists: convertExists, + not: convertNotMatch, range: convertRange, term: convertTerm, + terms: convertTerms, }; /** diff --git a/packages/db/src/types/search.ts b/packages/db/src/types/search.ts index f694e7aae4d..68cb7b2d0dd 100644 --- a/packages/db/src/types/search.ts +++ b/packages/db/src/types/search.ts @@ -15,7 +15,7 @@ export type QueryEvent = { queryStringParameters?: QueryStringParameters, }; -type QueriableType = boolean | Date | number | string; +export type QueriableType = boolean | Date | number | string; export type RangeType = { gte?: Omit, @@ -31,11 +31,13 @@ export type DbQueryParameters = { fields?: string[], infix?: string, limit?: number, + exists?: { [key: string]: boolean }, + not?: { [key: string]: QueriableType | undefined }, offset?: number, page?: number, prefix?: string, range?: { [key: string]: RangeType }, sort?: SortType[], term?: { [key: string]: QueriableType | undefined }, - terms?: { [key: string]: any }, + terms?: { [key: string]: QueriableType[] }, }; diff --git a/packages/db/tests/search/test-GranuleSearch.js b/packages/db/tests/search/test-GranuleSearch.js index 2e4c87f813b..370330d2128 100644 --- a/packages/db/tests/search/test-GranuleSearch.js +++ b/packages/db/tests/search/test-GranuleSearch.js @@ -102,6 +102,7 @@ test.before(async (t) => { cmrLink: 'https://fakeLink', duration: 6.8, endingDateTime: '2020-03-17T10:00:00.000Z', + 'error.Error': 'CumulusMessageAdapterExecutionError', lastUpdateDateTime: '2020-03-18T10:00:00.000Z', processingEndDateTime: '2020-03-16T10:00:00.000Z', productVolume: '6000', @@ -112,6 +113,8 @@ test.before(async (t) => { updatedAt: 1579352700000, }; + t.context.granuleIds = range(100).map(generateGranuleId); + const error = { Cause: 'cause string', Error: 'CumulusMessageAdapterExecutionError', @@ -121,7 +124,7 @@ test.before(async (t) => { t.context.pgGranules = await t.context.granulePgModel.insert( knex, range(100).map((num) => fakeGranuleRecordFactory({ - granule_id: generateGranuleId(num), + granule_id: t.context.granuleIds[num], collection_cumulus_id: (num % 2) ? t.context.collectionCumulusId : t.context.collectionCumulusId2, pdr_cumulus_id: !(num % 2) ? t.context.pdrCumulusId : undefined, @@ -387,17 +390,20 @@ test('GranuleSearch supports search for multiple fields', async (t) => { const { knex } = t.context; const queryStringParameters = { limit: 200, - collectionId: t.context.collectionId2, + collectionId__in: [t.context.collectionId2, t.context.collectionId].join(','), + cmrLink__exists: 'false', + 'error.Error': t.context.granuleSearchFields['error.Error'], provider: t.context.provider.name, - 'error.Error': 'CumulusMessageAdapterExecutionError', + published__not: 'true', status: 'failed', timestamp__from: t.context.granuleSearchFields.timestamp, timestamp__to: t.context.granuleSearchFields.timestamp + 500, + sort_key: ['collectionId', '-timestamp'], }; const dbSearch = new GranuleSearch({ queryStringParameters }); const response = await dbSearch.query(knex); - t.is(response.meta.count, 50); - t.is(response.results?.length, 50); + t.is(response.meta.count, 49); + t.is(response.results?.length, 49); }); test('GranuleSearch non-existing fields are ignored', async (t) => { @@ -547,3 +553,285 @@ test('GranuleSearch supports sorting by Error', async (t) => { t.is(response10.results[0].error.Error, 'CumulusMessageAdapterExecutionError'); t.is(response10.results[99].error, undefined); }); + +test('GranuleSearch supports terms search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + granuleId__in: [t.context.granuleIds[0], t.context.granuleIds[5]].join(','), + published__in: 'true,false', + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 2); + t.is(response.results?.length, 2); + + queryStringParameters = { + limit: 200, + granuleId__in: [t.context.granuleIds[0], t.context.granuleIds[5]].join(','), + published__in: 'true', + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('GranuleSearch supports collectionId terms search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + collectionId__in: [t.context.collectionId2, constructCollectionId('fakecollectionterms', 'v1')].join(','), + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 200, + collectionId__in: [t.context.collectionId, t.context.collectionId2].join(','), + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 100); +}); + +test('GranuleSearch supports provider terms search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + provider__in: [t.context.provider.name, 'fakeproviderterms'].join(','), + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports pdrName terms search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + pdrName__in: [t.context.pdr.name, 'fakepdrterms'].join(','), + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports error.Error terms search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + 'error.Error__in': [t.context.granuleSearchFields['error.Error'], 'unknownerror'].join(','), + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 200, + 'error.Error__in': 'unknownerror', + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); +}); + +test('GranuleSearch supports search which granule field does not match the given value', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + granuleId__not: t.context.granuleIds[0], + published__not: 'true', + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 49); + t.is(response.results?.length, 49); + + queryStringParameters = { + limit: 200, + granuleId__not: t.context.granuleIds[0], + published__not: 'false', + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports search which collectionId does not match the given value', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + collectionId__not: t.context.collectionId2, + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports search which provider does not match the given value', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + provider__not: t.context.provider.name, + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); + + queryStringParameters = { + limit: 200, + provider__not: 'providernotexist', + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports search which pdrName does not match the given value', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + pdrName__not: t.context.pdr.name, + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); + + queryStringParameters = { + limit: 200, + pdrName__not: 'pdrnotexist', + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports search which error.Error does not match the given value', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + 'error.Error__not': t.context.granuleSearchFields['error.Error'], + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); + + queryStringParameters = { + limit: 200, + 'error.Error__not': 'unknownerror', + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports search which checks existence of granule field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + cmrLink__exists: 'true', + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('GranuleSearch supports search which checks existence of collectionId', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + collectionId__exists: 'true', + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 100); + queryStringParameters = { + limit: 200, + collectionId__exists: 'false', + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); +}); + +test('GranuleSearch supports search which checks existence of provider', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + provider__exists: 'true', + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 200, + provider__exists: 'false', + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports search which checks existence of pdrName', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + pdrName__exists: 'true', + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 200, + pdrName__exists: 'false', + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('GranuleSearch supports search which checks existence of error', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + error__exists: 'true', + }; + let dbSearch = new GranuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 200, + error__exists: 'false', + }; + dbSearch = new GranuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); diff --git a/packages/db/tests/search/test-queries.js b/packages/db/tests/search/test-queries.js index 2b4b3324d78..1fc690aaf0a 100644 --- a/packages/db/tests/search/test-queries.js +++ b/packages/db/tests/search/test-queries.js @@ -20,12 +20,22 @@ test('convertQueryStringToDbQueryParameters correctly converts api query string collectionId: 'MOD09GQ___006', nonExistingField: 'nonExistingFieldValue', nonExistingField__from: 'nonExistingFieldValue', + granuleId__in: 'granuleId1,granuleId2', + collectionId__in: 'MOD09GQ___006,MODIS___007', + granuleId__not: 'notMatchingGranuleId', + error__exists: 'true', }; const expectedDbQueryParameters = { + exists: { + error: true, + }, fields: ['granuleId', 'collectionId', 'status', 'updatedAt'], infix: 'A1657416', limit: 20, + not: { + granule_id: 'notMatchingGranuleId', + }, offset: 40, page: 3, prefix: 'MO', @@ -53,6 +63,11 @@ test('convertQueryStringToDbQueryParameters correctly converts api query string status: 'completed', 'error.Error': 'CumulusMessageAdapterExecutionError', }, + terms: { + granule_id: ['granuleId1', 'granuleId2'], + collectionName: ['MOD09GQ', 'MODIS'], + collectionVersion: ['006', '007'], + }, }; const dbQueryParams = convertQueryStringToDbQueryParameters('granule', queryStringParameters); From e73059da23d7659008c5065ba2a27b38b441fc79 Mon Sep 17 00:00:00 2001 From: Naga Nages <66387215+Nnaga1@users.noreply.github.com> Date: Thu, 13 Jun 2024 11:10:47 -0400 Subject: [PATCH 07/61] CUMULUS-3641 - Update Collections LIST endpoint to query Postgres basic (#3681) * reopening PR * PR feedback * small test fix * small PR feedbacks * adding new tests from match queries * PR feedback/formatting * temporary reversion to list endpoint for reconreport tests * reverting changes * adding logging * more logging * more logging * removing logging + commenting reconrep test temp * commenting out failing createReconReport spec * removing comment * reverting changes to reconReport test * reverting previous change * adding ts-check * PR feedback * PR feedback * adding in test * PR feedback fix * PR feedback --- CHANGELOG.md | 2 + .../CreateReconciliationReportSpec.js | 2 +- packages/api/endpoints/collections.js | 25 +- packages/api/tests/app/test-launchpadAuth.js | 8 +- .../endpoints/collections/list-collections.js | 68 +++- packages/db/src/index.ts | 3 + packages/db/src/search/BaseSearch.ts | 2 +- packages/db/src/search/CollectionSearch.ts | 86 +++++ packages/db/src/search/field-mapping.ts | 12 + .../db/tests/search/test-CollectionSearch.js | 308 ++++++++++++++++++ .../db/tests/search/test-GranuleSearch.js | 2 +- .../db/tests/search/test-field-mapping.js | 6 + 12 files changed, 499 insertions(+), 25 deletions(-) create mode 100644 packages/db/src/search/CollectionSearch.ts create mode 100644 packages/db/tests/search/test-CollectionSearch.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 75d4b5d12ed..874607dcf5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Replace ElasticSearch Phase 1 +- **CUMULUS-3641** + - Updated `collections` api endpoint to query postgres instead of elasticsearch except if `includeStats` is in the query parameters - **CUMULUS-3695** - Updated `granule` list api endpoint and BaseSearch class to handle sort fields - **CUMULUS-3688** diff --git a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js index 1fb7fe65625..5462f04c5f9 100644 --- a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js +++ b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js @@ -273,7 +273,7 @@ const waitForCollectionRecordsInList = async (stackName, collectionIds, addition async () => { // Verify the collection is returned when listing collections const collsResp = await getCollections({ prefix: stackName, - query: { _id__in: collectionIds.join(','), ...additionalQueryParams, limit: 30 } }); + query: { _id__in: collectionIds.join(','), ...additionalQueryParams, includeStats: true, limit: 30 } }); const results = get(JSON.parse(collsResp.body), 'results', []); const ids = results.map((c) => constructCollectionId(c.name, c.version)); return isEqual(ids.sort(), collectionIds.sort()); diff --git a/packages/api/endpoints/collections.js b/packages/api/endpoints/collections.js index 15ea6090303..1e2f6b3518a 100644 --- a/packages/api/endpoints/collections.js +++ b/packages/api/endpoints/collections.js @@ -1,3 +1,5 @@ +//@ts-check + 'use strict'; const router = require('express-promise-router')(); @@ -16,6 +18,7 @@ const { isCollisionError, translateApiCollectionToPostgresCollection, translatePostgresCollectionToApiCollection, + CollectionSearch, } = require('@cumulus/db'); const CollectionConfigStore = require('@cumulus/collection-config-store'); const { getEsClient, Search } = require('@cumulus/es-client/search'); @@ -43,14 +46,22 @@ const log = new Logger({ sender: '@cumulus/api/collections' }); * @returns {Promise} the promise of express response object */ async function list(req, res) { + log.trace(`list query ${JSON.stringify(req.query)}`); const { getMMT, includeStats, ...queryStringParameters } = req.query; - const collection = new Collection( - { queryStringParameters }, - undefined, - process.env.ES_INDEX, - includeStats === 'true' - ); - let result = await collection.query(); + let dbSearch; + if (includeStats === 'true') { + dbSearch = new Collection( + { queryStringParameters }, + undefined, + process.env.ES_INDEX, + includeStats === 'true' + ); + } else { + dbSearch = new CollectionSearch( + { queryStringParameters } + ); + } + let result = await dbSearch.query(); if (getMMT === 'true') { result = await insertMMTLinks(result); } diff --git a/packages/api/tests/app/test-launchpadAuth.js b/packages/api/tests/app/test-launchpadAuth.js index 717658a9bb6..db6d3346531 100644 --- a/packages/api/tests/app/test-launchpadAuth.js +++ b/packages/api/tests/app/test-launchpadAuth.js @@ -10,7 +10,7 @@ const { createBucket, putJsonS3Object } = require('@cumulus/aws-client/S3'); const launchpad = require('@cumulus/launchpad-auth'); const { randomId } = require('@cumulus/common/test-utils'); -const EsCollection = require('@cumulus/es-client/collections'); +const { CollectionSearch } = require('@cumulus/db'); const models = require('../../models'); const { createJwtToken } = require('../../lib/token'); const { fakeAccessTokenFactory } = require('../../lib/testUtils'); @@ -72,7 +72,7 @@ test.after.always(async () => { test.serial('API request with a valid Launchpad token stores the access token', async (t) => { const stub = sinon.stub(launchpad, 'validateLaunchpadToken').returns(validateTokenResponse); - const collectionStub = sinon.stub(EsCollection.prototype, 'query').returns([]); + const collectionStub = sinon.stub(CollectionSearch.prototype, 'query').returns([]); try { await request(app) @@ -113,7 +113,7 @@ test.serial('API request with an invalid Launchpad token returns a 403 unauthori test.serial('API request with a stored non-expired Launchpad token record returns a successful response', async (t) => { let stub = sinon.stub(launchpad, 'validateLaunchpadToken').resolves(validateTokenResponse); - const collectionStub = sinon.stub(EsCollection.prototype, 'query').returns([]); + const collectionStub = sinon.stub(CollectionSearch.prototype, 'query').returns([]); try { await request(app) @@ -143,7 +143,7 @@ test.serial('API request with a stored non-expired Launchpad token record return }); test.serial('API request with an expired Launchpad token returns a 401 response', async (t) => { - const collectionStub = sinon.stub(EsCollection.prototype, 'query').returns([]); + const collectionStub = sinon.stub(CollectionSearch.prototype, 'query').returns([]); try { await accessTokenModel.create({ diff --git a/packages/api/tests/endpoints/collections/list-collections.js b/packages/api/tests/endpoints/collections/list-collections.js index 277fbac4577..f64b0e85b78 100644 --- a/packages/api/tests/endpoints/collections/list-collections.js +++ b/packages/api/tests/endpoints/collections/list-collections.js @@ -3,6 +3,7 @@ const test = require('ava'); const request = require('supertest'); const sinon = require('sinon'); +const range = require('lodash/range'); const awsServices = require('@cumulus/aws-client/services'); const { recursivelyDeleteS3Bucket, @@ -11,6 +12,7 @@ const { randomString } = require('@cumulus/common/test-utils'); const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); const EsCollection = require('@cumulus/es-client/collections'); const { getEsClient } = require('@cumulus/es-client/search'); +const { randomId } = require('@cumulus/common/test-utils'); const models = require('../../../models'); const { @@ -20,10 +22,25 @@ const { } = require('../../../lib/testUtils'); const assertions = require('../../../lib/assertions'); +const testDbName = randomId('collection'); + +const { + destroyLocalTestDb, + generateLocalTestDb, + CollectionPgModel, + fakeCollectionRecordFactory, + migrationDir, + localStackConnectionEnv, +} = require('../../../../db/dist'); + +process.env.PG_HOST = randomId('hostname'); +process.env.PG_USER = randomId('user'); +process.env.PG_PASSWORD = randomId('password'); +process.env.TOKEN_SECRET = randomString(); + process.env.AccessTokensTable = randomString(); process.env.stackName = randomString(); process.env.system_bucket = randomString(); -process.env.TOKEN_SECRET = randomString(); // import the express app after setting the env variables const { app } = require('../../../app'); @@ -34,7 +51,13 @@ let esClient; let jwtAuthToken; let accessTokenModel; -test.before(async () => { +process.env = { + ...process.env, + ...localStackConnectionEnv, + PG_DATABASE: testDbName, +}; + +test.before(async (t) => { const esAlias = randomString(); process.env.ES_INDEX = esAlias; await bootstrapElasticSearch({ @@ -52,16 +75,45 @@ test.before(async () => { jwtAuthToken = await createFakeJwtAuthToken({ accessTokenModel, username }); esClient = await getEsClient('fakehost'); + const { knexAdmin, knex } = await generateLocalTestDb( + testDbName, + migrationDir + ); + + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + t.context.collectionPgModel = new CollectionPgModel(); + const collections = []; + + range(40).map((num) => ( + collections.push(fakeCollectionRecordFactory({ + name: num % 2 === 0 ? `testCollection__${num}` : `fakeCollection__${num}`, + version: `${num}`, + cumulus_id: num, + updated_at: new Date(1579352700000 + (num % 2) * 1000), + })) + )); + + t.context.collections = collections; + await t.context.collectionPgModel.insert( + t.context.knex, + collections + ); }); test.beforeEach((t) => { t.context.testCollection = fakeCollectionFactory(); }); -test.after.always(async () => { +test.after.always(async (t) => { await accessTokenModel.deleteTable(); await recursivelyDeleteS3Bucket(process.env.system_bucket); await esClient.client.indices.delete({ index: esIndex }); + await destroyLocalTestDb({ + ...t.context, + testDbName, + }); }); test('CUMULUS-911 GET without pathParameters and without an Authorization header returns an Authorization Missing response', async (t) => { @@ -86,9 +138,6 @@ test('CUMULUS-912 GET without pathParameters and with an invalid access token re test.todo('CUMULUS-912 GET without pathParameters and with an unauthorized user returns an unauthorized response'); test.serial('default returns list of collections from query', async (t) => { - const stub = sinon.stub(EsCollection.prototype, 'query').returns({ results: [t.context.testCollection] }); - const spy = sinon.stub(EsCollection.prototype, 'addStatsToCollectionResults'); - const response = await request(app) .get('/collections') .set('Accept', 'application/json') @@ -96,11 +145,8 @@ test.serial('default returns list of collections from query', async (t) => { .expect(200); const { results } = response.body; - t.is(results.length, 1); - t.is(results[0].name, t.context.testCollection.name); - t.true(spy.notCalled); - stub.restore(); - spy.restore(); + t.is(results.length, 10); + t.is(results[0].name, t.context.collections[0].name); }); test.serial('returns list of collections with stats when requested', async (t) => { diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index 234f5f80785..ed2bd892171 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -145,6 +145,9 @@ export { export { StatsSearch, } from './search/StatsSearch'; +export { + CollectionSearch, +} from './search/CollectionSearch'; export { AsyncOperationPgModel } from './models/async_operation'; export { BasePgModel } from './models/base'; diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index d616a12d0c2..db1fc579beb 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -412,7 +412,7 @@ class BaseSearch { * @param testKnex - knex for testing * @returns search result */ - async query(testKnex: Knex | undefined) { + async query(testKnex?: Knex) { const knex = testKnex ?? await getKnexClient(); const { countQuery, searchQuery } = this.buildSearch(knex); try { diff --git a/packages/db/src/search/CollectionSearch.ts b/packages/db/src/search/CollectionSearch.ts new file mode 100644 index 00000000000..d8b1b805432 --- /dev/null +++ b/packages/db/src/search/CollectionSearch.ts @@ -0,0 +1,86 @@ +import { Knex } from 'knex'; +import pick from 'lodash/pick'; + +import Logger from '@cumulus/logger'; +import { CollectionRecord } from '@cumulus/types/api/collections'; +import { BaseSearch } from './BaseSearch'; +import { DbQueryParameters, QueryEvent } from '../types/search'; +import { translatePostgresCollectionToApiCollection } from '../translate/collections'; +import { PostgresCollectionRecord } from '../types/collection'; + +const log = new Logger({ sender: '@cumulus/db/CollectionSearch' }); + +/** + * There is no need to declare an ApiCollectionRecord type since + * CollectionRecord contains all the same fields from the api + */ + +/** + * Class to build and execute db search query for collection + */ +export class CollectionSearch extends BaseSearch { + constructor(event: QueryEvent) { + super(event, 'collection'); + } + + /** + * Build basic query + * + * @param knex - DB client + * @returns queries for getting count and search result + */ + protected buildBasicQuery(knex: Knex) + : { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + } { + const countQuery = knex(this.tableName) + .count(`${this.tableName}.cumulus_id`); + + const searchQuery = knex(this.tableName) + .select(`${this.tableName}.*`); + return { countQuery, searchQuery }; + } + + /** + * Build queries for infix and prefix + * + * @param params + * @param params.countQuery - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildInfixPrefixQuery(params: { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { countQuery, searchQuery, dbQueryParameters } = params; + const { infix, prefix } = dbQueryParameters ?? this.dbQueryParameters; + if (infix) { + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `%${infix}%`)); + } + if (prefix) { + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `%${prefix}%`)); + } + } + + /** + * Translate postgres records to api records + * + * @param pgRecords - postgres records returned from query + * @returns translated api records + */ + protected translatePostgresRecordsToApiRecords(pgRecords: PostgresCollectionRecord[]) + : Partial[] { + log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); + const apiRecords = pgRecords.map((item) => { + const apiRecord = translatePostgresCollectionToApiCollection(item); + + return this.dbQueryParameters.fields + ? pick(apiRecord, this.dbQueryParameters.fields) + : apiRecord; + }); + return apiRecords; + } +} diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts index 75cc91a00b7..9a196243d11 100644 --- a/packages/db/src/search/field-mapping.ts +++ b/packages/db/src/search/field-mapping.ts @@ -123,6 +123,18 @@ const collectionMapping : { [key: string]: Function } = { updatedAt: (value?: string) => ({ updated_at: value && new Date(Number(value)), }), + reportToEms: (value?: string) => ({ + report_to_ems: (value === 'true'), + }), + process: (value?: string) => ({ + process: value, + }), + sampleFileName: (value?: string) => ({ + sample_file_name: value, + }), + urlPath: (value?: string) => ({ + url_path: value, + }), }; const executionMapping : { [key: string]: Function } = { diff --git a/packages/db/tests/search/test-CollectionSearch.js b/packages/db/tests/search/test-CollectionSearch.js new file mode 100644 index 00000000000..3598cda5edd --- /dev/null +++ b/packages/db/tests/search/test-CollectionSearch.js @@ -0,0 +1,308 @@ +'use strict'; + +const test = require('ava'); +const cryptoRandomString = require('crypto-random-string'); +const range = require('lodash/range'); +const { CollectionSearch } = require('../../dist/search/CollectionSearch'); + +const { + destroyLocalTestDb, + generateLocalTestDb, + CollectionPgModel, + fakeCollectionRecordFactory, + migrationDir, +} = require('../../dist'); + +const testDbName = `collection_${cryptoRandomString({ length: 10 })}`; + +test.before(async (t) => { + const { knexAdmin, knex } = await generateLocalTestDb( + testDbName, + migrationDir + ); + + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + t.context.collectionPgModel = new CollectionPgModel(); + const collections = []; + range(100).map((num) => ( + collections.push(fakeCollectionRecordFactory({ + name: num % 2 === 0 ? `testCollection___00${num}` : `fakeCollection___00${num}`, + version: `${num}`, + cumulus_id: num, + updated_at: new Date(1579352700000 + (num % 2) * 1000), + process: num % 2 === 0 ? 'ingest' : 'publish', + report_to_ems: num % 2 === 0, + url_path: num % 2 === 0 ? 'https://fakepath.com' : undefined, + })) + )); + + await t.context.collectionPgModel.insert( + t.context.knex, + collections + ); +}); + +test.after.always(async (t) => { + await destroyLocalTestDb({ + ...t.context, + testDbName, + }); +}); + +test('CollectionSearch returns 10 collections by default', async (t) => { + const { knex } = t.context; + const AggregateSearch = new CollectionSearch(); + const results = await AggregateSearch.query(knex); + t.is(results.meta.count, 100); + t.is(results.results.length, 10); +}); + +test('CollectionSearch supports page and limit params', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 20, + page: 2, + }; + let dbSearch = new CollectionSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 20); + + queryStringParameters = { + limit: 11, + page: 10, + }; + dbSearch = new CollectionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 1); + + queryStringParameters = { + limit: 10, + page: 11, + }; + dbSearch = new CollectionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 0); +}); + +test('CollectionSearch supports infix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 20, + infix: 'test', + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 20); +}); + +test('CollectionSearch supports prefix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 20, + prefix: 'fake', + }; + const dbSearch2 = new CollectionSearch({ queryStringParameters }); + const response2 = await dbSearch2.query(knex); + t.is(response2.meta.count, 50); + t.is(response2.results?.length, 20); +}); + +test('CollectionSearch supports term search for boolean field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + reportToEms: false, + }; + const dbSearch4 = new CollectionSearch({ queryStringParameters }); + const response4 = await dbSearch4.query(knex); + t.is(response4.meta.count, 50); + t.is(response4.results?.length, 50); +}); + +test('CollectionSearch supports term search for date field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + updatedAt: 1579352701000, + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('CollectionSearch supports term search for number field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + version: 2, + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('CollectionSearch supports term search for string field', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + name: 'fakeCollection___0071', + }; + const dbSearch2 = new CollectionSearch({ queryStringParameters }); + const response2 = await dbSearch2.query(knex); + t.is(response2.meta.count, 1); + t.is(response2.results?.length, 1); + + queryStringParameters = { + limit: 200, + process: 'publish', + }; + const dbSearch3 = new CollectionSearch({ queryStringParameters }); + const response3 = await dbSearch3.query(knex); + t.is(response3.meta.count, 50); + t.is(response3.results?.length, 50); +}); + +// TODO in CUMULUS-3639 +test.todo('CollectionSearch supports range search'); + +test('CollectionSearch supports search for multiple fields', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + name: 'testCollection___000', + updatedAt: 1579352700000, + process: 'ingest', + reportToEms: 'true', + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('CollectionSearch non-existing fields are ignored', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + non_existing_field: `non_exist_${cryptoRandomString({ length: 5 })}`, + non_existing_field__from: `non_exist_${cryptoRandomString({ length: 5 })}`, + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 100); +}); + +test('CollectionSearch returns fields specified', async (t) => { + const { knex } = t.context; + const fields = 'name,version,reportToEms,process'; + const queryStringParameters = { + fields, + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 10); + response.results.forEach((collection) => t.deepEqual(Object.keys(collection), fields.split(','))); +}); + +test('CollectionSearch supports sorting', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + sort_by: 'name', + order: 'asc', + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 100); + t.true(response.results[0].name < response.results[99].name); + t.true(response.results[0].name < response.results[50].name); + + queryStringParameters = { + limit: 200, + sort_key: ['-name'], + }; + const dbSearch2 = new CollectionSearch({ queryStringParameters }); + const response2 = await dbSearch2.query(knex); + t.is(response2.meta.count, 100); + t.is(response2.results?.length, 100); + t.true(response2.results[0].name > response2.results[99].name); + t.true(response2.results[0].name > response2.results[50].name); + + queryStringParameters = { + limit: 200, + sort_by: 'version', + }; + const dbSearch3 = new CollectionSearch({ queryStringParameters }); + const response3 = await dbSearch3.query(knex); + t.is(response3.meta.count, 100); + t.is(response3.results?.length, 100); + t.true(response3.results[0].version < response3.results[99].version); + t.true(response3.results[49].version < response3.results[50].version); +}); + +test('CollectionSearch supports terms search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + process__in: ['ingest', 'archive'].join(','), + }; + let dbSearch = new CollectionSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 200, + process__in: ['ingest', 'archive'].join(','), + name__in: ['testCollection___000', 'fakeCollection___001'].join(','), + }; + dbSearch = new CollectionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('CollectionSearch supports search when collection field does not match the given value', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + process__not: 'publish', + }; + let dbSearch = new CollectionSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 200, + process__not: 'publish', + name__not: 'testCollection___000', + }; + dbSearch = new CollectionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 49); + t.is(response.results?.length, 49); +}); + +test('CollectionSearch supports search which checks existence of collection field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + urlPath__exists: 'true', + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); diff --git a/packages/db/tests/search/test-GranuleSearch.js b/packages/db/tests/search/test-GranuleSearch.js index 370330d2128..50a70c0c06e 100644 --- a/packages/db/tests/search/test-GranuleSearch.js +++ b/packages/db/tests/search/test-GranuleSearch.js @@ -643,7 +643,7 @@ test('GranuleSearch supports error.Error terms search', async (t) => { t.is(response.results?.length, 0); }); -test('GranuleSearch supports search which granule field does not match the given value', async (t) => { +test('GranuleSearch supports search when granule field does not match the given value', async (t) => { const { knex } = t.context; let queryStringParameters = { limit: 200, diff --git a/packages/db/tests/search/test-field-mapping.js b/packages/db/tests/search/test-field-mapping.js index b1d18befd30..4fca79ec82f 100644 --- a/packages/db/tests/search/test-field-mapping.js +++ b/packages/db/tests/search/test-field-mapping.js @@ -105,6 +105,9 @@ test('mapQueryStringFieldToDbField correctly converts all collection api fields const queryStringParameters = { createdAt: '1591312763823', name: 'MOD11A1', + reportToEms: 'true', + urlPath: 'http://fakepath.com', + sampleFileName: 'hello.txt', version: '006', updatedAt: 1591384094512, }; @@ -113,6 +116,9 @@ test('mapQueryStringFieldToDbField correctly converts all collection api fields created_at: new Date(1591312763823), name: 'MOD11A1', version: '006', + report_to_ems: true, + url_path: 'http://fakepath.com', + sample_file_name: 'hello.txt', updated_at: new Date(1591384094512), }; From 8c83b85cb23969db852a13cf15f2407037b768d9 Mon Sep 17 00:00:00 2001 From: Naga Nages <66387215+Nnaga1@users.noreply.github.com> Date: Fri, 21 Jun 2024 10:27:33 -0400 Subject: [PATCH 08/61] CUMULUS-3699 - Update collection List endpoints to query postgres - includeStats (#3688) * first commit * CHANGELOG * fixing small things * changes + fixes * PR feedback * splitting queries separately * PR feedback * PR feedback * PR feedback --- CHANGELOG.md | 3 +- .../CreateReconciliationReportSpec.js | 2 +- packages/api/endpoints/collections.js | 18 +--- .../endpoints/collections/list-collections.js | 52 ++++++----- packages/db/src/search/BaseSearch.ts | 7 +- packages/db/src/search/CollectionSearch.ts | 93 ++++++++++++++++--- packages/db/src/search/field-mapping.ts | 7 ++ .../db/tests/search/test-CollectionSearch.js | 61 +++++++++--- 8 files changed, 178 insertions(+), 65 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc37ad73a28..8c20371c497 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## Unreleased ### Replace ElasticSearch Phase 1 - +- **CUMULUS-3699** + - Updated `collections` api endpoint to be able to support `includeStats` query string parameter - **CUMULUS-3641** - Updated `collections` api endpoint to query postgres instead of elasticsearch except if `includeStats` is in the query parameters - **CUMULUS-3695** diff --git a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js index 5462f04c5f9..1fb7fe65625 100644 --- a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js +++ b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js @@ -273,7 +273,7 @@ const waitForCollectionRecordsInList = async (stackName, collectionIds, addition async () => { // Verify the collection is returned when listing collections const collsResp = await getCollections({ prefix: stackName, - query: { _id__in: collectionIds.join(','), ...additionalQueryParams, includeStats: true, limit: 30 } }); + query: { _id__in: collectionIds.join(','), ...additionalQueryParams, limit: 30 } }); const results = get(JSON.parse(collsResp.body), 'results', []); const ids = results.map((c) => constructCollectionId(c.name, c.version)); return isEqual(ids.sort(), collectionIds.sort()); diff --git a/packages/api/endpoints/collections.js b/packages/api/endpoints/collections.js index 1e2f6b3518a..3dc7be465f1 100644 --- a/packages/api/endpoints/collections.js +++ b/packages/api/endpoints/collections.js @@ -47,20 +47,10 @@ const log = new Logger({ sender: '@cumulus/api/collections' }); */ async function list(req, res) { log.trace(`list query ${JSON.stringify(req.query)}`); - const { getMMT, includeStats, ...queryStringParameters } = req.query; - let dbSearch; - if (includeStats === 'true') { - dbSearch = new Collection( - { queryStringParameters }, - undefined, - process.env.ES_INDEX, - includeStats === 'true' - ); - } else { - dbSearch = new CollectionSearch( - { queryStringParameters } - ); - } + const { getMMT, ...queryStringParameters } = req.query; + const dbSearch = new CollectionSearch( + { queryStringParameters } + ); let result = await dbSearch.query(); if (getMMT === 'true') { result = await insertMMTLinks(result); diff --git a/packages/api/tests/endpoints/collections/list-collections.js b/packages/api/tests/endpoints/collections/list-collections.js index f64b0e85b78..4da2b7e38a3 100644 --- a/packages/api/tests/endpoints/collections/list-collections.js +++ b/packages/api/tests/endpoints/collections/list-collections.js @@ -2,16 +2,12 @@ const test = require('ava'); const request = require('supertest'); -const sinon = require('sinon'); const range = require('lodash/range'); const awsServices = require('@cumulus/aws-client/services'); const { recursivelyDeleteS3Bucket, } = require('@cumulus/aws-client/S3'); const { randomString } = require('@cumulus/common/test-utils'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); -const EsCollection = require('@cumulus/es-client/collections'); -const { getEsClient } = require('@cumulus/es-client/search'); const { randomId } = require('@cumulus/common/test-utils'); const models = require('../../../models'); @@ -28,7 +24,9 @@ const { destroyLocalTestDb, generateLocalTestDb, CollectionPgModel, + GranulePgModel, fakeCollectionRecordFactory, + fakeGranuleRecordFactory, migrationDir, localStackConnectionEnv, } = require('../../../../db/dist'); @@ -45,9 +43,6 @@ process.env.system_bucket = randomString(); // import the express app after setting the env variables const { app } = require('../../../app'); -const esIndex = randomString(); -let esClient; - let jwtAuthToken; let accessTokenModel; @@ -58,13 +53,6 @@ process.env = { }; test.before(async (t) => { - const esAlias = randomString(); - process.env.ES_INDEX = esAlias; - await bootstrapElasticSearch({ - host: 'fakehost', - index: esIndex, - alias: esAlias, - }); await awsServices.s3().createBucket({ Bucket: process.env.system_bucket }); const username = randomString(); @@ -74,7 +62,7 @@ test.before(async (t) => { await accessTokenModel.createTable(); jwtAuthToken = await createFakeJwtAuthToken({ accessTokenModel, username }); - esClient = await getEsClient('fakehost'); + const { knexAdmin, knex } = await generateLocalTestDb( testDbName, migrationDir @@ -86,7 +74,7 @@ test.before(async (t) => { t.context.collectionPgModel = new CollectionPgModel(); const collections = []; - range(40).map((num) => ( + range(10).map((num) => ( collections.push(fakeCollectionRecordFactory({ name: num % 2 === 0 ? `testCollection__${num}` : `fakeCollection__${num}`, version: `${num}`, @@ -95,11 +83,28 @@ test.before(async (t) => { })) )); + t.context.granulePgModel = new GranulePgModel(); + const granules = []; + const statuses = ['queued', 'failed', 'completed', 'running']; + + range(100).map((num) => ( + granules.push(fakeGranuleRecordFactory({ + collection_cumulus_id: collections[num % 9].cumulus_id, + status: statuses[num % 4], + })) + )); + t.context.collections = collections; await t.context.collectionPgModel.insert( t.context.knex, collections ); + + t.context.granules = granules; + await t.context.granulePgModel.insert( + t.context.knex, + granules + ); }); test.beforeEach((t) => { @@ -109,7 +114,6 @@ test.beforeEach((t) => { test.after.always(async (t) => { await accessTokenModel.deleteTable(); await recursivelyDeleteS3Bucket(process.env.system_bucket); - await esClient.client.indices.delete({ index: esIndex }); await destroyLocalTestDb({ ...t.context, testDbName, @@ -150,16 +154,20 @@ test.serial('default returns list of collections from query', async (t) => { }); test.serial('returns list of collections with stats when requested', async (t) => { - const stub = sinon.stub(EsCollection.prototype, 'getStats').returns([t.context.testCollection]); - const response = await request(app) .get('/collections?includeStats=true') .set('Accept', 'application/json') .set('Authorization', `Bearer ${jwtAuthToken}`) .expect(200); + const expectedStats1 = { queued: 3, completed: 3, failed: 3, running: 3, total: 12 }; + const expectedStats2 = { queued: 2, completed: 3, failed: 3, running: 3, total: 11 }; + const expectedStats3 = { queued: 0, completed: 0, failed: 0, running: 0, total: 0 }; + const { results } = response.body; - t.is(results.length, 1); - t.is(results[0].name, t.context.testCollection.name); - stub.restore(); + t.is(results.length, 10); + t.is(results[0].name, t.context.collections[0].name); + t.deepEqual(results[0].stats, expectedStats1); + t.deepEqual(results[1].stats, expectedStats2); + t.deepEqual(results[9].stats, expectedStats3); }); diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index db1fc579beb..5896e9d009e 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -399,10 +399,11 @@ class BaseSearch { * Translate postgres records to api records * * @param pgRecords - postgres records returned from query + * @param [knex] - knex client for additional queries if neccessary * @throws - function is not implemented */ - protected translatePostgresRecordsToApiRecords(pgRecords: BaseRecord[]) { - log.error(`translatePostgresRecordsToApiRecords is not implemented ${pgRecords[0]}`); + protected translatePostgresRecordsToApiRecords(pgRecords: BaseRecord[], knex?: Knex) { + log.error(`translatePostgresRecordsToApiRecords is not implemented ${pgRecords[0]} with client ${knex}`); throw new Error('translatePostgresRecordsToApiRecords is not implemented'); } @@ -423,7 +424,7 @@ class BaseSearch { meta.count = Number(countResult[0]?.count ?? 0); const pgRecords = await searchQuery; - const apiRecords = this.translatePostgresRecordsToApiRecords(pgRecords); + const apiRecords = await this.translatePostgresRecordsToApiRecords(pgRecords, knex); return { meta, diff --git a/packages/db/src/search/CollectionSearch.ts b/packages/db/src/search/CollectionSearch.ts index d8b1b805432..28ab2835f6d 100644 --- a/packages/db/src/search/CollectionSearch.ts +++ b/packages/db/src/search/CollectionSearch.ts @@ -7,20 +7,36 @@ import { BaseSearch } from './BaseSearch'; import { DbQueryParameters, QueryEvent } from '../types/search'; import { translatePostgresCollectionToApiCollection } from '../translate/collections'; import { PostgresCollectionRecord } from '../types/collection'; +import { TableNames } from '../tables'; const log = new Logger({ sender: '@cumulus/db/CollectionSearch' }); -/** - * There is no need to declare an ApiCollectionRecord type since - * CollectionRecord contains all the same fields from the api - */ +type Statuses = { + queued: number, + completed: number, + failed: number, + running: number, + total: number, +}; + +type StatsRecords = { + [key: number]: Statuses, +}; + +interface CollectionRecordApi extends CollectionRecord { + stats?: Statuses, +} /** - * Class to build and execute db search query for collection + * Class to build and execute db search query for collections */ export class CollectionSearch extends BaseSearch { + readonly includeStats: boolean; + constructor(event: QueryEvent) { - super(event, 'collection'); + const { includeStats, ...queryStringParameters } = event.queryStringParameters || {}; + super({ queryStringParameters }, 'collection'); + this.includeStats = (includeStats === 'true'); } /** @@ -39,6 +55,7 @@ export class CollectionSearch extends BaseSearch { const searchQuery = knex(this.tableName) .select(`${this.tableName}.*`); + return { countQuery, searchQuery }; } @@ -65,21 +82,73 @@ export class CollectionSearch extends BaseSearch { } } + /** + * Executes stats query to get granules' status aggregation + * + * @param ids - array of cumulusIds of the collections + * @param knex - knex for the stats query + * @returns the collection's granules status' aggregation + */ + private async retrieveGranuleStats(collectionCumulusIds: number[], knex: Knex) + : Promise { + const granulesTable = TableNames.granules; + const statsQuery = knex(granulesTable) + .select(`${granulesTable}.collection_cumulus_id`, `${granulesTable}.status`) + .count(`${granulesTable}.status`) + .groupBy(`${granulesTable}.collection_cumulus_id`, `${granulesTable}.status`) + .whereIn(`${granulesTable}.collection_cumulus_id`, collectionCumulusIds); + const results = await statsQuery; + const reduced = results.reduce((acc, record) => { + const cumulusId = Number(record.collection_cumulus_id); + if (!acc[cumulusId]) { + acc[cumulusId] = { + queued: 0, + completed: 0, + failed: 0, + running: 0, + total: 0, + }; + } + acc[cumulusId][record.status as keyof Statuses] += Number(record.count); + acc[cumulusId]['total'] += Number(record.count); + return acc; + }, {} as StatsRecords); + return reduced; + } + /** * Translate postgres records to api records * - * @param pgRecords - postgres records returned from query + * @param pgRecords - postgres Collection records returned from query + * @param knex - knex for the stats query if incldueStats is true * @returns translated api records */ - protected translatePostgresRecordsToApiRecords(pgRecords: PostgresCollectionRecord[]) - : Partial[] { + protected async translatePostgresRecordsToApiRecords(pgRecords: PostgresCollectionRecord[], + knex: Knex): Promise[]> { log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); - const apiRecords = pgRecords.map((item) => { - const apiRecord = translatePostgresCollectionToApiCollection(item); + let statsRecords: StatsRecords; + const cumulusIds = pgRecords.map((record) => record.cumulus_id); + if (this.includeStats) { + statsRecords = await this.retrieveGranuleStats(cumulusIds, knex); + } - return this.dbQueryParameters.fields + const apiRecords = pgRecords.map((record) => { + const apiRecord: CollectionRecordApi = translatePostgresCollectionToApiCollection(record); + const apiRecordFinal = this.dbQueryParameters.fields ? pick(apiRecord, this.dbQueryParameters.fields) : apiRecord; + + if (statsRecords) { + apiRecordFinal.stats = statsRecords[record.cumulus_id] ? statsRecords[record.cumulus_id] : + { + queued: 0, + completed: 0, + failed: 0, + running: 0, + total: 0, + }; + } + return apiRecordFinal; }); return apiRecords; } diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts index 9a196243d11..fc6719a635a 100644 --- a/packages/db/src/search/field-mapping.ts +++ b/packages/db/src/search/field-mapping.ts @@ -117,6 +117,13 @@ const collectionMapping : { [key: string]: Function } = { version: (value?: string) => ({ version: value, }), + _id: (value?: string) => { + const { name, version } = (value && deconstructCollectionId(value)) || {}; + return { + collectionName: name, + collectionVersion: version, + }; + }, timestamp: (value?: string) => ({ updated_at: value && new Date(Number(value)), }), diff --git a/packages/db/tests/search/test-CollectionSearch.js b/packages/db/tests/search/test-CollectionSearch.js index 3598cda5edd..4b09cc66142 100644 --- a/packages/db/tests/search/test-CollectionSearch.js +++ b/packages/db/tests/search/test-CollectionSearch.js @@ -9,7 +9,9 @@ const { destroyLocalTestDb, generateLocalTestDb, CollectionPgModel, + GranulePgModel, fakeCollectionRecordFactory, + fakeGranuleRecordFactory, migrationDir, } = require('../../dist'); @@ -28,8 +30,8 @@ test.before(async (t) => { const collections = []; range(100).map((num) => ( collections.push(fakeCollectionRecordFactory({ - name: num % 2 === 0 ? `testCollection___00${num}` : `fakeCollection___00${num}`, - version: `${num}`, + name: num % 2 === 0 ? 'testCollection' : 'fakeCollection', + version: num, cumulus_id: num, updated_at: new Date(1579352700000 + (num % 2) * 1000), process: num % 2 === 0 ? 'ingest' : 'publish', @@ -38,10 +40,27 @@ test.before(async (t) => { })) )); + t.context.granulePgModel = new GranulePgModel(); + const granules = []; + const statuses = ['queued', 'failed', 'completed', 'running']; + + range(1000).map((num) => ( + granules.push(fakeGranuleRecordFactory({ + collection_cumulus_id: num % 99, + cumulus_id: 100 + num, + status: statuses[num % 4], + })) + )); + await t.context.collectionPgModel.insert( t.context.knex, collections ); + + await t.context.granulePgModel.insert( + t.context.knex, + granules + ); }); test.after.always(async (t) => { @@ -53,8 +72,8 @@ test.after.always(async (t) => { test('CollectionSearch returns 10 collections by default', async (t) => { const { knex } = t.context; - const AggregateSearch = new CollectionSearch(); - const results = await AggregateSearch.query(knex); + const dbSearch = new CollectionSearch({}); + const results = await dbSearch.query(knex); t.is(results.meta.count, 100); t.is(results.results.length, 10); }); @@ -117,7 +136,7 @@ test('CollectionSearch supports term search for boolean field', async (t) => { const { knex } = t.context; const queryStringParameters = { limit: 200, - reportToEms: false, + reportToEms: 'false', }; const dbSearch4 = new CollectionSearch({ queryStringParameters }); const response4 = await dbSearch4.query(knex); @@ -129,7 +148,7 @@ test('CollectionSearch supports term search for date field', async (t) => { const { knex } = t.context; const queryStringParameters = { limit: 200, - updatedAt: 1579352701000, + updatedAt: '1579352701000', }; const dbSearch = new CollectionSearch({ queryStringParameters }); const response = await dbSearch.query(knex); @@ -141,7 +160,7 @@ test('CollectionSearch supports term search for number field', async (t) => { const { knex } = t.context; const queryStringParameters = { limit: 200, - version: 2, + version: '2', }; const dbSearch = new CollectionSearch({ queryStringParameters }); const response = await dbSearch.query(knex); @@ -153,7 +172,7 @@ test('CollectionSearch supports term search for string field', async (t) => { const { knex } = t.context; let queryStringParameters = { limit: 200, - name: 'fakeCollection___0071', + _id: 'fakeCollection___71', }; const dbSearch2 = new CollectionSearch({ queryStringParameters }); const response2 = await dbSearch2.query(knex); @@ -177,8 +196,9 @@ test('CollectionSearch supports search for multiple fields', async (t) => { const { knex } = t.context; const queryStringParameters = { limit: 200, - name: 'testCollection___000', - updatedAt: 1579352700000, + name: 'testCollection', + version: '0', + updatedAt: '1579352700000', process: 'ingest', reportToEms: 'true', }; @@ -265,7 +285,7 @@ test('CollectionSearch supports terms search', async (t) => { queryStringParameters = { limit: 200, process__in: ['ingest', 'archive'].join(','), - name__in: ['testCollection___000', 'fakeCollection___001'].join(','), + _id__in: ['testCollection___0', 'fakeCollection___1'].join(','), }; dbSearch = new CollectionSearch({ queryStringParameters }); response = await dbSearch.query(knex); @@ -287,7 +307,7 @@ test('CollectionSearch supports search when collection field does not match the queryStringParameters = { limit: 200, process__not: 'publish', - name__not: 'testCollection___000', + version__not: 18, }; dbSearch = new CollectionSearch({ queryStringParameters }); response = await dbSearch.query(knex); @@ -306,3 +326,20 @@ test('CollectionSearch supports search which checks existence of collection fiel t.is(response.meta.count, 50); t.is(response.results?.length, 50); }); + +test('CollectionSearch supports includeStats', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + includeStats: 'true', + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + + const expectedStats1 = { queued: 3, completed: 3, failed: 2, running: 3, total: 11 }; + const expectedStats2 = { queued: 0, completed: 0, failed: 0, running: 0, total: 0 }; + t.is(response.meta.count, 100); + t.is(response.results?.length, 100); + t.deepEqual(response.results[0].stats, expectedStats1); + t.deepEqual(response.results[99].stats, expectedStats2); +}); From 4e8440690f67da03136c68bc14c85b7fbd51d1f3 Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Tue, 2 Jul 2024 16:33:12 -0400 Subject: [PATCH 09/61] CUMULUS-3639: Add support to db/CollectionSearch to retrieve active collections (#3693) * CUMULUS-3639:Add support to db/CollectionSearch to retrieve active collections * add test active collections * add ts-check * update /collections/active unit test * test snyk * fix field mapping * parallel search and fix urlPath * add cumulus-lp stack * add limit 1 to subquery --- CHANGELOG.md | 3 + example/config.yml | 7 +- example/deployments/cumulus/cumulus-lp.tfvars | 4 + .../data-persistence/cumulus-lp.tfvars | 1 + .../db-migration/cumulus-lp.tfvars | 1 + packages/api/endpoints/collections.js | 16 +- packages/api/endpoints/granules.js | 2 +- packages/api/endpoints/stats.js | 2 + .../collections/active-collections.js | 169 ++++++++++++------ packages/db/src/search/BaseSearch.ts | 7 +- packages/db/src/search/CollectionSearch.ts | 62 ++++++- packages/db/src/search/field-mapping.ts | 16 +- .../db/tests/search/test-CollectionSearch.js | 96 ++++++++-- .../db/tests/search/test-field-mapping.js | 2 +- 14 files changed, 302 insertions(+), 86 deletions(-) create mode 100644 example/deployments/cumulus/cumulus-lp.tfvars create mode 100644 example/deployments/data-persistence/cumulus-lp.tfvars create mode 100644 example/deployments/db-migration/cumulus-lp.tfvars diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c20371c497..ee5fdb3e9c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## Unreleased ### Replace ElasticSearch Phase 1 + +- **CUMULUS-3639** + - Updated `/collections/active` endpoint to query postgres - **CUMULUS-3699** - Updated `collections` api endpoint to be able to support `includeStats` query string parameter - **CUMULUS-3641** diff --git a/example/config.yml b/example/config.yml index 74e5e79812d..118f9e10ea4 100644 --- a/example/config.yml +++ b/example/config.yml @@ -13,6 +13,11 @@ cumulus-es: apiUsername: jasmine pdrNodeNameProviderBucket: cumulus-sit-pdr-node-name-provider +cumulus-lp: + bucket: cumulus-sit-internal + apiUsername: jasmine + pdrNodeNameProviderBucket: cumulus-sit-pdr-node-name-provider + mvd-tf: bucket: mvd-internal @@ -44,4 +49,4 @@ nnaga-tf: bucket: nnaga-internal ecarton: - bucket: ecarton-internal \ No newline at end of file + bucket: ecarton-internal diff --git a/example/deployments/cumulus/cumulus-lp.tfvars b/example/deployments/cumulus/cumulus-lp.tfvars new file mode 100644 index 00000000000..6d72a3849ea --- /dev/null +++ b/example/deployments/cumulus/cumulus-lp.tfvars @@ -0,0 +1,4 @@ +prefix = "cumulus-lp" +archive_api_port = 8000 +key_name = "lp" +cmr_oauth_provider = "launchpad" diff --git a/example/deployments/data-persistence/cumulus-lp.tfvars b/example/deployments/data-persistence/cumulus-lp.tfvars new file mode 100644 index 00000000000..7c26d276f59 --- /dev/null +++ b/example/deployments/data-persistence/cumulus-lp.tfvars @@ -0,0 +1 @@ +prefix = "cumulus-lp" diff --git a/example/deployments/db-migration/cumulus-lp.tfvars b/example/deployments/db-migration/cumulus-lp.tfvars new file mode 100644 index 00000000000..7c26d276f59 --- /dev/null +++ b/example/deployments/db-migration/cumulus-lp.tfvars @@ -0,0 +1 @@ +prefix = "cumulus-lp" diff --git a/packages/api/endpoints/collections.js b/packages/api/endpoints/collections.js index 3dc7be465f1..a6688407c50 100644 --- a/packages/api/endpoints/collections.js +++ b/packages/api/endpoints/collections.js @@ -26,7 +26,6 @@ const { indexCollection, deleteCollection, } = require('@cumulus/es-client/indexer'); -const Collection = require('@cumulus/es-client/collections'); const { publishCollectionCreateSnsMessage, publishCollectionDeleteSnsMessage, @@ -46,7 +45,7 @@ const log = new Logger({ sender: '@cumulus/api/collections' }); * @returns {Promise} the promise of express response object */ async function list(req, res) { - log.trace(`list query ${JSON.stringify(req.query)}`); + log.debug(`list query ${JSON.stringify(req.query)}`); const { getMMT, ...queryStringParameters } = req.query; const dbSearch = new CollectionSearch( { queryStringParameters } @@ -68,15 +67,10 @@ async function list(req, res) { * @returns {Promise} the promise of express response object */ async function activeList(req, res) { - const { getMMT, includeStats, ...queryStringParameters } = req.query; - - const collection = new Collection( - { queryStringParameters }, - undefined, - process.env.ES_INDEX, - includeStats === 'true' - ); - let result = await collection.queryCollectionsWithActiveGranules(); + log.debug(`activeList query ${JSON.stringify(req.query)}`); + const { getMMT, ...queryStringParameters } = req.query; + const dbSearch = new CollectionSearch({ queryStringParameters: { active: 'true', ...queryStringParameters } }); + let result = await dbSearch.query(); if (getMMT === 'true') { result = await insertMMTLinks(result); } diff --git a/packages/api/endpoints/granules.js b/packages/api/endpoints/granules.js index f25e5bb262c..7aff430d540 100644 --- a/packages/api/endpoints/granules.js +++ b/packages/api/endpoints/granules.js @@ -101,7 +101,7 @@ function _createNewGranuleDateValue() { * @returns {Promise} the promise of express response object */ async function list(req, res) { - log.trace(`list query ${JSON.stringify(req.query)}`); + log.debug(`list query ${JSON.stringify(req.query)}`); const { getRecoveryStatus, ...queryStringParameters } = req.query; const dbSearch = new GranuleSearch({ queryStringParameters }); diff --git a/packages/api/endpoints/stats.js b/packages/api/endpoints/stats.js index 1caf461416e..a335a20c72e 100644 --- a/packages/api/endpoints/stats.js +++ b/packages/api/endpoints/stats.js @@ -1,3 +1,5 @@ +//@ts-check + 'use strict'; const router = require('express-promise-router')(); diff --git a/packages/api/tests/endpoints/collections/active-collections.js b/packages/api/tests/endpoints/collections/active-collections.js index 8042e93780b..4fd0b6757bc 100644 --- a/packages/api/tests/endpoints/collections/active-collections.js +++ b/packages/api/tests/endpoints/collections/active-collections.js @@ -2,22 +2,17 @@ const test = require('ava'); const request = require('supertest'); -const sinon = require('sinon'); -const rewire = require('rewire'); +const range = require('lodash/range'); const awsServices = require('@cumulus/aws-client/services'); const { recursivelyDeleteS3Bucket, } = require('@cumulus/aws-client/S3'); const { randomId } = require('@cumulus/common/test-utils'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); -const indexer = rewire('@cumulus/es-client/indexer'); -const { getEsClient } = require('@cumulus/es-client/search'); +const { randomString } = require('@cumulus/common/test-utils'); const models = require('../../../models'); const { createFakeJwtAuthToken, - fakeCollectionFactory, - fakeGranuleFactoryV2, setAuthorizedOAuthUsers, } = require('../../../lib/testUtils'); const assertions = require('../../../lib/assertions'); @@ -27,23 +22,41 @@ process.env.stackName = randomId('stackName'); process.env.system_bucket = randomId('bucket'); process.env.TOKEN_SECRET = randomId('tokenSecret'); +const testDbName = randomId('collection'); + +const { + destroyLocalTestDb, + generateLocalTestDb, + CollectionPgModel, + GranulePgModel, + fakeCollectionRecordFactory, + fakeGranuleRecordFactory, + migrationDir, + localStackConnectionEnv, +} = require('../../../../db/dist'); + +process.env.PG_HOST = randomId('hostname'); +process.env.PG_USER = randomId('user'); +process.env.PG_PASSWORD = randomId('password'); + +process.env.AccessTokensTable = randomString(); +process.env.stackName = randomString(); +process.env.system_bucket = randomString(); +process.env.TOKEN_SECRET = randomString(); + // import the express app after setting the env variables const { app } = require('../../../app'); -const esIndex = randomId('esindex'); -let esClient; - let jwtAuthToken; let accessTokenModel; -test.before(async () => { - const esAlias = randomId('esAlias'); - process.env.ES_INDEX = esAlias; - await bootstrapElasticSearch({ - host: 'fakehost', - index: esIndex, - alias: esAlias, - }); +process.env = { + ...process.env, + ...localStackConnectionEnv, + PG_DATABASE: testDbName, +}; + +test.before(async (t) => { await awsServices.s3().createBucket({ Bucket: process.env.system_bucket }); const username = randomId('username'); @@ -53,45 +66,63 @@ test.before(async () => { await accessTokenModel.createTable(); jwtAuthToken = await createFakeJwtAuthToken({ accessTokenModel, username }); - esClient = await getEsClient('fakehost'); - - await Promise.all([ - indexer.indexCollection(esClient, fakeCollectionFactory({ - name: 'coll1', - version: '1', - }), esAlias), - indexer.indexCollection(esClient, fakeCollectionFactory({ - name: 'coll2', - version: '1', - }), esAlias), - indexer.indexGranule(esClient, fakeGranuleFactoryV2({ collectionId: 'coll1___1' }), esAlias), - indexer.indexGranule(esClient, fakeGranuleFactoryV2({ collectionId: 'coll1___1' }), esAlias), - ]); - - // Indexing using Date.now() to generate the timestamp - const stub = sinon.stub(Date, 'now').returns((new Date(2020, 0, 29)).getTime()); - - try { - await Promise.all([ - indexer.indexCollection(esClient, fakeCollectionFactory({ - name: 'coll3', - version: '1', - updatedAt: new Date(2020, 0, 29), - }), esAlias), - indexer.indexGranule(esClient, fakeGranuleFactoryV2({ - updatedAt: new Date(2020, 1, 29), - collectionId: 'coll3___1', - }), esAlias), - ]); - } finally { - stub.restore(); - } + + const { knexAdmin, knex } = await generateLocalTestDb( + testDbName, + migrationDir + ); + + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + t.context.collectionPgModel = new CollectionPgModel(); + const collections = []; + + range(3).map((num) => ( + collections.push(fakeCollectionRecordFactory({ + name: `coll${num + 1}`, + version: 1, + cumulus_id: num, + updated_at: num === 2 ? new Date(2020, 0, 29) : new Date(), + })) + )); + + t.context.granulePgModel = new GranulePgModel(); + const granules = []; + + range(2).map(() => ( + granules.push(fakeGranuleRecordFactory({ + collection_cumulus_id: 0, + })) + )); + + range(2).map((num) => ( + granules.push(fakeGranuleRecordFactory({ + collection_cumulus_id: 2, + updated_at: new Date(2020, num, 29), + })) + )); + + t.context.collections = collections; + await t.context.collectionPgModel.insert( + t.context.knex, + collections + ); + + t.context.granules = granules; + await t.context.granulePgModel.insert( + t.context.knex, + granules + ); }); -test.after.always(async () => { +test.after.always(async (t) => { await accessTokenModel.deleteTable(); await recursivelyDeleteS3Bucket(process.env.system_bucket); - await esClient.client.indices.delete({ index: esIndex }); + await destroyLocalTestDb({ + ...t.context, + testDbName, + }); }); test('GET without pathParameters and without an Authorization header returns an Authorization Missing response', async (t) => { @@ -153,3 +184,35 @@ test.serial('timestamps filters collections by granule date', async (t) => { t.is(results.length, 1); t.is(results[0].name, 'coll3'); }); + +test.serial('timestamps filters collections and stats by granule date', async (t) => { + const fromDate = new Date(2020, 0, 1); + const toDate = new Date(2020, 1, 1); + const toDate2 = new Date(2020, 2, 1); + + let response = await request(app) + .get(`/collections/active?timestamp__from=${fromDate.getTime()}×tamp__to=${toDate.getTime()}&includeStats=true`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .expect(200); + + let results = response.body.results; + t.is(results.length, 1); + let { name, stats } = results[0]; + t.is(name, 'coll3'); + t.truthy(stats); + t.is(stats.total, 1); + + response = await request(app) + .get(`/collections/active?timestamp__from=${fromDate.getTime()}×tamp__to=${toDate2.getTime()}&includeStats=true`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .expect(200); + + results = response.body.results; + t.is(results.length, 1); + ({ name, stats } = results[0]); + t.is(name, 'coll3'); + t.truthy(stats); + t.is(stats.total, 2); +}); diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index 5896e9d009e..8cfa200b93f 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -98,7 +98,7 @@ class BaseSearch { this.buildTermQuery({ countQuery, searchQuery }); this.buildTermsQuery({ countQuery, searchQuery }); this.buildNotMatchQuery({ countQuery, searchQuery }); - this.buildRangeQuery({ countQuery, searchQuery }); + this.buildRangeQuery({ knex, countQuery, searchQuery }); this.buildExistsQuery({ countQuery, searchQuery }); this.buildInfixPrefixQuery({ countQuery, searchQuery }); this.buildSortQuery({ searchQuery }); @@ -200,11 +200,13 @@ class BaseSearch { * Build queries for range fields * * @param params + * @param params.knex - db client * @param [params.countQuery] - query builder for getting count * @param params.searchQuery - query builder for search * @param [params.dbQueryParameters] - db query parameters */ protected buildRangeQuery(params: { + knex?: Knex, countQuery?: Knex.QueryBuilder, searchQuery: Knex.QueryBuilder, dbQueryParameters?: DbQueryParameters, @@ -417,13 +419,12 @@ class BaseSearch { const knex = testKnex ?? await getKnexClient(); const { countQuery, searchQuery } = this.buildSearch(knex); try { - const countResult = await countQuery; + const [countResult, pgRecords] = await Promise.all([countQuery, searchQuery]); const meta = this._metaTemplate(); meta.limit = this.dbQueryParameters.limit; meta.page = this.dbQueryParameters.page; meta.count = Number(countResult[0]?.count ?? 0); - const pgRecords = await searchQuery; const apiRecords = await this.translatePostgresRecordsToApiRecords(pgRecords, knex); return { diff --git a/packages/db/src/search/CollectionSearch.ts b/packages/db/src/search/CollectionSearch.ts index 28ab2835f6d..2b436b5dd66 100644 --- a/packages/db/src/search/CollectionSearch.ts +++ b/packages/db/src/search/CollectionSearch.ts @@ -31,11 +31,13 @@ interface CollectionRecordApi extends CollectionRecord { * Class to build and execute db search query for collections */ export class CollectionSearch extends BaseSearch { + readonly active: boolean; readonly includeStats: boolean; constructor(event: QueryEvent) { - const { includeStats, ...queryStringParameters } = event.queryStringParameters || {}; + const { active, includeStats, ...queryStringParameters } = event.queryStringParameters || {}; super({ queryStringParameters }, 'collection'); + this.active = (active === 'true'); this.includeStats = (includeStats === 'true'); } @@ -82,10 +84,50 @@ export class CollectionSearch extends BaseSearch { } } + /** + * Build queries for range fields + * + * @param params + * @param params.knex - db client + * @param [params.countQuery] - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildRangeQuery(params: { + knex: Knex, + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + if (!this.active) { + super.buildRangeQuery(params); + return; + } + + const granulesTable = TableNames.granules; + const { knex, countQuery, searchQuery, dbQueryParameters } = params; + const { range = {} } = dbQueryParameters ?? this.dbQueryParameters; + + const subQuery = knex.select(1).from(granulesTable) + .where(`${granulesTable}.collection_cumulus_id`, knex.raw(`${this.tableName}.cumulus_id`)); + + Object.entries(range).forEach(([name, rangeValues]) => { + if (rangeValues.gte) { + subQuery.where(`${granulesTable}.${name}`, '>=', rangeValues.gte); + } + if (rangeValues.lte) { + subQuery.where(`${granulesTable}.${name}`, '<=', rangeValues.lte); + } + }); + subQuery.limit(1); + + [countQuery, searchQuery].forEach((query) => query.whereExists(subQuery)); + } + /** * Executes stats query to get granules' status aggregation * - * @param ids - array of cumulusIds of the collections + * @param collectionCumulusIds - array of cumulusIds of the collections * @param knex - knex for the stats query * @returns the collection's granules status' aggregation */ @@ -97,6 +139,18 @@ export class CollectionSearch extends BaseSearch { .count(`${granulesTable}.status`) .groupBy(`${granulesTable}.collection_cumulus_id`, `${granulesTable}.status`) .whereIn(`${granulesTable}.collection_cumulus_id`, collectionCumulusIds); + + if (this.active) { + Object.entries(this.dbQueryParameters?.range ?? {}).forEach(([name, rangeValues]) => { + if (rangeValues.gte) { + statsQuery.where(`${granulesTable}.${name}`, '>=', rangeValues.gte); + } + if (rangeValues.lte) { + statsQuery.where(`${granulesTable}.${name}`, '<=', rangeValues.lte); + } + }); + } + log.debug(`retrieveGranuleStats statsQuery: ${statsQuery?.toSQL().sql}`); const results = await statsQuery; const reduced = results.reduce((acc, record) => { const cumulusId = Number(record.collection_cumulus_id); @@ -139,8 +193,8 @@ export class CollectionSearch extends BaseSearch { : apiRecord; if (statsRecords) { - apiRecordFinal.stats = statsRecords[record.cumulus_id] ? statsRecords[record.cumulus_id] : - { + apiRecordFinal.stats = statsRecords[record.cumulus_id] ? statsRecords[record.cumulus_id] + : { queued: 0, completed: 0, failed: 0, diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts index fc6719a635a..1357b4cfbbe 100644 --- a/packages/db/src/search/field-mapping.ts +++ b/packages/db/src/search/field-mapping.ts @@ -23,6 +23,9 @@ const granuleMapping: { [key: string]: Function } = { granuleId: (value?: string) => ({ granule_id: value, }), + _id: (value?: string) => ({ + granule_id: value, + }), lastUpdateDateTime: (value?: string) => ({ last_update_date_time: value, }), @@ -82,7 +85,6 @@ const granuleMapping: { [key: string]: Function } = { }), }; -// TODO add and verify all queryable fields for the following record types const asyncOperationMapping : { [key: string]: Function } = { createdAt: (value?: string) => ({ created_at: value && new Date(Number(value)), @@ -124,6 +126,15 @@ const collectionMapping : { [key: string]: Function } = { collectionVersion: version, }; }, + duplicateHandling: (value?: string) => ({ + duplicate_handling: value, + }), + granuleId: (value?: string) => ({ + granule_id_validation_regex: value, + }), + granuleIdExtraction: (value?: string) => ({ + granule_id_extraction_regex: value, + }), timestamp: (value?: string) => ({ updated_at: value && new Date(Number(value)), }), @@ -139,11 +150,12 @@ const collectionMapping : { [key: string]: Function } = { sampleFileName: (value?: string) => ({ sample_file_name: value, }), - urlPath: (value?: string) => ({ + url_path: (value?: string) => ({ url_path: value, }), }; +// TODO add and verify all queryable fields for the following record types const executionMapping : { [key: string]: Function } = { arn: (value?: string) => ({ arn: value, diff --git a/packages/db/tests/search/test-CollectionSearch.js b/packages/db/tests/search/test-CollectionSearch.js index 4b09cc66142..595ebb81cb0 100644 --- a/packages/db/tests/search/test-CollectionSearch.js +++ b/packages/db/tests/search/test-CollectionSearch.js @@ -28,12 +28,14 @@ test.before(async (t) => { t.context.collectionPgModel = new CollectionPgModel(); const collections = []; + t.context.collectionSearchTmestamp = 1579352700000; + range(100).map((num) => ( collections.push(fakeCollectionRecordFactory({ name: num % 2 === 0 ? 'testCollection' : 'fakeCollection', version: num, cumulus_id: num, - updated_at: new Date(1579352700000 + (num % 2) * 1000), + updated_at: new Date(t.context.collectionSearchTmestamp + (num % 2)), process: num % 2 === 0 ? 'ingest' : 'publish', report_to_ems: num % 2 === 0, url_path: num % 2 === 0 ? 'https://fakepath.com' : undefined, @@ -43,12 +45,20 @@ test.before(async (t) => { t.context.granulePgModel = new GranulePgModel(); const granules = []; const statuses = ['queued', 'failed', 'completed', 'running']; + t.context.granuleSearchTmestamp = 1688888800000; range(1000).map((num) => ( granules.push(fakeGranuleRecordFactory({ + // collection with cumulus_id 0-9 each has 11 granules, + // collection 10-98 has 10 granules, and collection 99 has 0 granule collection_cumulus_id: num % 99, cumulus_id: 100 + num, status: statuses[num % 4], + // granule with collection_cumulus_id n has timestamp granuleSearchTmestamp + n, + // except granule 98 (with collection 98 ) which has timestamp granuleSearchTmestamp - 1 + updated_at: num === 98 + ? new Date(t.context.granuleSearchTmestamp - 1) + : new Date(t.context.granuleSearchTmestamp + (num % 99)), })) )); @@ -148,7 +158,7 @@ test('CollectionSearch supports term search for date field', async (t) => { const { knex } = t.context; const queryStringParameters = { limit: 200, - updatedAt: '1579352701000', + updatedAt: `${t.context.collectionSearchTmestamp + 1}`, }; const dbSearch = new CollectionSearch({ queryStringParameters }); const response = await dbSearch.query(knex); @@ -189,8 +199,27 @@ test('CollectionSearch supports term search for string field', async (t) => { t.is(response3.results?.length, 50); }); -// TODO in CUMULUS-3639 -test.todo('CollectionSearch supports range search'); +test('CollectionSearch supports range search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + timestamp__from: `${t.context.collectionSearchTmestamp + 1}`, + timestamp__to: `${t.context.collectionSearchTmestamp + 2}`, + }; + let dbSearch = new CollectionSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + ...queryStringParameters, + active: 'true', + }; + dbSearch = new CollectionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); +}); test('CollectionSearch supports search for multiple fields', async (t) => { const { knex } = t.context; @@ -198,7 +227,7 @@ test('CollectionSearch supports search for multiple fields', async (t) => { limit: 200, name: 'testCollection', version: '0', - updatedAt: '1579352700000', + updatedAt: `${t.context.collectionSearchTmestamp}`, process: 'ingest', reportToEms: 'true', }; @@ -319,7 +348,7 @@ test('CollectionSearch supports search which checks existence of collection fiel const { knex } = t.context; const queryStringParameters = { limit: 200, - urlPath__exists: 'true', + url_path__exists: 'true', }; const dbSearch = new CollectionSearch({ queryStringParameters }); const response = await dbSearch.query(knex); @@ -336,10 +365,57 @@ test('CollectionSearch supports includeStats', async (t) => { const dbSearch = new CollectionSearch({ queryStringParameters }); const response = await dbSearch.query(knex); - const expectedStats1 = { queued: 3, completed: 3, failed: 2, running: 3, total: 11 }; - const expectedStats2 = { queued: 0, completed: 0, failed: 0, running: 0, total: 0 }; + const expectedStats0 = { queued: 3, completed: 3, failed: 2, running: 3, total: 11 }; + const expectedStats98 = { queued: 2, completed: 3, failed: 3, running: 2, total: 10 }; + const expectedStats99 = { queued: 0, completed: 0, failed: 0, running: 0, total: 0 }; + t.is(response.meta.count, 100); t.is(response.results?.length, 100); - t.deepEqual(response.results[0].stats, expectedStats1); - t.deepEqual(response.results[99].stats, expectedStats2); + t.deepEqual(response.results[0].stats, expectedStats0); + t.deepEqual(response.results[98].stats, expectedStats98); + t.deepEqual(response.results[99].stats, expectedStats99); +}); + +test('CollectionSearch supports search for active collections', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: '200', + active: 'true', + includeStats: 'true', + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + + const expectedStats0 = { queued: 3, completed: 3, failed: 2, running: 3, total: 11 }; + const expectedStats10 = { queued: 2, completed: 3, failed: 3, running: 2, total: 10 }; + const expectedStats98 = { queued: 2, completed: 3, failed: 3, running: 2, total: 10 }; + t.is(response.meta.count, 99); + t.is(response.results?.length, 99); + t.deepEqual(response.results[0].stats, expectedStats0); + t.deepEqual(response.results[10].stats, expectedStats10); + t.deepEqual(response.results[98].stats, expectedStats98); +}); + +test('CollectionSearch support search for active collections and stats with granules updated in the given time frame', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: '200', + active: 'true', + includeStats: 'true', + timestamp__from: `${t.context.granuleSearchTmestamp + 10}`, + timestamp__to: `${t.context.granuleSearchTmestamp + 98}`, + sort_by: 'version', + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + + const expectedStats10 = { queued: 2, completed: 3, failed: 3, running: 2, total: 10 }; + // collection with cumulus_id 98 has 9 granules in the time frame + const expectedStats98 = { queued: 2, completed: 2, failed: 3, running: 2, total: 9 }; + + // collections with cumulus_id 0-9 are filtered out + t.is(response.meta.count, 89); + t.is(response.results?.length, 89); + t.deepEqual(response.results[0].stats, expectedStats10); + t.deepEqual(response.results[88].stats, expectedStats98); }); diff --git a/packages/db/tests/search/test-field-mapping.js b/packages/db/tests/search/test-field-mapping.js index 4fca79ec82f..cccfccfde28 100644 --- a/packages/db/tests/search/test-field-mapping.js +++ b/packages/db/tests/search/test-field-mapping.js @@ -106,7 +106,7 @@ test('mapQueryStringFieldToDbField correctly converts all collection api fields createdAt: '1591312763823', name: 'MOD11A1', reportToEms: 'true', - urlPath: 'http://fakepath.com', + url_path: 'http://fakepath.com', sampleFileName: 'hello.txt', version: '006', updatedAt: 1591384094512, From a94df430937ce2d8a6688be1d1d63ddd5d3b94e5 Mon Sep 17 00:00:00 2001 From: Naga Nages <66387215+Nnaga1@users.noreply.github.com> Date: Fri, 5 Jul 2024 12:16:56 -0400 Subject: [PATCH 10/61] CUMULUS-3239 - Update Executions LIST endpoint to query Postgres basic (#3684) * first commit * adding changes * storing changes * updating progress * linting + small fixes * small fix * changing timestamp to string in tests * fixing timestamp * commenting out tests failing in CI but not locally * saving changes * collection support * adding async_ops support * changing endpoint + tests * fixing test * uncommenting tests + adding var * commenting out tests failing in CI but not locally * adding parentArn support + changing tests * added parentArn support + fixing tests * small endpoint test fix * Pr feedback + code improvements * small CHANGELOG fix * PR feedback * PR feedback + linting * PR feedback * PR feedback * fixing test * fixing execution tests after removing asyncCumulusOPId from mapping * PR feedback * removed includeFullRecord from search classes * PR feedback * PR feedback * reverting change --- CHANGELOG.md | 3 +- packages/api/endpoints/executions.js | 8 +- packages/api/tests/endpoints/stats.js | 4 +- .../api/tests/endpoints/test-executions.js | 19 +- packages/db/src/index.ts | 3 + packages/db/src/search/BaseSearch.ts | 30 + packages/db/src/search/ExecutionSearch.ts | 155 +++++ packages/db/src/search/field-mapping.ts | 19 + packages/db/src/search/queries.ts | 3 +- packages/db/src/translate/executions.ts | 67 ++- packages/db/src/types/search.ts | 2 + .../db/tests/search/test-ExecutionSearch.js | 557 ++++++++++++++++++ .../db/tests/search/test-GranuleSearch.js | 6 +- packages/db/tests/search/test-StatsSearch.js | 20 +- packages/db/tests/search/test-queries.js | 3 + 15 files changed, 840 insertions(+), 59 deletions(-) create mode 100644 packages/db/src/search/ExecutionSearch.ts create mode 100644 packages/db/tests/search/test-ExecutionSearch.js diff --git a/CHANGELOG.md b/CHANGELOG.md index ee5fdb3e9c0..4c8fdc9bb6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## Unreleased ### Replace ElasticSearch Phase 1 - +- **CUMULUS-3239** + - Updated `execution` list api endpoint and added `ExecutionSearch` class to query postgres - **CUMULUS-3639** - Updated `/collections/active` endpoint to query postgres - **CUMULUS-3699** diff --git a/packages/api/endpoints/executions.js b/packages/api/endpoints/executions.js index 1010a1bfd13..ef09199c20e 100644 --- a/packages/api/endpoints/executions.js +++ b/packages/api/endpoints/executions.js @@ -12,6 +12,7 @@ const { ExecutionPgModel, translatePostgresExecutionToApiExecution, createRejectableTransaction, + ExecutionSearch, } = require('@cumulus/db'); const { deleteExecution } = require('@cumulus/es-client/indexer'); const { getEsClient, Search } = require('@cumulus/es-client/search'); @@ -125,11 +126,8 @@ async function update(req, res) { * @returns {Promise} the promise of express response object */ async function list(req, res) { - const search = new Search( - { queryStringParameters: req.query }, - 'execution', - process.env.ES_INDEX - ); + log.debug(`list query ${JSON.stringify(req.query)}`); + const search = new ExecutionSearch({ queryStringParameters: req.query }); const response = await search.query(); return res.send(response); } diff --git a/packages/api/tests/endpoints/stats.js b/packages/api/tests/endpoints/stats.js index e0612a8392f..d3c3876acf8 100644 --- a/packages/api/tests/endpoints/stats.js +++ b/packages/api/tests/endpoints/stats.js @@ -91,8 +91,8 @@ test.before(async (t) => { granules.push(fakeGranuleRecordFactory({ collection_cumulus_id: num % 20, status: statuses[num % 4], - created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))).toISOString(), - updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))).toISOString(), + created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))), + updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), error: errors[num % 5], duration: num + (num / 10), })) diff --git a/packages/api/tests/endpoints/test-executions.js b/packages/api/tests/endpoints/test-executions.js index c5ac4a8de4a..e61b5c2e4de 100644 --- a/packages/api/tests/endpoints/test-executions.js +++ b/packages/api/tests/endpoints/test-executions.js @@ -149,9 +149,8 @@ test.before(async (t) => { ); t.context.fakePGExecutions = await Promise.all(fakeExecutions.map(async (execution) => { - const omitExecution = omit(execution, ['asyncOperationId', 'parentArn']); const executionPgRecord = await translateApiExecutionToPostgresExecution( - omitExecution, + execution, t.context.knex ); const [pgExecution] = await t.context.executionPgModel.create( @@ -336,11 +335,10 @@ test.serial('GET executions returns list of executions by default', async (t) => .set('Accept', 'application/json') .set('Authorization', `Bearer ${jwtAuthToken}`) .expect(200); - const { meta, results } = response.body; t.is(results.length, 3); t.is(meta.stack, process.env.stackName); - t.is(meta.table, 'execution'); + t.is(meta.table, 'executions'); t.is(meta.count, 3); const arns = fakeExecutions.map((i) => i.arn); results.forEach((r) => { @@ -359,14 +357,15 @@ test.serial('executions can be filtered by workflow', async (t) => { const { meta, results } = response.body; t.is(results.length, 1); t.is(meta.stack, process.env.stackName); - t.is(meta.table, 'execution'); + t.is(meta.table, 'executions'); t.is(meta.count, 1); t.is(fakeExecutions[1].arn, results[0].arn); }); test.serial('GET executions with asyncOperationId filter returns the correct executions', async (t) => { const response = await request(app) - .get(`/executions?asyncOperationId=${t.context.asyncOperationId}`) + .get('/executions') + .query({ asyncOperationId: t.context.asyncOperationId }) .set('Accept', 'application/json') .set('Authorization', `Bearer ${jwtAuthToken}`) .expect(200); @@ -411,12 +410,6 @@ test('GET returns an existing execution', async (t) => { t.context.knex, executionRecord ); - t.teardown(async () => { - await t.context.executionPgModel.delete(t.context.knex, executionRecord); - await t.context.executionPgModel.delete(t.context.knex, parentExecutionRecord); - await collectionPgModel.delete(t.context.knex, collectionRecord); - await asyncOperationsPgModel.delete(t.context.knex, asyncRecord); - }); const response = await request(app) .get(`/executions/${executionRecord.arn}`) @@ -1305,7 +1298,7 @@ test.serial('POST /executions creates an execution that is searchable', async (t const { meta, results } = response.body; t.is(results.length, 1); t.is(meta.stack, process.env.stackName); - t.is(meta.table, 'execution'); + t.is(meta.table, 'executions'); t.is(meta.count, 1); t.is(results[0].arn, newExecution.arn); }); diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index 908c6c9dcec..32929497e5f 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -141,6 +141,9 @@ export { export { BaseSearch, } from './search/BaseSearch'; +export { + ExecutionSearch, +} from './search/ExecutionSearch'; export { GranuleSearch, } from './search/GranuleSearch'; diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index 8cfa200b93f..c039ed222f8 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -185,10 +185,16 @@ class BaseSearch { case 'pdrName': [countQuery, searchQuery].forEach((query) => query?.[queryMethod](`${this.tableName}.pdr_cumulus_id`)); break; + case 'asyncOperationId': + [countQuery, searchQuery].forEach((query) => query?.[queryMethod](`${this.tableName}.async_operation_cumulus_id`)); + break; case 'error': case 'error.Error': [countQuery, searchQuery].forEach((query) => query?.whereRaw(`${this.tableName}.error ->> 'Error' is ${checkNull}`)); break; + case 'parentArn': + [countQuery, searchQuery].forEach((query) => query?.[queryMethod](`${this.tableName}.parent_cumulus_id`)); + break; default: [countQuery, searchQuery].forEach((query) => query?.[queryMethod](`${this.tableName}.${name}`)); break; @@ -243,6 +249,8 @@ class BaseSearch { collections: collectionsTable, providers: providersTable, pdrs: pdrsTable, + asyncOperations: asyncOperationsTable, + executions: executionsTable, } = TableNames; const { countQuery, searchQuery, dbQueryParameters } = params; @@ -266,6 +274,12 @@ class BaseSearch { [countQuery, searchQuery] .forEach((query) => query?.whereRaw(`${this.tableName}.error->>'Error' = '${value}'`)); break; + case 'asyncOperationId': + [countQuery, searchQuery].forEach((query) => query?.where(`${asyncOperationsTable}.id`, value)); + break; + case 'parentArn': + [countQuery, searchQuery].forEach((query) => query?.where(`${executionsTable}_parent.arn`, value)); + break; default: [countQuery, searchQuery].forEach((query) => query?.where(`${this.tableName}.${name}`, value)); break; @@ -290,6 +304,8 @@ class BaseSearch { collections: collectionsTable, providers: providersTable, pdrs: pdrsTable, + asyncOperations: asyncOperationsTable, + executions: executionsTable, } = TableNames; const { countQuery, searchQuery, dbQueryParameters } = params; @@ -321,6 +337,12 @@ class BaseSearch { [countQuery, searchQuery] .forEach((query) => query?.whereRaw(`${this.tableName}.error->>'Error' in ('${value.join('\',\'')}')`)); break; + case 'asyncOperationId': + [countQuery, searchQuery].forEach((query) => query?.whereIn(`${asyncOperationsTable}.id`, value)); + break; + case 'parentArn': + [countQuery, searchQuery].forEach((query) => query?.whereIn(`${executionsTable}_parent.arn`, value)); + break; default: [countQuery, searchQuery].forEach((query) => query?.whereIn(`${this.tableName}.${name}`, value)); break; @@ -345,6 +367,8 @@ class BaseSearch { collections: collectionsTable, providers: providersTable, pdrs: pdrsTable, + asyncOperations: asyncOperationsTable, + executions: executionsTable, } = TableNames; const { countQuery, searchQuery, dbQueryParameters } = params; @@ -365,6 +389,12 @@ class BaseSearch { case 'pdrName': [countQuery, searchQuery].forEach((query) => query?.whereNot(`${pdrsTable}.name`, value)); break; + case 'asyncOperationId': + [countQuery, searchQuery].forEach((query) => query?.whereNot(`${asyncOperationsTable}.id`, value)); + break; + case 'parentArn': + [countQuery, searchQuery].forEach((query) => query?.whereNot(`${executionsTable}_parent.arn`, value)); + break; case 'error.Error': [countQuery, searchQuery].forEach((query) => query?.whereRaw(`${this.tableName}.error->>'Error' != '${value}'`)); break; diff --git a/packages/db/src/search/ExecutionSearch.ts b/packages/db/src/search/ExecutionSearch.ts new file mode 100644 index 00000000000..2d1618b7fc8 --- /dev/null +++ b/packages/db/src/search/ExecutionSearch.ts @@ -0,0 +1,155 @@ +import { Knex } from 'knex'; +import Logger from '@cumulus/logger'; +import pick from 'lodash/pick'; +import { constructCollectionId } from '@cumulus/message/Collections'; +import { ApiExecutionRecord } from '@cumulus/types/api/executions'; +import { BaseSearch } from './BaseSearch'; +import { DbQueryParameters, QueryEvent } from '../types/search'; +import { translatePostgresExecutionToApiExecutionWithoutDbQuery } from '../translate/executions'; +import { PostgresExecutionRecord } from '../types/execution'; +import { TableNames } from '../tables'; +import { BaseRecord } from '../types/base'; + +const log = new Logger({ sender: '@cumulus/db/ExecutionSearch' }); + +interface ExecutionRecord extends BaseRecord, PostgresExecutionRecord { + collectionName?: string, + collectionVersion?: string, + asyncOperationId?: string; + parentArn?: string; +} + +/** + * Class to build and execute db search query for executions + */ +export class ExecutionSearch extends BaseSearch { + constructor(event: QueryEvent) { + super(event, 'execution'); + } + + /** + * check if joined async_ops table search is needed + * + * @returns whether collection search is needed + */ + protected searchAsync(): boolean { + const { not, term, terms } = this.dbQueryParameters; + return (!!(not?.asyncOperationId || + term?.asyncOperationId || terms?.asyncOperationId)); + } + + /** + * check if joined async_ops table search is needed + * + * @returns whether collection search is needed + */ + protected searchParent(): boolean { + const { not, term, terms } = this.dbQueryParameters; + return (!!(not?.parentArn || + term?.parentArn || terms?.parentArn)); + } + + /** + * Build basic query + * + * @param knex - DB client + * @returns queries for getting count and search result + */ + protected buildBasicQuery(knex: Knex) + : { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + } { + const { + collections: collectionsTable, + asyncOperations: asyncOperationsTable, + executions: executionsTable, + } = TableNames; + + const searchQuery = knex(`${this.tableName} as ${this.tableName}`) + .select(`${this.tableName}.*`) + .select({ + collectionName: `${collectionsTable}.name`, + collectionVersion: `${collectionsTable}.version`, + asyncOperationId: `${asyncOperationsTable}.id`, + }); + + if (this.searchParent() || this.dbQueryParameters.includeFullRecord) { + searchQuery.select({ parentArn: `${executionsTable}_parent.arn` }); + } + + const countQuery = knex(this.tableName) + .count(`${this.tableName}.cumulus_id`); + + if (this.searchCollection()) { + countQuery.innerJoin(collectionsTable, `${this.tableName}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); + searchQuery.innerJoin(collectionsTable, `${this.tableName}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); + } else { + searchQuery.leftJoin(collectionsTable, `${this.tableName}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); + } + + if (this.searchAsync()) { + countQuery.innerJoin(asyncOperationsTable, `${this.tableName}.async_operation_cumulus_id`, `${asyncOperationsTable}.cumulus_id`); + searchQuery.innerJoin(asyncOperationsTable, `${this.tableName}.async_operation_cumulus_id`, `${asyncOperationsTable}.cumulus_id`); + } else { + searchQuery.leftJoin(asyncOperationsTable, `${this.tableName}.async_operation_cumulus_id`, `${asyncOperationsTable}.cumulus_id`); + } + + if (this.searchParent()) { + countQuery.innerJoin(`${this.tableName} as ${this.tableName}_parent`, `${this.tableName}.parent_cumulus_id`, `${this.tableName}_parent.cumulus_id`); + searchQuery.innerJoin(`${this.tableName} as ${this.tableName}_parent`, `${this.tableName}.parent_cumulus_id`, `${this.tableName}_parent.cumulus_id`); + } else if (this.dbQueryParameters.includeFullRecord) { + searchQuery.leftJoin(`${this.tableName} as ${this.tableName}_parent`, `${this.tableName}.parent_cumulus_id`, `${this.tableName}_parent.cumulus_id`); + } + return { countQuery, searchQuery }; + } + + /** + * Build queries for infix and prefix + * + * @param params + * @param params.countQuery - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildInfixPrefixQuery(params: { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { countQuery, searchQuery, dbQueryParameters } = params; + const { infix, prefix } = dbQueryParameters ?? this.dbQueryParameters; + if (infix) { + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.arn`, `%${infix}%`)); + } + if (prefix) { + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.arn`, `%${prefix}%`)); + } + } + + /** + * Translate postgres records to api records + * + * @param pgRecords - postgres records returned from query + * @returns translated api records + */ + protected translatePostgresRecordsToApiRecords(pgRecords: ExecutionRecord[]) + : Partial[] { + log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); + const apiRecords = pgRecords.map((executionRecord: ExecutionRecord) => { + const { collectionName, collectionVersion, asyncOperationId, parentArn } = executionRecord; + const collectionId = collectionName && collectionVersion ? + constructCollectionId(collectionName, collectionVersion) : undefined; + const apiRecord = translatePostgresExecutionToApiExecutionWithoutDbQuery({ + executionRecord, + collectionId, + asyncOperationId, + parentArn, + }); + return this.dbQueryParameters.fields + ? pick(apiRecord, this.dbQueryParameters.fields) + : apiRecord; + }); + return apiRecords; + } +} diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts index 1357b4cfbbe..39fd2ef61ec 100644 --- a/packages/db/src/search/field-mapping.ts +++ b/packages/db/src/search/field-mapping.ts @@ -163,9 +163,22 @@ const executionMapping : { [key: string]: Function } = { createdAt: (value?: string) => ({ created_at: value && new Date(Number(value)), }), + duration: (value?: string) => ({ + duration: value && Number(value), + }), + // nested error field + 'error.Error': (value?: string) => ({ + 'error.Error': value, + }), + 'error.Error.keyword': (value?: string) => ({ + 'error.Error': value, + }), execution: (value?: string) => ({ url: value, }), + type: (value?: string) => ({ + workflow_name: value, + }), status: (value?: string) => ({ status: value, }), @@ -176,6 +189,12 @@ const executionMapping : { [key: string]: Function } = { updated_at: value && new Date(Number(value)), }), // The following fields require querying other tables + asyncOperationId: (value?: string) => ({ + asyncOperationId: value, + }), + parentArn: (value?: string) => ({ + parentArn: value, + }), collectionId: (value?: string) => { const { name, version } = (value && deconstructCollectionId(value)) || {}; return { diff --git a/packages/db/src/search/queries.ts b/packages/db/src/search/queries.ts index 192fa01265c..3e1bbe4ddd1 100644 --- a/packages/db/src/search/queries.ts +++ b/packages/db/src/search/queries.ts @@ -229,7 +229,7 @@ export const convertQueryStringToDbQueryParameters = ( type: string, queryStringParameters: QueryStringParameters ): DbQueryParameters => { - const { limit, page, prefix, infix, fields } = queryStringParameters; + const { limit, page, prefix, infix, fields, includeFullRecord } = queryStringParameters; const dbQueryParameters: DbQueryParameters = {}; dbQueryParameters.page = Number.parseInt(page ?? '1', 10); @@ -239,6 +239,7 @@ export const convertQueryStringToDbQueryParameters = ( if (typeof infix === 'string') dbQueryParameters.infix = infix; if (typeof prefix === 'string') dbQueryParameters.prefix = prefix; if (typeof fields === 'string') dbQueryParameters.fields = fields.split(','); + dbQueryParameters.includeFullRecord = (includeFullRecord === 'true'); dbQueryParameters.sort = convertSort(type, queryStringParameters); // remove reserved words (that are not fields) diff --git a/packages/db/src/translate/executions.ts b/packages/db/src/translate/executions.ts index c2fc51c35c8..ce4402120f0 100644 --- a/packages/db/src/translate/executions.ts +++ b/packages/db/src/translate/executions.ts @@ -14,6 +14,44 @@ import { ExecutionPgModel } from '../models/execution'; import { CollectionPgModel } from '../models/collection'; import { AsyncOperationPgModel } from '../models/async_operation'; +export const translatePostgresExecutionToApiExecutionWithoutDbQuery = ({ + executionRecord, + collectionId, + asyncOperationId, + parentArn, +}:{ + executionRecord: PostgresExecutionRecord, + collectionId: string | undefined, + asyncOperationId: string | undefined, + parentArn: string | undefined, +}): ApiExecutionRecord => { + const postfix = executionRecord.arn.split(':').pop(); + if (!postfix) { + throw new Error(`Execution ARN record ${executionRecord.arn} has an invalid postfix and API cannot generate the required 'name' field`); + } + + const translatedRecord = { + name: postfix, + status: executionRecord.status, + arn: executionRecord.arn, + duration: executionRecord.duration, + error: executionRecord.error, + tasks: executionRecord.tasks, + originalPayload: executionRecord.original_payload, + finalPayload: executionRecord.final_payload, + type: executionRecord.workflow_name, + execution: executionRecord.url, + cumulusVersion: executionRecord.cumulus_version, + asyncOperationId, + collectionId, + parentArn, + createdAt: executionRecord.created_at.getTime(), + updatedAt: executionRecord.updated_at.getTime(), + timestamp: executionRecord.timestamp?.getTime(), + }; + return removeNilProperties(translatedRecord); +}; + export const translatePostgresExecutionToApiExecution = async ( executionRecord: PostgresExecutionRecord, knex: Knex, @@ -21,9 +59,9 @@ export const translatePostgresExecutionToApiExecution = async ( asyncOperationPgModel = new AsyncOperationPgModel(), executionPgModel = new ExecutionPgModel() ): Promise => { - let parentArn: string | undefined; let collectionId: string | undefined; let asyncOperationId: string | undefined; + let parentArn: string | undefined; if (executionRecord.collection_cumulus_id) { const collection = await collectionPgModel.get(knex, { @@ -44,31 +82,12 @@ export const translatePostgresExecutionToApiExecution = async ( parentArn = parentExecution.arn; } - const postfix = executionRecord.arn.split(':').pop(); - if (!postfix) { - throw new Error(`Execution ARN record ${executionRecord.arn} has an invalid postfix and API cannot generate the required 'name' field`); - } - - const translatedRecord = { - name: postfix, - status: executionRecord.status, - arn: executionRecord.arn, - duration: executionRecord.duration, - error: executionRecord.error, - tasks: executionRecord.tasks, - originalPayload: executionRecord.original_payload, - finalPayload: executionRecord.final_payload, - type: executionRecord.workflow_name, - execution: executionRecord.url, - cumulusVersion: executionRecord.cumulus_version, - asyncOperationId, + return translatePostgresExecutionToApiExecutionWithoutDbQuery({ + executionRecord, collectionId, + asyncOperationId, parentArn, - createdAt: executionRecord.created_at.getTime(), - updatedAt: executionRecord.updated_at.getTime(), - timestamp: executionRecord.timestamp?.getTime(), - }; - return removeNilProperties(translatedRecord); + }); }; /** diff --git a/packages/db/src/types/search.ts b/packages/db/src/types/search.ts index 68cb7b2d0dd..2dbad8f2287 100644 --- a/packages/db/src/types/search.ts +++ b/packages/db/src/types/search.ts @@ -6,6 +6,7 @@ export type QueryStringParameters = { page?: string, order?: string, prefix?: string, + includeFullRecord?: string, sort_by?: string, sort_key?: string[], [key: string]: string | string[] | undefined, @@ -31,6 +32,7 @@ export type DbQueryParameters = { fields?: string[], infix?: string, limit?: number, + includeFullRecord?: boolean, exists?: { [key: string]: boolean }, not?: { [key: string]: QueriableType | undefined }, offset?: number, diff --git a/packages/db/tests/search/test-ExecutionSearch.js b/packages/db/tests/search/test-ExecutionSearch.js new file mode 100644 index 00000000000..deab4baad84 --- /dev/null +++ b/packages/db/tests/search/test-ExecutionSearch.js @@ -0,0 +1,557 @@ +'use strict'; + +const test = require('ava'); +const cryptoRandomString = require('crypto-random-string'); +const range = require('lodash/range'); +const { constructCollectionId } = require('@cumulus/message/Collections'); +const { ExecutionSearch } = require('../../dist/search/ExecutionSearch'); + +const { + generateLocalTestDb, + destroyLocalTestDb, + CollectionPgModel, + fakeAsyncOperationRecordFactory, + fakeCollectionRecordFactory, + migrationDir, + fakeExecutionRecordFactory, + ExecutionPgModel, + AsyncOperationPgModel, +} = require('../../dist'); + +const testDbName = `collection_${cryptoRandomString({ length: 10 })}`; + +test.before(async (t) => { + const { knexAdmin, knex } = await generateLocalTestDb( + testDbName, + migrationDir + ); + + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + const statuses = ['queued', 'failed', 'completed', 'running']; + const errors = [{ Error: 'UnknownError' }, { Error: 'CumulusMessageAdapterError' }, { Error: 'IngestFailure' }, { Error: 'CmrFailure' }, {}]; + + // Create a PG Collection + t.context.collectionPgModel = new CollectionPgModel(); + t.context.testPgCollection = fakeCollectionRecordFactory( + { cumulus_id: 0, + name: 'testCollection', + version: 8 } + ); + + await t.context.collectionPgModel.insert( + t.context.knex, + t.context.testPgCollection + ); + + t.context.collectionCumulusId = t.context.testPgCollection.cumulus_id; + + t.context.collectionId = constructCollectionId( + t.context.testPgCollection.name, + t.context.testPgCollection.version + ); + + t.context.asyncOperationsPgModel = new AsyncOperationPgModel(); + t.context.testAsyncOperation = fakeAsyncOperationRecordFactory({ cumulus_id: 140 }); + t.context.asyncCumulusId = t.context.testAsyncOperation.cumulus_id; + + await t.context.asyncOperationsPgModel.insert( + t.context.knex, + t.context.testAsyncOperation + ); + + t.context.duration = 100; + + const executions = []; + t.context.executionPgModel = new ExecutionPgModel(); + + range(50).map((num) => ( + executions.push(fakeExecutionRecordFactory({ + collection_cumulus_id: num % 2 === 0 ? t.context.collectionCumulusId : undefined, + status: statuses[(num % 3) + 1], + error: errors[num % 5], + created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))), + updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), + workflow_name: `testWorkflow__${num}`, + arn: num % 2 === 0 ? `testArn__${num}:testExecutionName` : `fakeArn__${num}:fakeExecutionName`, + url: `https://fake-execution${num}.com/`, + original_payload: { + orginal: `payload__${num}`, + }, + final_payload: num % 2 === 0 ? { + final: `payload__${num}`, + } : undefined, + duration: num > 0 ? t.context.duration * ((num % 2) + 1) : undefined, + async_operation_cumulus_id: num % 2 === 0 ? t.context.asyncCumulusId + : undefined, + parent_cumulus_id: num > 25 ? num % 25 : undefined, + cumulus_id: num, + timestamp: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), + })) + )); + await t.context.executionPgModel.insert( + t.context.knex, + executions + ); +}); + +test.after.always(async (t) => { + await destroyLocalTestDb({ + ...t.context, + testDbName, + }); +}); + +test('ExecutionSearch returns correct response for basic query', async (t) => { + const { knex } = t.context; + const dbSearch = new ExecutionSearch({}); + const results = await dbSearch.query(knex); + t.is(results.meta.count, 50); + t.is(results.results.length, 10); + const expectedResponse1 = { + name: 'testExecutionName', + status: 'failed', + arn: 'testArn__0:testExecutionName', + error: { Error: 'UnknownError' }, + originalPayload: { orginal: 'payload__0' }, + finalPayload: { final: 'payload__0' }, + type: 'testWorkflow__0', + execution: 'https://fake-execution0.com/', + asyncOperationId: t.context.testAsyncOperation.id, + collectionId: 'testCollection___8', + createdAt: new Date(2017, 11, 31).getTime(), + updatedAt: new Date(2018, 0, 1).getTime(), + timestamp: new Date(2018, 0, 1).getTime(), + }; + + const expectedResponse10 = { + name: 'fakeExecutionName', + status: 'failed', + arn: 'fakeArn__9:fakeExecutionName', + duration: 200, + error: {}, + originalPayload: { orginal: 'payload__9' }, + type: 'testWorkflow__9', + execution: 'https://fake-execution9.com/', + createdAt: new Date(2021, 9, 9).getTime(), + updatedAt: new Date(2021, 9, 10).getTime(), + timestamp: new Date(2021, 9, 10).getTime(), + }; + t.deepEqual(results.results[0], expectedResponse1); + t.deepEqual(results.results[9], expectedResponse10); +}); + +test('ExecutionSearch supports page and limit params', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 25, + page: 2, + }; + let dbSearch = new ExecutionSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 25); + + queryStringParameters = { + limit: 10, + page: 5, + }; + dbSearch = new ExecutionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 10); + + queryStringParameters = { + limit: 10, + page: 11, + }; + dbSearch = new ExecutionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 0); +}); + +test('ExecutionSearch supports infix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + infix: 'fake', + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('ExecutionSearch supports prefix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + prefix: 'test', + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('ExecutionSearch supports collectionId term search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + collectionId: t.context.collectionId, + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('ExecutionSearch supports asyncOperationId term search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + asyncOperationId: t.context.testAsyncOperation.id, + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('ExecutionSearch supports term search for number field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + duration: 100, + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 24); + t.is(response.results?.length, 24); +}); + +test('ExecutionSearch supports term search for string field', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 50, + status: 'completed', + }; + let dbSearch = new ExecutionSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 17); + t.is(response.results?.length, 17); + + queryStringParameters = { + limit: 50, + type: 'testWorkflow__5', + }; + dbSearch = new ExecutionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('ExecutionSearch supports term search for nested error.Error', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + 'error.Error': 'CumulusMessageAdapterError', + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 10); + t.is(response.results?.length, 10); +}); + +test('ExecutionSearch supports range search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 50, + duration__from: 100, + duration__to: 150, + }; + let dbSearch = new ExecutionSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 24); + t.is(response.results?.length, 24); + + queryStringParameters = { + limit: 200, + duration__from: 150, + }; + dbSearch = new ExecutionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('ExecutionSearch non-existing fields are ignored', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + non_existing_field: `non_exist_${cryptoRandomString({ length: 5 })}`, + non_existing_field__from: `non_exist_${cryptoRandomString({ length: 5 })}`, + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('ExecutionSearch returns fields specified', async (t) => { + const { knex } = t.context; + const fields = 'status,arn,type,error'; + const queryStringParameters = { + fields, + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 10); + response.results.forEach((execution) => t.deepEqual(Object.keys(execution), fields.split(','))); +}); + +test('ExecutionSearch supports search for multiple fields', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + id: 13, + workflow_name: 'testWorkflow__13', + arn: 'fakeArn__13:fakeExecutionName', + url: 'https://fake-execution13.com/', + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('ExecutionSearch supports sorting', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 50, + sort_by: 'timestamp', + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + t.true(response.results[0].updatedAt < response.results[49].updatedAt); + t.true(response.results[1].updatedAt < response.results[25].updatedAt); + + queryStringParameters = { + limit: 50, + sort_by: 'timestamp', + order: 'desc', + }; + const dbSearch2 = new ExecutionSearch({ queryStringParameters }); + const response2 = await dbSearch2.query(knex); + t.is(response2.meta.count, 50); + t.is(response2.results?.length, 50); + t.true(response2.results[0].updatedAt > response2.results[49].updatedAt); + t.true(response2.results[1].updatedAt > response2.results[25].updatedAt); + + queryStringParameters = { + limit: 200, + sort_key: ['-timestamp'], + }; + const dbSearch3 = new ExecutionSearch({ queryStringParameters }); + const response3 = await dbSearch3.query(knex); + t.is(response3.meta.count, 50); + t.is(response3.results?.length, 50); + t.true(response3.results[0].updatedAt > response3.results[49].updatedAt); + t.true(response3.results[1].updatedAt > response3.results[25].updatedAt); +}); + +test('ExecutionSearch supports sorting by Error', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 50, + sort_by: 'error.Error', + }; + const dbSearch7 = new ExecutionSearch({ queryStringParameters }); + const response7 = await dbSearch7.query(knex); + t.is(response7.results[0].error.Error, 'CmrFailure'); + t.is(response7.results[49].error.Error, undefined); + + queryStringParameters = { + limit: 50, + sort_by: 'error.Error.keyword', + order: 'desc', + }; + const dbSearch10 = new ExecutionSearch({ queryStringParameters }); + const response10 = await dbSearch10.query(knex); + t.is(response10.results[0].error.Error, undefined); + t.is(response10.results[49].error.Error, 'CmrFailure'); +}); + +test('ExecutionSearch supports terms search', async (t) => { + const { knex } = t.context; + + let queryStringParameters = { + limit: 50, + type__in: ['testWorkflow__1', 'testWorkflow__2'].join(','), + }; + let dbSearch = new ExecutionSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 2); + t.is(response.results?.length, 2); + + queryStringParameters = { + limit: 50, + type__in: ['testWorkflow__1', 'testWorkflow__2'].join(','), + status__in: 'running', + }; + dbSearch = new ExecutionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('ExecutionSearch supports parentArn term search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + parentArn: 'fakeArn__21:fakeExecutionName', + }; + let dbSearch = new ExecutionSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + const expectedResponse = { + name: 'testExecutionName', + status: 'completed', + arn: 'testArn__46:testExecutionName', + duration: 100, + error: { Error: 'CumulusMessageAdapterError' }, + originalPayload: { orginal: 'payload__46' }, + finalPayload: { final: 'payload__46' }, + type: 'testWorkflow__46', + execution: 'https://fake-execution46.com/', + asyncOperationId: t.context.testAsyncOperation.id, + collectionId: 'testCollection___8', + parentArn: 'fakeArn__21:fakeExecutionName', + createdAt: new Date(2022, 10, 16).getTime(), + updatedAt: new Date(2022, 10, 18).getTime(), + timestamp: new Date(2022, 10, 18).getTime(), + }; + + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); + t.deepEqual(response.results[0], expectedResponse); + queryStringParameters = { + limit: 50, + parentArn__exists: 'true', + }; + dbSearch = new ExecutionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 24); + t.is(response.results?.length, 24); + queryStringParameters = { + limit: 50, + parentArn__in: ['fakeArn__21:fakeExecutionName', 'testArn__22:testExecutionName'].join(','), + }; + dbSearch = new ExecutionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 2); + t.is(response.results?.length, 2); + queryStringParameters = { + limit: 50, + parentArn__not: 'testArn__2:testExecutionName', + }; + dbSearch = new ExecutionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 23); + t.is(response.results?.length, 23); +}); + +test('ExecutionSearch supports error.Error terms search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 50, + 'error.Error__in': ['CumulusMessageAdapterError', 'UnknownError'].join(','), + }; + let dbSearch = new ExecutionSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 20); + t.is(response.results?.length, 20); + + queryStringParameters = { + limit: 50, + 'error.Error__in': 'unknownerror', + }; + dbSearch = new ExecutionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); +}); + +test('ExecutionSearch supports search which checks existence of execution field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + duration__exists: 'true', + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 49); + t.is(response.results?.length, 49); +}); + +test('ExecutionSearch supports search which execution field does not match the given value', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + status__not: 'completed', + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 33); + t.is(response.results?.length, 33); +}); + +test('ExecutionSearch supports term search for timestamp', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 50, + timestamp: `${(new Date(2023, 11, 7)).getTime()}`, + }; + let dbSearch = new ExecutionSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); + queryStringParameters = { + limit: 200, + timestamp__from: `${(new Date(2019, 2, 21)).getTime()}`, + timestamp__to: `${(new Date(2027, 1, 23)).getTime()}`, + }; + dbSearch = new ExecutionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 36); + t.is(response.results?.length, 36); +}); + +test('ExecutionSearch supports term search for date field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + updatedAt: `${new Date(2018, 0, 20).getTime()}`, + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('ExecutionSearch includeFullRecord', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + includeFullRecord: 'true', + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + t.true('parentArn' in response.results[40]); + t.true('collectionId' in response.results[40]); + t.true('asyncOperationId' in response.results[40]); +}); diff --git a/packages/db/tests/search/test-GranuleSearch.js b/packages/db/tests/search/test-GranuleSearch.js index 50a70c0c06e..04911d29bb0 100644 --- a/packages/db/tests/search/test-GranuleSearch.js +++ b/packages/db/tests/search/test-GranuleSearch.js @@ -448,14 +448,14 @@ test('GranuleSearch supports sorting', async (t) => { queryStringParameters = { limit: 200, sort_by: 'timestamp', - order: 'asc', + order: 'desc', }; const dbSearch2 = new GranuleSearch({ queryStringParameters }); const response2 = await dbSearch2.query(knex); t.is(response2.meta.count, 100); t.is(response2.results?.length, 100); - t.true(response2.results[0].updatedAt < response2.results[99].updatedAt); - t.true(response2.results[1].updatedAt < response2.results[50].updatedAt); + t.true(response2.results[0].updatedAt > response2.results[99].updatedAt); + t.true(response2.results[1].updatedAt > response2.results[50].updatedAt); queryStringParameters = { limit: 200, diff --git a/packages/db/tests/search/test-StatsSearch.js b/packages/db/tests/search/test-StatsSearch.js index 6b94d7fa14b..a2a7faba6dc 100644 --- a/packages/db/tests/search/test-StatsSearch.js +++ b/packages/db/tests/search/test-StatsSearch.js @@ -35,8 +35,8 @@ test.before(async (t) => { t.context.collectionPgModel = new CollectionPgModel(); t.context.granulePgModel = new GranulePgModel(); t.context.providerPgModel = new ProviderPgModel(); - t.context.PdrPgModel = new PdrPgModel(); - t.context.ExecutionPgModel = new ExecutionPgModel(); + t.context.pdrPgModel = new PdrPgModel(); + t.context.executionPgModel = new ExecutionPgModel(); const statuses = ['queued', 'failed', 'completed', 'running']; const errors = [{ Error: 'UnknownError' }, { Error: 'CumulusMessageAdapterError' }, { Error: 'IngestFailure' }, { Error: 'CmrFailure' }, {}]; @@ -66,8 +66,8 @@ test.before(async (t) => { collection_cumulus_id: num % 20, granule_id: num % 2 === 0 ? `testGranule${num}` : `query__Granule${num}`, status: statuses[num % 4], - created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))).toISOString(), - updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))).toISOString(), + created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))), + updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), error: errors[num % 5], duration: num + (num / 10), provider_cumulus_id: num % 10, @@ -79,16 +79,16 @@ test.before(async (t) => { collection_cumulus_id: num, status: statuses[(num % 3) + 1], provider_cumulus_id: num % 10, - created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))).toISOString(), - updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))).toISOString(), + created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))), + updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), // eslint-disable-next-line no-sequences })), executions.push(fakeExecutionRecordFactory({ collection_cumulus_id: num, status: statuses[(num % 3) + 1], error: errors[num % 5], - created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))).toISOString(), - updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))).toISOString(), + created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))), + updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), })) )); @@ -107,12 +107,12 @@ test.before(async (t) => { granules ); - await t.context.ExecutionPgModel.insert( + await t.context.executionPgModel.insert( t.context.knex, executions ); - await t.context.PdrPgModel.insert( + await t.context.pdrPgModel.insert( t.context.knex, pdrs ); diff --git a/packages/db/tests/search/test-queries.js b/packages/db/tests/search/test-queries.js index 1fc690aaf0a..70d8a69c404 100644 --- a/packages/db/tests/search/test-queries.js +++ b/packages/db/tests/search/test-queries.js @@ -24,6 +24,7 @@ test('convertQueryStringToDbQueryParameters correctly converts api query string collectionId__in: 'MOD09GQ___006,MODIS___007', granuleId__not: 'notMatchingGranuleId', error__exists: 'true', + includeFullRecord: 'true', }; const expectedDbQueryParameters = { @@ -32,6 +33,7 @@ test('convertQueryStringToDbQueryParameters correctly converts api query string }, fields: ['granuleId', 'collectionId', 'status', 'updatedAt'], infix: 'A1657416', + includeFullRecord: true, limit: 20, not: { granule_id: 'notMatchingGranuleId', @@ -84,6 +86,7 @@ test('convertQueryStringToDbQueryParameters correctly converts sortby error para limit: 10, offset: 0, page: 1, + includeFullRecord: false, sort: [ { column: 'error.Error', From 87dcd7bcb91b5e9312c4b72e61d788a4ab77c692 Mon Sep 17 00:00:00 2001 From: jennyhliu Date: Thu, 18 Jul 2024 11:19:29 -0400 Subject: [PATCH 11/61] fix changelog --- CHANGELOG.md | 59 ++++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1cadc5ccd1..65322be1471 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,36 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [Unreleased] +### Replace ElasticSearch Phase 1 + +- **CUMULUS-3239** + - Updated `execution` list api endpoint and added `ExecutionSearch` class to query postgres +- **CUMULUS-3639** + - Updated `/collections/active` endpoint to query postgres +- **CUMULUS-3641** + - Updated `collections` api endpoint to query postgres instead of elasticsearch except if `includeStats` is in the query parameters +- **CUMULUS-3688** + - Updated `stats` api endpoint to query postgres instead of elasticsearch +- **CUMULUS-3689** + - Updated `stats/aggregate` api endpoint to query postgres instead of elasticsearch + - Created a new StatsSearch class for querying postgres with the stats endpoint +- **CUMULUS-3692** + - Added `@cumulus/db/src/search` `BaseSearch` and `GranuleSearch` classes to + support basic queries for granules + - Updated granules List endpoint to query postgres for basic queries +- **CUMULUS-3693** + - Added functionality to `@cumulus/db/src/search` to support range queries +- **CUMULUS-3694** + - Added functionality to `@cumulus/db/src/search` to support term queries + - Updated `BaseSearch` and `GranuleSearch` classes to support term queries for granules + - Updated granules List endpoint to search postgres +- **CUMULUS-3695** + - Updated `granule` list api endpoint and BaseSearch class to handle sort fields +- **CUMULUS-3696** + - Added functionality to `@cumulus/db/src/search` to support terms, `not` and `exists` queries +- **CUMULUS-3699** + - Updated `collections` api endpoint to be able to support `includeStats` query string parameter + ### Migration Notes #### CUMULUS-3320 Update executions table @@ -78,35 +108,6 @@ degraded execution table operations. ## [v18.3.1] 2024-07-08 -### Replace ElasticSearch Phase 1 -- **CUMULUS-3239** - - Updated `execution` list api endpoint and added `ExecutionSearch` class to query postgres -- **CUMULUS-3639** - - Updated `/collections/active` endpoint to query postgres -- **CUMULUS-3699** - - Updated `collections` api endpoint to be able to support `includeStats` query string parameter -- **CUMULUS-3641** - - Updated `collections` api endpoint to query postgres instead of elasticsearch except if `includeStats` is in the query parameters -- **CUMULUS-3695** - - Updated `granule` list api endpoint and BaseSearch class to handle sort fields -- **CUMULUS-3688** - - Updated `stats` api endpoint to query postgres instead of elasticsearch -- **CUMULUS-3689** - - Updated `stats/aggregate` api endpoint to query postgres instead of elasticsearch - - Created a new StatsSearch class for querying postgres with the stats endpoint -- **CUMULUS-3692** - - Added `@cumulus/db/src/search` `BaseSearch` and `GranuleSearch` classes to - support basic queries for granules - - Updated granules List endpoint to query postgres for basic queries -- **CUMULUS-3693** - - Added functionality to `@cumulus/db/src/search` to support range queries -- **CUMULUS-3694** - - Added functionality to `@cumulus/db/src/search` to support term queries - - Updated `BaseSearch` and `GranuleSearch` classes to support term queries for granules - - Updated granules List endpoint to search postgres -- **CUMULUS-3696** - - Added functionality to `@cumulus/db/src/search` to support terms, `not` and `exists` queries - ### Migration Notes #### CUMULUS-3433 Update to node.js v20 From af78001b0b5a430d17a2c1cf30433d684e4064ee Mon Sep 17 00:00:00 2001 From: Naga Nages <66387215+Nnaga1@users.noreply.github.com> Date: Thu, 25 Jul 2024 10:43:00 -0400 Subject: [PATCH 12/61] Cumulus 3640/3242- Update granule non-LIST endpoints and other granule related es queries to query postgres (#3727) * removing granules * fixing lint * fixing test * small change * adding back in some deleted things * removing more * lint fix * removing tests * skipping execution search-by-granules tests * skipping execution tests * removing tests * more removing * adding in deleted test * removing more * adding back in needed code * removing ES_HOST, query, and index from bulk_ops * fixing bulk_ops tests * adding back in ELK stack refs * changing reconreports test to skip/adding back in getGranulesByPayload * PR feedback * adding back in skipped tests * CHANGELOG * PR feedback * PR feedback' . ; * PR feedback + syntax check * adding back sort to write-granules test --- CHANGELOG.md | 19 +- .../CreateReconciliationReportSpec.js | 12 +- packages/api/endpoints/granules.js | 89 +- packages/api/lib/granules.js | 13 - .../api/lib/writeRecords/write-granules.js | 123 +-- packages/api/src/lib/granule-delete.ts | 53 +- .../endpoints/granules/test-bulk-delete.js | 3 - .../endpoints/granules/test-bulk-granules.js | 3 - .../endpoints/granules/test-bulk-reingest.js | 3 - packages/api/tests/endpoints/test-granules.js | 369 +------ .../api/tests/helpers/create-test-data.js | 14 +- .../tests/lambdas/test-bulk-granule-delete.js | 3 - .../api/tests/lambdas/test-bulk-operation.js | 14 +- packages/api/tests/lib/test-granule-delete.js | 202 +--- .../lib/writeRecords/test-write-granules.js | 985 +++--------------- 15 files changed, 222 insertions(+), 1683 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e34acdb45bf..4a35c5234ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,18 +7,19 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [Unreleased] ### Replace ElasticSearch Phase 1 - - **CUMULUS-3239** - - Updated `execution` list api endpoint and added `ExecutionSearch` class to query postgres + - Updated `execution` list api endpoint and added `ExecutionSearch` class to query postgres - **CUMULUS-3639** - - Updated `/collections/active` endpoint to query postgres + - Updated `/collections/active` endpoint to query postgres +- **CUMULUS-3640** + - Removed elasticsearch dependency from granules endpoint - **CUMULUS-3641** - - Updated `collections` api endpoint to query postgres instead of elasticsearch except if `includeStats` is in the query parameters + - Updated `collections` api endpoint to query postgres instead of elasticsearch except if `includeStats` is in the query parameters - **CUMULUS-3688** - - Updated `stats` api endpoint to query postgres instead of elasticsearch + - Updated `stats` api endpoint to query postgres instead of elasticsearch - **CUMULUS-3689** - - Updated `stats/aggregate` api endpoint to query postgres instead of elasticsearch - - Created a new StatsSearch class for querying postgres with the stats endpoint + - Updated `stats/aggregate` api endpoint to query postgres instead of elasticsearch + - Created a new StatsSearch class for querying postgres with the stats endpoint - **CUMULUS-3692** - Added `@cumulus/db/src/search` `BaseSearch` and `GranuleSearch` classes to support basic queries for granules @@ -30,11 +31,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Updated `BaseSearch` and `GranuleSearch` classes to support term queries for granules - Updated granules List endpoint to search postgres - **CUMULUS-3695** - - Updated `granule` list api endpoint and BaseSearch class to handle sort fields + - Updated `granule` list api endpoint and BaseSearch class to handle sort fields - **CUMULUS-3696** - Added functionality to `@cumulus/db/src/search` to support terms, `not` and `exists` queries - **CUMULUS-3699** - - Updated `collections` api endpoint to be able to support `includeStats` query string parameter + - Updated `collections` api endpoint to be able to support `includeStats` query string parameter ### Migration Notes diff --git a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js index 1fb7fe65625..ea3a35db7c4 100644 --- a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js +++ b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js @@ -345,7 +345,6 @@ describe('When there are granule differences and granule reconciliation is run', await s3().putObject({ Body: 'delete-me', ...extraS3Object }); extraCumulusCollection = await createActiveCollection(config.stackName, config.bucket); - const testId = createTimestampedTestId(config.stackName, 'CreateReconciliationReport'); testSuffix = createTestSuffix(testId); testDataFolder = createTestDataPath(testId); @@ -428,7 +427,8 @@ describe('When there are granule differences and granule reconciliation is run', if (beforeAllFailed) fail(beforeAllFailed); }); - describe('Create an Inventory Reconciliation Report to monitor inventory discrepancies', () => { + // TODO: fix tests in CUMULUS-3806 when CreateReconciliationReport lambda is changed to query postgres + xdescribe('Create an Inventory Reconciliation Report to monitor inventory discrepancies', () => { // report record in db and report in s3 let reportRecord; let report; @@ -541,7 +541,7 @@ describe('When there are granule differences and granule reconciliation is run', expect(report.granulesInCumulusCmr.okCount).toBe(1); }); - it('generates a filtered report showing granules that are in the Cumulus but not in CMR', () => { + it('generates a filtered report showing granules that are in Cumulus but not in CMR', () => { if (beforeAllFailed) fail(beforeAllFailed); // ingested (not published) granule should only in Cumulus const cumulusGranuleIds = report.granulesInCumulusCmr.onlyInCumulus.map((gran) => gran.granuleId); @@ -612,7 +612,8 @@ describe('When there are granule differences and granule reconciliation is run', }); }); - describe('Create an Internal Reconciliation Report to monitor internal discrepancies', () => { + // TODO: the internal report functionality will be removed after collections/granules is changed to no longer use ES + xdescribe('Create an Internal Reconciliation Report to monitor internal discrepancies', () => { // report record in db and report in s3 let reportRecord; let report; @@ -833,7 +834,8 @@ describe('When there are granule differences and granule reconciliation is run', }); }); - describe('Create an ORCA Backup Reconciliation Report to monitor ORCA backup discrepancies', () => { + // TODO: fix tests in CUMULUS-3806 when CreateReconciliationReport lambda is changed to query postgres + xdescribe('Create an ORCA Backup Reconciliation Report to monitor ORCA backup discrepancies', () => { // report record in db and report in s3 let reportRecord; let report; diff --git a/packages/api/endpoints/granules.js b/packages/api/endpoints/granules.js index 97ea83afc4b..55dedb7298f 100644 --- a/packages/api/endpoints/granules.js +++ b/packages/api/endpoints/granules.js @@ -26,12 +26,6 @@ const { translatePostgresGranuleToApiGranule, getGranuleAndCollection, } = require('@cumulus/db'); -const { - Search, - getEsClient, - recordNotFoundString, - multipleRecordFoundString, -} = require('@cumulus/es-client/search'); const { deleteGranuleAndFiles } = require('../src/lib/granule-delete'); const { zodParser } = require('../src/zod-utils'); @@ -157,7 +151,6 @@ const _setNewGranuleDefaults = (incomingApiGranule, isNewRecord = true) => { const create = async (req, res) => { const { knex = await getKnexClient(), - esClient = await getEsClient(), createGranuleFromApiMethod = createGranuleFromApi, } = req.testContext || {}; @@ -188,8 +181,7 @@ const create = async (req, res) => { try { await createGranuleFromApiMethod( _setNewGranuleDefaults(granule, true), - knex, - esClient + knex ); } catch (error) { log.error('Could not write granule', error); @@ -214,7 +206,6 @@ const patchGranule = async (req, res) => { granulePgModel = new GranulePgModel(), collectionPgModel = new CollectionPgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), updateGranuleFromApiMethod = updateGranuleFromApi, } = req.testContext || {}; let apiGranule = req.body || {}; @@ -279,7 +270,7 @@ const patchGranule = async (req, res) => { if (isNewRecord) { apiGranule = _setNewGranuleDefaults(apiGranule, isNewRecord); } - await updateGranuleFromApiMethod(apiGranule, knex, esClient); + await updateGranuleFromApiMethod(apiGranule, knex); } catch (error) { log.error('failed to update granule', error); return res.boom.badRequest(errorify(error)); @@ -496,7 +487,7 @@ async function patchByGranuleId(req, res) { granulePgModel = new GranulePgModel(), knex = await getKnexClient(), } = req.testContext || {}; - + let pgGranule; const body = req.body; const action = body.action; @@ -509,11 +500,18 @@ async function patchByGranuleId(req, res) { ); } - const pgGranule = await getUniqueGranuleByGranuleId( - knex, - req.params.granuleId, - granulePgModel - ); + try { + pgGranule = await await getUniqueGranuleByGranuleId( + knex, + req.params.granuleId, + granulePgModel + ); + } catch (error) { + if (error instanceof RecordDoesNotExist) { + log.info('Granule does not exist'); + return res.boom.notFound('No record found'); + } + } const collectionPgModel = new CollectionPgModel(); const pgCollection = await collectionPgModel.get(knex, { @@ -542,7 +540,6 @@ async function patch(req, res) { const body = req.body; const action = body.action; - if (!action) { if (_granulePayloadMatchesQueryParams(body, req)) { return patchGranule(req, res); @@ -596,7 +593,6 @@ const associateExecution = async (req, res) => { granulePgModel = new GranulePgModel(), collectionPgModel = new CollectionPgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), } = req.testContext || {}; let pgGranule; @@ -641,6 +637,7 @@ const associateExecution = async (req, res) => { ...pgGranule, updated_at: new Date(), }; + const apiGranuleRecord = { ...(await translatePostgresGranuleToApiGranule({ knexOrTransaction: knex, @@ -652,7 +649,6 @@ const associateExecution = async (req, res) => { try { await writeGranuleRecordAndPublishSns({ apiGranuleRecord, - esClient, executionCumulusId: pgExecution.cumulus_id, granulePgModel, postgresGranuleRecord: updatedPgGranule, @@ -684,47 +680,23 @@ const associateExecution = async (req, res) => { async function delByGranuleId(req, res) { const { knex = await getKnexClient(), - esClient = await getEsClient(), - esGranulesClient = new Search({}, 'granule', process.env.ES_INDEX), } = req.testContext || {}; - + let pgGranule; const granuleId = req.params.granuleId; log.info(`granules.del ${granuleId}`); - let pgGranule; - let esResult; try { - // TODO - Phase 3 - we need a ticket to address granule/collection consistency - // For now use granule ID without collection search in ES pgGranule = await getUniqueGranuleByGranuleId(knex, granuleId); } catch (error) { if (error instanceof RecordDoesNotExist) { - // TODO - Phase 3 - we need to require the collectionID, not infer it - - esResult = await esGranulesClient.get(granuleId); - - if (esResult.detail === recordNotFoundString) { - log.info('Granule does not exist in Elasticsearch and PostgreSQL'); - return res.boom.notFound('No record found'); - } - if (esResult.detail === multipleRecordFoundString) { - return res.boom.notFound( - 'No Postgres record found, multiple ES entries found for deletion' - ); - } - log.info( - `Postgres Granule with ID ${granuleId} does not exist but exists in Elasticsearch. Proceeding to remove from elasticsearch.` - ); - } else { - throw error; + log.info('Granule does not exist'); + return res.boom.notFound(`Granule ${granuleId} does not exist or was already deleted`); } } const deletionDetails = await deleteGranuleAndFiles({ knex, - apiGranule: esResult, pgGranule: pgGranule, - esClient, }); return res.send({ detail: 'Record deleted', ...deletionDetails }); @@ -742,8 +714,6 @@ async function del(req, res) { knex = await getKnexClient(), collectionPgModel = new CollectionPgModel(), granulePgModel = new GranulePgModel(), - esClient = await getEsClient(), - esGranulesClient = new Search({}, 'granule', process.env.ES_INDEX), } = req.testContext || {}; const granuleId = req.params.granuleId; @@ -755,7 +725,6 @@ async function del(req, res) { let pgGranule; let pgCollection; - let esResult; try { pgCollection = await collectionPgModel.get( knex, @@ -773,21 +742,6 @@ async function del(req, res) { `No collection found for granuleId ${granuleId} with collectionId ${collectionId}` ); } - - esResult = await esGranulesClient.get(granuleId, collectionId); - - if (esResult.detail === recordNotFoundString) { - log.info('Granule does not exist in Elasticsearch and PostgreSQL'); - return res.boom.notFound('No record found'); - } - if (esResult.detail === multipleRecordFoundString) { - return res.boom.notFound( - 'No Postgres record found, multiple ES entries found for deletion' - ); - } - log.info( - `Postgres Granule with ID ${granuleId} does not exist but exists in Elasticsearch. Proceeding to remove from elasticsearch.` - ); } else { throw error; } @@ -795,9 +749,7 @@ async function del(req, res) { const deletionDetails = await deleteGranuleAndFiles({ knex, - apiGranule: esResult, pgGranule: pgGranule, - esClient, }); return res.send({ detail: 'Record deleted', ...deletionDetails }); @@ -928,7 +880,6 @@ async function bulkOperations(req, res) { payload, type: 'BULK_GRANULE', envVars: { - ES_HOST: process.env.ES_HOST, granule_sns_topic_arn: process.env.granule_sns_topic_arn, invoke: process.env.invoke, KNEX_DEBUG: payload.knexDebug ? 'true' : 'false', @@ -994,7 +945,6 @@ async function bulkDelete(req, res) { cmr_password_secret_name: process.env.cmr_password_secret_name, cmr_provider: process.env.cmr_provider, cmr_username: process.env.cmr_username, - ES_HOST: process.env.ES_HOST, granule_sns_topic_arn: process.env.granule_sns_topic_arn, KNEX_DEBUG: payload.knexDebug ? 'true' : 'false', launchpad_api: process.env.launchpad_api, @@ -1036,7 +986,6 @@ async function bulkReingest(req, res) { payload, type: 'BULK_GRANULE_REINGEST', envVars: { - ES_HOST: process.env.ES_HOST, granule_sns_topic_arn: process.env.granule_sns_topic_arn, invoke: process.env.invoke, KNEX_DEBUG: payload.knexDebug ? 'true' : 'false', diff --git a/packages/api/lib/granules.js b/packages/api/lib/granules.js index 49b2f24a95e..1a9fbfc4e1f 100644 --- a/packages/api/lib/granules.js +++ b/packages/api/lib/granules.js @@ -23,7 +23,6 @@ const { getKnexClient, GranulePgModel, } = require('@cumulus/db'); -const indexer = require('@cumulus/es-client/indexer'); const { getEsClient } = require('@cumulus/es-client/search'); const { getBucketsConfigKey } = require('@cumulus/common/stack'); const { fetchDistributionBucketMap } = require('@cumulus/distribution-utils'); @@ -81,7 +80,6 @@ const getExecutionProcessingTimeInfo = ({ * @param {Object} params.granulePgModel - parameter override, used for unit testing * @param {Object} params.collectionPgModel - parameter override, used for unit testing * @param {Object} params.filesPgModel - parameter override, used for unit testing - * @param {Object} params.esClient - parameter override, used for unit testing * @param {Object} params.dbClient - parameter override, used for unit testing * @returns {Promise} - Object containing an 'updated' * files object with current file key values and an error object containing a set of @@ -95,7 +93,6 @@ async function moveGranuleFilesAndUpdateDatastore(params) { collectionPgModel = new CollectionPgModel(), filesPgModel = new FilePgModel(), dbClient = await getKnexClient(), - esClient = await getEsClient(), } = params; const { name, version } = deconstructCollectionId(apiGranule.collectionId); @@ -132,16 +129,6 @@ async function moveGranuleFilesAndUpdateDatastore(params) { }); const moveResults = await Promise.allSettled(moveFilePromises); - - await indexer.upsertGranule({ - esClient, - updates: { - ...apiGranule, - files: updatedFiles, - }, - index: process.env.ES_INDEX, - }); - const filteredResults = moveResults.filter((r) => r.status === 'rejected'); const moveGranuleErrors = filteredResults.map((error) => error.reason); diff --git a/packages/api/lib/writeRecords/write-granules.js b/packages/api/lib/writeRecords/write-granules.js index 4b94e4eeb70..0db973ca7c0 100644 --- a/packages/api/lib/writeRecords/write-granules.js +++ b/packages/api/lib/writeRecords/write-granules.js @@ -26,12 +26,6 @@ const { upsertGranuleWithExecutionJoinRecord, translateApiGranuleToPostgresGranuleWithoutNilsRemoved, } = require('@cumulus/db'); -const { - upsertGranule, -} = require('@cumulus/es-client/indexer'); -const { - getEsClient, -} = require('@cumulus/es-client/search'); const Logger = require('@cumulus/logger'); const { deconstructCollectionId, @@ -85,13 +79,11 @@ const { const { getExecutionCumulusId, isStatusFinalState, - isStatusActiveState, } = require('./utils'); /** * @typedef { import('knex').Knex } Knex * @typedef { import('knex').Knex.Transaction } KnexTransaction -* @typedef { typeof Search.es } Esclient and update type * @typedef { import('@cumulus/types').ApiGranule } ApiGranule * @typedef { import('@cumulus/types').ApiGranuleRecord } ApiGranuleRecord * @typedef { Granule } ApiGranuleModel @@ -101,7 +93,6 @@ const { * @typedef { import('@cumulus/db').GranulePgModel } GranulePgModel * @typedef { import('@cumulus/db').FilePgModel } FilePgModel */ - const { recordIsValid } = require('../schema'); const granuleSchema = require('../schemas').granule; const log = new Logger({ sender: '@cumulus/api/lib/writeRecords/write-granules' }); @@ -289,8 +280,7 @@ const _publishPostgresGranuleUpdateToSns = async ({ }; /** - * Update granule record status in PostgreSQL Elasticsearch. - * Update granule record status in PostgreSQL and Elasticsearch. + * Update granule record status in PostgreSQL * Publish SNS event for updated granule. * * @param {Object} params @@ -303,48 +293,24 @@ const _publishPostgresGranuleUpdateToSns = async ({ * @param {Object} params.granulePgModel - @cumulus/db compatible granule module instance * @param {Knex} params.knex - Knex object * @param {string} params.snsEventType - SNS Event Type, defaults to 'Update' - * @param {Object} params.esClient - Elasticsearch client * returns {Promise} */ const _updateGranule = async ({ apiGranule, postgresGranule, - apiFieldUpdates, pgFieldUpdates, - apiFieldsToDelete, granulePgModel, knex, snsEventType = 'Update', - esClient, }) => { const granuleId = apiGranule.granuleId; - const esGranule = omit(apiGranule, apiFieldsToDelete); - - let updatedPgGranule; - await createRejectableTransaction(knex, async (trx) => { - [updatedPgGranule] = await granulePgModel.update( - trx, - { cumulus_id: postgresGranule.cumulus_id }, - pgFieldUpdates, - ['*'] - ); - log.info(`Successfully wrote granule ${granuleId} to PostgreSQL`); - try { - await upsertGranule({ - esClient, - updates: { - ...esGranule, - ...apiFieldUpdates, - }, - index: process.env.ES_INDEX, - }); - log.info(`Successfully wrote granule ${granuleId} to Elasticsearch`); - } catch (writeError) { - log.error(`Write to Elasticsearch failed for ${granuleId}`, writeError); - throw writeError; - } - }); - + const [updatedPgGranule] = await granulePgModel.update( + knex, + { cumulus_id: postgresGranule.cumulus_id }, + pgFieldUpdates, + ['*'] + ); + log.info(`Successfully wrote granule ${granuleId} to PostgreSQL`); log.info( ` Successfully wrote granule %j to PostgreSQL. Record cumulus_id in PostgreSQL: ${updatedPgGranule.cumulus_id}. @@ -376,7 +342,6 @@ const updateGranuleStatusToFailed = async (params) => { error = {}, collectionPgModel = new CollectionPgModel(), granulePgModel = new GranulePgModel(), - esClient = await getEsClient(), } = params; const status = 'failed'; const { granuleId, collectionId } = granule; @@ -403,7 +368,6 @@ const updateGranuleStatusToFailed = async (params) => { granulePgModel, knex, snsEventType: 'Update', - esClient, }); log.debug(`Updated granule status to failed, granuleId: ${granule.granuleId}, PostgreSQL cumulus_id: ${pgGranule.cumulus_id}`); } catch (thrownError) { @@ -502,16 +466,13 @@ const _writeGranuleFilesAndThrowIfExpectedWriteError = async ({ }; /** - * Write granule to PostgreSQL and ElasticSearch, keeping granules to be written in sync - * as necessary. - * If any granule writes fail, keep the data stores in sync. + * Write granule to PostgreSQL, if any granule writes fail, keep the data stores in sync. * * @param {Object} params * @param {PostgresGranuleRecord} params.postgresGranuleRecord - PostgreSQL granule record to write * to the database * @param {ApiGranuleRecord} params.apiGranuleRecord - Api Granule object to write to the database * @param {Knex} params.knex - Knex object - * @param {Esclient} params.esClient - Elasticsearch client * @param {number} params.executionCumulusId - Execution ID the granule was written from * @param {boolean} params.writeConstraints - Boolean flag to set if createdAt/execution * write constraints should restrict write @@ -529,12 +490,10 @@ const _writeGranuleRecords = async (params) => { postgresGranuleRecord, apiGranuleRecord, knex, - esClient = await getEsClient(), executionCumulusId, granulePgModel, writeConstraints = true, } = params; - let pgGranule; /** * @type { { status: string, pgGranule: PostgresGranuleRecord } | undefined } */ @@ -552,42 +511,6 @@ const _writeGranuleRecords = async (params) => { granulePgModel, writeConstraints, }); - if (writePgGranuleResult.status === 'dropped') { - return; - } - pgGranule = writePgGranuleResult.pgGranule; - - if (writeConstraints && isStatusActiveState(pgGranule.status)) { - // pgGranule was updated, but with writeConstraints conditions and the granule status is - // 'queued' or 'running', so only some values were updated. we need to ensure the correct - // values are propagated to ES. - // The only values allowed to be updated in the PG granule write under these conditions are - // currently status, timestamp, updated_at, and created_at, and the associated execution - // as part of the write chain - const limitedUpdateApiGranuleRecord = await translatePostgresGranuleToApiGranule({ - granulePgRecord: pgGranule, - knexOrTransaction: trx, - }); - await upsertGranule({ - esClient, - updates: limitedUpdateApiGranuleRecord, - index: process.env.ES_INDEX, - }, writeConstraints); - } else { - // Future: refactor to cover the entire object? - // Ensure PG default createdAt value is propagated to ES - // in the case where _writeGranule is called without createdAt set - if (!apiGranuleRecord.createdAt) { - apiGranuleRecord.createdAt = pgGranule.created_at.getTime(); - } - - // TODO: refactor to not need apiGranuleRecord, only need files and a few other fields - await upsertGranule({ - esClient, - updates: apiGranuleRecord, - index: process.env.ES_INDEX, - }, writeConstraints); - } }); if (writePgGranuleResult === undefined) { // unlikely to happen but want a unique message that we can find and diagnose @@ -613,7 +536,6 @@ const _writeGranuleRecords = async (params) => { if (isStatusFinalState(apiGranuleRecord.status) && thrownError.name === 'SchemaValidationError') { const originalError = apiGranuleRecord.error; - const errors = []; if (originalError) { errors.push(originalError); @@ -653,7 +575,6 @@ const _writeGranuleRecords = async (params) => { * @param {PostgresGranuleRecord} params.postgresGranuleRecord - PostgreSQL granule record to write * to the database * @param {ApiGranuleRecord} params.apiGranuleRecord - Api Granule object to write to the database - * @param {Esclient} params.esClient - Elasticsearch client * @param {number} params.executionCumulusId - Execution ID the granule was written from * @param {GranulePgModel} params.granulePgModel - @cumulus/db compatible granule module instance * @returns {Promise} @@ -661,7 +582,6 @@ const _writeGranuleRecords = async (params) => { const _writeGranule = async ({ postgresGranuleRecord, apiGranuleRecord, - esClient, executionCumulusId, granulePgModel, knex, @@ -672,7 +592,6 @@ const _writeGranule = async ({ const { status } = apiGranuleRecord; const writePgGranuleResult = await _writeGranuleRecords({ apiGranuleRecord, - esClient, executionCumulusId, granulePgModel, knex, @@ -707,12 +626,11 @@ const _writeGranule = async ({ * @summary In cases where a full API record is not passed, but partial/tangential updates to granule * records are called for, updates to files records are not required and pre-write * calculation in methods like write/update GranulesFromApi result in unneded -* evaluation/database writes /etc. This method updates the postgres/ES datastore and +* evaluation/database writes /etc. This method updates postgres and * publishes the SNS update event without incurring unneded overhead. * @param {Object} params * @param {Object} params.apiGranuleRecord - Api Granule object to write to the database * @param {number} params.executionCumulusId - Execution ID the granule was written from -* @param {Object} params.esClient - Elasticsearch client * @param {Object} params.granulePgModel - @cumulus/db compatible granule module instance * @param {Knex} params.knex - Knex object * @param {Object} params.postgresGranuleRecord - PostgreSQL granule record to write @@ -723,7 +641,6 @@ const _writeGranule = async ({ const writeGranuleRecordAndPublishSns = async ({ postgresGranuleRecord, apiGranuleRecord, - esClient, executionCumulusId, granulePgModel, knex, @@ -731,7 +648,6 @@ const writeGranuleRecordAndPublishSns = async ({ }) => { const writePgGranuleResult = await _writeGranuleRecords({ apiGranuleRecord: omit(apiGranuleRecord, 'files'), - esClient, executionCumulusId, granulePgModel, knex, @@ -784,7 +700,6 @@ const writeGranuleRecordAndPublishSns = async ({ * @param {Object} [granule.queryFields] - query fields * @param {Object} [granule.granulePgModel] - only for testing. * @param {Knex} knex - knex Client - * @param {Object} esClient - Elasticsearch client * @param {string} snsEventType - SNS Event Type * @returns {Promise} */ @@ -817,7 +732,6 @@ const writeGranuleFromApi = async ( granulePgModel = new GranulePgModel(), }, knex, - esClient, snsEventType ) => { try { @@ -900,10 +814,8 @@ const writeGranuleFromApi = async ( dynamoRecord: apiGranuleRecord, knexOrTransaction: knex, }); - await _writeGranule({ apiGranuleRecord, - esClient, executionCumulusId, granulePgModel, knex, @@ -919,12 +831,12 @@ const writeGranuleFromApi = async ( } }; -const createGranuleFromApi = async (granule, knex, esClient) => { - await writeGranuleFromApi(granule, knex, esClient, 'Create'); +const createGranuleFromApi = async (granule, knex) => { + await writeGranuleFromApi(granule, knex, 'Create'); }; -const updateGranuleFromApi = async (granule, knex, esClient) => { - await writeGranuleFromApi(granule, knex, esClient, 'Update'); +const updateGranuleFromApi = async (granule, knex) => { + await writeGranuleFromApi(granule, knex, 'Update'); }; /** @@ -937,7 +849,6 @@ const updateGranuleFromApi = async (granule, knex, esClient) => { * @param {Knex} params.knex - Client to interact with PostgreSQL database * @param {Object} [params.granulePgModel] * Optional override for the granule model writing to PostgreSQL database - * @param {Object} params.esClient - Elasticsearch client * @returns {Promise} * true if there are no granules on the message, otherwise * results from Promise.allSettled for all granules @@ -948,7 +859,6 @@ const writeGranulesFromMessage = async ({ executionCumulusId, knex, granulePgModel = new GranulePgModel(), - esClient, testOverrides = {}, // Used only for test mocks }) => { if (!messageHasGranules(cumulusMessage)) { @@ -1069,7 +979,6 @@ const writeGranulesFromMessage = async ({ return _writeGranule({ apiGranuleRecord, - esClient, executionCumulusId, granulePgModel, knex, @@ -1083,7 +992,7 @@ const writeGranulesFromMessage = async ({ if (failures.length > 0) { const allFailures = failures.map((failure) => failure.reason); const aggregateError = new AggregateError(allFailures); - log.error('Failed writing some granules to Postgres/Elasticsearch', aggregateError); + log.error('Failed writing some granules to Postgres', aggregateError); throw aggregateError; } return results; @@ -1104,7 +1013,6 @@ const updateGranuleStatusToQueued = async (params) => { knex, collectionPgModel = new CollectionPgModel(), granulePgModel = new GranulePgModel(), - esClient = await getEsClient(), } = params; const status = 'queued'; const { granuleId, collectionId } = apiGranule; @@ -1132,7 +1040,6 @@ const updateGranuleStatusToQueued = async (params) => { granulePgModel, knex, snsEventType: 'Update', - esClient, }); log.debug(`Updated granule status to queued, PostgreSQL cumulus_id: ${pgGranule.cumulus_id}`); diff --git a/packages/api/src/lib/granule-delete.ts b/packages/api/src/lib/granule-delete.ts index 66f9842f91a..19fbd1b8c60 100644 --- a/packages/api/src/lib/granule-delete.ts +++ b/packages/api/src/lib/granule-delete.ts @@ -7,18 +7,14 @@ import { GranulePgModel, PostgresGranuleRecord, PostgresFileRecord, - createRejectableTransaction, translatePostgresGranuleToApiGranule, CollectionPgModel, PdrPgModel, ProviderPgModel, } from '@cumulus/db'; import { DeletePublishedGranule } from '@cumulus/errors'; -import { ApiFile, ApiGranuleRecord } from '@cumulus/types'; +import { ApiFile } from '@cumulus/types'; import Logger from '@cumulus/logger'; - -const { deleteGranule } = require('@cumulus/es-client/indexer'); -const { getEsClient } = require('@cumulus/es-client/search'); const { publishGranuleDeleteSnsMessage } = require('../../lib/publishSnsMessageUtils'); const FileUtils = require('../../lib/FileUtils'); @@ -44,66 +40,36 @@ const deleteS3Files = async ( ); /** - * Delete a Granule from Postgres and/or ES, delete the Granule's + * Delete a Granule from Postgres, delete the Granule's * Files from Postgres and S3 * * @param {Object} params * @param {Knex} params.knex - DB client - * @param {Object} params.apiGranule - Granule from API * @param {PostgresGranule} params.pgGranule - Granule from Postgres * @param {number | undefined} params.collectionCumulusId - Optional Collection Cumulus ID * @param {FilePgModel} params.filePgModel - File Postgres model * @param {GranulePgModel} params.granulePgModel - Granule Postgres model * @param {CollectionPgModel} params.collectionPgModel - Collection Postgres model - * @param {Object} params.esClient - Elasticsearch client * @returns {Object} - Granule Deletion details */ const deleteGranuleAndFiles = async (params: { knex: Knex, - apiGranule?: ApiGranuleRecord, pgGranule: PostgresGranuleRecord, filePgModel: FilePgModel, granulePgModel: GranulePgModel, collectionPgModel: CollectionPgModel, - esClient: { - delete(...args: any): any | any[]; - }, collectionCumulusId?: number, }) => { const { knex, pgGranule, - apiGranule, filePgModel = new FilePgModel(), granulePgModel = new GranulePgModel(), collectionPgModel = new CollectionPgModel(), - esClient = await getEsClient(), } = params; // Most of the calls using this method aren't typescripted // We cannot rely on typings to save us here - if (!pgGranule && !apiGranule) { - throw new Error('pgGranule and apiGranule undefined, but one is required'); - } - if (!pgGranule && apiGranule) { - logger.info('deleteGranuleAndFiles called without pgGranule, removing ES record only'); - await deleteGranule({ - esClient, - granuleId: apiGranule.granuleId, - collectionId: apiGranule.collectionId, - index: process.env.ES_INDEX, - ignore: [404], - }); - logger.debug(`Successfully deleted granule ${apiGranule.granuleId} from ES datastore`); - await deleteS3Files(apiGranule.files); - logger.debug(`Successfully removed S3 files ${JSON.stringify(apiGranule.files)}`); - return { - collection: apiGranule.collectionId, - deletedGranuleId: apiGranule.granuleId, - deletionTime: Date.now(), - deletedFiles: apiGranule.files, - }; - } if (pgGranule?.published === true) { throw new DeletePublishedGranule('You cannot delete a granule that is published to CMR. Remove it from CMR first'); } @@ -124,20 +90,11 @@ const deleteGranuleAndFiles = async (params: { }); try { - await createRejectableTransaction(knex, async (trx) => { - await granulePgModel.delete(trx, { - cumulus_id: pgGranule.cumulus_id, - }); - await deleteGranule({ - esClient, - granuleId: granuleToPublishToSns.granuleId, - collectionId: granuleToPublishToSns.collectionId, - index: process.env.ES_INDEX, - ignore: [404], - }); + await granulePgModel.delete(knex, { + cumulus_id: pgGranule.cumulus_id, }); await publishGranuleDeleteSnsMessage(granuleToPublishToSns); - logger.debug(`Successfully deleted granule ${pgGranule.granule_id} from ES/PostGreSQL datastores`); + logger.debug(`Successfully deleted granule ${pgGranule.granule_id} from PostgreSQL`); await deleteS3Files(files); logger.debug(`Successfully removed S3 files ${JSON.stringify(files)}`); return { diff --git a/packages/api/tests/endpoints/granules/test-bulk-delete.js b/packages/api/tests/endpoints/granules/test-bulk-delete.js index 41ff6d60e6b..c81d05b38d7 100644 --- a/packages/api/tests/endpoints/granules/test-bulk-delete.js +++ b/packages/api/tests/endpoints/granules/test-bulk-delete.js @@ -50,7 +50,6 @@ test.before(async () => { process.env.launchpad_api = randomString(); process.env.launchpad_certificate = randomString(); process.env.launchpad_passphrase_secret_name = randomString(); - process.env.ES_HOST = randomString(); await s3().createBucket({ Bucket: process.env.system_bucket }); @@ -131,7 +130,6 @@ test.serial('POST /granules/bulkDelete starts an async-operation with the correc METRICS_ES_PASS: process.env.METRICS_ES_PASS, stackName: process.env.stackName, system_bucket: process.env.system_bucket, - ES_HOST: process.env.ES_HOST, }, }); Object.keys(omit(payload.envVars, ['KNEX_DEBUG'])).forEach((envVarKey) => { @@ -221,7 +219,6 @@ test.serial('POST /granules/bulkDelete starts an async-operation with the correc METRICS_ES_PASS: process.env.METRICS_ES_PASS, stackName: process.env.stackName, system_bucket: process.env.system_bucket, - ES_HOST: process.env.ES_HOST, }, }); Object.keys(omit(payload.envVars, ['KNEX_DEBUG'])).forEach((envVarKey) => { diff --git a/packages/api/tests/endpoints/granules/test-bulk-granules.js b/packages/api/tests/endpoints/granules/test-bulk-granules.js index f3b096dd524..cd72febc9be 100644 --- a/packages/api/tests/endpoints/granules/test-bulk-granules.js +++ b/packages/api/tests/endpoints/granules/test-bulk-granules.js @@ -29,7 +29,6 @@ process.env = { EcsCluster: randomString(), BulkOperationLambda: randomString(), invoke: randomString(), - ES_HOST: randomString(), METRICS_ES_HOST: randomString(), METRICS_ES_USER: randomString(), METRICS_ES_PASS: randomString(), @@ -100,7 +99,6 @@ test.serial('POST /granules/bulk starts an async-operation with the correct payl payload: body, type: 'BULK_GRANULE', envVars: { - ES_HOST: process.env.ES_HOST, granule_sns_topic_arn: process.env.granule_sns_topic_arn, system_bucket: process.env.system_bucket, stackName: process.env.stackName, @@ -184,7 +182,6 @@ test.serial('POST /granules/bulk starts an async-operation with the correct payl payload: body, type: 'BULK_GRANULE', envVars: { - ES_HOST: process.env.ES_HOST, granule_sns_topic_arn: process.env.granule_sns_topic_arn, system_bucket: process.env.system_bucket, stackName: process.env.stackName, diff --git a/packages/api/tests/endpoints/granules/test-bulk-reingest.js b/packages/api/tests/endpoints/granules/test-bulk-reingest.js index a66e6967a71..dafd81ab0c1 100644 --- a/packages/api/tests/endpoints/granules/test-bulk-reingest.js +++ b/packages/api/tests/endpoints/granules/test-bulk-reingest.js @@ -30,7 +30,6 @@ process.env = { EcsCluster: randomId('EcsCluster'), BulkOperationLambda: randomId('BulkOperationLambda'), invoke: randomId('invoke'), - ES_HOST: randomId('esHost'), METRICS_ES_HOST: randomId('metricsEsHost'), METRICS_ES_USER: randomId('metricsEsUser'), METRICS_ES_PASS: randomId('metricsEsPass'), @@ -98,7 +97,6 @@ test.serial('POST /granules/bulkReingest starts an async-operation with the corr payload: body, type: 'BULK_GRANULE_REINGEST', envVars: { - ES_HOST: process.env.ES_HOST, granule_sns_topic_arn: process.env.granule_sns_topic_arn, system_bucket: process.env.system_bucket, stackName: process.env.stackName, @@ -174,7 +172,6 @@ test.serial('POST /granules/bulkReingest starts an async-operation with the corr payload: body, type: 'BULK_GRANULE_REINGEST', envVars: { - ES_HOST: process.env.ES_HOST, granule_sns_topic_arn: process.env.granule_sns_topic_arn, system_bucket: process.env.system_bucket, stackName: process.env.stackName, diff --git a/packages/api/tests/endpoints/test-granules.js b/packages/api/tests/endpoints/test-granules.js index fc6f9425889..15f878ebe01 100644 --- a/packages/api/tests/endpoints/test-granules.js +++ b/packages/api/tests/endpoints/test-granules.js @@ -29,12 +29,10 @@ const { translateApiExecutionToPostgresExecution, translateApiFiletoPostgresFile, translateApiGranuleToPostgresGranule, - translatePostgresFileToApiFile, translatePostgresGranuleToApiGranule, upsertGranuleWithExecutionJoinRecord, } = require('@cumulus/db'); -const { createTestIndex, cleanupTestIndex } = require('@cumulus/es-client/testUtils'); const { buildS3Uri, createBucket, @@ -52,8 +50,6 @@ const { } = require('@aws-sdk/client-sns'); const { CMR } = require('@cumulus/cmr-client'); const { metadataObjectFromCMRFile } = require('@cumulus/cmrjs/cmr-utils'); -const indexer = require('@cumulus/es-client/indexer'); -const { Search, multipleRecordFoundString } = require('@cumulus/es-client/search'); const launchpad = require('@cumulus/launchpad-auth'); const { randomString, randomId } = require('@cumulus/common/test-utils'); const { removeNilProperties } = require('@cumulus/common/util'); @@ -62,7 +58,7 @@ const { getBucketsConfigKey } = require('@cumulus/common/stack'); const { getDistributionBucketMapKey } = require('@cumulus/distribution-utils'); const { constructCollectionId } = require('@cumulus/message/Collections'); -const { create, del, patch, patchGranule } = require('../../endpoints/granules'); +const { create, patch, patchGranule } = require('../../endpoints/granules'); const { sortFilesByKey } = require('../helpers/sort'); const assertions = require('../../lib/assertions'); const { createGranuleAndFiles } = require('../helpers/create-test-data'); @@ -203,12 +199,6 @@ test.before(async (t) => { t.context.knex = knex; t.context.knexAdmin = knexAdmin; - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - - t.context.esGranulesClient = new Search({}, 'granule', process.env.ES_INDEX); - // Create collections in Postgres // we need this because a granule has a foreign key referring to collections t.context.collectionName = 'fakeCollection'; @@ -340,18 +330,6 @@ test.beforeEach(async (t) => { })) ); t.context.insertedPgGranules = t.context.fakePGGranuleRecords.flat(); - const insertedApiGranuleTranslations = await Promise.all( - t.context.insertedPgGranules.map((granule) => - translatePostgresGranuleToApiGranule({ - knexOrTransaction: t.context.knex, - granulePgRecord: granule, - })) - ); - // index PG Granules into ES - await Promise.all( - insertedApiGranuleTranslations.map((granule) => - indexer.indexGranule(t.context.esClient, granule, t.context.esIndex)) - ); const topicName = randomString(); const { TopicArn } = await createSnsTopic(topicName); @@ -401,7 +379,6 @@ test.after.always(async (t) => { knexAdmin: t.context.knexAdmin, testDbName, }); - await cleanupTestIndex(t.context); }); // TODO postgres query doesn't return searchContext @@ -919,14 +896,13 @@ test.serial('PATCH applies an in-place workflow to an existing granule', async ( }); test.serial('PATCH removes a granule from CMR', async (t) => { - const { s3Buckets, newPgGranule } = await createGranuleAndFiles({ + const { s3Buckets, apiGranule } = await createGranuleAndFiles({ dbClient: t.context.knex, - esClient: t.context.esClient, collectionId: t.context.collectionId, granuleParams: { published: true }, }); - const granuleId = newPgGranule.granule_id; + const granuleId = apiGranule.granuleId; sinon.stub(CMR.prototype, 'deleteGranule').callsFake(() => Promise.resolve()); @@ -1004,7 +980,7 @@ test.serial('DELETE returns 404 if granule does not exist', async (t) => { .set('Accept', 'application/json') .set('Authorization', `Bearer ${jwtAuthToken}`) .expect(404); - t.true(response.body.message.includes('No record found')); + t.true(response.body.message.includes(`Granule ${granuleId} does not exist or was already deleted`)); }); test.serial('DELETE returns 404 if collection does not exist', async (t) => { @@ -1026,7 +1002,6 @@ test.serial('DELETE does not require a collectionId', async (t) => { const { s3Buckets, apiGranule, newPgGranule } = await createGranuleAndFiles({ dbClient: t.context.knex, granuleParams: { published: false }, - esClient: t.context.esClient, }); const response = await request(app) @@ -1060,9 +1035,9 @@ test.serial('DELETE does not require a collectionId', async (t) => { t.teardown(() => deleteS3Buckets([s3Buckets.protected.name, s3Buckets.public.name])); }); -test.serial('DELETE deletes a granule that exists in PostgreSQL but not Elasticsearch successfully', +test.serial('DELETE deletes a granule that exists in PostgreSQL successfully', async (t) => { - const { collectionPgModel, esGranulesClient, knex } = t.context; + const { collectionPgModel, knex } = t.context; const testPgCollection = fakeCollectionRecordFactory({ name: randomString(), version: '005', @@ -1089,7 +1064,6 @@ test.serial('DELETE deletes a granule that exists in PostgreSQL but not Elastics collection_cumulus_id: createdPgGranule.collection_cumulus_id, }) ); - t.false(await esGranulesClient.exists(newGranule.granuleId)); const response = await request(app) .delete(`/granules/${newCollectionId}/${newGranule.granuleId}`) @@ -1115,100 +1089,10 @@ test.serial('DELETE deletes a granule that exists in PostgreSQL but not Elastics ); }); -test.serial('DELETE deletes a granule that exists in Elasticsearch but not PostgreSQL successfully', async (t) => { - const { collectionPgModel, esClient, esIndex, esGranulesClient, knex } = t.context; - const testPgCollection = fakeCollectionRecordFactory({ - name: randomString(), - version: '005', - }); - const newCollectionId = constructCollectionId(testPgCollection.name, testPgCollection.version); - - const [pgCollection] = await collectionPgModel.create(knex, testPgCollection); - const newGranule = fakeGranuleFactoryV2({ - granuleId: randomId(), - status: 'failed', - collectionId: newCollectionId, - published: false, - files: [], - }); - - await indexer.indexGranule(esClient, newGranule, esIndex); - - t.false( - await granulePgModel.exists(knex, { - granule_id: newGranule.granuleId, - collection_cumulus_id: pgCollection.cumulus_id, - }) - ); - t.true(await esGranulesClient.exists(newGranule.granuleId)); - - const response = await request(app) - .delete(`/granules/${newCollectionId}/${newGranule.granuleId}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.is(response.status, 200); - const responseBody = response.body; - t.like(responseBody, { - detail: 'Record deleted', - collection: newCollectionId, - deletedGranuleId: newGranule.granuleId, - }); - t.truthy(responseBody.deletionTime); - t.is(responseBody.deletedFiles.length, newGranule.files.length); - - t.false(await esGranulesClient.exists(newGranule.granuleId)); -}); - -test.serial('DELETE fails to delete a granule that has multiple entries in Elasticsearch, but no records in PostgreSQL', async (t) => { - const { knex } = t.context; - const testPgCollection = fakeCollectionRecordFactory({ - name: randomString(), - version: '005', - }); - - const newCollectionId = constructCollectionId(testPgCollection.name, testPgCollection.version); - - const collectionPgModel = new CollectionPgModel(); - const [pgCollection] = await collectionPgModel.create(knex, testPgCollection); - const newGranule = fakeGranuleFactoryV2({ - granuleId: randomId(), - status: 'failed', - collectionId: newCollectionId, - published: false, - files: [], - }); - - t.false( - await granulePgModel.exists(knex, { - granule_id: newGranule.granuleId, - collection_cumulus_id: pgCollection.cumulus_id, - }) - ); - - const expressRequest = { - params: { - granuleId: newGranule.granuleId, - collectionId: newCollectionId, - }, - testContext: { - esGranulesClient: { - get: () => ({ detail: multipleRecordFoundString }), - }, - }, - }; - const response = buildFakeExpressResponse(); - - await del(expressRequest, response); - t.true(response.boom.notFound.called); -}); - test.serial('DELETE deleting an existing granule that is published will fail and not delete records', async (t) => { const { s3Buckets, apiGranule, newPgGranule } = await createGranuleAndFiles({ dbClient: t.context.knex, granuleParams: { published: true }, - esClient: t.context.esClient, }); const granuleId = apiGranule.granuleId; @@ -1246,7 +1130,6 @@ test.serial('DELETE deleting an existing unpublished granule succeeds', async (t const { s3Buckets, apiGranule, newPgGranule } = await createGranuleAndFiles({ dbClient: t.context.knex, granuleParams: { published: false }, - esClient: t.context.esClient, }); const granuleId = apiGranule.granuleId; @@ -1290,7 +1173,6 @@ test.serial('DELETE throws an error if the Postgres get query fails', async (t) const { s3Buckets, apiGranule, newPgGranule } = await createGranuleAndFiles({ dbClient: t.context.knex, granuleParams: { published: true }, - esClient: t.context.esClient, }); sinon.stub(GranulePgModel.prototype, 'get').throws(new Error('Error message')); @@ -1330,7 +1212,6 @@ test.serial('DELETE publishes an SNS message after a successful granule delete', const { s3Buckets, apiGranule, newPgGranule } = await createGranuleAndFiles({ dbClient: t.context.knex, granuleParams: { published: false }, - esClient: t.context.esClient, }); const timeOfResponse = Date.now(); @@ -1387,8 +1268,6 @@ test.serial('move a granule with no .cmr.xml file', async (t) => { const secondBucket = randomId('second'); const thirdBucket = randomId('third'); - const { esGranulesClient } = t.context; - await runTestUsingBuckets([secondBucket, thirdBucket], async () => { // Generate Granule/Files, S3 objects and database entries const granuleFileName = randomId('granuleFileName'); @@ -1473,16 +1352,6 @@ test.serial('move a granule with no .cmr.xml file', async (t) => { bucket: destination.bucket, }); } - - // check the ES index is updated - const esRecord = await esGranulesClient.get(newGranule.granuleId); - t.is(esRecord.files.length, 3); - esRecord.files.forEach((esFileRecord) => { - const pgMatchingFileRecord = pgFiles.find( - (pgFile) => pgFile.key.match(esFileRecord.key) && pgFile.bucket.match(esFileRecord.bucket) - ); - t.deepEqual(translatePostgresFileToApiFile(pgMatchingFileRecord), esFileRecord); - }); }); }); @@ -1919,7 +1788,7 @@ test.serial('PATCH with action move returns failure if more than one granule fil t.true(expressResponse.boom.conflict.calledWithMatch('Cannot move granule because the following files would be overwritten at the destination location: file1')); }); -test.serial('create (POST) creates new granule without an execution in PostgreSQL, and Elasticsearch', async (t) => { +test.serial('create (POST) creates new granule without an execution in PostgreSQL', async (t) => { const newGranule = fakeGranuleFactoryV2({ collectionId: t.context.collectionId, execution: undefined, @@ -1936,16 +1805,13 @@ test.serial('create (POST) creates new granule without an execution in PostgreSQ granule_id: newGranule.granuleId, collection_cumulus_id: t.context.collectionCumulusId, }); - const fetchedESRecord = await t.context.esGranulesClient.get(newGranule.granuleId); - t.deepEqual(JSON.parse(response.text), { message: `Successfully wrote granule with Granule Id: ${newGranule.granuleId}, Collection Id: ${t.context.collectionId}`, }); t.is(fetchedPostgresRecord.granule_id, newGranule.granuleId); - t.is(fetchedESRecord.granuleId, newGranule.granuleId); }); -test.serial('create (POST) creates new granule with associated execution in PostgreSQL and Elasticsearch', async (t) => { +test.serial('create (POST) creates new granule with associated execution in PostgreSQL', async (t) => { const newGranule = fakeGranuleFactoryV2({ collectionId: t.context.collectionId, execution: t.context.executionUrl, @@ -1962,12 +1828,10 @@ test.serial('create (POST) creates new granule with associated execution in Post granule_id: newGranule.granuleId, collection_cumulus_id: t.context.collectionCumulusId, }); - const fetchedESRecord = await t.context.esGranulesClient.get(newGranule.granuleId); t.deepEqual(JSON.parse(response.text), { message: `Successfully wrote granule with Granule Id: ${newGranule.granuleId}, Collection Id: ${newGranule.collectionId}`, }); t.is(fetchedPostgresRecord.granule_id, newGranule.granuleId); - t.is(fetchedESRecord.granuleId, newGranule.granuleId); }); test.serial('create (POST) publishes an SNS message upon successful granule creation', async (t) => { @@ -2124,9 +1988,8 @@ test.serial('create (POST) throws conflict error if a granule with same granuleI t.is(errorText.message, `A granule already exists for granuleId: ${newGranule.granuleId}`); }); -test.serial('PATCH updates an existing granule in all data stores', async (t) => { +test.serial('PATCH updates an existing granule in postgres', async (t) => { const { - esClient, executionUrl, knex, testExecutionCumulusId, @@ -2135,9 +1998,8 @@ test.serial('PATCH updates an existing granule in all data stores', async (t) => const oldQueryFields = { foo: Math.random(), }; - const { newPgGranule, esRecord } = await createGranuleAndFiles({ + const { newPgGranule } = await createGranuleAndFiles({ dbClient: knex, - esClient, executionCumulusId: testExecutionCumulusId, granuleParams: { status: 'running', @@ -2146,6 +2008,7 @@ test.serial('PATCH updates an existing granule in all data stores', async (t) => queryFields: oldQueryFields, }, }); + const newApiGranule = await translatePostgresGranuleToApiGranule({ granulePgRecord: newPgGranule, knexOrTransaction: knex, @@ -2153,8 +2016,6 @@ test.serial('PATCH updates an existing granule in all data stores', async (t) => t.is(newPgGranule.status, 'running'); t.deepEqual(newPgGranule.query_fields, oldQueryFields); - t.is(esRecord.status, 'running'); - t.deepEqual(esRecord.queryFields, oldQueryFields); const newQueryFields = { foo: randomString(), @@ -2177,11 +2038,6 @@ test.serial('PATCH updates an existing granule in all data stores', async (t) => cumulus_id: newPgGranule.cumulus_id, }); - const actualApiGranule = await translatePostgresGranuleToApiGranule({ - granulePgRecord: actualPgGranule, - knexOrTransaction: knex, - }); - t.deepEqual(actualPgGranule, { ...newPgGranule, timestamp: new Date(timestamp), @@ -2193,32 +2049,17 @@ test.serial('PATCH updates an existing granule in all data stores', async (t) => ending_date_time: actualPgGranule.ending_date_time, production_date_time: actualPgGranule.production_date_time, }); - - const updatedEsRecord = await t.context.esGranulesClient.get(newApiGranule.granuleId); - t.like(updatedEsRecord, { - ...esRecord, - files: actualApiGranule.files, - status: 'completed', - queryFields: newQueryFields, - updatedAt: updatedEsRecord.updatedAt, - timestamp: updatedEsRecord.timestamp, - }); }); test.serial('PATCH executes successfully with no non-required-field-updates (testing "insert" update/undefined fields)', async (t) => { const { - esClient, executionPgRecord, executionUrl, knex, } = t.context; const timestamp = Date.now(); - const { - esRecord, - newPgGranule, - } = await createGranuleAndFiles({ + const { newPgGranule, apiGranule } = await createGranuleAndFiles({ dbClient: knex, - esClient, executionCumulusId: executionPgRecord.cumulus_id, granuleParams: { status: 'running', @@ -2228,13 +2069,13 @@ test.serial('PATCH executes successfully with no non-required-field-updates (tes }); const updatedGranule = { - granuleId: esRecord.granuleId, - collectionId: esRecord.collectionId, + granuleId: apiGranule.granuleId, + collectionId: apiGranule.collectionId, status: newPgGranule.status, }; await request(app) - .patch(`/granules/${esRecord.granuleId}`) + .patch(`/granules/${apiGranule.granuleId}`) .set('Accept', 'application/json') .set('Authorization', `Bearer ${jwtAuthToken}`) .send(updatedGranule) @@ -2249,34 +2090,22 @@ test.serial('PATCH executes successfully with no non-required-field-updates (tes timestamp: actualPgGranule.timestamp, updated_at: actualPgGranule.updated_at, }); - - const updatedEsRecord = await t.context.esGranulesClient.get( - newPgGranule.granule_id - ); - t.like(updatedEsRecord, { - ...esRecord, - timestamp: updatedEsRecord.timestamp, - updatedAt: updatedEsRecord.updatedAt, - }); }); -test.serial('PATCH does not update non-current-timestamp undefined fields for existing granules in all datastores', async (t) => { +test.serial('PATCH does not update non-current-timestamp undefined fields for existing granules', async (t) => { const { - esClient, knex, executionPgRecord, - esGranulesClient, testExecutionCumulusId, } = t.context; const originalUpdateTimestamp = Date.now(); const { - esRecord, newPgGranule, + apiGranule, } = await createGranuleAndFiles({ dbClient: knex, - esClient, executionCumulusId: testExecutionCumulusId, granuleParams: { beginningDateTime: '2022-01-18T14:40:00.000Z', @@ -2307,7 +2136,7 @@ test.serial('PATCH does not update non-current-timestamp undefined fields for ex execution_cumulus_id: executionPgRecord.cumulus_id, }); const updatedGranule = { - granuleId: newPgGranule.granule_id, + granuleId: apiGranule.granuleId, collectionId: constructCollectionId(t.context.collectionName, t.context.collectionVersion), status: newPgGranule.status, }; @@ -2328,20 +2157,12 @@ test.serial('PATCH does not update non-current-timestamp undefined fields for ex knexOrTransaction: knex, }); - const updatedEsRecord = await esGranulesClient.get(newPgGranule.granule_id); - - [updatedEsRecord, esRecord, translatedPostgresGranule].forEach( + [translatedPostgresGranule].forEach( (record) => { record.files.sort((f1, f2) => sortFilesByKey(f1, f2)); } ); - t.like(updatedEsRecord, { - ...esRecord, - updatedAt: actualPgGranule.updated_at.getTime(), - timestamp: actualPgGranule.timestamp.getTime(), - }); - t.like(newPgGranule, { ...actualPgGranule, updated_at: newPgGranule.updated_at, @@ -2349,14 +2170,12 @@ test.serial('PATCH does not update non-current-timestamp undefined fields for ex }); }); -test.serial('PATCH nullifies expected fields for existing granules in all datastores', async (t) => { +test.serial('PATCH nullifies expected fields for existing granules', async (t) => { const { collectionName, collectionVersion, - esClient, knex, executionPgRecord, - esGranulesClient, testExecutionCumulusId, } = t.context; @@ -2364,9 +2183,8 @@ test.serial('PATCH nullifies expected fields for existing granules in all datast const collectionId = constructCollectionId(collectionName, collectionVersion); - const { newPgGranule } = await createGranuleAndFiles({ + const { newPgGranule, apiGranule } = await createGranuleAndFiles({ dbClient: knex, - esClient, executionCumulusId: testExecutionCumulusId, granuleParams: { beginningDateTime: '2022-01-18T14:40:00.000Z', @@ -2395,7 +2213,7 @@ test.serial('PATCH nullifies expected fields for existing granules in all datast }); const updatedGranule = { - granuleId: newPgGranule.granule_id, + granuleId: apiGranule.granuleId, collectionId, status: newPgGranule.status, createdAt: null, @@ -2420,7 +2238,7 @@ test.serial('PATCH nullifies expected fields for existing granules in all datast }; await request(app) - .patch(`/granules/${newPgGranule.granule_id}`) + .patch(`/granules/${apiGranule.granuleId}`) .set('Accept', 'application/json') .set('Authorization', `Bearer ${jwtAuthToken}`) .send(updatedGranule) @@ -2435,8 +2253,6 @@ test.serial('PATCH nullifies expected fields for existing granules in all datast knexOrTransaction: knex, }); - const updatedEsRecord = await esGranulesClient.get(newPgGranule.granule_id); - const expectedGranule = { collectionId, createdAt: translatedPostgresGranule.createdAt, @@ -2451,19 +2267,14 @@ test.serial('PATCH nullifies expected fields for existing granules in all datast }; t.deepEqual(translatedPostgresGranule, expectedGranule); - t.deepEqual( - { ...updatedEsRecord, files: [] }, - { ...expectedGranule, _id: updatedEsRecord._id } - ); }); test.serial('PATCH does not overwrite existing duration of an existing granule if not specified in the payload', async (t) => { - const { esClient, executionUrl, knex } = t.context; + const { executionUrl, knex } = t.context; const unmodifiedDuration = 100; - const { newPgGranule, esRecord } = await createGranuleAndFiles({ + const { newPgGranule, apiGranule } = await createGranuleAndFiles({ dbClient: knex, - esClient, execution: executionUrl, granuleParams: { duration: unmodifiedDuration, @@ -2473,20 +2284,19 @@ test.serial('PATCH does not overwrite existing duration of an existing granule i // Verify returned objects have correct status t.is(newPgGranule.status, 'completed'); - t.is(esRecord.status, 'completed'); const newQueryFields = { foo: randomString(), }; const updatedGranule = { - granuleId: esRecord.granuleId, - collectionId: esRecord.collectionId, + granuleId: apiGranule.granuleId, + collectionId: apiGranule.collectionId, status: 'completed', queryFields: newQueryFields, }; await request(app) - .patch(`/granules/${esRecord.granuleId}`) + .patch(`/granules/${apiGranule.granuleId}`) .set('Accept', 'application/json') .set('Authorization', `Bearer ${jwtAuthToken}`) .send(updatedGranule) @@ -2495,21 +2305,18 @@ test.serial('PATCH does not overwrite existing duration of an existing granule i const actualPgGranule = await t.context.granulePgModel.get(t.context.knex, { cumulus_id: newPgGranule.cumulus_id, }); - const actualEsGranule = await t.context.esGranulesClient.get(esRecord.granuleId); t.is(actualPgGranule.duration, unmodifiedDuration); - t.is(actualEsGranule.duration, unmodifiedDuration); }); test.serial('PATCH does not overwrite existing createdAt of an existing granule if not specified in the payload', async (t) => { - const { esClient, executionUrl, knex } = t.context; + const { executionUrl, knex } = t.context; const timestamp = Date.now(); const createdAt = timestamp - 1000000; - const { newPgGranule, esRecord } = await createGranuleAndFiles({ + const { newPgGranule, apiGranule } = await createGranuleAndFiles({ dbClient: knex, - esClient, execution: executionUrl, granuleParams: { createdAt, @@ -2519,18 +2326,16 @@ test.serial('PATCH does not overwrite existing createdAt of an existing granule // Verify returned objects have correct status t.is(newPgGranule.status, 'completed'); - t.is(esRecord.status, 'completed'); t.deepEqual(newPgGranule.created_at, new Date(createdAt)); - t.is(esRecord.createdAt, createdAt); const updatedGranule = { - granuleId: esRecord.granuleId, - collectionId: esRecord.collectionId, + granuleId: apiGranule.granuleId, + collectionId: apiGranule.collectionId, status: 'completed', }; await request(app) - .patch(`/granules/${esRecord.granuleId}`) + .patch(`/granules/${apiGranule.granuleId}`) .set('Accept', 'application/json') .set('Authorization', `Bearer ${jwtAuthToken}`) .send(updatedGranule) @@ -2539,10 +2344,7 @@ test.serial('PATCH does not overwrite existing createdAt of an existing granule const actualPgGranule = await t.context.granulePgModel.get(t.context.knex, { cumulus_id: newPgGranule.cumulus_id, }); - const actualEsGranule = await t.context.esGranulesClient.get(esRecord.granuleId); - t.deepEqual(actualPgGranule.created_at, new Date(createdAt)); - t.is(actualEsGranule.createdAt, createdAt); }); test.serial('PATCH creates a granule if one does not already exist in all data stores', async (t) => { @@ -2577,23 +2379,11 @@ test.serial('PATCH creates a granule if one does not already exist in all data s collection_cumulus_id: t.context.collectionCumulusId, }); - const actualApiGranule = await translatePostgresGranuleToApiGranule({ - granulePgRecord: actualPgGranule, - knexOrTransaction: knex, - }); - t.deepEqual(removeNilProperties(actualPgGranule), { ...fakePgGranule, timestamp: actualPgGranule.timestamp, cumulus_id: actualPgGranule.cumulus_id, }); - - const esRecord = await t.context.esGranulesClient.get(fakeGranule.granuleId); - t.deepEqual(esRecord, { - ...fakeGranule, - timestamp: actualApiGranule.timestamp, - _id: esRecord._id, - }); }); test.serial('PATCH sets a default value of false for `published` if one is not set', async (t) => { @@ -2623,15 +2413,11 @@ test.serial('PATCH sets a default value of false for `published` if one is not s collection_cumulus_id: t.context.collectionCumulusId, }); - const fakeEsRecord = await t.context.esGranulesClient.get(fakeGranule.granuleId); - t.is(fakePgGranule.published, false); - t.is(fakeEsRecord.published, false); }); test.serial('PATCH replaces an existing granule in all data stores with correct timestamps', async (t) => { const { - esClient, executionUrl, knex, testExecutionCumulusId, @@ -2640,7 +2426,6 @@ test.serial('PATCH replaces an existing granule in all data stores with correct newPgGranule, } = await createGranuleAndFiles({ dbClient: knex, - esClient, executionCumulusId: testExecutionCumulusId, granuleParams: { status: 'running', @@ -2671,25 +2456,19 @@ test.serial('PATCH replaces an existing granule in all data stores with correct const actualPgGranule = await t.context.granulePgModel.get(t.context.knex, { cumulus_id: newPgGranule.cumulus_id, }); - const updatedEsRecord = await t.context.esGranulesClient.get(newApiGranule.granuleId); // createdAt timestamp from original record should have been preserved t.is(actualPgGranule.createdAt, newPgGranule.createdAt); - // PG and ES records have the same timestamps - t.is(actualPgGranule.created_at.getTime(), updatedEsRecord.createdAt); - t.is(actualPgGranule.updated_at.getTime(), updatedEsRecord.updatedAt); }); -test.serial('PATCH replaces an existing granule in all datastores with a granule that violates message-path write constraints, ignoring message write constraints and field selection', async (t) => { +test.serial('PATCH replaces an existing granule with a granule that violates message-path write constraints, ignoring message write constraints and field selection', async (t) => { const { - esClient, executionUrl, knex, testExecutionCumulusId, } = t.context; const { newPgGranule, apiGranule } = await createGranuleAndFiles({ dbClient: knex, - esClient, executionCumulusId: testExecutionCumulusId, granuleParams: { status: 'completed', @@ -2719,25 +2498,13 @@ test.serial('PATCH replaces an existing granule in all datastores with a granule cumulus_id: newPgGranule.cumulus_id, }); - const updatedEsRecord = await t.context.esGranulesClient.get(apiGranule.granuleId); - - t.is(updatedEsRecord.updatedAt, updatedGranule.updatedAt); - t.is(updatedEsRecord.createdAt, updatedGranule.createdAt); - // PG and ES records have the same timestamps - t.is(actualPgGranule.created_at.getTime(), updatedEsRecord.createdAt); - t.is(actualPgGranule.updated_at.getTime(), updatedEsRecord.updatedAt); - t.is(actualPgGranule.cmr_link, updatedGranule.cmrLink); - t.is(updatedEsRecord.cmrLink, updatedGranule.cmrLink); - t.is(actualPgGranule.duration, updatedGranule.duration); - t.is(updatedEsRecord.duration, updatedGranule.duration); }); test.serial('PATCH publishes an SNS message after a successful granule update', async (t) => { const { collectionCumulusId, - esClient, executionUrl, knex, testExecutionCumulusId, @@ -2746,7 +2513,6 @@ test.serial('PATCH publishes an SNS message after a successful granule update', newPgGranule, } = await createGranuleAndFiles({ dbClient: knex, - esClient, executionCumulusId: testExecutionCumulusId, granuleParams: { status: 'running', @@ -2797,7 +2563,6 @@ test.serial('PATCH publishes an SNS message after a successful granule update', test.serial("create() sets a default createdAt value for passed granule if it's not set by the user", async (t) => { const { - esClient, executionUrl, knex, testExecutionCumulusId, @@ -2805,7 +2570,6 @@ test.serial("create() sets a default createdAt value for passed granule if it's const { apiGranule } = await createGranuleAndFiles({ dbClient: knex, - esClient, executionCumulusId: testExecutionCumulusId, granuleParams: { status: 'running', @@ -2838,7 +2602,6 @@ test.serial("create() sets a default createdAt value for passed granule if it's test.serial("patch() sets a default createdAt value for new granule if it's not set by the user", async (t) => { const { - esClient, executionUrl, knex, testExecutionCumulusId, @@ -2846,7 +2609,6 @@ test.serial("patch() sets a default createdAt value for new granule if it's not const { apiGranule } = await createGranuleAndFiles({ dbClient: knex, - esClient, executionCumulusId: testExecutionCumulusId, granuleParams: { status: 'running', @@ -2877,19 +2639,16 @@ test.serial("patch() sets a default createdAt value for new granule if it's not t.truthy(updateGranuleFromApiMethodStub.getCalls()[0].args[0].createdAt); }); -test.serial('PATCH() does not write to DynamoDB/Elasticsearch/SNS if writing to PostgreSQL fails', async (t) => { +test.serial('PATCH() does not write to DynamoDB/SNS if writing to PostgreSQL fails', async (t) => { const { - esClient, executionUrl, knex, testExecutionCumulusId, } = t.context; const { newPgGranule, - esRecord, } = await createGranuleAndFiles({ dbClient: knex, - esClient, granuleParams: { status: 'running', execution: executionUrl, @@ -2937,22 +2696,12 @@ test.serial('PATCH() does not write to DynamoDB/Elasticsearch/SNS if writing to await patch(expressRequest, response); t.true(response.boom.badRequest.calledWithMatch('something bad')); - const actualPgGranule = await t.context.granulePgModel.get(t.context.knex, { - cumulus_id: newPgGranule.cumulus_id, - }); - - const actualApiGranule = await translatePostgresGranuleToApiGranule({ - granulePgRecord: actualPgGranule, - knexOrTransaction: knex, - }); - t.deepEqual( await t.context.granulePgModel.get(t.context.knex, { cumulus_id: newPgGranule.cumulus_id, }), newPgGranule ); - t.deepEqual(await t.context.esGranulesClient.get(actualApiGranule.granuleId), esRecord); const { Messages } = await sqs() .receiveMessage({ @@ -2963,11 +2712,10 @@ test.serial('PATCH() does not write to DynamoDB/Elasticsearch/SNS if writing to t.is(Messages.length, 0); }); -test.serial('PATCH rolls back PostgreSQL records and does not write to SNS if writing to Elasticsearch fails', async (t) => { - const { esClient, executionUrl, knex, testExecutionCumulusId } = t.context; - const { newPgGranule, esRecord } = await createGranuleAndFiles({ +test.serial('PATCH rolls back PostgreSQL records and does not write to SNS if writing to Postgres fails', async (t) => { + const { executionUrl, knex, testExecutionCumulusId } = t.context; + const { newPgGranule, apiGranule } = await createGranuleAndFiles({ dbClient: knex, - esClient, executionCumulusId: testExecutionCumulusId, granuleParams: { collectionId: t.context.collectionId, @@ -2976,23 +2724,9 @@ test.serial('PATCH rolls back PostgreSQL records and does not write to SNS if wr }, }); - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - update: () => { - throw new Error('something bad'); - }, - delete: () => Promise.resolve(), - }, - }; - const apiGranule = await translatePostgresGranuleToApiGranule({ - granulePgRecord: newPgGranule, - knexOrTransaction: knex, - }); - const updatedGranule = { ...apiGranule, - status: 'completed', + status: 'failure', // non-existent status to fail the PG write }; const expressRequest = { @@ -3003,20 +2737,17 @@ test.serial('PATCH rolls back PostgreSQL records and does not write to SNS if wr body: updatedGranule, testContext: { knex, - esClient: fakeEsClient, }, }; const response = buildFakeExpressResponse(); await patch(expressRequest, response); - t.true(response.boom.badRequest.calledWithMatch('something bad')); const actualPgGranule = await t.context.granulePgModel.get(t.context.knex, { cumulus_id: newPgGranule.cumulus_id, }); t.deepEqual(actualPgGranule, newPgGranule); - t.deepEqual(await t.context.esGranulesClient.get(apiGranule.granuleId), esRecord); const { Messages } = await sqs() .receiveMessage({ @@ -3078,9 +2809,7 @@ test.serial('PATCH sets defaults and adds new granule', async (t) => { granule_id: granuleId, }); - const esRecord = await t.context.esGranulesClient.get(granuleId); - - const setCreatedAtValue = esRecord.createdAt; + const setCreatedAtValue = new Date(postgresRecord[0].created_at).getTime(); const expectedApiGranule = { ...newGranule, createdAt: setCreatedAtValue, @@ -3088,8 +2817,6 @@ test.serial('PATCH sets defaults and adds new granule', async (t) => { published: false, updatedAt: setCreatedAtValue, }; - - t.like(esRecord, expectedApiGranule); t.like( await translatePostgresGranuleToApiGranule({ granulePgRecord: postgresRecord[0], @@ -3636,7 +3363,6 @@ test.serial('associateExecution (POST) returns Not Found if collectionId in payl test.serial('PUT replaces an existing granule in all data stores, removing existing fields if not specified', async (t) => { const { - esClient, executionPgRecord, executionUrl, knex, @@ -3646,7 +3372,6 @@ test.serial('PUT replaces an existing granule in all data stores, removing exist newPgGranule, } = await createGranuleAndFiles({ dbClient: knex, - esClient, executionCumulusId: executionPgRecord.cumulus_id, granuleParams: { beginningDateTime: new Date().toISOString(), @@ -3702,17 +3427,11 @@ test.serial('PUT replaces an existing granule in all data stores, removing exist updatedAt: translatedActualPgGranule.updatedAt, createdAt: translatedActualPgGranule.createdAt, }; - - const updatedEsRecord = await t.context.esGranulesClient.get( - apiGranule.granuleId - ); - // Files is always returned as '[]' by translator if none exist t.deepEqual( { ...translatedActualPgGranule }, { ...expectedGranule, files: [] } ); - t.deepEqual(updatedEsRecord, { ...expectedGranule, _id: updatedEsRecord._id }); }); test.serial('PUT creates a new granule in all data stores', async (t) => { @@ -3745,7 +3464,6 @@ test.serial('PUT creates a new granule in all data stores', async (t) => { knexOrTransaction: knex, granulePgRecord: actualPgGranule, }); - const updatedEsRecord = await t.context.esGranulesClient.get(granuleId); const expectedGranule = { ...newGranule, @@ -3761,7 +3479,6 @@ test.serial('PUT creates a new granule in all data stores', async (t) => { { ...translatedActualPgGranule }, { ...expectedGranule, files: [] } ); - t.deepEqual(updatedEsRecord, { ...expectedGranule, _id: updatedEsRecord._id }); }); test.serial('PUT utilizes the collectionId from the URI if one is not provided', async (t) => { @@ -3769,7 +3486,6 @@ test.serial('PUT utilizes the collectionId from the URI if one is not provided', collectionCumulusId, collectionId, createGranuleId, - esGranulesClient, knex, } = t.context; @@ -3794,7 +3510,6 @@ test.serial('PUT utilizes the collectionId from the URI if one is not provided', knexOrTransaction: knex, granulePgRecord: actualPgGranule, }); - const updatedEsRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...newGranule, @@ -3810,7 +3525,6 @@ test.serial('PUT utilizes the collectionId from the URI if one is not provided', { ...translatedActualPgGranule }, { ...expectedGranule, files: [] } ); - t.deepEqual(updatedEsRecord, { ...expectedGranule, _id: updatedEsRecord._id }); }); test.serial('PUT utilizes the granuleId from the URI if one is not provided', async (t) => { @@ -3819,7 +3533,6 @@ test.serial('PUT utilizes the granuleId from the URI if one is not provided', as collectionCumulusId, createGranuleId, knex, - esGranulesClient, } = t.context; const granuleId = createGranuleId(); @@ -3843,7 +3556,6 @@ test.serial('PUT utilizes the granuleId from the URI if one is not provided', as knexOrTransaction: knex, granulePgRecord: actualPgGranule, }); - const updatedEsRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...newGranule, @@ -3859,7 +3571,6 @@ test.serial('PUT utilizes the granuleId from the URI if one is not provided', as { ...translatedActualPgGranule }, { ...expectedGranule, files: [] } ); - t.deepEqual(updatedEsRecord, { ...expectedGranule, _id: updatedEsRecord._id }); }); test.serial('PUT throws if URI collection does not match provided object collectionId', async (t) => { diff --git a/packages/api/tests/helpers/create-test-data.js b/packages/api/tests/helpers/create-test-data.js index c8f63513cff..8702b1b1efc 100644 --- a/packages/api/tests/helpers/create-test-data.js +++ b/packages/api/tests/helpers/create-test-data.js @@ -19,8 +19,7 @@ const { translatePostgresExecutionToApiExecution, translatePostgresGranuleToApiGranule, } = require('@cumulus/db'); -const { indexGranule, indexExecution } = require('@cumulus/es-client/indexer'); -const { Search } = require('@cumulus/es-client/search'); +const { indexExecution } = require('@cumulus/es-client/indexer'); const { constructCollectionId } = require('@cumulus/message/Collections'); // Postgres mock data factories @@ -58,7 +57,6 @@ async function createGranuleAndFiles({ executionCumulusId, collectionId, dbClient, - esClient, granuleParams = { published: false }, }) { const s3Buckets = { @@ -201,18 +199,9 @@ async function createGranuleAndFiles({ granulePgRecord: pgGranule, }); - await indexGranule(esClient, apiGranule, process.env.ES_INDEX); - - const esGranulesClient = new Search( - {}, - 'granule', - process.env.ES_INDEX - ); - return { newPgGranule: await granulePgModel.get(dbClient, { cumulus_id: pgGranule.cumulus_id }), apiGranule, - esRecord: await esGranulesClient.get(newGranule.granuleId), files: files, s3Buckets: s3Buckets, }; @@ -288,7 +277,6 @@ async function createExecutionRecords({ executionCumulusId: pgExecutions[0][0].cumulus_id, collectionId, dbClient: knex, - esClient, }); const granulesExecutionsModel = new GranulesExecutionsPgModel(); await Promise.all(pgExecutions.map((execution) => granulesExecutionsModel.create(knex, { diff --git a/packages/api/tests/lambdas/test-bulk-granule-delete.js b/packages/api/tests/lambdas/test-bulk-granule-delete.js index 04666966f88..c1f16faaad8 100644 --- a/packages/api/tests/lambdas/test-bulk-granule-delete.js +++ b/packages/api/tests/lambdas/test-bulk-granule-delete.js @@ -93,7 +93,6 @@ test.after.always(async (t) => { test('bulkGranuleDelete does not fail on published granules if payload.forceRemoveFromCmr is true', async (t) => { const { knex, - esClient, } = t.context; const granulePgModel = new GranulePgModel(); @@ -111,7 +110,6 @@ test('bulkGranuleDelete does not fail on published granules if payload.forceRemo published: true, collection_cumulus_id: collectionPgRecord.cumulus_id, }, - esClient: esClient, writeDynamo: false, }), createGranuleAndFiles({ @@ -120,7 +118,6 @@ test('bulkGranuleDelete does not fail on published granules if payload.forceRemo published: true, collection_cumulus_id: collectionPgRecord.cumulus_id, }, - esClient: esClient, writeDynamo: false, }), ]); diff --git a/packages/api/tests/lambdas/test-bulk-operation.js b/packages/api/tests/lambdas/test-bulk-operation.js index e94c267ed90..3b28d46c994 100644 --- a/packages/api/tests/lambdas/test-bulk-operation.js +++ b/packages/api/tests/lambdas/test-bulk-operation.js @@ -415,12 +415,10 @@ test.serial('bulk operation BULK_GRANULE_DELETE deletes listed granules from Pos createGranuleAndFiles({ dbClient: t.context.knex, granuleParams: { published: false }, - esClient: t.context.esClient, }), createGranuleAndFiles({ dbClient: t.context.knex, granuleParams: { published: false }, - esClient: t.context.esClient, }), ]); @@ -487,12 +485,12 @@ test.serial('bulk operation BULK_GRANULE_DELETE processes all granules that do n }); const granules = await Promise.all([ - createGranuleAndFiles({ dbClient: t.context.knex, esClient: t.context.esClient }), - createGranuleAndFiles({ dbClient: t.context.knex, esClient: t.context.esClient }), - createGranuleAndFiles({ dbClient: t.context.knex, esClient: t.context.esClient }), - createGranuleAndFiles({ dbClient: t.context.knex, esClient: t.context.esClient }), - createGranuleAndFiles({ dbClient: t.context.knex, esClient: t.context.esClient }), - createGranuleAndFiles({ dbClient: t.context.knex, esClient: t.context.esClient }), + createGranuleAndFiles({ dbClient: t.context.knex }), + createGranuleAndFiles({ dbClient: t.context.knex }), + createGranuleAndFiles({ dbClient: t.context.knex }), + createGranuleAndFiles({ dbClient: t.context.knex }), + createGranuleAndFiles({ dbClient: t.context.knex }), + createGranuleAndFiles({ dbClient: t.context.knex }), ]); const apiGranules = await Promise.all( diff --git a/packages/api/tests/lib/test-granule-delete.js b/packages/api/tests/lib/test-granule-delete.js index 74b9e4a2439..b16dcd8d535 100644 --- a/packages/api/tests/lib/test-granule-delete.js +++ b/packages/api/tests/lib/test-granule-delete.js @@ -7,15 +7,10 @@ const { DeleteTopicCommand, } = require('@aws-sdk/client-sns'); const { createSnsTopic } = require('@cumulus/aws-client/SNS'); - -const { recordNotFoundString } = require('@cumulus/es-client/search'); - const { createBucket, deleteS3Buckets, s3ObjectExists, - s3PutObject, - createS3Buckets, } = require('@cumulus/aws-client/S3'); const { randomId, randomString } = require('@cumulus/common/test-utils'); const { @@ -30,17 +25,11 @@ const { translateApiCollectionToPostgresCollection, } = require('@cumulus/db'); const { DeletePublishedGranule } = require('@cumulus/errors'); -const { Search } = require('@cumulus/es-client/search'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const { constructCollectionId } = require('@cumulus/message/Collections'); // Dynamo mock data factories const { fakeCollectionFactory, - fakeGranuleFactoryV2, } = require('../../lib/testUtils'); const { deleteGranuleAndFiles } = require('../../src/lib/granule-delete'); @@ -72,11 +61,6 @@ test.before(async (t) => { t.context.knex = knex; t.context.knexAdmin = knexAdmin; - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esGranulesClient = new Search({}, 'granule', t.context.esIndex); - // Create a Dynamo collection // we need this because a granule has a fk referring to collections t.context.testCollection = fakeCollectionFactory({ @@ -137,7 +121,6 @@ test.after.always(async (t) => { knexAdmin: t.context.knexAdmin, testDbName, }); - await cleanupTestIndex(t.context); }); test.serial('deleteGranuleAndFiles() throws an error if the granule is published', async (t) => { @@ -145,7 +128,6 @@ test.serial('deleteGranuleAndFiles() throws an error if the granule is published dbClient: t.context.knex, collectionId: t.context.collectionId, collectionCumulusId: t.context.collectionCumulusId, - esClient: t.context.esClient, granuleParams: { published: true }, }); @@ -153,7 +135,6 @@ test.serial('deleteGranuleAndFiles() throws an error if the granule is published deleteGranuleAndFiles({ knex: t.context.knex, pgGranule: newPgGranule, - esClient: t.context.esClient, }), { instanceOf: DeletePublishedGranule } ); @@ -173,8 +154,8 @@ test.serial('deleteGranuleAndFiles() throws an error if the granule is published ])); }); -test.serial('deleteGranuleAndFiles() removes granules from PostgreSQL/Elasticsearch and files from PostgreSQL/S3', async (t) => { - const { collectionId, collectionCumulusId, esClient, esGranulesClient, knex } = t.context; +test.serial('deleteGranuleAndFiles() removes granules from PostgreSQL and files from PostgreSQL/S3', async (t) => { + const { collectionId, collectionCumulusId, knex } = t.context; const { apiGranule, @@ -186,19 +167,12 @@ test.serial('deleteGranuleAndFiles() removes granules from PostgreSQL/Elasticsea collectionId, collectionCumulusId, granuleParams: { published: false }, - esClient, }); t.true(await granulePgModel.exists(knex, { granule_id: newPgGranule.granule_id, collection_cumulus_id: collectionCumulusId, })); - t.true( - await esGranulesClient.exists( - newPgGranule.granule_id, - collectionId - ) - ); await Promise.all( files.map(async (file) => { t.true(await s3ObjectExists({ Bucket: file.bucket, Key: file.key })); @@ -209,7 +183,6 @@ test.serial('deleteGranuleAndFiles() removes granules from PostgreSQL/Elasticsea const details = await deleteGranuleAndFiles({ knex: knex, pgGranule: newPgGranule, - esClient, }); t.truthy(details.deletionTime); @@ -268,7 +241,6 @@ test.serial('deleteGranuleAndFiles() succeeds if a file is not present in S3', a const details = await deleteGranuleAndFiles({ knex: t.context.knex, pgGranule: newPgGranule, - esClient: t.context.esClient, }); t.truthy(details.deletionTime); @@ -285,12 +257,6 @@ test.serial('deleteGranuleAndFiles() succeeds if a file is not present in S3', a collection_cumulus_id: newPgGranule.collection_cumulus_id, } )); - t.false( - await t.context.esGranulesClient.exists( - newPgGranule.granule_id, - t.context.collectionCumulusId - ) - ); }); test.serial('deleteGranuleAndFiles() will not delete S3 Files if the PostgreSQL granule delete fails', async (t) => { @@ -302,7 +268,6 @@ test.serial('deleteGranuleAndFiles() will not delete S3 Files if the PostgreSQL dbClient: t.context.knex, collectionId: t.context.collectionId, collectionCumulusId: t.context.collectionCumulusId, - esClient: t.context.esClient, granuleParams: { published: false }, }); @@ -318,75 +283,11 @@ test.serial('deleteGranuleAndFiles() will not delete S3 Files if the PostgreSQL knex: t.context.knex, pgGranule: newPgGranule, granulePgModel: mockGranuleModel, - esClient: t.context.esClient, }), { message: 'PG delete failed' } ); - // granule should still exist in PostgreSQL and Elasticsearch - t.true(await granulePgModel.exists( - t.context.knex, - { - granule_id: newPgGranule.granule_id, - collection_cumulus_id: newPgGranule.collection_cumulus_id, - } - )); - t.true( - await t.context.esGranulesClient.exists( - newPgGranule.granule_id, - t.context.collectionId - ) - ); - - // Files will still exist in S3 and PostgreSQL. - await Promise.all( - files.map(async (file) => { - t.true(await s3ObjectExists({ Bucket: file.bucket, Key: file.key })); - t.true(await filePgModel.exists(t.context.knex, { bucket: file.bucket, key: file.key })); - }) - ); - - t.teardown(() => deleteS3Buckets([ - s3Buckets.protected.name, - s3Buckets.public.name, - ])); -}); - -test.serial('deleteGranuleAndFiles() will not delete granule or S3 files if the Elasticsearch granule delete fails', async (t) => { - const { - newPgGranule, - files, - s3Buckets, - } = await createGranuleAndFiles({ - dbClient: t.context.knex, - collectionId: t.context.collectionId, - collectionCumulusId: t.context.collectionCumulusId, - esClient: t.context.esClient, - granuleParams: { published: false }, - }); - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - delete: () => { - throw new Error('ES delete failed'); - }, - index: (record) => Promise.resolve({ - body: record, - }), - }, - }; - - await t.throwsAsync( - deleteGranuleAndFiles({ - knex: t.context.knex, - pgGranule: newPgGranule, - esClient: fakeEsClient, - }), - { message: 'ES delete failed' } - ); - - // granule should still exist in PostgreSQL and elasticsearch + // granule should still exist in PostgreSQL t.true(await granulePgModel.exists( t.context.knex, { @@ -394,12 +295,6 @@ test.serial('deleteGranuleAndFiles() will not delete granule or S3 files if the collection_cumulus_id: newPgGranule.collection_cumulus_id, } )); - t.true( - await t.context.esGranulesClient.exists( - newPgGranule.granule_id, - t.context.collectionId - ) - ); // Files will still exist in S3 and PostgreSQL. await Promise.all( @@ -414,94 +309,3 @@ test.serial('deleteGranuleAndFiles() will not delete granule or S3 files if the s3Buckets.public.name, ])); }); - -test.serial( - 'deleteGranuleAndFiles() does not require a PostgreSQL granule if an elasticsearch granule is present', - async (t) => { - // Create a granule in Dynamo only - const s3Buckets = { - protected: { - name: randomId('protected'), - type: 'protected', - }, - public: { - name: randomId('public'), - type: 'public', - }, - }; - const granuleId = randomId('granule'); - const files = [ - { - bucket: s3Buckets.protected.name, - fileName: `${granuleId}.hdf`, - key: `${randomString(5)}/${granuleId}.hdf`, - }, - { - bucket: s3Buckets.protected.name, - fileName: `${granuleId}.cmr.xml`, - key: `${randomString(5)}/${granuleId}.cmr.xml`, - }, - { - bucket: s3Buckets.public.name, - fileName: `${granuleId}.jpg`, - key: `${randomString(5)}/${granuleId}.jpg`, - }, - ]; - - const newGranule = fakeGranuleFactoryV2({ - granuleId: granuleId, - status: 'failed', - published: false, - files: files, - }); - - await createS3Buckets([s3Buckets.protected.name, s3Buckets.public.name]); - - // Add files to S3 - await Promise.all( - newGranule.files.map((file) => - s3PutObject({ - Bucket: file.bucket, - Key: file.key, - Body: `test data ${randomString()}`, - })) - ); - - // Add granule to elasticsearch - const esGranulesClient = new Search( - {}, - 'granule', - process.env.ES_INDEX - ); - - await t.context.esClient.client.index({ - index: t.context.esIndex, - type: 'granule', - id: newGranule.granuleId, - parent: 'fakeCollectionId', - body: newGranule, - }); - - await deleteGranuleAndFiles({ - knex: t.context.knex, - apiGranule: newGranule, - pgGranule: undefined, - esClient: t.context.esClient, - }); - - // verify the files are deleted from S3. - await Promise.all( - files.map(async (file) => { - t.false(await s3ObjectExists({ Bucket: file.bucket, Key: file.key })); - }) - ); - - // Verify record is removed from elasticsearch - - const esResult = await esGranulesClient.get(newGranule.granuleId); - t.is(esResult.detail, recordNotFoundString); - - t.teardown(() => - deleteS3Buckets([s3Buckets.protected.name, s3Buckets.public.name])); - } -); diff --git a/packages/api/tests/lib/writeRecords/test-write-granules.js b/packages/api/tests/lib/writeRecords/test-write-granules.js index 0182bbcc900..d861703013f 100644 --- a/packages/api/tests/lib/writeRecords/test-write-granules.js +++ b/packages/api/tests/lib/writeRecords/test-write-granules.js @@ -4,7 +4,6 @@ const orderBy = require('lodash/orderBy'); const test = require('ava'); const cryptoRandomString = require('crypto-random-string'); const sinon = require('sinon'); -const sortBy = require('lodash/sortBy'); const omit = require('lodash/omit'); const StepFunctions = require('@cumulus/aws-client/StepFunctions'); @@ -41,13 +40,6 @@ const { SubscribeCommand, DeleteTopicCommand, } = require('@aws-sdk/client-sns'); -const { - Search, -} = require('@cumulus/es-client/search'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const { getExecutionUrlFromArn, } = require('@cumulus/message/Executions'); @@ -110,17 +102,14 @@ const apiFormatOmitList = [ * @param {Object} updateGranulePayload -- Request body for granule update * @param {boolean} granuleWriteVia -- Either 'api' (default) or 'message'. Switches * The granule write mechanism - * @returns {Object} -- Updated granule objects from each datastore and PG-translated payload + * @returns {Object} -- Updated granule objects and PG-translated payload * updatedPgGranuleFields, * pgGranule, - * esGranule, * dynamoGranule, **/ const updateGranule = async (t, updateGranulePayload, granuleWriteVia = 'api') => { const { collectionCumulusId, - esClient, - esGranulesClient, executionCumulusId, granuleId, granulePgModel, @@ -153,13 +142,12 @@ const updateGranule = async (t, updateGranulePayload, granuleWriteVia = 'api') = testOverrides: { stepFunctionUtils: t.context.stepFunctionUtils }, }); } else { - await writeGranuleFromApi({ ...updateGranulePayload }, knex, esClient, 'Update'); + await writeGranuleFromApi({ ...updateGranulePayload }, knex, 'Update'); } const pgGranule = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esGranule = await esGranulesClient.get(granuleId); const updatedPgGranuleFields = await translateApiGranuleToPostgresGranule({ dynamoRecord: { ...updateGranulePayload }, @@ -169,7 +157,6 @@ const updateGranule = async (t, updateGranulePayload, granuleWriteVia = 'api') = return { updatedPgGranuleFields, pgGranule, - esGranule, }; }; @@ -211,15 +198,6 @@ test.before(async (t) => { ); t.context.knexAdmin = knexAdmin; t.context.knex = knex; - - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esGranulesClient = new Search( - {}, - 'granule', - t.context.esIndex - ); }); test.beforeEach(async (t) => { @@ -344,7 +322,6 @@ test.after.always(async (t) => { await destroyLocalTestDb({ ...t.context, }); - await cleanupTestIndex(t.context); }); test('generateFilePgRecord() adds granule cumulus ID', (t) => { @@ -412,9 +389,7 @@ test.serial('_writeGranule will not allow a running status to replace a complete const { granule, executionCumulusId, - esClient, collectionCumulusId, - executionUrl, granuleId, granulePgModel, knex, @@ -434,7 +409,6 @@ test.serial('_writeGranule will not allow a running status to replace a complete executionCumulusId, granulePgModel, knex, - esClient, snsEventType: 'Update', }); @@ -457,13 +431,6 @@ test.serial('_writeGranule will not allow a running status to replace a complete )).length, 1 ); - t.like( - await t.context.esGranulesClient.get(granuleId), - { - execution: executionUrl, - status: 'completed', - } - ); const updatedapiGranuleRecord = { ...granule, @@ -486,7 +453,6 @@ test.serial('_writeGranule will not allow a running status to replace a complete executionCumulusId, granulePgModel, knex, - esClient, snsEventType: 'Update', }); @@ -499,13 +465,6 @@ test.serial('_writeGranule will not allow a running status to replace a complete status: 'completed', } ); - t.like( - await t.context.esGranulesClient.get(granuleId), - { - execution: executionUrl, - status: 'completed', - } - ); }); test.serial('writeGranulesFromMessage() returns undefined if message has no granules', async (t) => { @@ -544,12 +503,9 @@ test.serial('writeGranulesFromMessage() returns undefined if message has empty g t.is(actual, undefined); }); -test.serial('writeGranulesFromMessage() saves granule records to PostgreSQL/Elasticsearch/SNS', async (t) => { +test.serial('writeGranulesFromMessage() saves granule records to PostgreSQL/SNS', async (t) => { const { cumulusMessage, - esGranulesClient, - granule, - granulePgModel, knex, collectionCumulusId, executionCumulusId, @@ -573,32 +529,6 @@ test.serial('writeGranulesFromMessage() saves granule records to PostgreSQL/Elas knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } )); - const postgresRecord = await granulePgModel.get( - knex, - { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } - ); - const esRecord = await esGranulesClient.get(granuleId); - const expectedGranule = { - ...granule, - createdAt: esRecord.createdAt, - duration: esRecord.duration, - error: {}, - productVolume: esRecord.productVolume, - status: cumulusMessage.meta.status, - timestamp: esRecord.timestamp, - updatedAt: esRecord.updatedAt, - }; - t.like(esRecord, expectedGranule); - - const postgresActual = await translatePostgresGranuleToApiGranule({ - knexOrTransaction: knex, - granulePgRecord: postgresRecord, - }); - - t.like( - { ...postgresActual, files: orderBy(postgresActual.files, ['bucket', 'key']) }, - { ...expectedGranule, files: orderBy(expectedGranule.files, ['bucket', 'key']) } - ); const { Messages } = await sqs().receiveMessage({ QueueUrl: t.context.QueueUrl, @@ -607,12 +537,11 @@ test.serial('writeGranulesFromMessage() saves granule records to PostgreSQL/Elas t.is(Messages.length, 1); }); -test.serial('writeGranulesFromMessage() on re-write saves granule records to PostgreSQL/Elasticsearch/SNS with expected values nullified', async (t) => { +test.serial('writeGranulesFromMessage() on re-write saves granule records to PostgreSQL/SNS with expected values nullified', async (t) => { const { collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, executionUrl, files, @@ -691,7 +620,6 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos collection_cumulus_id: collectionCumulusId, }) ); - t.true(await esGranulesClient.exists(completeGranule.granuleId)); validNullableGranuleKeys.forEach((key) => { completeGranule[key] = null; }); @@ -714,7 +642,6 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...completeGranule, @@ -741,8 +668,7 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos timeToPreprocess: 0, }; - // Files array order is not promised to match between datastores - [esRecord, expectedGranule, apiFormattedPostgresGranule].forEach((record) => { + [expectedGranule, apiFormattedPostgresGranule].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); @@ -752,15 +678,13 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos apiFormattedPostgresGranule, removeNilProperties(expectedGranule) ); - t.deepEqual(omit(esRecord, ['_id']), removeNilProperties(expectedGranule)); }); -test.serial('writeGranulesFromMessage() on re-write saves granule records to PostgreSQL/Elasticsearch/SNS without updating product volume if files is undefined', async (t) => { +test.serial('writeGranulesFromMessage() on re-write saves granule records to PostgreSQL/SNS without updating product volume if files is undefined', async (t) => { const { collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, executionUrl, files, @@ -818,18 +742,14 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); - - t.is(esRecord.productVolume, '15'); t.is(apiFormattedPostgresGranule.productVolume, '15'); }); -test.serial('writeGranulesFromMessage() on re-write saves granule records to PostgreSQL/Elasticsearch/SNS without modifying undefined values', async (t) => { +test.serial('writeGranulesFromMessage() on re-write saves granule records to PostgreSQL/SNS without modifying undefined values', async (t) => { const { collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, executionUrl, files, @@ -887,7 +807,6 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos collection_cumulus_id: collectionCumulusId, }) ); - t.true(await esGranulesClient.exists(completeGranule.granuleId)); const initialPostgresRecord = await granulePgModel.get(knex, { granule_id: granuleId, @@ -918,7 +837,6 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...apiFormattedInitialPostgresGranule, @@ -937,8 +855,7 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos timeToPreprocess: 0, }; - // Files array order is not promised to match between datastores - [esRecord, expectedGranule, apiFormattedPostgresGranule].forEach((record) => { + [expectedGranule, apiFormattedPostgresGranule].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); @@ -948,15 +865,13 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos apiFormattedPostgresGranule, removeNilProperties(expectedGranule) ); - t.deepEqual(omit(esRecord, ['_id']), removeNilProperties(expectedGranule)); }); -test.serial('writeGranulesFromMessage() on re-write saves granule records to PostgreSQL/Elasticsearch/SNS with expected values nullified when granule is updated to running', async (t) => { +test.serial('writeGranulesFromMessage() on re-write saves granule records to PostgreSQL/SNS with expected values nullified when granule is updated to running', async (t) => { const { collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, files, granulePgModel, @@ -1069,7 +984,6 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); // We expect nothing other than these fields to change because of the write rules: const expectedGranule = { @@ -1081,8 +995,7 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos execution: executionUrl, }; - // Files array order is not promised to match between datastores - [esRecord, expectedGranule, apiFormattedPostgresGranule].forEach((record) => { + [expectedGranule, apiFormattedPostgresGranule].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); @@ -1092,49 +1005,6 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos apiFormattedPostgresGranule, expectedGranule ); - t.deepEqual(omit(esRecord, ['_id']), expectedGranule); -}); - -test.serial('writeGranulesFromMessage() saves the same values to PostgreSQL and Elasticsearch', async (t) => { - const { - collectionCumulusId, - cumulusMessage, - executionCumulusId, - granuleId, - knex, - providerCumulusId, - stepFunctionUtils, - } = t.context; - - // Only test fields that are stored in Postgres on the Granule record. - // The following fields are populated by separate queries during translation - // or elasticsearch. - const omitList = ['files', '_id']; - - await writeGranulesFromMessage({ - cumulusMessage, - executionCumulusId, - knex, - providerCumulusId, - testOverrides: { stepFunctionUtils }, - }); - - const granulePgRecord = await t.context.granulePgModel.get( - knex, - { - granule_id: granuleId, - collection_cumulus_id: collectionCumulusId, - } - ); - - // translate the PG granule to API granule to directly compare to elasticsearch - const translatedPgRecord = await translatePostgresGranuleToApiGranule({ - granulePgRecord, - knexOrTransaction: knex, - }); - - const esRecord = await t.context.esGranulesClient.get(granuleId); - t.deepEqual(omit(translatedPgRecord, omitList), omit(esRecord, omitList)); }); test.serial('writeGranulesFromMessage() sets a default value of false for `published` if one is not set', async (t) => { @@ -1148,11 +1018,6 @@ test.serial('writeGranulesFromMessage() sets a default value of false for `publi stepFunctionUtils, } = t.context; - // Only test fields that are stored in Postgres on the Granule record. - // The following fields are populated by separate queries during translation - // or elasticsearch. - const omitList = ['files', '_id']; - // Remove published key for test delete cumulusMessage.payload.granules[0].published; @@ -1172,18 +1037,11 @@ test.serial('writeGranulesFromMessage() sets a default value of false for `publi } ); - // Validate objects all match - /// translate the PG granule to API granule to directly compare to ES const translatedPgRecord = await translatePostgresGranuleToApiGranule({ granulePgRecord, knexOrTransaction: knex, }); - const esRecord = await t.context.esGranulesClient.get(granuleId); - t.deepEqual(omit(translatedPgRecord, omitList), omit(esRecord, omitList)); - - // Validate assertion is true in the primary datastore: - t.is(translatedPgRecord.published, false); }); @@ -1199,11 +1057,6 @@ test.serial('writeGranulesFromMessage() uses a default value for granule.created stepFunctionUtils, } = t.context; - // Only test fields that are stored in Postgres on the Granule record. - // The following fields are populated by separate queries during translation - // or elasticsearch. - const omitList = ['files', '_id']; - // Remove createdAt key for test delete cumulusMessage.payload.granules[0].createdAt; @@ -1223,18 +1076,10 @@ test.serial('writeGranulesFromMessage() uses a default value for granule.created } ); - // Validate objects all match - /// translate the PG granule to API granule to directly compare to ES const translatedPgRecord = await translatePostgresGranuleToApiGranule({ granulePgRecord, knexOrTransaction: knex, }); - - const esRecord = await t.context.esGranulesClient.get(granuleId); - t.deepEqual(omit(translatedPgRecord, omitList), omit(esRecord, omitList)); - - // Validate assertion is true in the primary datastore: - t.is(translatedPgRecord.createdAt, workflowStartTime); }); @@ -1249,11 +1094,6 @@ test.serial('writeGranulesFromMessage() allows overwrite of createdAt and uses g stepFunctionUtils, } = t.context; - // Only test fields that are stored in Postgres on the Granule record. - // The following fields are populated by separate queries during translation - // or elasticsearch. - const omitList = ['files', '_id']; - await writeGranulesFromMessage({ cumulusMessage, executionCumulusId, @@ -1270,25 +1110,16 @@ test.serial('writeGranulesFromMessage() allows overwrite of createdAt and uses g } ); - // Validate objects all match - /// translate the PG granule to API granule to directly compare to ES const translatedPgRecord = await translatePostgresGranuleToApiGranule({ granulePgRecord, knexOrTransaction: knex, }); - - const esRecord = await t.context.esGranulesClient.get(granuleId); - t.deepEqual(omit(translatedPgRecord, omitList), omit(esRecord, omitList)); - - // Validate assertion is true in the primary datastore: - t.is(translatedPgRecord.createdAt, cumulusMessage.payload.granules[0].createdAt); }); -test.serial('writeGranulesFromMessage() given a payload with undefined files, keeps existing files in all datastores', async (t) => { +test.serial('writeGranulesFromMessage() given a payload with undefined files, keeps existing files', async (t) => { const { collectionCumulusId, - esGranulesClient, files, granule, granulePgModel, @@ -1324,7 +1155,6 @@ test.serial('writeGranulesFromMessage() given a payload with undefined files, ke testOverrides: { stepFunctionUtils }, }); - const originalEsGranule = await esGranulesClient.get(granuleId); const originalpgGranule = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } @@ -1342,13 +1172,9 @@ test.serial('writeGranulesFromMessage() given a payload with undefined files, ke originalApiGranule.files.sort( (f1, f2) => sortFilesByBuckets(f1, f2) ); - originalEsGranule.files.sort( - (f1, f2) => sortFilesByBuckets(f1, f2) - ); // Files were written correctly in initial DB writes t.true(originalPayloadFiles.length > 0); - t.deepEqual(originalEsGranule.files, originalPayloadFiles); t.deepEqual(originalApiGranule.files, originalPayloadFiles); // Update existing granule with a partial granule object @@ -1362,29 +1188,22 @@ test.serial('writeGranulesFromMessage() given a payload with undefined files, ke const { pgGranule, - esGranule, } = await updateGranule(t, updateGranulePayload, 'message'); const apiGranule = await translatePostgresGranuleToApiGranule({ granulePgRecord: pgGranule, knexOrTransaction: knex, }); - - esGranule.files.sort( - (f1, f2) => sortFilesByBuckets(f1, f2) - ); apiGranule.files.sort( (f1, f2) => sortFilesByBuckets(f1, f2) ); t.deepEqual(apiGranule.files, originalPayloadFiles); - t.deepEqual(esGranule.files, originalPayloadFiles); }); test.serial('writeGranulesFromMessage() given a partial granule overwrites only provided fields', async (t) => { const { collectionCumulusId, - esGranulesClient, granule, granulePgModel, knex, @@ -1424,7 +1243,6 @@ test.serial('writeGranulesFromMessage() given a partial granule overwrites only knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } )); - t.true(await esGranulesClient.exists(granuleId)); const originalpgGranule = await granulePgModel.get( knex, @@ -1442,7 +1260,6 @@ test.serial('writeGranulesFromMessage() given a partial granule overwrites only const { updatedPgGranuleFields, pgGranule, - esGranule, } = await updateGranule(t, updateGranulePayload, 'message'); const apiGranule = await translatePostgresGranuleToApiGranule({ @@ -1450,9 +1267,6 @@ test.serial('writeGranulesFromMessage() given a partial granule overwrites only knexOrTransaction: knex, }); - esGranule.files.sort( - (f1, f2) => sortFilesByBuckets(f1, f2) - ); apiGranule.files.sort( (f1, f2) => sortFilesByBuckets(f1, f2) ); @@ -1465,19 +1279,12 @@ test.serial('writeGranulesFromMessage() given a partial granule overwrites only cumulusMessageOmitList ) ); - - // Postgres and ElasticSearch granules matches - t.deepEqual( - apiGranule, - omit(esGranule, ['_id']) - ); }); -test.serial('writeGranulesFromMessage() given an empty array as a files key will remove all existing files and keep Postgres/Elastic in-sync', async (t) => { +test.serial('writeGranulesFromMessage() given an empty array as a files key will remove all existing files and keep Postgres in-sync', async (t) => { const { collectionCumulusId, executionCumulusId, - esGranulesClient, files, granule, granuleId, @@ -1512,7 +1319,6 @@ test.serial('writeGranulesFromMessage() given an empty array as a files key will testOverrides: { stepFunctionUtils }, }); - const originalEsGranule = await esGranulesClient.get(granuleId); const originalpgGranule = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } @@ -1530,12 +1336,8 @@ test.serial('writeGranulesFromMessage() given an empty array as a files key will originalApiGranule.files.sort( (f1, f2) => sortFilesByBuckets(f1, f2) ); - originalEsGranule.files.sort( - (f1, f2) => sortFilesByBuckets(f1, f2) - ); // Files were written correctly in initial DB writes - t.deepEqual(originalEsGranule.files, originalPayloadFiles); t.deepEqual(originalApiGranule.files, originalPayloadFiles); // Update existing granule with a partial granule object @@ -1549,7 +1351,6 @@ test.serial('writeGranulesFromMessage() given an empty array as a files key will const { updatedPgGranuleFields, pgGranule, - esGranule, } = await updateGranule(t, updateGranulePayload, 'message'); // Postgres granule matches expected updatedGranule @@ -1565,16 +1366,12 @@ test.serial('writeGranulesFromMessage() given an empty array as a files key will granulePgRecord: pgGranule, knexOrTransaction: knex, }); - - // Files were removed from all datastores t.deepEqual(apiGranule.files, []); - t.is(esGranule.files, undefined); }); test.serial('writeGranulesFromMessage() given a null files key will throw an error', async (t) => { const { collectionCumulusId, - esGranulesClient, granule, knex, executionCumulusId, @@ -1608,7 +1405,7 @@ test.serial('writeGranulesFromMessage() given a null files key will throw an err testOverrides: { stepFunctionUtils }, }); - // Files exist in all datastores + // Files exist in all PG const originalPGGranule = await t.context.granulePgModel.get( knex, { @@ -1620,21 +1417,16 @@ test.serial('writeGranulesFromMessage() given a null files key will throw an err granulePgRecord: originalPGGranule, knexOrTransaction: knex, }); - const originalEsGranule = await esGranulesClient.get(granuleId); const originalPayloadFiles = t.context.files; originalApiGranule.files.sort( (f1, f2) => sortFilesByBuckets(f1, f2) ); - originalEsGranule.files.sort( - (f1, f2) => sortFilesByBuckets(f1, f2) - ); originalPayloadFiles.sort( (f1, f2) => sortFilesByBuckets(f1, f2) ); t.deepEqual(originalApiGranule.files, originalPayloadFiles); - t.deepEqual(originalEsGranule.files, originalPayloadFiles); // Update existing granule with a partial granule object const updateGranulePayload = { @@ -1731,81 +1523,6 @@ test.serial('writeGranulesFromMessage() removes preexisting granule file from Po ); }); -test.serial('writeGranulesFromMessage() saves granule records to PostgreSQL/Elasticsearch with same timestamps', async (t) => { - const { - cumulusMessage, - knex, - collectionCumulusId, - executionCumulusId, - providerCumulusId, - granuleId, - stepFunctionUtils, - } = t.context; - - await writeGranulesFromMessage({ - cumulusMessage, - executionCumulusId, - providerCumulusId, - knex, - testOverrides: { stepFunctionUtils }, - }); - - const granulePgRecord = await t.context.granulePgModel.get( - knex, - { - granule_id: granuleId, - collection_cumulus_id: collectionCumulusId, - } - ); - - const esRecord = await t.context.esGranulesClient.get(granuleId); - - t.is(granulePgRecord.created_at.getTime(), esRecord.createdAt); - t.is(granulePgRecord.updated_at.getTime(), esRecord.updatedAt); - t.is(granulePgRecord.timestamp.getTime(), esRecord.timestamp); -}); - -test.serial('writeGranulesFromMessage() saves the same files to PostgreSQL and Elasticsearch', async (t) => { - const { - collectionCumulusId, - cumulusMessage, - esGranulesClient, - executionCumulusId, - granuleId, - granulePgModel, - knex, - stepFunctionUtils, - } = t.context; - - // ensure files are written - cumulusMessage.meta.status = 'completed'; - - await writeGranulesFromMessage({ - cumulusMessage, - executionCumulusId, - knex, - testOverrides: { stepFunctionUtils }, - }); - - const granulePgRecord = await granulePgModel.get( - knex, - { - granule_id: granuleId, - collection_cumulus_id: collectionCumulusId, - } - ); - - // translate the PG granule to API granule to directly compare to Dynamo - const translatedPgRecord = await translatePostgresGranuleToApiGranule({ - granulePgRecord, - knexOrTransaction: knex, - }); - const sortByKeys = ['bucket', 'key']; - - const esRecord = await esGranulesClient.get(granuleId); - t.deepEqual(sortBy(translatedPgRecord.files, sortByKeys), sortBy(esRecord.files, sortByKeys)); -}); - test.serial('writeGranulesFromMessage() saves file records to when workflow status is "completed"', async (t) => { const { collectionCumulusId, @@ -1911,7 +1628,7 @@ test.serial('writeGranulesFromMessage() throws error if any granule writes fail' })); }); -test.serial('writeGranulesFromMessage() does not write to PostgreSQL/Elasticsearch/SNS if Postgres write fails', async (t) => { +test.serial('writeGranulesFromMessage() does not write to PostgreSQL/SNS if Postgres write fails', async (t) => { const { collectionCumulusId, cumulusMessage, @@ -1946,55 +1663,6 @@ test.serial('writeGranulesFromMessage() does not write to PostgreSQL/Elasticsear collection_cumulus_id: collectionCumulusId, }) ); - t.false(await t.context.esGranulesClient.exists(granuleId)); - - const { Messages } = await sqs().receiveMessage({ - QueueUrl: t.context.QueueUrl, - WaitTimeSeconds: 10, - }); - - t.is(Messages.length, 0); -}); - -test.serial('writeGranulesFromMessage() does not persist records to PostgreSQL/Elasticsearch/SNS if Elasticsearch write fails', async (t) => { - const { - collectionCumulusId, - cumulusMessage, - executionCumulusId, - granuleId, - knex, - providerCumulusId, - stepFunctionUtils, - } = t.context; - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - update: () => { - throw new Error('Granules ES error'); - }, - delete: () => Promise.resolve(), - }, - }; - - const [error] = await t.throwsAsync(writeGranulesFromMessage({ - collectionCumulusId, - cumulusMessage, - esClient: fakeEsClient, - executionCumulusId, - knex, - providerCumulusId, - testOverrides: { stepFunctionUtils }, - })); - - t.true(error.message.includes('Granules ES error')); - t.false( - await t.context.granulePgModel.exists( - knex, - { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } - ) - ); - t.false(await t.context.esGranulesClient.exists(granuleId)); const { Messages } = await sqs().receiveMessage({ QueueUrl: t.context.QueueUrl, @@ -2038,7 +1706,7 @@ test.serial('writeGranulesFromMessage() writes a granule and marks as failed if t.true(pgGranuleError[0].Cause.includes('AggregateError')); }); -test.serial('writeGranuleFromMessage() writes a new granule with files set to "[]" results in file value set to undefined/default in all datastores', async (t) => { +test.serial('writeGranuleFromMessage() writes a new granule with files set to "[]" results in file value set to undefined/default', async (t) => { const { collectionCumulusId, cumulusMessage, @@ -2064,7 +1732,6 @@ test.serial('writeGranuleFromMessage() writes a new granule with files set to "[ knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); const translatedPgRecord = await translatePostgresGranuleToApiGranule({ granulePgRecord, @@ -2072,7 +1739,6 @@ test.serial('writeGranuleFromMessage() writes a new granule with files set to "[ }); t.deepEqual(translatedPgRecord.files, []); - t.is(esRecord.files, undefined); }); test.serial('_writeGranules attempts to mark granule as failed if a SchemaValidationException occurs when a granule is in a final state', async (t) => { @@ -2116,7 +1782,6 @@ test.serial('_writeGranules attempts to mark granule as failed if a SchemaValida knex, testOverrides: { stepFunctionUtils }, })); - t.true(error.cause.message.includes('The record has validation errors:')); const pgGranule = await t.context.granulePgModel.get(knex, { @@ -2342,11 +2007,6 @@ test.serial('writeGranulesFromMessage() sets `published` to false if null value stepFunctionUtils, } = t.context; - // Only test fields that are stored in Postgres on the Granule record. - // The following fields are populated by separate queries during translation - // or elasticsearch. - const omitList = ['files', '_id']; - // Set published to null for test cumulusMessage.payload.granules[0].published = null; @@ -2366,22 +2026,14 @@ test.serial('writeGranulesFromMessage() sets `published` to false if null value } ); - // Validate objects all match - /// translate the PG granule to API granule to directly compare to ES const translatedPgRecord = await translatePostgresGranuleToApiGranule({ granulePgRecord, knexOrTransaction: knex, }); - - const esRecord = await t.context.esGranulesClient.get(granuleId); - t.deepEqual(omit(translatedPgRecord, omitList), omit(esRecord, omitList)); - - // Validate assertion is true in the primary datastore: - t.is(translatedPgRecord.published, false); }); -test.serial('writeGranulesFromMessage() does not write a granule to Postgres or ES if a granule with the same ID and with a different collection ID already exists', async (t) => { +test.serial('writeGranulesFromMessage() does not write a granule to Postgres if a granule with the same ID and with a different collection ID already exists', async (t) => { const { collectionPgModel, collectionCumulusId, @@ -2425,7 +2077,6 @@ test.serial('writeGranulesFromMessage() does not write a granule to Postgres or collection_cumulus_id: collectionCumulusId, }) ); - t.false(await t.context.esGranulesClient.exists(granuleId)); const { Messages } = await sqs().receiveMessage({ QueueUrl: t.context.QueueUrl, @@ -2509,7 +2160,7 @@ test.serial('writeGranulesFromMessage() does not persist file records to Postgre ); }); -test.serial('writeGranulesFromMessage() on re-write with the same granule values and files with "completed" status saves granule records to PostgreSQL/Elasticsearch with updated product volume, expected values, and files', async (t) => { +test.serial('writeGranulesFromMessage() on re-write with the same granule values and files with "completed" status saves granule records to PostgreSQL with updated product volume, expected values, and files', async (t) => { // a re-write with same values and files accomplishes the same result // as an update with different values // for completed status, whether the re-write is with the same execution or a new one @@ -2518,7 +2169,6 @@ test.serial('writeGranulesFromMessage() on re-write with the same granule values collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, executionUrl, files, @@ -2553,7 +2203,6 @@ test.serial('writeGranulesFromMessage() on re-write with the same granule values collection_cumulus_id: collectionCumulusId, }) ); - t.true(await esGranulesClient.exists(completeGranule.granuleId)); const initialPostgresRecord = await granulePgModel.get(knex, { granule_id: granuleId, @@ -2583,7 +2232,6 @@ test.serial('writeGranulesFromMessage() on re-write with the same granule values granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...apiFormattedInitialPostgresGranule, @@ -2613,8 +2261,7 @@ test.serial('writeGranulesFromMessage() on re-write with the same granule values timeToPreprocess: 0, }; - // Files array order is not promised to match between datastores - [esRecord, expectedGranule, apiFormattedPostgresGranule].forEach((record) => { + [expectedGranule, apiFormattedPostgresGranule].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); @@ -2624,10 +2271,9 @@ test.serial('writeGranulesFromMessage() on re-write with the same granule values apiFormattedPostgresGranule, removeNilProperties(expectedGranule) ); - t.deepEqual(omit(esRecord, ['_id']), removeNilProperties(expectedGranule)); }); -test.serial('writeGranulesFromMessage() on re-write with the same granule values but different files with "completed" status saves granule records to PostgreSQL/Elasticsearch with updated product volume, expected values, and replaces the files', async (t) => { +test.serial('writeGranulesFromMessage() on re-write with the same granule values but different files with "completed" status saves granule records to PostgreSQL with updated product volume, expected values, and replaces the files', async (t) => { // a re-write with same values and files accomplishes the same result // as an update with different values // for completed status, whether the re-write is with the same execution or a new one @@ -2636,7 +2282,6 @@ test.serial('writeGranulesFromMessage() on re-write with the same granule values collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, executionUrl, files, @@ -2671,7 +2316,6 @@ test.serial('writeGranulesFromMessage() on re-write with the same granule values collection_cumulus_id: collectionCumulusId, }) ); - t.true(await esGranulesClient.exists(completeGranule.granuleId)); // Create new files for granule. These records will exist in database // during subsequent granule write from message @@ -2706,7 +2350,6 @@ test.serial('writeGranulesFromMessage() on re-write with the same granule values granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...updatedGranule, @@ -2730,8 +2373,7 @@ test.serial('writeGranulesFromMessage() on re-write with the same granule values timeToPreprocess: 0, }; - // Files array order is not promised to match between datastores - [esRecord, expectedGranule, apiFormattedPostgresGranule].forEach((record) => { + [expectedGranule, apiFormattedPostgresGranule].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); @@ -2741,10 +2383,9 @@ test.serial('writeGranulesFromMessage() on re-write with the same granule values apiFormattedPostgresGranule, removeNilProperties(expectedGranule) ); - t.deepEqual(omit(esRecord, ['_id']), removeNilProperties(expectedGranule)); }); -test.serial('writeGranulesFromMessage() on update changing granule status to "running", with different files and the same execution, does not update the granule values or files in Postgres/ES, so the pre-existing values and files will persist', async (t) => { +test.serial('writeGranulesFromMessage() on update changing granule status to "running", with different files and the same execution, does not update the granule values or files in Postgres, so the pre-existing values and files will persist', async (t) => { // a re-write with same values and files accomplishes the same result // as an update with different values // for running status, there is a difference whether the re-write is with the same execution @@ -2753,7 +2394,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "ru collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, files, granulePgModel, @@ -2786,7 +2426,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "ru collection_cumulus_id: collectionCumulusId, }) ); - t.true(await esGranulesClient.exists(completeGranule.granuleId)); const initialPostgresRecord = await granulePgModel.get(knex, { granule_id: granuleId, @@ -2833,7 +2472,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "ru granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...apiFormattedInitialPostgresGranule, @@ -2849,21 +2487,18 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "ru updatedAt: apiFormattedInitialPostgresGranule.updatedAt, }; - // Files array order is not promised to match between datastores - [esRecord, expectedGranule, apiFormattedPostgresGranule].forEach((record) => { + [expectedGranule, apiFormattedPostgresGranule].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); - // Translated postgres granule matches expected updatedGranule // minus model defaults t.deepEqual( apiFormattedPostgresGranule, removeNilProperties(expectedGranule) ); - t.deepEqual(omit(esRecord, ['_id']), removeNilProperties(expectedGranule)); }); -test.serial('writeGranulesFromMessage() on update changing granule status to "queued", with different files and the same execution, does not update the granule values or files in Postgres/ES, so the pre-existing values and files will persist', async (t) => { +test.serial('writeGranulesFromMessage() on update changing granule status to "queued", with different files and the same execution, does not update the granule values or files in Postgres, so the pre-existing values and files will persist', async (t) => { // a re-write with same values and files accomplishes the same result // as an update with different values // for queued status, there is a difference whether the re-write is with the same execution @@ -2872,7 +2507,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "qu collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, files, granulePgModel, @@ -2905,7 +2539,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "qu collection_cumulus_id: collectionCumulusId, }) ); - t.true(await esGranulesClient.exists(completeGranule.granuleId)); const initialPostgresRecord = await granulePgModel.get(knex, { granule_id: granuleId, @@ -2952,7 +2585,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "qu granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...apiFormattedInitialPostgresGranule, @@ -2968,21 +2600,18 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "qu updatedAt: apiFormattedInitialPostgresGranule.updatedAt, }; - // Files array order is not promised to match between datastores - [esRecord, expectedGranule, apiFormattedPostgresGranule].forEach((record) => { + [expectedGranule, apiFormattedPostgresGranule].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); - // Translated postgres granule matches expected updatedGranule // minus model defaults t.deepEqual( apiFormattedPostgresGranule, removeNilProperties(expectedGranule) ); - t.deepEqual(omit(esRecord, ['_id']), removeNilProperties(expectedGranule)); }); -test.serial('writeGranulesFromMessage() on update changing granule status to "running", with different files and a new execution, updates only limited granule values to Postgres/ES, and does not persist updates to the files', async (t) => { +test.serial('writeGranulesFromMessage() on update changing granule status to "running", with different files and a new execution, updates only limited granule values to Postgres, and does not persist updates to the files', async (t) => { // a re-write with same values accomplishes the same result as an update with different values // for running status, there is a difference whether the re-write is with the same execution // or a new one @@ -2990,7 +2619,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "ru collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, files, granulePgModel, @@ -3023,7 +2651,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "ru collection_cumulus_id: collectionCumulusId, }) ); - t.true(await esGranulesClient.exists(completeGranule.granuleId)); const initialPostgresRecord = await granulePgModel.get(knex, { granule_id: granuleId, @@ -3079,7 +2706,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "ru granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...apiFormattedInitialPostgresGranule, @@ -3099,8 +2725,7 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "ru execution: executionUrl, }; - // Files array order is not promised to match between datastores - [esRecord, expectedGranule, apiFormattedPostgresGranule].forEach((record) => { + [expectedGranule, apiFormattedPostgresGranule].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); @@ -3110,10 +2735,9 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "ru apiFormattedPostgresGranule, removeNilProperties(expectedGranule) ); - t.deepEqual(omit(esRecord, ['_id']), removeNilProperties(expectedGranule)); }); -test.serial('writeGranulesFromMessage() on update changing granule status to "queued", with different files and a new execution, does not update the granule values or files in Postgres/ES, so the pre-existing values and files will persist', async (t) => { +test.serial('writeGranulesFromMessage() on update changing granule status to "queued", with different files and a new execution, does not update the granule values or files in Postgres, so the pre-existing values and files will persist', async (t) => { // a re-write with same values accomplishes the same result as an update with different values // for queued status, there is a difference whether the re-write is with the same execution // or a new one, but only between an existing execution and a non-existing execution @@ -3121,7 +2745,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "qu collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, files, granulePgModel, @@ -3154,7 +2777,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "qu collection_cumulus_id: collectionCumulusId, }) ); - t.true(await esGranulesClient.exists(completeGranule.granuleId)); const initialPostgresRecord = await granulePgModel.get(knex, { granule_id: granuleId, @@ -3210,7 +2832,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "qu granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...apiFormattedInitialPostgresGranule, @@ -3226,28 +2847,24 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "qu updatedAt: apiFormattedInitialPostgresGranule.updatedAt, }; - // Files array order is not promised to match between datastores - [esRecord, expectedGranule, apiFormattedPostgresGranule].forEach((record) => { + [expectedGranule, apiFormattedPostgresGranule].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); - // Translated postgres granule matches expected updatedGranule // minus model defaults t.deepEqual( apiFormattedPostgresGranule, removeNilProperties(expectedGranule) ); - t.deepEqual(omit(esRecord, ['_id']), removeNilProperties(expectedGranule)); }); -test.serial('writeGranulesFromMessage() on update changing granule status to "running", with different files, a new execution, and a stale granule createdAt, does not update the granule values or files in Postgres/ES or to the files, so the pre-existing values and files will persist', async (t) => { +test.serial('writeGranulesFromMessage() on update changing granule status to "running", with different files, a new execution, and a stale granule createdAt, does not update the granule values or files in Postgres or to the files, so the pre-existing values and files will persist', async (t) => { // for running status, there is a difference whether the re-write is with the same execution // or a new one const { collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, files, granulePgModel, @@ -3280,7 +2897,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "ru collection_cumulus_id: collectionCumulusId, }) ); - t.true(await esGranulesClient.exists(completeGranule.granuleId)); const initialPostgresRecord = await granulePgModel.get(knex, { granule_id: granuleId, @@ -3335,7 +2951,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "ru granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...apiFormattedInitialPostgresGranule, @@ -3351,28 +2966,24 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "ru updatedAt: apiFormattedInitialPostgresGranule.updatedAt, }; - // Files array order is not promised to match between datastores - [esRecord, expectedGranule, apiFormattedPostgresGranule].forEach((record) => { + [expectedGranule, apiFormattedPostgresGranule].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); - // Translated postgres granule matches expected updatedGranule // minus model defaults t.deepEqual( apiFormattedPostgresGranule, removeNilProperties(expectedGranule) ); - t.deepEqual(omit(esRecord, ['_id']), removeNilProperties(expectedGranule)); }); -test.serial('writeGranulesFromMessage() on update changing granule status to "queued", with different files, a new execution, and a stale granule createdAt, does not update the granule values or files in Postgres/ES or to the files, so the pre-existing values and files will persist', async (t) => { +test.serial('writeGranulesFromMessage() on update changing granule status to "queued", with different files, a new execution, and a stale granule createdAt, does not update the granule values or files in Postgres or to the files, so the pre-existing values and files will persist', async (t) => { // for queued status, there is a difference whether the re-write is with the same execution // or a new one const { collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, files, granulePgModel, @@ -3405,7 +3016,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "qu collection_cumulus_id: collectionCumulusId, }) ); - t.true(await esGranulesClient.exists(completeGranule.granuleId)); const initialPostgresRecord = await granulePgModel.get(knex, { granule_id: granuleId, @@ -3460,7 +3070,6 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "qu granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...apiFormattedInitialPostgresGranule, @@ -3476,28 +3085,24 @@ test.serial('writeGranulesFromMessage() on update changing granule status to "qu updatedAt: apiFormattedInitialPostgresGranule.updatedAt, }; - // Files array order is not promised to match between datastores - [esRecord, expectedGranule, apiFormattedPostgresGranule].forEach((record) => { + [expectedGranule, apiFormattedPostgresGranule].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); - // Translated postgres granule matches expected updatedGranule // minus model defaults t.deepEqual( apiFormattedPostgresGranule, removeNilProperties(expectedGranule) ); - t.deepEqual(omit(esRecord, ['_id']), removeNilProperties(expectedGranule)); }); -test.serial('writeGranulesFromMessage() on update with "completed" status and stale granule createdAt, does not persist the granule updates to Postgres/ES or to the files', async (t) => { +test.serial('writeGranulesFromMessage() on update with "completed" status and stale granule createdAt, does not persist the granule updates to Postgres or to the files', async (t) => { // for completed status, whether the update is with the same execution or a new one // does not make a difference const { collection, collectionCumulusId, cumulusMessage, - esGranulesClient, executionCumulusId, files, granulePgModel, @@ -3530,7 +3135,6 @@ test.serial('writeGranulesFromMessage() on update with "completed" status and st collection_cumulus_id: collectionCumulusId, }) ); - t.true(await esGranulesClient.exists(completeGranule.granuleId)); const initialPostgresRecord = await granulePgModel.get(knex, { granule_id: granuleId, @@ -3580,7 +3184,6 @@ test.serial('writeGranulesFromMessage() on update with "completed" status and st granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId); const expectedGranule = { ...apiFormattedInitialPostgresGranule, @@ -3596,24 +3199,20 @@ test.serial('writeGranulesFromMessage() on update with "completed" status and st updatedAt: apiFormattedInitialPostgresGranule.updatedAt, }; - // Files array order is not promised to match between datastores - [esRecord, expectedGranule, apiFormattedPostgresGranule].forEach((record) => { + [expectedGranule, apiFormattedPostgresGranule].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); - // Translated postgres granule matches expected updatedGranule // minus model defaults t.deepEqual( apiFormattedPostgresGranule, removeNilProperties(expectedGranule) ); - t.deepEqual(omit(esRecord, ['_id']), removeNilProperties(expectedGranule)); }); test.serial('writeGranuleFromApi() removes preexisting granule file from postgres on granule update with disjoint files', async (t) => { const { collectionCumulusId, - esClient, filePgModel, granule, granuleId, @@ -3634,7 +3233,7 @@ test.serial('writeGranuleFromApi() removes preexisting granule file from postgre key: 'fake_key', }, '*'); - await writeGranuleFromApi({ ...granule, status: 'completed' }, knex, esClient, snsEventType); + await writeGranuleFromApi({ ...granule, status: 'completed' }, knex, snsEventType); const granuleRecord = await granulePgModel.get( knex, @@ -3654,66 +3253,60 @@ test.serial('writeGranuleFromApi() throws for a granule with no granuleId provid const { knex, granule, - esClient, } = t.context; await t.throwsAsync( - writeGranuleFromApi({ ...granule, granuleId: undefined }, knex, esClient, 'Create'), + writeGranuleFromApi({ ...granule, granuleId: undefined }, knex, 'Create'), { message: 'Could not create granule record, invalid granuleId: undefined' } ); }); test.serial('writeGranuleFromApi() throws for a granule with an invalid collectionId', async (t) => { const { - esClient, granule, knex, } = t.context; await t.throwsAsync( - writeGranuleFromApi({ ...granule, collectionId: constructCollectionId('wrong____', 'collection') }, knex, esClient, 'Create'), + writeGranuleFromApi({ ...granule, collectionId: constructCollectionId('wrong____', 'collection') }, knex, 'Create'), { message: 'Record in collections with identifiers {"name":"wrong____","version":"collection"} does not exist.' } ); }); test.serial('writeGranuleFromApi() throws for a granule with no collectionId provided', async (t) => { const { - esClient, knex, granule, } = t.context; await t.throwsAsync( - writeGranuleFromApi({ ...granule, collectionId: undefined }, knex, esClient, 'Create'), + writeGranuleFromApi({ ...granule, collectionId: undefined }, knex, 'Create'), { message: 'collectionId required to generate a granule record' } ); }); test.serial('writeGranuleFromApi() throws for a granule with an invalid collectionId provided', async (t) => { const { - esClient, knex, granule, } = t.context; const badCollectionId = `collectionId${cryptoRandomString({ length: 5 })}`; await t.throwsAsync( - writeGranuleFromApi({ ...granule, collectionId: badCollectionId }, knex, esClient, 'Create'), + writeGranuleFromApi({ ...granule, collectionId: badCollectionId }, knex, 'Create'), { message: `invalid collectionId: "${badCollectionId}"` } ); }); -test.serial('writeGranuleFromApi() writes a granule to PostgreSQL and Elasticsearch.', async (t) => { +test.serial('writeGranuleFromApi() writes a granule to PostgreSQL', async (t) => { const { collectionCumulusId, - esClient, - esGranulesClient, granule, granuleId, granulePgModel, knex, } = t.context; - const result = await writeGranuleFromApi({ ...granule, error: {} }, knex, esClient, 'Create'); + const result = await writeGranuleFromApi({ ...granule, error: {} }, knex, 'Create'); t.is(result, `Wrote Granule ${granuleId}`); @@ -3721,19 +3314,11 @@ test.serial('writeGranuleFromApi() writes a granule to PostgreSQL and Elasticsea knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await esGranulesClient.get(granuleId); const postgresActual = await translatePostgresGranuleToApiGranule({ knexOrTransaction: knex, granulePgRecord: postgresRecord, }); - t.deepEqual({ - ...granule, - _id: esRecord._id, - timestamp: postgresActual.timestamp, - error: {}, - }, esRecord); - t.deepEqual( { ...granule, @@ -3749,11 +3334,9 @@ test.serial('writeGranuleFromApi() writes a granule to PostgreSQL and Elasticsea ); }); -test.serial('writeGranuleFromApi() writes a granule to PostgreSQL and Elasticsearch and populates a consistent createdAt default value', async (t) => { +test.serial('writeGranuleFromApi() writes a granule to PostgreSQL and populates a consistent createdAt default value', async (t) => { const { collectionCumulusId, - esClient, - esGranulesClient, granule, granuleId, granulePgModel, @@ -3762,7 +3345,7 @@ test.serial('writeGranuleFromApi() writes a granule to PostgreSQL and Elasticsea delete granule.createdAt; - const result = await writeGranuleFromApi({ ...granule }, knex, esClient, 'Create'); + const result = await writeGranuleFromApi({ ...granule }, knex, 'Create'); t.is(result, `Wrote Granule ${granuleId}`); @@ -3770,7 +3353,6 @@ test.serial('writeGranuleFromApi() writes a granule to PostgreSQL and Elasticsea knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await esGranulesClient.get(granuleId); const postgresTranslated = await translatePostgresGranuleToApiGranule({ knexOrTransaction: knex, granulePgRecord: postgresRecord, @@ -3779,13 +3361,6 @@ test.serial('writeGranuleFromApi() writes a granule to PostgreSQL and Elasticsea const defaultCreatedAt = postgresTranslated.createdAt; const defaultTimestamp = postgresTranslated.timestamp; - t.deepEqual({ - ...granule, - _id: esRecord._id, - createdAt: defaultCreatedAt, - timestamp: defaultTimestamp, - }, esRecord); - t.deepEqual( { ...granule, @@ -3801,11 +3376,9 @@ test.serial('writeGranuleFromApi() writes a granule to PostgreSQL and Elasticsea ); }); -test.serial('writeGranuleFromApi() given a payload with undefined files, keeps existing files in all datastores', async (t) => { +test.serial('writeGranuleFromApi() given a payload with undefined files, keeps existing files', async (t) => { const { collectionCumulusId, - esClient, - esGranulesClient, files, granule, granulePgModel, @@ -3813,9 +3386,7 @@ test.serial('writeGranuleFromApi() given a payload with undefined files, keeps e granuleId, } = t.context; - await writeGranuleFromApi({ ...granule }, knex, esClient, 'Create'); - - const originalEsGranule = await esGranulesClient.get(granuleId); + await writeGranuleFromApi({ ...granule }, knex, 'Create'); const originalpgGranule = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } @@ -3833,13 +3404,8 @@ test.serial('writeGranuleFromApi() given a payload with undefined files, keeps e originalApiGranule.files.sort( (f1, f2) => sortFilesByBuckets(f1, f2) ); - originalEsGranule.files.sort( - (f1, f2) => sortFilesByBuckets(f1, f2) - ); - // Files were written correctly in initial DB writes t.true(originalPayloadFiles.length > 0); - t.deepEqual(originalEsGranule.files, originalPayloadFiles); t.deepEqual(originalApiGranule.files, originalPayloadFiles); // Update existing granule with a partial granule object @@ -3853,43 +3419,34 @@ test.serial('writeGranuleFromApi() given a payload with undefined files, keeps e const { pgGranule, - esGranule, } = await updateGranule(t, updateGranulePayload); const apiGranule = await translatePostgresGranuleToApiGranule({ granulePgRecord: pgGranule, knexOrTransaction: knex, }); - - esGranule.files.sort( - (f1, f2) => sortFilesByBuckets(f1, f2) - ); apiGranule.files.sort( (f1, f2) => sortFilesByBuckets(f1, f2) ); t.deepEqual(apiGranule.files, originalPayloadFiles); - t.deepEqual(esGranule.files, originalPayloadFiles); }); test.serial('writeGranuleFromApi() given a partial granule overwrites only provided fields', async (t) => { const { collectionCumulusId, - esClient, - esGranulesClient, granule, granuleId, granulePgModel, knex, } = t.context; - await writeGranuleFromApi({ ...granule }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule }, knex, 'Create'); t.true(await granulePgModel.exists( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } )); - t.true(await esGranulesClient.exists(granuleId)); const originalpgGranule = await granulePgModel.get( knex, @@ -3907,7 +3464,6 @@ test.serial('writeGranuleFromApi() given a partial granule overwrites only provi const { updatedPgGranuleFields, pgGranule, - esGranule, } = await updateGranule(t, updateGranulePayload); // Postgres granule matches expected updatedGranule @@ -3920,28 +3476,15 @@ test.serial('writeGranuleFromApi() given a partial granule overwrites only provi granulePgRecord: pgGranule, knexOrTransaction: knex, }); - - // Files array order not guarunteed to match between datastores - esGranule.files.sort( - (f1, f2) => sortFilesByBuckets(f1, f2) - ); apiGranule.files.sort( (f1, f2) => sortFilesByBuckets(f1, f2) ); - - // Postgres and ElasticSearch granules matches - t.deepEqual( - apiGranule, - omit(esGranule, ['_id']) - ); }); -test.serial('writeGranuleFromApi() given a granule with all fields populated is written to the DB, on update removes all expected nullified fields from all datastores', async (t) => { +test.serial('writeGranuleFromApi() given a granule with all fields populated is written to the DB, on update removes all expected nullified fields', async (t) => { const { collection, collectionCumulusId, - esClient, - esGranulesClient, executionUrl, files, granulePgModel, @@ -4000,13 +3543,12 @@ test.serial('writeGranuleFromApi() given a granule with all fields populated is updatedAt: Date.now(), }); - await writeGranuleFromApi({ ...completeGranule }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...completeGranule }, knex, 'Create'); t.true(await granulePgModel.exists( knex, { granule_id: completeGranule.granuleId, collection_cumulus_id: collectionCumulusId } )); - t.true(await esGranulesClient.exists(completeGranule.granuleId)); const originalpgGranule = await granulePgModel.get( knex, @@ -4024,7 +3566,6 @@ test.serial('writeGranuleFromApi() given a granule with all fields populated is const { pgGranule, - esGranule, } = await updateGranule(t, completeGranule); const apiFormattedPostgresGranule = await translatePostgresGranuleToApiGranule({ @@ -4057,42 +3598,23 @@ test.serial('writeGranuleFromApi() given a granule with all fields populated is .sort(), undefinedApiKeys.sort() ); - - // Postgres and ElasticSearch granules matches - t.deepEqual( - omit(apiFormattedPostgresGranule, ['files']), - omit(esGranule, ['_id']) - ); - // Validate that none of the responses come back as 'null', we want them removed, not set - t.is(validNullableGranuleKeys.filter((key) => esGranule[key] === null).length, 0); - // Validate that all of the nullable keys are unset - const undefinedEsKeys = validNullableGranuleKeys.filter((i) => !apiFormatOmitList.includes(i)); - t.deepEqual( - validNullableGranuleKeys - .filter((key) => esGranule[key] === undefined) - .sort(), - undefinedEsKeys.sort() - ); }); -test.serial('writeGranuleFromApi() when called on a granuleId that exists in the datastore does not modify the `published` field if it is not set', async (t) => { +test.serial('writeGranuleFromApi() when called on a granuleId does not modify the `published` field if it is not set', async (t) => { const { collectionCumulusId, - esClient, - esGranulesClient, granule, granuleId, granulePgModel, knex, } = t.context; - await writeGranuleFromApi({ ...granule, published: true }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule, published: true }, knex, 'Create'); t.true(await granulePgModel.exists( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } )); - t.true(await esGranulesClient.exists(granuleId)); const originalPgGranule = await granulePgModel.get( knex, @@ -4109,24 +3631,14 @@ test.serial('writeGranuleFromApi() when called on a granuleId that exists in the const { pgGranule, - esGranule, } = await updateGranule(t, updateGranulePayload); t.is(pgGranule.published, originalPgGranule.published); - - const apiGranule = await translatePostgresGranuleToApiGranule({ - granulePgRecord: pgGranule, - knexOrTransaction: knex, - }); - - t.is(apiGranule.published, esGranule.published); }); -test.serial('writeGranuleFromApi() given an empty array as a files key will remove all existing files and keep Postgres/Elastic in-sync', async (t) => { +test.serial('writeGranuleFromApi() given an empty array as a files key will remove all existing files', async (t) => { const { collectionCumulusId, - esClient, - esGranulesClient, files, granule, granuleId, @@ -4134,9 +3646,7 @@ test.serial('writeGranuleFromApi() given an empty array as a files key will remo knex, } = t.context; - await writeGranuleFromApi({ ...granule }, knex, esClient, 'Create'); - - const originalEsGranule = await esGranulesClient.get(granuleId); + await writeGranuleFromApi({ ...granule }, knex, 'Create'); const originalpgGranule = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } @@ -4154,12 +3664,8 @@ test.serial('writeGranuleFromApi() given an empty array as a files key will remo originalApiGranule.files.sort( (f1, f2) => sortFilesByBuckets(f1, f2) ); - originalEsGranule.files.sort( - (f1, f2) => sortFilesByBuckets(f1, f2) - ); // Files were written correctly in initial DB writes - t.deepEqual(originalEsGranule.files, originalPayloadFiles); t.deepEqual(originalApiGranule.files, originalPayloadFiles); // Update existing granule with a partial granule object @@ -4173,7 +3679,6 @@ test.serial('writeGranuleFromApi() given an empty array as a files key will remo const { updatedPgGranuleFields, pgGranule, - esGranule, } = await updateGranule(t, updateGranulePayload); // Postgres granule matches expected updatedGranule @@ -4186,23 +3691,19 @@ test.serial('writeGranuleFromApi() given an empty array as a files key will remo granulePgRecord: pgGranule, knexOrTransaction: knex, }); - - // Files were removed from all datastores t.deepEqual(apiGranule.files, []); - t.is(esGranule.files, undefined); }); test.serial('writeGranuleFromApi() writes a granule without an execution', async (t) => { const { collectionCumulusId, - esClient, granule, granuleId, granulePgModel, knex, } = t.context; - await writeGranuleFromApi({ ...granule, execution: undefined }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule, execution: undefined }, knex, 'Create'); t.true(await granulePgModel.exists( knex, @@ -4213,14 +3714,13 @@ test.serial('writeGranuleFromApi() writes a granule without an execution', async test.serial('writeGranuleFromApi() can write a granule with no files associated with it', async (t) => { const { knex, - esClient, granule, granuleId, granulePgModel, collectionCumulusId, } = t.context; - await writeGranuleFromApi({ ...granule, files: [] }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule, files: [] }, knex, 'Create'); t.true(await granulePgModel.exists( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } @@ -4229,20 +3729,18 @@ test.serial('writeGranuleFromApi() can write a granule with no files associated test.serial('writeGranuleFromApi() throws with granule with an execution url that does not exist', async (t) => { const { - esClient, knex, granule, } = t.context; const execution = `execution${cryptoRandomString({ length: 5 })}`; await t.throwsAsync( - writeGranuleFromApi({ ...granule, execution }, knex, esClient, 'Create'), + writeGranuleFromApi({ ...granule, execution }, knex, 'Create'), { message: `Could not find execution in PostgreSQL database with url ${execution}` } ); }); -test.serial('writeGranuleFromApi() saves granule records to Postgres and ElasticSearch with same input time values.', async (t) => { +test.serial('writeGranuleFromApi() saves updated values for running granule record to Postgres on rewrite', async (t) => { const { - esClient, knex, collectionCumulusId, granule, @@ -4250,43 +3748,11 @@ test.serial('writeGranuleFromApi() saves granule records to Postgres and Elastic granulePgModel, } = t.context; - const createdAt = Date.now() - 24 * 60 * 60 * 1000; - const updatedAt = Date.now() - 100000; - const timestamp = Date.now(); - - const result = await writeGranuleFromApi({ ...granule, createdAt, updatedAt, timestamp }, knex, esClient, 'Create'); - - t.is(result, `Wrote Granule ${granuleId}`); - - const postgresRecord = await granulePgModel.get( - knex, - { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } - ); - const esRecord = await t.context.esGranulesClient.get(granuleId); - - t.truthy(esRecord.timestamp); - t.is(postgresRecord.created_at.getTime(), esRecord.createdAt); - t.is(postgresRecord.updated_at.getTime(), esRecord.updatedAt); - t.is(postgresRecord.timestamp.getTime(), esRecord.timestamp); -}); - -test.serial('writeGranuleFromApi() saves updated values for running granule record to Postgres and ElasticSearch on rewrite', async (t) => { - const { - esClient, - esGranulesClient, - knex, - collectionCumulusId, - granule, - granuleId, - granulePgModel, - } = t.context; - - await writeGranuleFromApi({ ...granule, status: 'completed', published: true }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule, status: 'completed', published: true }, knex, 'Create'); t.true(await granulePgModel.exists( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } )); - t.true(await esGranulesClient.exists(granuleId)); const createdAt = Date.now() - 24 * 60 * 60 * 1000; const updatedAt = Date.now() - 100000; @@ -4304,7 +3770,6 @@ test.serial('writeGranuleFromApi() saves updated values for running granule reco status: 'running', }, knex, - esClient, 'Create' ); @@ -4314,30 +3779,18 @@ test.serial('writeGranuleFromApi() saves updated values for running granule reco knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); - - t.is(postgresRecord.created_at.getTime(), esRecord.createdAt); - t.is(postgresRecord.updated_at.getTime(), esRecord.updatedAt); - t.is(postgresRecord.timestamp.getTime(), esRecord.timestamp); t.is(postgresRecord.duration, updatedDuration); - t.is(esRecord.duration, updatedDuration); - t.is(postgresRecord.cmr_link, updatedCmrLink); - t.is(esRecord.cmrLink, updatedCmrLink); // Validate that value not in API update value is not changed t.is(postgresRecord.published, true); - t.is(esRecord.published, true); t.is(postgresRecord.status, 'running'); - t.is(esRecord.status, 'running'); }); -test.serial('writeGranuleFromApi() saves updated values for queued granule record to Postgres and ElasticSearch on rewrite', async (t) => { +test.serial('writeGranuleFromApi() saves updated values for queued granule record to Postgres on rewrite', async (t) => { const { - esClient, - esGranulesClient, knex, collectionCumulusId, granule, @@ -4345,12 +3798,11 @@ test.serial('writeGranuleFromApi() saves updated values for queued granule recor granulePgModel, } = t.context; - await writeGranuleFromApi({ ...granule, status: 'completed', published: true }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule, status: 'completed', published: true }, knex, 'Create'); t.true(await granulePgModel.exists( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } )); - t.true(await esGranulesClient.exists(granuleId)); const createdAt = Date.now() - 24 * 60 * 60 * 1000; const updatedAt = Date.now() - 100000; @@ -4368,7 +3820,6 @@ test.serial('writeGranuleFromApi() saves updated values for queued granule recor status: 'queued', }, knex, - esClient, 'Create' ); @@ -4378,61 +3829,20 @@ test.serial('writeGranuleFromApi() saves updated values for queued granule recor knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); - - t.is(postgresRecord.created_at.getTime(), esRecord.createdAt); - t.is(postgresRecord.updated_at.getTime(), esRecord.updatedAt); - t.is(postgresRecord.timestamp.getTime(), esRecord.timestamp); t.is(postgresRecord.duration, updatedDuration); - t.is(esRecord.duration, updatedDuration); t.is(postgresRecord.cmr_link, updatedCmrLink); - t.is(esRecord.cmrLink, updatedCmrLink); // Validate that value not in API update value is not changed t.is(postgresRecord.published, true); - t.is(esRecord.published, true); t.is(postgresRecord.status, 'queued'); - t.is(esRecord.status, 'queued'); -}); - -test.serial('writeGranuleFromApi() saves granule records to Postgres and ElasticSearch with same default time values.', async (t) => { - const { - esClient, - knex, - collectionCumulusId, - granule, - granuleId, - granulePgModel, - } = t.context; - - const createdAt = undefined; - const updatedAt = undefined; - const timestamp = undefined; - - const result = await writeGranuleFromApi({ ...granule, createdAt, updatedAt, timestamp }, knex, esClient, 'Create'); - - t.is(result, `Wrote Granule ${granuleId}`); - - const postgresRecord = await granulePgModel.get( - knex, - { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } - ); - const esRecord = await t.context.esGranulesClient.get(granuleId); - - t.truthy(esRecord.timestamp); - t.is(postgresRecord.created_at.getTime(), esRecord.createdAt); - t.is(postgresRecord.updated_at.getTime(), esRecord.updatedAt); - t.is(postgresRecord.timestamp.getTime(), esRecord.timestamp); - t.is(postgresRecord.timestamp.getTime(), esRecord.updatedAt); }); test.serial('writeGranuleFromApi() saves file records to Postgres if Postgres write is enabled and workflow status is "completed"', async (t) => { const { collectionCumulusId, - esClient, filePgModel, granule, granuleId, @@ -4440,7 +3850,7 @@ test.serial('writeGranuleFromApi() saves file records to Postgres if Postgres wr knex, } = t.context; - await writeGranuleFromApi({ ...granule, status: 'completed' }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule, status: 'completed' }, knex, 'Create'); const granuleRecord = await granulePgModel.get( knex, @@ -4458,7 +3868,6 @@ test.serial('writeGranuleFromApi() saves file records to Postgres if Postgres wr test.serial('writeGranuleFromApi() sets granule to fail, writes all valid files and throws if any non-valid file fails', async (t) => { const { collectionCumulusId, - esClient, filePgModel, granule, granulePgModel, @@ -4477,7 +3886,7 @@ test.serial('writeGranuleFromApi() sets granule to fail, writes all valid files } const validFileCount = allfiles.length - invalidFiles.length; - await t.throwsAsync(writeGranuleFromApi({ ...granule, files: allfiles }, knex, esClient, 'Create')); + await t.throwsAsync(writeGranuleFromApi({ ...granule, files: allfiles }, knex, 'Create')); t.false(await filePgModel.exists(knex, { key: invalidFiles[0].key })); t.false(await filePgModel.exists(knex, { key: invalidFiles[1].key })); @@ -4498,7 +3907,6 @@ test.serial('writeGranuleFromApi() sets granule to fail, writes all valid files test.serial('writeGranuleFromApi() sets granule to failed with expected error and throws if any file fails', async (t) => { const { collectionCumulusId, - esClient, granule, granuleId, knex, @@ -4520,7 +3928,6 @@ test.serial('writeGranuleFromApi() sets granule to failed with expected error an await t.throwsAsync(writeGranuleFromApi( { ...granule, status: 'completed', files }, knex, - esClient, 'Create' )); @@ -4533,10 +3940,8 @@ test.serial('writeGranuleFromApi() sets granule to failed with expected error an t.true(pgGranuleError[0].Cause.includes('AggregateError')); }); -test.serial('writeGranuleFromApi() allows update of complete granule record in all datastores if older granule exists with same execution in a completed state', async (t) => { +test.serial('writeGranuleFromApi() allows update of complete granule record if older granule exists with same execution in a completed state', async (t) => { const { - esClient, - esGranulesClient, knex, collectionCumulusId, granule, @@ -4544,12 +3949,11 @@ test.serial('writeGranuleFromApi() allows update of complete granule record in a granulePgModel, } = t.context; - await writeGranuleFromApi({ ...granule, status: 'completed', published: true }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule, status: 'completed', published: true }, knex, 'Create'); t.true(await granulePgModel.exists( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } )); - t.true(await esGranulesClient.exists(granuleId)); const createdAt = Date.now() - 24 * 60 * 60 * 1000; const updatedAt = Date.now() - 100000; @@ -4567,7 +3971,6 @@ test.serial('writeGranuleFromApi() allows update of complete granule record in a status: 'running', }, knex, - esClient, 'Create' ); @@ -4577,28 +3980,18 @@ test.serial('writeGranuleFromApi() allows update of complete granule record in a knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); - - t.is(postgresRecord.created_at.getTime(), esRecord.createdAt); - t.is(postgresRecord.updated_at.getTime(), esRecord.updatedAt); - t.is(postgresRecord.timestamp.getTime(), esRecord.timestamp); t.is(postgresRecord.duration, updatedDuration); - t.is(esRecord.duration, updatedDuration); t.is(postgresRecord.cmr_link, updatedCmrLink); - t.is(esRecord.cmrLink, updatedCmrLink); // Validate that value not in API update value is not changed t.is(postgresRecord.published, true); - t.is(esRecord.published, true); }); -test.serial('writeGranuleFromApi() allows overwrite of granule records in all datastores if granule exists with newer createdAt and has same execution in a completed state', async (t) => { +test.serial('writeGranuleFromApi() allows overwrite of granule records if granule exists with newer createdAt and has same execution in a completed state', async (t) => { const { - esClient, executionUrl, - esGranulesClient, knex, collectionCumulusId, granule, @@ -4606,12 +3999,11 @@ test.serial('writeGranuleFromApi() allows overwrite of granule records in all da granulePgModel, } = t.context; - await writeGranuleFromApi({ ...granule, status: 'completed', published: true, execution: executionUrl }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule, status: 'completed', published: true, execution: executionUrl }, knex, 'Create'); t.true(await granulePgModel.exists( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } )); - t.true(await esGranulesClient.exists(granuleId)); const createdAt = 1; const updatedAt = Date.now() - 100000; @@ -4629,7 +4021,6 @@ test.serial('writeGranuleFromApi() allows overwrite of granule records in all da status: 'running', }, knex, - esClient, 'Create' ); @@ -4639,35 +4030,24 @@ test.serial('writeGranuleFromApi() allows overwrite of granule records in all da knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); const translatedPgGranule = await translatePostgresGranuleToApiGranule({ knexOrTransaction: knex, granulePgRecord: postgresRecord, }); - t.is(postgresRecord.created_at.getTime(), esRecord.createdAt); - t.is(postgresRecord.updated_at.getTime(), esRecord.updatedAt); - t.is(postgresRecord.timestamp.getTime(), esRecord.timestamp); - t.is(postgresRecord.duration, updatedDuration); - t.is(esRecord.duration, updatedDuration); t.is(postgresRecord.cmr_link, updatedCmrLink); - t.is(esRecord.cmrLink, updatedCmrLink); // Validate that value not in API update value is not changed t.is(postgresRecord.published, true); - t.is(esRecord.published, true); t.is(translatedPgGranule.execution, executionUrl); - t.is(esRecord.execution, executionUrl); }); -test.serial('writeGranuleFromApi() allows overwrite of granule records in all datastores and associates with new execution if granule exists with newer createdAt and an existing execution is in a completed state', async (t) => { +test.serial('writeGranuleFromApi() allows overwrite of granule records and associates with new execution if granule exists with newer createdAt and an existing execution is in a completed state', async (t) => { const { - esClient, - esGranulesClient, executionUrl, knex, collectionCumulusId, @@ -4676,12 +4056,11 @@ test.serial('writeGranuleFromApi() allows overwrite of granule records in all da granulePgModel, } = t.context; - await writeGranuleFromApi({ ...granule, status: 'completed', published: true, execution: executionUrl }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule, status: 'completed', published: true, execution: executionUrl }, knex, 'Create'); t.true(await granulePgModel.exists( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } )); - t.true(await esGranulesClient.exists(granuleId)); const stateMachineName = cryptoRandomString({ length: 5 }); const newExecutionName = cryptoRandomString({ length: 5 }); @@ -4714,7 +4093,6 @@ test.serial('writeGranuleFromApi() allows overwrite of granule records in all da status: 'running', }, knex, - esClient, 'Create' ); @@ -4724,36 +4102,25 @@ test.serial('writeGranuleFromApi() allows overwrite of granule records in all da knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); const translatedPgGranule = await translatePostgresGranuleToApiGranule({ knexOrTransaction: knex, granulePgRecord: postgresRecord, }); - t.is(postgresRecord.created_at.getTime(), esRecord.createdAt); - t.is(postgresRecord.updated_at.getTime(), esRecord.updatedAt); - t.is(postgresRecord.timestamp.getTime(), esRecord.timestamp); - t.is(postgresRecord.duration, updatedDuration); - t.is(esRecord.duration, updatedDuration); t.is(postgresRecord.cmr_link, updatedCmrLink); - t.is(esRecord.cmrLink, updatedCmrLink); // Validate that value not in API update value is not changed t.is(postgresRecord.published, true); - t.is(esRecord.published, true); t.is(translatedPgGranule.execution, newExecutionUrl); - t.is(esRecord.execution, newExecutionUrl); }); -test.serial('updateGranuleStatusToQueued() updates granule status in PostgreSQL/Elasticsearch and publishes SNS message', async (t) => { +test.serial('updateGranuleStatusToQueued() updates granule status in PostgreSQL and publishes SNS message', async (t) => { const { collectionCumulusId, - esGranulesClient, - esClient, granule, granuleId, granulePgModel, @@ -4761,12 +4128,11 @@ test.serial('updateGranuleStatusToQueued() updates granule status in PostgreSQL/ QueueUrl, } = t.context; - await writeGranuleFromApi({ ...granule }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule }, knex, 'Create'); const postgresRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await esGranulesClient.get(granuleId, granule.collectionId); const apiGranule = await translatePostgresGranuleToApiGranule({ granulePgRecord: postgresRecord, knexOrTransaction: knex, @@ -4782,20 +4148,14 @@ test.serial('updateGranuleStatusToQueued() updates granule status in PostgreSQL/ { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); const omitList = ['_id', 'execution', 'status', 'updatedAt', 'updated_at', 'files']; - const sortByKeys = ['bucket', 'key']; - const updatedEsRecord = await esGranulesClient.get(granuleId, granule.collectionId); const translatedPgGranule = await translatePostgresGranuleToApiGranule({ granulePgRecord: updatedPostgresRecord, knexOrTransaction: knex, }); t.is(updatedPostgresRecord.status, 'queued'); - t.is(updatedEsRecord.status, 'queued'); t.is(translatedPgGranule.execution, apiGranule.execution); t.deepEqual(omit(postgresRecord, omitList), omit(updatedPostgresRecord, omitList)); - t.deepEqual(sortBy(translatedPgGranule.files, sortByKeys), sortBy(esRecord.files, sortByKeys)); - t.deepEqual(omit(esRecord, omitList), omit(updatedEsRecord, omitList)); - t.deepEqual(omit(translatedPgGranule, omitList), omit(updatedEsRecord, omitList)); const { Messages } = await sqs().receiveMessage({ QueueUrl, @@ -4812,13 +4172,12 @@ test.serial('updateGranuleStatusToQueued() updates granule status in PostgreSQL/ test.serial('updateGranuleStatusToQueued() throws error if record does not exist in pg', async (t) => { const { - esClient, knex, granule, granuleId, } = t.context; - await writeGranuleFromApi({ ...granule }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule }, knex, 'Create'); const name = randomId('name'); const version = randomId('version'); @@ -4835,15 +4194,14 @@ test.serial('updateGranuleStatusToQueued() throws error if record does not exist ); }); -test.serial('updateGranuleStatusToQueued() does not update Elasticsearch granule if writing to PostgreSQL fails', async (t) => { +test.serial('updateGranuleStatusToQueued() does not publish a SNS message if writing to PostgreSQL fails', async (t) => { const { collectionCumulusId, - esGranulesClient, - esClient, granule, granuleId, granulePgModel, knex, + QueueUrl, } = t.context; const testGranulePgModel = { @@ -4853,7 +4211,7 @@ test.serial('updateGranuleStatusToQueued() does not update Elasticsearch granule }, }; - await writeGranuleFromApi({ ...granule }, knex, esClient, 'Create'); + await writeGranuleFromApi({ ...granule }, knex, 'Create'); const postgresRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } @@ -4863,10 +4221,8 @@ test.serial('updateGranuleStatusToQueued() does not update Elasticsearch granule granulePgRecord: postgresRecord, knexOrTransaction: knex, }); - const esRecord = await esGranulesClient.get(granuleId, granule.collectionId); t.is(postgresRecord.status, 'completed'); - t.is(esRecord.status, 'completed'); t.truthy(apiGranule.execution); await t.throwsAsync( @@ -4882,98 +4238,33 @@ test.serial('updateGranuleStatusToQueued() does not update Elasticsearch granule knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const updatedEsRecord = await esGranulesClient.get(granuleId, granule.collectionId); const translatedPgGranule = await translatePostgresGranuleToApiGranule({ granulePgRecord: updatedPostgresRecord, knexOrTransaction: knex, }); const omitList = ['_id', 'execution', 'updatedAt', 'updated_at', 'files']; - const sortByKeys = ['bucket', 'key']; t.not(updatedPostgresRecord.status, 'queued'); - t.not(esRecord.status, 'queued'); t.not(translatedPgGranule.execution, undefined); // Check that granules are equal in all data stores t.deepEqual(omit(postgresRecord, omitList), omit(updatedPostgresRecord, omitList)); - t.deepEqual(sortBy(translatedPgGranule.files, sortByKeys), sortBy(esRecord.files, sortByKeys)); - t.deepEqual(omit(esRecord, omitList), omit(updatedEsRecord, omitList)); - t.deepEqual(omit(translatedPgGranule, omitList), omit(esRecord, omitList)); -}); - -test.serial('updateGranuleStatusToQueued() does not update PostgreSQL granule if writing to Elasticsearch fails', async (t) => { - const { - collectionCumulusId, - esGranulesClient, - esClient, - granule, - granuleId, - granulePgModel, - knex, - } = t.context; - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - update: () => { - throw new Error('Elasticsearch failure'); - }, - delete: () => Promise.resolve(), - }, - }; - - await writeGranuleFromApi({ ...granule }, knex, esClient, 'Create'); - const postgresRecord = await granulePgModel.get( - knex, - { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } - ); - const apiGranule = await translatePostgresGranuleToApiGranule({ - granulePgRecord: postgresRecord, - knexOrTransaction: knex, - }); - const esRecord = await esGranulesClient.get(granuleId, granule.collectionId); - - t.is(postgresRecord.status, 'completed'); - t.is(esRecord.status, 'completed'); - // Should we consider making this an explicit granule execution PG call? - t.truthy(apiGranule.execution); - - await t.throwsAsync( - updateGranuleStatusToQueued({ - apiGranule, - knex, - esClient: fakeEsClient, - }), - { message: 'Elasticsearch failure' } - ); - - const updatedPostgresRecord = await granulePgModel.get( - knex, - { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } - ); - const updatedEsRecord = await esGranulesClient.get(granuleId, granule.collectionId); - const translatedPgGranule = await translatePostgresGranuleToApiGranule({ - granulePgRecord: updatedPostgresRecord, - knexOrTransaction: knex, + const { Messages } = await sqs().receiveMessage({ + QueueUrl, + WaitTimeSeconds: 10, }); - const omitList = ['_id', 'execution', 'updatedAt', 'updated_at', 'files']; - const sortByKeys = ['bucket', 'key']; - - t.not(updatedPostgresRecord.status, 'queued'); - t.not(esRecord.status, 'queued'); + const snsMessageBody = JSON.parse(Messages[0].Body); + const publishedMessage = JSON.parse(snsMessageBody.Message); - // Check that granules are equal in all data stores - t.deepEqual(omit(postgresRecord, omitList), omit(updatedPostgresRecord, omitList)); - t.deepEqual(sortBy(translatedPgGranule.files, sortByKeys), sortBy(esRecord.files, sortByKeys)); - t.deepEqual(omit(esRecord, omitList), omit(updatedEsRecord, omitList)); - t.deepEqual(omit(translatedPgGranule, omitList), omit(esRecord, omitList)); + t.is(Messages.length, 1); + t.deepEqual(publishedMessage.record, apiGranule); + t.is(publishedMessage.event, 'Create'); }); test.serial('_writeGranule() successfully publishes an SNS message', async (t) => { const { granule, executionCumulusId, - esClient, knex, granulePgModel, granuleId, @@ -4995,12 +4286,9 @@ test.serial('_writeGranule() successfully publishes an SNS message', async (t) = executionCumulusId, granulePgModel, knex, - esClient, snsEventType: 'Update', }); - t.true(await t.context.esGranulesClient.exists(granuleId)); - const retrievedPgGranule = await granulePgModel.get(knex, { granule_id: granuleId, collection_cumulus_id: postgresGranuleRecord.collection_cumulus_id, @@ -5028,18 +4316,11 @@ test.serial('updateGranuleStatusToFailed() updates granule status in the databas granuleId, granulePgModel, } = t.context; - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - update: () => Promise.resolve(), - delete: () => Promise.resolve(), - }, - }; granule.status = 'running'; const snsEventType = 'Update'; try { - await writeGranuleFromApi({ ...granule }, knex, fakeEsClient, snsEventType); + await writeGranuleFromApi({ ...granule }, knex, snsEventType); } catch (error) { console.log(`initial write: ${JSON.stringify(error)}`); } @@ -5056,7 +4337,7 @@ test.serial('updateGranuleStatusToFailed() updates granule status in the databas const fakeErrorObject = { Error: 'This is a fake error', Cause: { Error: 'caused by some fake issue' } }; await updateGranuleStatusToFailed( - { granule: apiGranule, knex, error: fakeErrorObject, fakeEsClient } + { granule: apiGranule, knex, error: fakeErrorObject } ); const updatedPostgresRecord = await granulePgModel.get( knex, @@ -5069,7 +4350,6 @@ test.serial('updateGranuleStatusToFailed() throws error if record does not exist const { knex, granuleId, - esClient, } = t.context; const name = randomId('name'); @@ -5081,7 +4361,7 @@ test.serial('updateGranuleStatusToFailed() throws error if record does not exist const fakeErrorObject = { Error: 'This is a fake error', Cause: { Error: 'caused by some fake issue' } }; await t.throwsAsync( updateGranuleStatusToFailed( - { granule: badGranule, knex, error: fakeErrorObject, esClient } + { granule: badGranule, knex, error: fakeErrorObject } ), { name: 'RecordDoesNotExist', @@ -5090,9 +4370,8 @@ test.serial('updateGranuleStatusToFailed() throws error if record does not exist ); }); -test.serial('writeGranuleFromApi() overwrites granule record with publish set to null with publish value set to false to all datastores', async (t) => { +test.serial('writeGranuleFromApi() overwrites granule record with publish set to null with publish value set to false', async (t) => { const { - esClient, knex, collectionCumulusId, granule, @@ -5100,7 +4379,7 @@ test.serial('writeGranuleFromApi() overwrites granule record with publish set to granulePgModel, } = t.context; - const result = await writeGranuleFromApi({ ...granule, published: true }, knex, esClient, 'Create'); + const result = await writeGranuleFromApi({ ...granule, published: true }, knex, 'Create'); t.is(result, `Wrote Granule ${granuleId}`); const originalPostgresRecord = await granulePgModel.get( @@ -5110,22 +4389,19 @@ test.serial('writeGranuleFromApi() overwrites granule record with publish set to t.true(originalPostgresRecord.published); - const updateResult = await writeGranuleFromApi({ ...granule, published: null }, knex, esClient, 'Create'); + const updateResult = await writeGranuleFromApi({ ...granule, published: null }, knex, 'Create'); t.is(updateResult, `Wrote Granule ${granuleId}`); const postgresRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); t.false(postgresRecord.published); - t.false(esRecord.published); }); -test.serial('writeGranuleFromApi() overwrites granule record with publish set to true with publish value set to true to all datastores', async (t) => { +test.serial('writeGranuleFromApi() overwrites granule record with publish set to true with publish value set to true', async (t) => { const { - esClient, knex, collectionCumulusId, granule, @@ -5133,7 +4409,7 @@ test.serial('writeGranuleFromApi() overwrites granule record with publish set to granulePgModel, } = t.context; - const result = await writeGranuleFromApi({ ...granule, published: true }, knex, esClient, 'Create'); + const result = await writeGranuleFromApi({ ...granule, published: true }, knex, 'Create'); t.is(result, `Wrote Granule ${granuleId}`); const originalPostgresRecord = await granulePgModel.get( @@ -5143,22 +4419,19 @@ test.serial('writeGranuleFromApi() overwrites granule record with publish set to t.true(originalPostgresRecord.published); - const updateResult = await writeGranuleFromApi({ ...granule, published: true }, knex, esClient, 'Create'); + const updateResult = await writeGranuleFromApi({ ...granule, published: true }, knex, 'Create'); t.is(updateResult, `Wrote Granule ${granuleId}`); const postgresRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); t.true(postgresRecord.published); - t.true(esRecord.published); }); -test.serial('writeGranuleFromApi() overwrites granule record with error set to null with error value set to "{}" to all datastores', async (t) => { +test.serial('writeGranuleFromApi() overwrites granule record with error set to null with error value set to "{}"', async (t) => { const { - esClient, knex, collectionCumulusId, granule, @@ -5166,25 +4439,22 @@ test.serial('writeGranuleFromApi() overwrites granule record with error set to n granulePgModel, } = t.context; - const result = await writeGranuleFromApi(granule, knex, esClient, 'Create'); + const result = await writeGranuleFromApi(granule, knex, 'Create'); t.is(result, `Wrote Granule ${granuleId}`); - const updateResult = await writeGranuleFromApi({ ...granule, error: null }, knex, esClient, 'Create'); + const updateResult = await writeGranuleFromApi({ ...granule, error: null }, knex, 'Create'); t.is(updateResult, `Wrote Granule ${granuleId}`); const granulePgRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); t.deepEqual(granulePgRecord.error, {}); - t.deepEqual(esRecord.error, {}); }); -test.serial('writeGranuleFromApi() overwrites granule record with error set with expected value to all datastores', async (t) => { +test.serial('writeGranuleFromApi() overwrites granule record with error set with expected value', async (t) => { const { - esClient, knex, collectionCumulusId, granule, @@ -5192,26 +4462,23 @@ test.serial('writeGranuleFromApi() overwrites granule record with error set with granulePgModel, } = t.context; - const result = await writeGranuleFromApi({ ...granule, error: null }, knex, esClient, 'Create'); + const result = await writeGranuleFromApi({ ...granule, error: null }, knex, 'Create'); t.is(result, `Wrote Granule ${granuleId}`); const updatedError = { fakeErrorKey: 'fakeErrorValue' }; - const updateResult = await writeGranuleFromApi({ ...granule, error: updatedError }, knex, esClient, 'Create'); + const updateResult = await writeGranuleFromApi({ ...granule, error: updatedError }, knex, 'Create'); t.is(updateResult, `Wrote Granule ${granuleId}`); const granulePgRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); t.deepEqual(granulePgRecord.error, updatedError); - t.deepEqual(esRecord.error, updatedError); }); -test.serial('writeGranuleFromApi() overwrites granule record with status "completed" with files set to null with file value set to undefined/default in Elastic and "[]" in Postgres', async (t) => { +test.serial('writeGranuleFromApi() overwrites granule record with status "completed" with files set to null with file value set to "[]" in Postgres', async (t) => { const { - esClient, knex, collectionCumulusId, granule, @@ -5219,17 +4486,16 @@ test.serial('writeGranuleFromApi() overwrites granule record with status "comple granulePgModel, } = t.context; - const result = await writeGranuleFromApi(granule, knex, esClient, 'Create'); + const result = await writeGranuleFromApi(granule, knex, 'Create'); t.is(result, `Wrote Granule ${granuleId}`); - const updateResult = await writeGranuleFromApi({ ...granule, files: null, status: 'completed' }, knex, esClient, 'Create'); + const updateResult = await writeGranuleFromApi({ ...granule, files: null, status: 'completed' }, knex, 'Create'); t.is(updateResult, `Wrote Granule ${granuleId}`); const granulePgRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); const translatedPgRecord = await translatePostgresGranuleToApiGranule({ granulePgRecord, @@ -5237,12 +4503,10 @@ test.serial('writeGranuleFromApi() overwrites granule record with status "comple }); t.deepEqual(translatedPgRecord.files, []); - t.is(esRecord.files, undefined); }); -test.serial('writeGranuleFromApi() writes a new granule with files set to "[]" results in file value set to undefined/default in Elastic and "[]" in Postgres', async (t) => { +test.serial('writeGranuleFromApi() writes a new granule with files set to "[]" results in file value set to "[]" in Postgres', async (t) => { const { - esClient, knex, collectionCumulusId, granule, @@ -5250,14 +4514,13 @@ test.serial('writeGranuleFromApi() writes a new granule with files set to "[]" r granulePgModel, } = t.context; - const updateResult = await writeGranuleFromApi({ ...granule, files: [] }, knex, esClient, 'Create'); + const updateResult = await writeGranuleFromApi({ ...granule, files: [] }, knex, 'Create'); t.is(updateResult, `Wrote Granule ${granuleId}`); const granulePgRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); const translatedPgRecord = await translatePostgresGranuleToApiGranule({ granulePgRecord, @@ -5265,12 +4528,10 @@ test.serial('writeGranuleFromApi() writes a new granule with files set to "[]" r }); t.deepEqual(translatedPgRecord.files, []); - t.is(esRecord.files, undefined); }); -test.serial('writeGranuleFromApi() overwrites granule record with status "failed" with files set to null with file value set to undefined/default in Elastic and "[]" in Postgres', async (t) => { +test.serial('writeGranuleFromApi() overwrites granule record with status "failed" with files set to null with file value set to "[]" in Postgres', async (t) => { const { - esClient, knex, collectionCumulusId, granule, @@ -5278,17 +4539,16 @@ test.serial('writeGranuleFromApi() overwrites granule record with status "failed granulePgModel, } = t.context; - const result = await writeGranuleFromApi(granule, knex, esClient, 'Create'); + const result = await writeGranuleFromApi(granule, knex, 'Create'); t.is(result, `Wrote Granule ${granuleId}`); - const updateResult = await writeGranuleFromApi({ ...granule, files: null, status: 'failed' }, knex, esClient, 'Create'); + const updateResult = await writeGranuleFromApi({ ...granule, files: null, status: 'failed' }, knex, 'Create'); t.is(updateResult, `Wrote Granule ${granuleId}`); const granulePgRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); const translatedPgRecord = await translatePostgresGranuleToApiGranule({ granulePgRecord, @@ -5296,12 +4556,10 @@ test.serial('writeGranuleFromApi() overwrites granule record with status "failed }); t.deepEqual(translatedPgRecord.files, []); - t.is(esRecord.files, undefined); }); -test.serial('writeGranuleFromApi() overwrites granule record with status "running" with files set to null with file value set to undefined/default in Elastic and "[]" in Postgres', async (t) => { +test.serial('writeGranuleFromApi() overwrites granule record with status "running" with files set to null with file value set to"[]" in Postgres', async (t) => { const { - esClient, knex, collectionCumulusId, granule, @@ -5309,17 +4567,16 @@ test.serial('writeGranuleFromApi() overwrites granule record with status "runnin granulePgModel, } = t.context; - const result = await writeGranuleFromApi(granule, knex, esClient, 'Create'); + const result = await writeGranuleFromApi(granule, knex, 'Create'); t.is(result, `Wrote Granule ${granuleId}`); - const updateResult = await writeGranuleFromApi({ ...granule, files: null, status: 'running' }, knex, esClient, 'Create'); + const updateResult = await writeGranuleFromApi({ ...granule, files: null, status: 'running' }, knex, 'Create'); t.is(updateResult, `Wrote Granule ${granuleId}`); const granulePgRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); const translatedPgRecord = await translatePostgresGranuleToApiGranule({ granulePgRecord, @@ -5327,12 +4584,10 @@ test.serial('writeGranuleFromApi() overwrites granule record with status "runnin }); t.deepEqual(translatedPgRecord.files, []); - t.is(esRecord.files, undefined); }); -test.serial('writeGranuleFromApi() overwrites granule record with status "queued" with files set to null with file value set to undefined/default in Elastic and "[]" in Postgres', async (t) => { +test.serial('writeGranuleFromApi() overwrites granule record with status "queued" with files set to null with file value set to "[]" in Postgres', async (t) => { const { - esClient, knex, collectionCumulusId, granule, @@ -5340,17 +4595,16 @@ test.serial('writeGranuleFromApi() overwrites granule record with status "queued granulePgModel, } = t.context; - const result = await writeGranuleFromApi(granule, knex, esClient, 'Create'); + const result = await writeGranuleFromApi(granule, knex, 'Create'); t.is(result, `Wrote Granule ${granuleId}`); - const updateResult = await writeGranuleFromApi({ ...granule, files: null, status: 'queued' }, knex, esClient, 'Create'); + const updateResult = await writeGranuleFromApi({ ...granule, files: null, status: 'queued' }, knex, 'Create'); t.is(updateResult, `Wrote Granule ${granuleId}`); const granulePgRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); const translatedPgRecord = await translatePostgresGranuleToApiGranule({ granulePgRecord, @@ -5358,12 +4612,10 @@ test.serial('writeGranuleFromApi() overwrites granule record with status "queued }); t.deepEqual(translatedPgRecord.files, []); - t.is(esRecord.files, undefined); }); -test.serial('writeGranuleFromApi() overwrites granule record on overwrite with files set to all datastores', async (t) => { +test.serial('writeGranuleFromApi() overwrites granule record on overwrite with files', async (t) => { const { - esClient, knex, collectionCumulusId, granule, @@ -5371,34 +4623,31 @@ test.serial('writeGranuleFromApi() overwrites granule record on overwrite with f granulePgModel, } = t.context; - const result = await writeGranuleFromApi({ ...granule, files: null }, knex, esClient, 'Create'); + const result = await writeGranuleFromApi({ ...granule, files: null }, knex, 'Create'); t.is(result, `Wrote Granule ${granuleId}`); - const updateResult = await writeGranuleFromApi(granule, knex, esClient, 'Create'); + const updateResult = await writeGranuleFromApi(granule, knex, 'Create'); t.is(updateResult, `Wrote Granule ${granuleId}`); const granulePgRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); const translatedPgRecord = await translatePostgresGranuleToApiGranule({ granulePgRecord, knexOrTransaction: knex, }); - [esRecord, granule, translatedPgRecord].forEach((record) => { + [granule, translatedPgRecord].forEach((record) => { record.files.sort((f1, f2) => sortFilesByBuckets(f1, f2)); }); t.deepEqual(translatedPgRecord.files, granule.files); - t.deepEqual(esRecord.files, granule.files); }); test.serial('writeGranuleFromApi() overwrites granule with expected nullified values for all states', async (t) => { const { - esClient, knex, collectionCumulusId, granule, @@ -5406,17 +4655,16 @@ test.serial('writeGranuleFromApi() overwrites granule with expected nullified va granulePgModel, } = t.context; - const result = await writeGranuleFromApi(granule, knex, esClient, 'Create'); + const result = await writeGranuleFromApi(granule, knex, 'Create'); t.is(result, `Wrote Granule ${granuleId}`); - const updateResult = await writeGranuleFromApi({ ...granule, files: null, status: 'completed' }, knex, esClient, 'Create'); + const updateResult = await writeGranuleFromApi({ ...granule, files: null, status: 'completed' }, knex, 'Create'); t.is(updateResult, `Wrote Granule ${granuleId}`); const granulePgRecord = await granulePgModel.get( knex, { granule_id: granuleId, collection_cumulus_id: collectionCumulusId } ); - const esRecord = await t.context.esGranulesClient.get(granuleId); const translatedPgRecord = await translatePostgresGranuleToApiGranule({ granulePgRecord, @@ -5424,5 +4672,4 @@ test.serial('writeGranuleFromApi() overwrites granule with expected nullified va }); t.deepEqual(translatedPgRecord.files, []); - t.is(esRecord.files, undefined); }); From d3c36a93d1c292ff7968defcff33fa090f26b8e0 Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Thu, 25 Jul 2024 14:36:33 -0400 Subject: [PATCH 13/61] CUMULUS-3240: Remove Elasticsearch dependency from executions endpoints (#3723) * CUMULUS-3240:Remove ElasticSearch dependency from Executions endpoints * update test-executions * remove es dependencies for execution * update changelog * fix lint and warning * address PR feedback * remove esClient from createExecutionRecords --- CHANGELOG.md | 4 +- packages/api/endpoints/executions.js | 34 +-- packages/api/lib/executions.js | 18 +- packages/api/lib/testUtils.js | 8 - .../api/lib/writeRecords/write-execution.js | 83 ++---- .../api/tests/endpoints/test-executions.js | 249 +----------------- .../api/tests/helpers/create-test-data.js | 7 - packages/api/tests/lib/test-executions.js | 34 +-- .../lib/writeRecords/test-write-execution.js | 172 +++--------- packages/db/src/models/execution.ts | 4 +- 10 files changed, 81 insertions(+), 532 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a35c5234ca..d589b1ad521 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Replace ElasticSearch Phase 1 - **CUMULUS-3239** - - Updated `execution` list api endpoint and added `ExecutionSearch` class to query postgres + - Updated `executions` list api endpoint and added `ExecutionSearch` class to query postgres +- **CUMULUS-3240** + - Removed Elasticsearch dependency from `executions` endpoints - **CUMULUS-3639** - Updated `/collections/active` endpoint to query postgres - **CUMULUS-3640** diff --git a/packages/api/endpoints/executions.js b/packages/api/endpoints/executions.js index 4f38fbfe40b..055f3eb07c0 100644 --- a/packages/api/endpoints/executions.js +++ b/packages/api/endpoints/executions.js @@ -17,12 +17,9 @@ const { CollectionPgModel, ExecutionPgModel, translatePostgresExecutionToApiExecution, - createRejectableTransaction, ExecutionSearch, } = require('@cumulus/db'); const { deconstructCollectionId } = require('@cumulus/message/Collections'); -const { deleteExecution } = require('@cumulus/es-client/indexer'); -const { getEsClient, Search } = require('@cumulus/es-client/search'); const { zodParser } = require('../src/zod-utils'); const { asyncOperationEndpointErrorHandler } = require('../app/middleware'); @@ -208,39 +205,21 @@ async function del(req, res) { const { executionPgModel = new ExecutionPgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), } = req.testContext || {}; const { arn } = req.params; - const esExecutionsClient = new Search( - {}, - 'execution', - process.env.ES_INDEX - ); try { await executionPgModel.get(knex, { arn }); } catch (error) { if (error instanceof RecordDoesNotExist) { - if (!(await esExecutionsClient.exists(arn))) { - log.info('Execution does not exist in Elasticsearch and PostgreSQL'); - return res.boom.notFound('No record found'); - } - log.info('Execution does not exist in PostgreSQL, it only exists in Elasticsearch. Proceeding with deletion'); - } else { - throw error; + log.info('Execution does not exist in PostgreSQL'); + return res.boom.notFound('No record found'); } + throw error; } - await createRejectableTransaction(knex, async (trx) => { - await executionPgModel.delete(trx, { arn }); - await deleteExecution({ - esClient, - arn, - index: process.env.ES_INDEX, - ignore: [404], - }); - }); + await executionPgModel.delete(knex, { arn }); return res.send({ message: 'Record deleted' }); } @@ -255,7 +234,7 @@ async function del(req, res) { async function searchByGranules(req, res) { const payload = req.body; const knex = await getKnexClient(); - const granules = await getGranulesForPayload(payload, knex); + const granules = await getGranulesForPayload(payload); const { page = 1, limit = 1, ...sortParams } = req.query; const offset = page < 1 ? 0 : (page - 1) * limit; @@ -290,7 +269,7 @@ async function searchByGranules(req, res) { async function workflowsByGranules(req, res) { const payload = req.body; const knex = await getKnexClient(); - const granules = await getGranulesForPayload(payload, knex); + const granules = await getGranulesForPayload(payload); const granuleCumulusIds = await getApiGranuleCumulusIds(knex, granules); @@ -367,7 +346,6 @@ async function bulkDeleteExecutionsByCollection(req, res) { type: 'BULK_EXECUTION_DELETE', payload: { ...payload, esBatchSize, dbBatchSize, collectionId }, envVars: { - ES_HOST: process.env.ES_HOST, KNEX_DEBUG: payload.knexDebug ? 'true' : 'false', stackName: process.env.stackName, system_bucket: process.env.system_bucket, diff --git a/packages/api/lib/executions.js b/packages/api/lib/executions.js index a330cbeaea2..833f45570c6 100644 --- a/packages/api/lib/executions.js +++ b/packages/api/lib/executions.js @@ -10,8 +10,6 @@ const { batchDeleteExecutionFromDatabaseByCumulusCollectionId, } = require('@cumulus/db'); const { deconstructCollectionId } = require('@cumulus/message/Collections'); -const { batchDeleteExecutionsByCollection } = require('@cumulus/es-client/executions'); -const { defaultIndexAlias } = require('@cumulus/es-client/search'); const StepFunctions = require('@cumulus/aws-client/StepFunctions'); @@ -130,12 +128,11 @@ const _deleteRdsExecutions = async ({ ); /** - * Handles the deletion of execution records from both Elasticsearch and the database. + * Handles the deletion of execution records from the database. * * @param {Object} event - The event object. * @param {string} event.collectionId - The ID of the collection whose execution * records are to be deleted. - * @param {string} event.esBatchSize - the batch size to delete from Elasticsearch * @param {string} event.dbBatchSize - the batch size to delete from the database * @returns {Promise} */ @@ -143,21 +140,8 @@ const batchDeleteExecutions = async (event) => { const knex = await getKnexClient(); const collectionId = event.collectionId; - const esBatchSize = Number(event.esBatchSize) || 10000; const dbBatchSize = Number(event.dbBatchSize) || 10000; - // Delete ES execution records - log.info( - `Starting deletion of executions records from Elasticsearch for collection ${collectionId}, batch size ${event.esBatchSize}` - ); - await batchDeleteExecutionsByCollection({ - index: process.env.ES_INDEX || defaultIndexAlias, - collectionId, - batchSize: esBatchSize, - }); - - log.info('Elasticsearch deletion complete'); - // Delete RDS execution records log.info( `Starting deletion of executions records from RDS for collection ${collectionId}, batch size ${event.dbBatchSize}` diff --git a/packages/api/lib/testUtils.js b/packages/api/lib/testUtils.js index 5b86f0e1513..ba9b74bf14c 100644 --- a/packages/api/lib/testUtils.js +++ b/packages/api/lib/testUtils.js @@ -28,7 +28,6 @@ const { indexRule, indexPdr, indexAsyncOperation, - indexExecution, deleteExecution, } = require('@cumulus/es-client/indexer'); const { @@ -618,8 +617,6 @@ const createExecutionTestRecords = async (context, executionParams = {}) => { const { knex, executionPgModel, - esClient, - esExecutionsClient, } = context; const originalExecution = fakeExecutionFactoryV2(executionParams); @@ -629,13 +626,8 @@ const createExecutionTestRecords = async (context, executionParams = {}) => { const originalPgRecord = await executionPgModel.get( knex, { cumulus_id: executionCumulusId } ); - await indexExecution(esClient, originalExecution, process.env.ES_INDEX); - const originalEsRecord = await esExecutionsClient.get( - originalExecution.arn - ); return { originalPgRecord, - originalEsRecord, }; }; diff --git a/packages/api/lib/writeRecords/write-execution.js b/packages/api/lib/writeRecords/write-execution.js index bb4c38033c6..d562ddbbf37 100644 --- a/packages/api/lib/writeRecords/write-execution.js +++ b/packages/api/lib/writeRecords/write-execution.js @@ -3,22 +3,16 @@ const isUndefined = require('lodash/isUndefined'); const omitBy = require('lodash/omitBy'); const { - createRejectableTransaction, ExecutionPgModel, translateApiExecutionToPostgresExecutionWithoutNilsRemoved, translatePostgresExecutionToApiExecution, } = require('@cumulus/db'); -const { - upsertExecution, -} = require('@cumulus/es-client/indexer'); -const { getEsClient } = require('@cumulus/es-client/search'); const { getMessageExecutionArn, getExecutionUrlFromArn, getMessageCumulusVersion, getMessageExecutionOriginalPayload, getMessageExecutionFinalPayload, - generateExecutionApiRecordFromMessage, } = require('@cumulus/message/Executions'); const { getMetaStatus, @@ -88,87 +82,49 @@ const buildExecutionRecord = ({ return omitBy(record, isUndefined); }; -const writeExecutionToES = async (params) => { - const { - apiRecord, - esClient = await getEsClient(), - writeConstraints = true, - } = params; - return await upsertExecution({ - esClient, - updates: apiRecord, - index: process.env.ES_INDEX, - }, writeConstraints); -}; - /** * Write execution record to databases * - * @param {Object} params - * @param {Object} params.apiRecord - Execution API record to be written - * @param {Object} params.postgresRecord - Execution PostgreSQL record to be written - * @param {Object} params.knex - Knex client - * @param {Object} [params.executionPgModel] - PostgreSQL execution model - * @param {number} [params.updatedAt] - updatedAt timestamp to use when writing records - * @param {Object} [params.esClient] - Elasticsearch client - * @returns {Promise} - PostgreSQL execution record that was written to the database + * @param {object} params + * @param {object} params.postgresRecord - Execution PostgreSQL record to be written + * @param {object} params.knex - Knex client + * @param {object} [params.executionPgModel] - PostgreSQL execution model + * @param {object} [params.writeConstraints] - Boolean flag to set if record write constraints apply + * @returns {Promise} - PostgreSQL execution record that was written to the database */ -const _writeExecutionRecord = ({ - apiRecord, +const _writeExecutionRecord = async ({ postgresRecord, knex, executionPgModel = new ExecutionPgModel(), - updatedAt = Date.now(), - esClient, writeConstraints = true, -}) => createRejectableTransaction(knex, async (trx) => { +}) => { logger.info(`About to write execution ${postgresRecord.arn} to PostgreSQL`); - const [executionPgRecord] = await executionPgModel.upsert(trx, postgresRecord, writeConstraints); + const [executionPgRecord] = await executionPgModel.upsert(knex, postgresRecord, writeConstraints); logger.info(`Successfully wrote execution ${postgresRecord.arn} to PostgreSQL with cumulus_id ${executionPgRecord.cumulus_id}`); - try { - await writeExecutionToES({ - apiRecord, - updatedAt, - esClient, - writeConstraints, - }); - logger.info(`Successfully wrote Elasticsearch record for execution ${apiRecord.arn}`); - } catch (error) { - logger.info(`Write to Elasticsearch failed, rolling back data store write for execution ${apiRecord.arn}`); - throw error; - } return executionPgRecord; -}); +}; /** * Write execution record to databases and publish SNS message * - * @param {Object} params - * @param {Object} params.apiRecord - Execution API record to be written - * @param {Object} params.postgresRecord - Execution PostgreSQL record to be written - * @param {Object} params.knex - Knex client - * @param {Object} [params.executionPgModel] - PostgreSQL execution model - * @param {number} [params.updatedAt] - updatedAt timestamp to use when writing records - * @param {Object} [params.esClient] - Elasticsearch client - * @returns {Promise} - PostgreSQL execution record that was written to the database + * @param {object} params + * @param {object} params.postgresRecord - Execution PostgreSQL record to be written + * @param {object} params.knex - Knex client + * @param {object} [params.executionPgModel] - PostgreSQL execution model + * @param {object} [params.writeConstraints] - Boolean flag to set if record write constraints apply + * @returns {Promise} - PostgreSQL execution record that was written to the database */ const _writeExecutionAndPublishSnsMessage = async ({ - apiRecord, postgresRecord, knex, executionPgModel, - updatedAt, - esClient, writeConstraints = true, }) => { const writeExecutionResponse = await _writeExecutionRecord( { - apiRecord, postgresRecord, knex, - esClient, executionPgModel, - updatedAt, writeConstraints, } ); @@ -187,7 +143,6 @@ const writeExecutionRecordFromMessage = async ({ asyncOperationCumulusId, parentExecutionCumulusId, updatedAt = Date.now(), - esClient, }) => { const postgresRecord = buildExecutionRecord({ cumulusMessage, @@ -196,13 +151,9 @@ const writeExecutionRecordFromMessage = async ({ parentExecutionCumulusId, updatedAt, }); - const executionApiRecord = generateExecutionApiRecordFromMessage(cumulusMessage, updatedAt); const writeExecutionResponse = await _writeExecutionAndPublishSnsMessage({ - apiRecord: executionApiRecord, postgresRecord: omitBy(postgresRecord, isUndefined), knex, - updatedAt, - esClient, }); return writeExecutionResponse.cumulus_id; }; @@ -214,7 +165,6 @@ const writeExecutionRecordFromApi = async ({ const postgresRecord = await translateApiExecutionToPostgresExecutionWithoutNilsRemoved(apiRecord, knex); return await _writeExecutionAndPublishSnsMessage({ - apiRecord, postgresRecord: omitBy(postgresRecord, isUndefined), knex, writeConstraints: false, @@ -224,7 +174,6 @@ const writeExecutionRecordFromApi = async ({ module.exports = { buildExecutionRecord, shouldWriteExecutionToPostgres, - writeExecutionToES, writeExecutionRecordFromMessage, writeExecutionRecordFromApi, }; diff --git a/packages/api/tests/endpoints/test-executions.js b/packages/api/tests/endpoints/test-executions.js index 5ceacbb9028..fcf6e04747a 100644 --- a/packages/api/tests/endpoints/test-executions.js +++ b/packages/api/tests/endpoints/test-executions.js @@ -39,7 +39,6 @@ const { translateApiGranuleToPostgresGranule, } = require('@cumulus/db'); const indexer = require('@cumulus/es-client/indexer'); -const { Search } = require('@cumulus/es-client/search'); const { createTestIndex, cleanupTestIndex, @@ -52,7 +51,6 @@ const { fakeExecutionFactoryV2, setAuthorizedOAuthUsers, createExecutionTestRecords, - cleanupExecutionTestRecords, fakeGranuleFactoryV2, fakeAsyncOperationFactory, } = require('../../lib/testUtils'); @@ -64,9 +62,8 @@ process.env.system_bucket = randomString(); process.env.TOKEN_SECRET = randomString(); // import the express app after setting the env variables -const { del, bulkDeleteExecutionsByCollection } = require('../../endpoints/executions'); +const { bulkDeleteExecutionsByCollection } = require('../../endpoints/executions'); const { app } = require('../../app'); -const { buildFakeExpressResponse } = require('./utils'); // create all the variables needed across this test const testDbName = `test_executions_${cryptoRandomString({ length: 10 })}`; @@ -115,13 +112,8 @@ test.before(async (t) => { const { esIndex, esClient } = await createTestIndex(); t.context.esIndex = esIndex; t.context.esClient = esClient; - t.context.esExecutionsClient = new Search( - {}, - 'execution', - process.env.ES_INDEX - ); - // create fake execution records + // create fake execution records in Postgres const asyncOperationId = uuidv4(); t.context.asyncOperationId = asyncOperationId; await t.context.asyncOperationsPgModel.create( @@ -158,7 +150,6 @@ test.before(async (t) => { t.context.knex, executionPgRecord ); - await indexer.indexExecution(esClient, execution, process.env.ES_INDEX); return pgExecution; })); @@ -244,6 +235,7 @@ test.beforeEach(async (t) => { ]; // create fake Postgres granule records + // es records are for Metrics search t.context.fakePGGranules = await Promise.all(t.context.fakeGranules.map(async (fakeGranule) => { await indexer.indexGranule(esClient, fakeGranule, esIndex); const granulePgRecord = await translateApiGranuleToPostgresGranule({ @@ -479,11 +471,6 @@ test.serial('DELETE deletes an execution', async (t) => { { arn } ) ); - t.true( - await t.context.esExecutionsClient.exists( - arn - ) - ); const response = await request(app) .delete(`/executions/${arn}`) @@ -497,103 +484,6 @@ test.serial('DELETE deletes an execution', async (t) => { const dbRecords = await t.context.executionPgModel .search(t.context.knex, { arn }); t.is(dbRecords.length, 0); - t.false( - await t.context.esExecutionsClient.exists( - arn - ) - ); -}); - -test.serial('del() does not remove from Elasticsearch if removing from PostgreSQL fails', async (t) => { - const { - originalPgRecord, - } = await createExecutionTestRecords( - t.context, - { parentArn: undefined } - ); - const { arn } = originalPgRecord; - t.teardown(async () => await cleanupExecutionTestRecords(t.context, { arn })); - - const fakeExecutionPgModel = new ExecutionPgModel(); - fakeExecutionPgModel.delete = () => { - throw new Error('something bad'); - }; - - const expressRequest = { - params: { - arn, - }, - testContext: { - knex: t.context.knex, - executionPgModel: fakeExecutionPgModel, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - del(expressRequest, response), - { message: 'something bad' } - ); - - t.true( - await t.context.executionPgModel.exists(t.context.knex, { - arn, - }) - ); - t.true( - await t.context.esExecutionsClient.exists( - arn - ) - ); -}); - -test.serial('del() does not remove from PostgreSQL if removing from Elasticsearch fails', async (t) => { - const { - originalPgRecord, - } = await createExecutionTestRecords( - t.context, - { parentArn: undefined } - ); - const { arn } = originalPgRecord; - t.teardown(async () => await cleanupExecutionTestRecords(t.context, { arn })); - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - delete: () => { - throw new Error('something bad'); - }, - }, - }; - - const expressRequest = { - params: { - arn, - }, - testContext: { - knex: t.context.knex, - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - del(expressRequest, response), - { message: 'something bad' } - ); - - t.true( - await t.context.executionPgModel.exists(t.context.knex, { - arn, - }) - ); - t.true( - await t.context.esExecutionsClient.exists( - arn - ) - ); }); test.serial('DELETE removes only specified execution from all data stores', async (t) => { @@ -640,7 +530,7 @@ test.serial('DELETE removes only specified execution from all data stores', asyn t.is(originalExecution2.length, 1); }); -test.serial('DELETE returns a 404 if PostgreSQL and Elasticsearch execution cannot be found', async (t) => { +test.serial('DELETE returns a 404 if PostgreSQL execution cannot be found', async (t) => { const nonExistentExecution = { arn: 'arn9', status: 'completed', @@ -656,74 +546,6 @@ test.serial('DELETE returns a 404 if PostgreSQL and Elasticsearch execution cann t.is(response.body.message, 'No record found'); }); -test('DELETE successfully deletes if a PostgreSQL execution exists but not Elasticsearch', async (t) => { - const { knex, executionPgModel } = t.context; - - const newExecution = fakeExecutionFactoryV2({ - status: 'completed', - name: 'test_execution', - }); - - const executionPgRecord = await translateApiExecutionToPostgresExecution( - newExecution, - knex - ); - await executionPgModel.create(knex, executionPgRecord); - - t.true(await executionPgModel.exists(knex, { arn: newExecution.arn })); - t.false( - await t.context.esExecutionsClient.exists( - newExecution.arn - ) - ); - await request(app) - .delete(`/executions/${newExecution.arn}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - const dbRecords = await executionPgModel.search(t.context.knex, { - arn: newExecution.arn, - }); - - t.is(dbRecords.length, 0); - t.false(await executionPgModel.exists(knex, { arn: newExecution.arn })); -}); - -test('DELETE successfully deletes if an Elasticsearch execution exists but not PostgreSQL', async (t) => { - const { knex, esClient, executionPgModel } = t.context; - - const newExecution = fakeExecutionFactoryV2({ - status: 'completed', - name: 'test_execution', - }); - - await indexer.indexExecution(esClient, newExecution, process.env.ES_INDEX); - - t.false(await executionPgModel.exists(knex, { arn: newExecution.arn })); - t.true( - await t.context.esExecutionsClient.exists( - newExecution.arn - ) - ); - await request(app) - .delete(`/executions/${newExecution.arn}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - const dbRecords = await executionPgModel.search(t.context.knex, { - arn: newExecution.arn, - }); - - t.is(dbRecords.length, 0); - t.false( - await t.context.esExecutionsClient.exists( - newExecution.arn - ) - ); -}); - test.serial('POST /executions/search-by-granules returns 1 record by default', async (t) => { const { fakeGranules, fakePGExecutions } = t.context; @@ -1106,7 +928,7 @@ test.serial('POST /executions/workflows-by-granules returns correct workflows wh t.deepEqual(response.body.sort(), ['fakeWorkflow', 'workflow2']); }); -test.serial('POST /executions creates a new execution in PostgreSQL/Elasticsearch with correct timestamps', async (t) => { +test.serial('POST /executions creates a new execution in PostgreSQL with correct timestamps', async (t) => { const newExecution = fakeExecutionFactoryV2(); await request(app) @@ -1123,15 +945,10 @@ test.serial('POST /executions creates a new execution in PostgreSQL/Elasticsearc } ); - const fetchedEsRecord = await t.context.esExecutionsClient.get(newExecution.arn); - t.true(fetchedPgRecord.created_at.getTime() > newExecution.createdAt); - - t.is(fetchedPgRecord.created_at.getTime(), fetchedEsRecord.createdAt); - t.is(fetchedPgRecord.updated_at.getTime(), fetchedEsRecord.updatedAt); }); -test.serial('POST /executions creates the expected record in PostgreSQL/Elasticsearch', async (t) => { +test.serial('POST /executions creates the expected record in PostgreSQL', async (t) => { const newExecution = fakeExecutionFactoryV2({ asyncOperationId: t.context.testAsyncOperation.id, collectionId: t.context.collectionId, @@ -1157,24 +974,12 @@ test.serial('POST /executions creates the expected record in PostgreSQL/Elastics } ); - const fetchedEsRecord = await t.context.esExecutionsClient.get(newExecution.arn); - t.is(fetchedPgRecord.arn, newExecution.arn); t.truthy(fetchedPgRecord.cumulus_id); t.is(fetchedPgRecord.async_operation_cumulus_id, t.context.asyncOperationCumulusId); t.is(fetchedPgRecord.collection_cumulus_id, t.context.collectionCumulusId); t.is(fetchedPgRecord.parent_cumulus_id, t.context.fakePGExecutions[1].cumulus_id); - t.deepEqual( - fetchedEsRecord, - { - ...newExecution, - _id: fetchedEsRecord._id, - createdAt: fetchedEsRecord.createdAt, - updatedAt: fetchedEsRecord.updatedAt, - timestamp: fetchedEsRecord.timestamp, - } - ); t.deepEqual( fetchedPgRecord, { @@ -1339,7 +1144,7 @@ test.serial('POST /executions publishes message to SNS topic', async (t) => { t.deepEqual(executionRecord, translatedExecution); }); -test.serial('PUT /executions updates the record as expected in PostgreSQL/Elasticsearch', async (t) => { +test.serial('PUT /executions updates the record as expected in PostgreSQL', async (t) => { const execution = fakeExecutionFactoryV2({ collectionId: t.context.collectionId, parentArn: t.context.fakeApiExecutions[1].arn, @@ -1382,21 +1187,6 @@ test.serial('PUT /executions updates the record as expected in PostgreSQL/Elasti arn: execution.arn, } ); - const updatedEsRecord = await t.context.esExecutionsClient.get(execution.arn); - const expectedEsRecord = { - ...updatedExecution, - collectionId: execution.collectionId, - createdAt: updatedPgRecord.created_at.getTime(), - updatedAt: updatedPgRecord.updated_at.getTime(), - }; - - t.like( - updatedEsRecord, - { - ...expectedEsRecord, - timestamp: updatedEsRecord.timestamp, - } - ); t.is(updatedPgRecord.arn, execution.arn); t.is(updatedPgRecord.cumulus_id, pgRecord.cumulus_id); @@ -1459,27 +1249,20 @@ test.serial('PUT /executions overwrites a completed record with a running record arn: execution.arn, } ); - const updatedEsRecord = await t.context.esExecutionsClient.get(execution.arn); - const expectedEsRecord = { + + const expectedApiRecord = { ...omitBy(updatedExecution, isNull), collectionId: execution.collectionId, createdAt: updatedPgRecord.created_at.getTime(), updatedAt: updatedPgRecord.updated_at.getTime(), }; - t.like( - updatedEsRecord, - { - ...expectedEsRecord, - timestamp: updatedEsRecord.timestamp, - } - ); const translatedExecution = await translatePostgresExecutionToApiExecution( updatedPgRecord, t.context.knex ); - t.deepEqual(translatedExecution, expectedEsRecord); + t.deepEqual(translatedExecution, expectedApiRecord); }); test.serial('PUT /executions removes execution fields when nullified fields are passed in', async (t) => { @@ -1534,25 +1317,17 @@ test.serial('PUT /executions removes execution fields when nullified fields are } ); - const updatedEsRecord = await t.context.esExecutionsClient.get(execution.arn); - const expectedEsRecord = { + const expectedApiRecord = { ...omitBy(updatedExecution, isNull), createdAt: updatedPgRecord.created_at.getTime(), updatedAt: updatedPgRecord.updated_at.getTime(), }; - t.like( - updatedEsRecord, - { - ...expectedEsRecord, - timestamp: updatedEsRecord.timestamp, - } - ); const translatedExecution = await translatePostgresExecutionToApiExecution( updatedPgRecord, t.context.knex ); - t.deepEqual(translatedExecution, expectedEsRecord); + t.deepEqual(translatedExecution, expectedApiRecord); }); test.serial('PUT /executions throws error for arn mismatch between params and payload', async (t) => { diff --git a/packages/api/tests/helpers/create-test-data.js b/packages/api/tests/helpers/create-test-data.js index 8702b1b1efc..1867f7a66a7 100644 --- a/packages/api/tests/helpers/create-test-data.js +++ b/packages/api/tests/helpers/create-test-data.js @@ -19,7 +19,6 @@ const { translatePostgresExecutionToApiExecution, translatePostgresGranuleToApiGranule, } = require('@cumulus/db'); -const { indexExecution } = require('@cumulus/es-client/indexer'); const { constructCollectionId } = require('@cumulus/message/Collections'); // Postgres mock data factories @@ -210,7 +209,6 @@ async function createGranuleAndFiles({ async function createExecutionRecords({ knex, count, - esClient, addGranules = false, collectionId, addParentExecutions = false, @@ -266,11 +264,6 @@ async function createExecutionRecords({ translatePostgresExecutionToApiExecution(execution[0], knex)) ); - await Promise.all( - executionRecords.map((record) => - indexExecution(esClient, record, process.env.ES_INDEX)) - ); - if (addGranules === true) { const testGranuleObject = await createGranuleAndFiles({ collectionCumulusId: pgCollectionRecord.cumulus_id, diff --git a/packages/api/tests/lib/test-executions.js b/packages/api/tests/lib/test-executions.js index dfafe887b0e..f34713565e8 100644 --- a/packages/api/tests/lib/test-executions.js +++ b/packages/api/tests/lib/test-executions.js @@ -6,14 +6,9 @@ const { destroyLocalTestDb, generateLocalTestDb, migrationDir, + ExecutionSearch, } = require('@cumulus/db'); -const { Search } = require('@cumulus/es-client/search'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); - const { createExecutionRecords } = require('../helpers/create-test-data'); const { chooseTargetExecution, batchDeleteExecutions } = require('../../lib/executions'); @@ -31,12 +26,9 @@ test.beforeEach(async (t) => { try { const testDbName = `test_executions_${cryptoRandomString({ length: 10 })}`; process.env = { ...process.env, ...localStackConnectionEnv, PG_DATABASE: testDbName }; - const { esIndex, esClient } = await createTestIndex(); const { knex, knexAdmin } = await generateLocalTestDb(testDbName, migrationDir); t.context = { ...t.context, - esClient, - esIndex, knex, knexAdmin, testDbName, @@ -53,20 +45,16 @@ test.afterEach.always(async (t) => { knexAdmin: t.context.knexAdmin, testDbName: t.context.testDbName, }); - await cleanupTestIndex(t.context); }); -const searchAllExecutionsForCollection = async (collectionId, esIndex) => { - const searchClient = new Search( +const searchAllExecutionsForCollection = async (collectionId) => { + const searchClient = new ExecutionSearch( { queryStringParameters: { collectionId, }, - }, - 'execution', - esIndex + } ); - await searchClient.initializeEsClient(); const response = await searchClient.query(); return response; }; @@ -135,7 +123,6 @@ test.serial('batchDeleteExecutions() deletes expected executions from the databa const { pgCollectionRecord } = await createExecutionRecords({ knex: t.context.knex, count: executionCount, - esClient: t.context.esClient, collectionId, addParentExecutions: true, }); @@ -143,19 +130,16 @@ test.serial('batchDeleteExecutions() deletes expected executions from the databa const otherCollectionRecords = await createExecutionRecords({ knex: t.context.knex, count: otherExecutionCount, - esClient: t.context.esClient, collectionId: otherCollectionId, addParentExecutions: true, }); const setupExecutions = await searchAllExecutionsForCollection( - collectionId, - t.context.esIndex + collectionId ); const otherCollectionEsExecutions = await searchAllExecutionsForCollection( - otherCollectionId, - t.context.esIndex + otherCollectionId ); const setupRdsExecutions = await t.context.knex('executions').select(); @@ -169,8 +153,7 @@ test.serial('batchDeleteExecutions() deletes expected executions from the databa batchSize: 7, }); const postDeleteEsExecutions = await searchAllExecutionsForCollection( - collectionId, - t.context.esIndex + collectionId ); const postDeleteRdsExecutions = await t.context.knex('executions').where('collection_cumulus_id', pgCollectionRecord[0].cumulus_id); t.is(postDeleteRdsExecutions.length, 0); @@ -178,8 +161,7 @@ test.serial('batchDeleteExecutions() deletes expected executions from the databa // Validate original executions exist const otherEsExecutions = await searchAllExecutionsForCollection( - otherCollectionId, - t.context.esIndex + otherCollectionId ); const otherRdsExecutions = await t.context.knex('executions').where('collection_cumulus_id', otherCollectionRecords.pgCollectionRecord[0].cumulus_id); t.is(otherEsExecutions.meta.count, otherExecutionCount + 1); diff --git a/packages/api/tests/lib/writeRecords/test-write-execution.js b/packages/api/tests/lib/writeRecords/test-write-execution.js index 261b99ad69f..bb63673e83b 100644 --- a/packages/api/tests/lib/writeRecords/test-write-execution.js +++ b/packages/api/tests/lib/writeRecords/test-write-execution.js @@ -4,7 +4,6 @@ const test = require('ava'); const cryptoRandomString = require('crypto-random-string'); const sinon = require('sinon'); const uuidv4 = require('uuid/v4'); -const omit = require('lodash/omit'); const { ExecutionPgModel, @@ -13,11 +12,6 @@ const { migrationDir, translatePostgresExecutionToApiExecution, } = require('@cumulus/db'); -const { Search } = require('@cumulus/es-client/search'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const { createSnsTopic } = require('@cumulus/aws-client/SNS'); const { sns, sqs } = require('@cumulus/aws-client/services'); const { @@ -45,14 +39,6 @@ test.before(async (t) => { t.context.executionPgModel = new ExecutionPgModel(); - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esExecutionsClient = new Search( - {}, - 'execution', - t.context.esIndex - ); t.context.postRDSDeploymentVersion = '9.0.0'; }); @@ -128,7 +114,6 @@ test.after.always(async (t) => { await destroyLocalTestDb({ ...t.context, }); - await cleanupTestIndex(t.context); }); test('shouldWriteExecutionToPostgres() returns false if collection from message is not found in Postgres', async (t) => { @@ -263,7 +248,7 @@ test('buildExecutionRecord returns record with error', (t) => { t.deepEqual(record.error, exception); }); -test.serial('writeExecutionRecordFromMessage() saves execution to RDS/Elasticsearch if write to RDS is enabled', async (t) => { +test.serial('writeExecutionRecordFromMessage() saves execution to RDS', async (t) => { const { cumulusMessage, knex, @@ -274,24 +259,6 @@ test.serial('writeExecutionRecordFromMessage() saves execution to RDS/Elasticsea await writeExecutionRecordFromMessage({ cumulusMessage, knex }); t.true(await executionPgModel.exists(knex, { arn: executionArn })); - t.true(await t.context.esExecutionsClient.exists(executionArn)); -}); - -test.serial('writeExecutionRecordFromMessage() saves execution to RDS/Elasticsearch with same timestamps', async (t) => { - const { - cumulusMessage, - knex, - executionArn, - executionPgModel, - } = t.context; - - await writeExecutionRecordFromMessage({ cumulusMessage, knex }); - - const pgRecord = await executionPgModel.get(knex, { arn: executionArn }); - const esRecord = await t.context.esExecutionsClient.get(executionArn); - - t.is(pgRecord.created_at.getTime(), esRecord.createdAt); - t.is(pgRecord.updated_at.getTime(), esRecord.updatedAt); }); test.serial('writeExecutionRecordFromMessage() properly sets originalPayload on initial write and finalPayload on subsequent write', async (t) => { @@ -311,10 +278,8 @@ test.serial('writeExecutionRecordFromMessage() properly sets originalPayload on await writeExecutionRecordFromMessage({ cumulusMessage, knex }); const pgRecord = await executionPgModel.get(knex, { arn: executionArn }); - const esRecord = await t.context.esExecutionsClient.get(executionArn); t.deepEqual(pgRecord.original_payload, originalPayload); - t.deepEqual(esRecord.originalPayload, originalPayload); cumulusMessage.meta.status = 'completed'; const finalPayload = { @@ -324,12 +289,9 @@ test.serial('writeExecutionRecordFromMessage() properly sets originalPayload on await writeExecutionRecordFromMessage({ cumulusMessage, knex }); const updatedPgRecord = await executionPgModel.get(knex, { arn: executionArn }); - const updatedEsRecord = await t.context.esExecutionsClient.get(executionArn); t.deepEqual(updatedPgRecord.original_payload, originalPayload); - t.deepEqual(updatedEsRecord.originalPayload, originalPayload); t.deepEqual(updatedPgRecord.final_payload, finalPayload); - t.deepEqual(updatedEsRecord.finalPayload, finalPayload); }); test.serial('writeExecutionRecordFromMessage() properly handles out of order writes and correctly preserves originalPayload/finalPayload', async (t) => { @@ -349,16 +311,11 @@ test.serial('writeExecutionRecordFromMessage() properly handles out of order wri await writeExecutionRecordFromMessage({ cumulusMessage, knex }); const pgRecord = await executionPgModel.get(knex, { arn: executionArn }); - const esRecord = await t.context.esExecutionsClient.get(executionArn); t.like(pgRecord, { status: 'completed', final_payload: finalPayload, }); - t.like(esRecord, { - status: 'completed', - finalPayload, - }); cumulusMessage.meta.status = 'running'; const originalPayload = { @@ -368,74 +325,39 @@ test.serial('writeExecutionRecordFromMessage() properly handles out of order wri await writeExecutionRecordFromMessage({ cumulusMessage, knex }); const updatedPgRecord = await executionPgModel.get(knex, { arn: executionArn }); - const updatedEsRecord = await t.context.esExecutionsClient.get(executionArn); t.like(updatedPgRecord, { status: 'completed', final_payload: finalPayload, original_payload: originalPayload, }); - t.like(updatedEsRecord, { - status: 'completed', - finalPayload, - originalPayload, - }); }); -test.serial('writeExecutionRecordFromMessage() does not write record to Elasticsearch if RDS write fails', async (t) => { +test.serial('writeExecutionRecordFromMessage() does not publish an SNS messagee if RDS write fails', async (t) => { const { cumulusMessage, knex, executionArn, executionPgModel, + QueueUrl, } = t.context; - const fakeTrxCallback = (cb) => { - const fakeTrx = sinon.stub().returns({ - insert: () => { - throw new Error('execution RDS error'); - }, - }); - return cb(fakeTrx); - }; - const trxStub = sinon.stub(knex, 'transaction').callsFake(fakeTrxCallback); - t.teardown(() => trxStub.restore()); + const knexStub = sinon.stub(knex, 'insert').returns({ + insert: () => { + throw new Error('execution RDS error'); + }, + }); + + t.teardown(() => knexStub.restore()); await t.throwsAsync( - writeExecutionRecordFromMessage({ cumulusMessage, knex }), + writeExecutionRecordFromMessage({ cumulusMessage, knex: knexStub }), { message: 'execution RDS error' } ); t.false(await executionPgModel.exists(knex, { arn: executionArn })); - t.false(await t.context.esExecutionsClient.exists(executionArn)); -}); - -test.serial('writeExecutionRecordFromMessage() does not write record to RDS if Elasticsearch write fails', async (t) => { - const { - cumulusMessage, - knex, - executionArn, - executionPgModel, - } = t.context; - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - update: () => { - throw new Error('ES error'); - }, - }, - }; + const { Messages } = await sqs().receiveMessage({ QueueUrl, WaitTimeSeconds: 10 }); - await t.throwsAsync( - writeExecutionRecordFromMessage({ - cumulusMessage, - knex, - esClient: fakeEsClient, - }), - { message: 'ES error' } - ); - t.false(await executionPgModel.exists(knex, { arn: executionArn })); - t.false(await t.context.esExecutionsClient.exists(executionArn)); + t.is(Messages.length, 0); }); test.serial('writeExecutionRecordFromMessage() correctly sets both original_payload and final_payload in postgres when execution records are run in sequence', async (t) => { @@ -462,26 +384,6 @@ test.serial('writeExecutionRecordFromMessage() correctly sets both original_payl t.deepEqual(pgRecord.final_payload, finalPayload); }); -test.serial('writeExecutionRecordFromApi() saves execution to RDS/Elasticsearch with same timestamps', async (t) => { - const { - cumulusMessage, - knex, - executionArn, - executionPgModel, - } = t.context; - - const apiRecord = generateExecutionApiRecordFromMessage(cumulusMessage); - await writeExecutionRecordFromApi({ - record: apiRecord, - knex, - }); - - const pgRecord = await executionPgModel.get(knex, { arn: executionArn }); - const esRecord = await t.context.esExecutionsClient.get(executionArn); - t.is(pgRecord.created_at.getTime(), esRecord.createdAt); - t.is(pgRecord.updated_at.getTime(), esRecord.updatedAt); -}); - test.serial('writeExecutionRecordFromMessage() does not allow a running execution to replace a completed execution due to write constraints', async (t) => { const { cumulusMessage, @@ -508,11 +410,6 @@ test.serial('writeExecutionRecordFromMessage() does not allow a running executio t.deepEqual(pgRecord.final_payload, finalPayload); t.deepEqual(pgRecord.tasks, tasks); - let esRecord = await t.context.esExecutionsClient.get(executionArn); - t.deepEqual(esRecord.originalPayload, originalPayload); - t.deepEqual(esRecord.finalPayload, finalPayload); - t.deepEqual(esRecord.tasks, tasks); - // writeConstraints apply, status is not updated in data stores cumulusMessage.meta.status = 'running'; cumulusMessage.payload = updatedOriginalPayload; @@ -526,8 +423,11 @@ test.serial('writeExecutionRecordFromMessage() does not allow a running executio t.is(pgRecord.status, 'completed'); const translatedExecution = await translatePostgresExecutionToApiExecution(pgRecord, knex); - esRecord = await t.context.esExecutionsClient.get(executionArn); - t.deepEqual(omit(esRecord, ['_id']), translatedExecution); + t.is(translatedExecution.arn, executionArn); + t.deepEqual(translatedExecution.status, 'completed'); + t.deepEqual(translatedExecution.originalPayload, updatedOriginalPayload); + t.deepEqual(translatedExecution.finalPayload, finalPayload); + t.deepEqual(translatedExecution.tasks, tasks); }); test.serial('writeExecutionRecordFromMessage() on re-write saves execution with expected values nullified', async (t) => { @@ -555,11 +455,6 @@ test.serial('writeExecutionRecordFromMessage() on re-write saves execution with t.deepEqual(pgRecord.final_payload, finalPayload); t.deepEqual(pgRecord.tasks, tasks); - let esRecord = await t.context.esExecutionsClient.get(executionArn); - t.deepEqual(esRecord.originalPayload, originalPayload); - t.deepEqual(esRecord.finalPayload, finalPayload); - t.deepEqual(esRecord.tasks, tasks); - cumulusMessage.meta.status = 'failed'; cumulusMessage.payload = null; cumulusMessage.meta.workflow_tasks = null; @@ -572,8 +467,11 @@ test.serial('writeExecutionRecordFromMessage() on re-write saves execution with t.is(pgRecord.tasks, null); const translatedExecution = await translatePostgresExecutionToApiExecution(pgRecord, knex); - esRecord = await t.context.esExecutionsClient.get(executionArn); - t.deepEqual(omit(esRecord, ['_id']), translatedExecution); + t.is(translatedExecution.arn, executionArn); + t.deepEqual(translatedExecution.status, cumulusMessage.meta.status); + t.deepEqual(translatedExecution.originalPayload, originalPayload); + t.falsy(translatedExecution.finalPayload); + t.falsy(translatedExecution.tasks); }); test.serial('writeExecutionRecordFromApi() allows a running execution to replace a completed execution', async (t) => { @@ -604,11 +502,6 @@ test.serial('writeExecutionRecordFromApi() allows a running execution to replace t.deepEqual(pgRecord.final_payload, finalPayload); t.deepEqual(pgRecord.tasks, tasks); - let esRecord = await t.context.esExecutionsClient.get(executionArn); - t.deepEqual(esRecord.originalPayload, originalPayload); - t.deepEqual(esRecord.finalPayload, finalPayload); - t.deepEqual(esRecord.tasks, tasks); - // writeConstraints do not apply, status is updated in data stores, // null fields are removed cumulusMessage.meta.status = 'running'; @@ -624,8 +517,11 @@ test.serial('writeExecutionRecordFromApi() allows a running execution to replace t.is(pgRecord.status, cumulusMessage.meta.status); const translatedExecution = await translatePostgresExecutionToApiExecution(pgRecord, knex); - esRecord = await t.context.esExecutionsClient.get(executionArn); - t.deepEqual(omit(esRecord, ['_id']), translatedExecution); + t.is(translatedExecution.arn, executionArn); + t.deepEqual(translatedExecution.status, cumulusMessage.meta.status); + t.deepEqual(translatedExecution.originalPayload, updatedOriginalPayload); + t.deepEqual(translatedExecution.finalPayload, finalPayload); + t.falsy(translatedExecution.tasks); }); test.serial('writeExecutionRecordFromApi() on re-write saves execution with expected values nullified', async (t) => { @@ -655,11 +551,6 @@ test.serial('writeExecutionRecordFromApi() on re-write saves execution with expe t.deepEqual(pgRecord.final_payload, finalPayload); t.deepEqual(pgRecord.tasks, tasks); - let esRecord = await t.context.esExecutionsClient.get(executionArn); - t.deepEqual(esRecord.originalPayload, originalPayload); - t.deepEqual(esRecord.finalPayload, finalPayload); - t.deepEqual(esRecord.tasks, tasks); - cumulusMessage.meta.status = 'failed'; cumulusMessage.payload = null; cumulusMessage.meta.workflow_tasks = null; @@ -673,8 +564,11 @@ test.serial('writeExecutionRecordFromApi() on re-write saves execution with expe t.is(pgRecord.tasks, null); const translatedExecution = await translatePostgresExecutionToApiExecution(pgRecord, knex); - esRecord = await t.context.esExecutionsClient.get(executionArn); - t.deepEqual(omit(esRecord, ['_id']), translatedExecution); + t.is(translatedExecution.arn, executionArn); + t.deepEqual(translatedExecution.status, cumulusMessage.meta.status); + t.deepEqual(translatedExecution.originalPayload, originalPayload); + t.falsy(translatedExecution.finalPayload); + t.falsy(translatedExecution.tasks); }); test.serial('writeExecutionRecordFromMessage() successfully publishes an SNS message', async (t) => { diff --git a/packages/db/src/models/execution.ts b/packages/db/src/models/execution.ts index 48012900861..76a50c6cfeb 100644 --- a/packages/db/src/models/execution.ts +++ b/packages/db/src/models/execution.ts @@ -55,13 +55,13 @@ class ExecutionPgModel extends BasePgModel>} An array of executions + * @returns An array of executions */ async searchByCumulusIds( knexOrTrx: Knex | Knex.Transaction, executionCumulusIds: Array | number, params: { limit: number, offset: number } - ): Promise> { + ): Promise> { const { limit, offset, ...sortQueries } = params || {}; const sortFields = getSortFields(sortQueries); const executionCumulusIdsArray = [executionCumulusIds].flat(); From 867b3259c5cb7058c0b38828b21fa1d4a4c623a1 Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Wed, 31 Jul 2024 14:00:14 -0400 Subject: [PATCH 14/61] CUMULUS-3642: postgres query adjustment (#3731) * estimate table row count * add more indexes * use count * * fix test * fix test * fix lint and add file index * add more index * fix lint * fix lint update test * execution asyncOperationId is optional * defautl stats last day * fix granule patchByGranuleId put logic back * vaccum tables * update changelog * remove sql script to another pr * update changelog * update active collection query * Revert "update active collection query" This reverts commit 88024c25e0c8e5f130417fe94fa7c9616cfb3fbe. --- CHANGELOG.md | 7 ++ packages/api/endpoints/granules.js | 21 ++---- packages/api/endpoints/stats.js | 16 ++++- packages/api/tests/endpoints/stats.js | 20 +++--- .../api/tests/endpoints/test-executions.js | 2 +- packages/api/tests/endpoints/test-granules.js | 2 +- packages/db/src/search/BaseSearch.ts | 48 ++++++++++++- packages/db/src/search/CollectionSearch.ts | 4 +- packages/db/src/search/ExecutionSearch.ts | 27 ++++--- packages/db/src/search/GranuleSearch.ts | 7 +- packages/db/src/search/StatsSearch.ts | 48 +++++++------ packages/db/src/search/queries.ts | 13 +++- packages/db/src/types/search.ts | 1 + .../db/tests/search/test-ExecutionSearch.js | 70 +++++++++++++++++-- .../db/tests/search/test-GranuleSearch.js | 24 +++++++ packages/db/tests/search/test-queries.js | 2 + 16 files changed, 240 insertions(+), 72 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8041eb6148e..58f0e59b9e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [Unreleased] ### Replace ElasticSearch Phase 1 + - **CUMULUS-3239** - Updated `executions` list api endpoint and added `ExecutionSearch` class to query postgres - **CUMULUS-3240** @@ -17,6 +18,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed elasticsearch dependency from granules endpoint - **CUMULUS-3641** - Updated `collections` api endpoint to query postgres instead of elasticsearch except if `includeStats` is in the query parameters +- **CUMULUS-3642** + - Adjusted queries to improve performance: + - Used count(*) over count(id) to count rows + - Estimated row count for large tables (granules and executions) by default for basic query + - Updated stats summary to default to the last day + - Updated ExecutionSearch to not include asyncOperationId by default - **CUMULUS-3688** - Updated `stats` api endpoint to query postgres instead of elasticsearch - **CUMULUS-3689** diff --git a/packages/api/endpoints/granules.js b/packages/api/endpoints/granules.js index 55dedb7298f..f3b3ed47544 100644 --- a/packages/api/endpoints/granules.js +++ b/packages/api/endpoints/granules.js @@ -487,7 +487,6 @@ async function patchByGranuleId(req, res) { granulePgModel = new GranulePgModel(), knex = await getKnexClient(), } = req.testContext || {}; - let pgGranule; const body = req.body; const action = body.action; @@ -500,18 +499,11 @@ async function patchByGranuleId(req, res) { ); } - try { - pgGranule = await await getUniqueGranuleByGranuleId( - knex, - req.params.granuleId, - granulePgModel - ); - } catch (error) { - if (error instanceof RecordDoesNotExist) { - log.info('Granule does not exist'); - return res.boom.notFound('No record found'); - } - } + const pgGranule = await getUniqueGranuleByGranuleId( + knex, + req.params.granuleId, + granulePgModel + ); const collectionPgModel = new CollectionPgModel(); const pgCollection = await collectionPgModel.get(knex, { @@ -681,10 +673,10 @@ async function delByGranuleId(req, res) { const { knex = await getKnexClient(), } = req.testContext || {}; - let pgGranule; const granuleId = req.params.granuleId; log.info(`granules.del ${granuleId}`); + let pgGranule; try { pgGranule = await getUniqueGranuleByGranuleId(knex, granuleId); } catch (error) { @@ -692,6 +684,7 @@ async function delByGranuleId(req, res) { log.info('Granule does not exist'); return res.boom.notFound(`Granule ${granuleId} does not exist or was already deleted`); } + throw error; } const deletionDetails = await deleteGranuleAndFiles({ diff --git a/packages/api/endpoints/stats.js b/packages/api/endpoints/stats.js index a335a20c72e..8a27b380246 100644 --- a/packages/api/endpoints/stats.js +++ b/packages/api/endpoints/stats.js @@ -37,7 +37,16 @@ function getType(req) { * @returns {Promise} the promise of express response object */ async function summary(req, res) { - const stats = new StatsSearch({ queryStringParameters: req.query }, 'granule'); + const params = req.query; + + const now = Date.now(); + params.timestamp__from = Number.parseInt(get( + params, + 'timestamp__from', + now - 24 * 3600 * 1000 + ), 10); + params.timestamp__to = Number.parseInt(get(params, 'timestamp__to', now), 10); + const stats = new StatsSearch({ queryStringParameters: params }, 'granule'); const r = await stats.summary(); return res.send(r); } @@ -50,8 +59,9 @@ async function summary(req, res) { * @returns {Promise} the promise of express response object */ async function aggregate(req, res) { - if (getType(req)) { - const stats = new StatsSearch({ queryStringParameters: omit(req.query, 'type') }, getType(req)); + const type = getType(req); + if (type) { + const stats = new StatsSearch({ queryStringParameters: omit(req.query, 'type') }, type); const r = await stats.aggregate(); return res.send(r); } diff --git a/packages/api/tests/endpoints/stats.js b/packages/api/tests/endpoints/stats.js index d3c3876acf8..1853a7c53ac 100644 --- a/packages/api/tests/endpoints/stats.js +++ b/packages/api/tests/endpoints/stats.js @@ -91,8 +91,10 @@ test.before(async (t) => { granules.push(fakeGranuleRecordFactory({ collection_cumulus_id: num % 20, status: statuses[num % 4], - created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))), - updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), + created_at: num === 99 + ? new Date() : (new Date(2018 + (num % 6), (num % 12), (num % 30))), + updated_at: num === 99 + ? new Date() : (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), error: errors[num % 5], duration: num + (num / 10), })) @@ -209,17 +211,17 @@ test('GET /stats/aggregate with an invalid access token returns an unauthorized assertions.isInvalidAccessTokenResponse(t, response); }); -test('GET /stats returns correct response, defaulted to all', async (t) => { +test('GET /stats returns correct response, defaulted to the last day', async (t) => { const response = await request(app) .get('/stats') .set('Accept', 'application/json') .set('Authorization', `Bearer ${jwtAuthToken}`) .expect(200); - t.is(response.body.errors.value, 80); - t.is(response.body.processingTime.value, 54.44999999642372); - t.is(response.body.granules.value, 100); - t.is(response.body.collections.value, 20); + t.is(response.body.errors.value, 0); + t.is(response.body.processingTime.value, 108.9000015258789); + t.is(response.body.granules.value, 1); + t.is(response.body.collections.value, 1); }); test('GET /stats returns correct response with date params filters values correctly', async (t) => { @@ -261,10 +263,10 @@ test('GET /stats/aggregate filters correctly by date', async (t) => { const expectedCount = [ { key: 'failed', count: 16 }, - { key: 'running', count: 9 }, { key: 'completed', count: 8 }, { key: 'queued', count: 8 }, + { key: 'running', count: 8 }, ]; - t.is(response.body.meta.count, 41); + t.is(response.body.meta.count, 40); t.deepEqual(response.body.count, expectedCount); }); diff --git a/packages/api/tests/endpoints/test-executions.js b/packages/api/tests/endpoints/test-executions.js index fcf6e04747a..a192c35d3a1 100644 --- a/packages/api/tests/endpoints/test-executions.js +++ b/packages/api/tests/endpoints/test-executions.js @@ -332,7 +332,7 @@ test.serial('GET executions returns list of executions by default', async (t) => t.is(results.length, 3); t.is(meta.stack, process.env.stackName); t.is(meta.table, 'executions'); - t.is(meta.count, 3); + t.true(meta.count > 0); const arns = fakeExecutions.map((i) => i.arn); results.forEach((r) => { t.true(arns.includes(r.arn)); diff --git a/packages/api/tests/endpoints/test-granules.js b/packages/api/tests/endpoints/test-granules.js index 15f878ebe01..196c98d3aa7 100644 --- a/packages/api/tests/endpoints/test-granules.js +++ b/packages/api/tests/endpoints/test-granules.js @@ -439,7 +439,7 @@ test.serial('default lists and paginates correctly from querying database', asyn t.is(results.length, 4); t.is(meta.stack, process.env.stackName); t.is(meta.table, 'granules'); - t.is(meta.count, 4); + t.true(meta.count > 0); results.forEach((r) => { t.true(granuleIds.includes(r.granuleId)); }); diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index c039ed222f8..663c0ebfb03 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -1,4 +1,5 @@ import { Knex } from 'knex'; +import get from 'lodash/get'; import omit from 'lodash/omit'; import Logger from '@cumulus/logger'; @@ -83,6 +84,17 @@ class BaseSearch { return !!(not?.providerName || term?.providerName || terms?.providerName); } + /** + * Determine if an estimated row count should be returned + * + * @param countSql - sql statement for count + * @returns whether an estimated row count should be returned + */ + protected shouldEstimateRowcount(countSql: string): boolean { + const isBasicQuery = (countSql === `select count(*) from "${this.tableName}"`); + return this.dbQueryParameters.estimateTableRowCount === true && isBasicQuery; + } + /** * Build the search query * @@ -439,6 +451,28 @@ class BaseSearch { throw new Error('translatePostgresRecordsToApiRecords is not implemented'); } + /** + * Get estimated table rowcount + * + * @param params + * @param params.knex - DB client + * @param [params.tableName] - table name + * @returns rowcount + */ + protected async getEstimatedRowcount(params: { + knex: Knex, + tableName? : string, + }) : Promise { + const { knex, tableName = this.tableName } = params; + const query = knex.raw(`EXPLAIN (FORMAT JSON) select * from "${tableName}"`); + log.debug(`Estimating the row count ${query.toSQL().sql}`); + const countResult = await query; + const countPath = 'rows[0]["QUERY PLAN"][0].Plan["Plan Rows"]'; + const estimatedCount = get(countResult, countPath); + const count = Number(estimatedCount ?? 0); + return count; + } + /** * Build and execute search query * @@ -448,12 +482,22 @@ class BaseSearch { async query(testKnex?: Knex) { const knex = testKnex ?? await getKnexClient(); const { countQuery, searchQuery } = this.buildSearch(knex); + + const shouldEstimateRowcount = countQuery + ? this.shouldEstimateRowcount(countQuery?.toSQL().sql) + : false; + const getEstimate = shouldEstimateRowcount + ? this.getEstimatedRowcount({ knex }) + : undefined; + try { - const [countResult, pgRecords] = await Promise.all([countQuery, searchQuery]); + const [countResult, pgRecords] = await Promise.all([ + getEstimate || countQuery, searchQuery, + ]); const meta = this._metaTemplate(); meta.limit = this.dbQueryParameters.limit; meta.page = this.dbQueryParameters.page; - meta.count = Number(countResult[0]?.count ?? 0); + meta.count = shouldEstimateRowcount ? countResult : Number(countResult[0]?.count ?? 0); const apiRecords = await this.translatePostgresRecordsToApiRecords(pgRecords, knex); diff --git a/packages/db/src/search/CollectionSearch.ts b/packages/db/src/search/CollectionSearch.ts index 2b436b5dd66..af30b66989b 100644 --- a/packages/db/src/search/CollectionSearch.ts +++ b/packages/db/src/search/CollectionSearch.ts @@ -53,7 +53,7 @@ export class CollectionSearch extends BaseSearch { searchQuery: Knex.QueryBuilder, } { const countQuery = knex(this.tableName) - .count(`${this.tableName}.cumulus_id`); + .count('*'); const searchQuery = knex(this.tableName) .select(`${this.tableName}.*`); @@ -136,7 +136,7 @@ export class CollectionSearch extends BaseSearch { const granulesTable = TableNames.granules; const statsQuery = knex(granulesTable) .select(`${granulesTable}.collection_cumulus_id`, `${granulesTable}.status`) - .count(`${granulesTable}.status`) + .count('*') .groupBy(`${granulesTable}.collection_cumulus_id`, `${granulesTable}.status`) .whereIn(`${granulesTable}.collection_cumulus_id`, collectionCumulusIds); diff --git a/packages/db/src/search/ExecutionSearch.ts b/packages/db/src/search/ExecutionSearch.ts index 2d1618b7fc8..9dd5621933b 100644 --- a/packages/db/src/search/ExecutionSearch.ts +++ b/packages/db/src/search/ExecutionSearch.ts @@ -1,6 +1,7 @@ import { Knex } from 'knex'; import Logger from '@cumulus/logger'; import pick from 'lodash/pick'; +import set from 'lodash/set'; import { constructCollectionId } from '@cumulus/message/Collections'; import { ApiExecutionRecord } from '@cumulus/types/api/executions'; import { BaseSearch } from './BaseSearch'; @@ -24,6 +25,10 @@ interface ExecutionRecord extends BaseRecord, PostgresExecutionRecord { */ export class ExecutionSearch extends BaseSearch { constructor(event: QueryEvent) { + // estimate the table rowcount by default + if (event?.queryStringParameters?.estimateTableRowCount !== 'false') { + set(event, 'queryStringParameters.estimateTableRowCount', 'true'); + } super(event, 'execution'); } @@ -34,8 +39,7 @@ export class ExecutionSearch extends BaseSearch { */ protected searchAsync(): boolean { const { not, term, terms } = this.dbQueryParameters; - return (!!(not?.asyncOperationId || - term?.asyncOperationId || terms?.asyncOperationId)); + return (!!(not?.asyncOperationId || term?.asyncOperationId || terms?.asyncOperationId)); } /** @@ -45,8 +49,7 @@ export class ExecutionSearch extends BaseSearch { */ protected searchParent(): boolean { const { not, term, terms } = this.dbQueryParameters; - return (!!(not?.parentArn || - term?.parentArn || terms?.parentArn)); + return (!!(not?.parentArn || term?.parentArn || terms?.parentArn)); } /** @@ -66,20 +69,24 @@ export class ExecutionSearch extends BaseSearch { executions: executionsTable, } = TableNames; - const searchQuery = knex(`${this.tableName} as ${this.tableName}`) + const searchQuery = knex(`${this.tableName}`) .select(`${this.tableName}.*`) .select({ collectionName: `${collectionsTable}.name`, collectionVersion: `${collectionsTable}.version`, - asyncOperationId: `${asyncOperationsTable}.id`, + }); + if (this.searchAsync() || this.dbQueryParameters.includeFullRecord) { + searchQuery.select({ asyncOperationId: `${asyncOperationsTable}.id` }); + } + if (this.searchParent() || this.dbQueryParameters.includeFullRecord) { searchQuery.select({ parentArn: `${executionsTable}_parent.arn` }); } const countQuery = knex(this.tableName) - .count(`${this.tableName}.cumulus_id`); + .count('*'); if (this.searchCollection()) { countQuery.innerJoin(collectionsTable, `${this.tableName}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); @@ -91,7 +98,7 @@ export class ExecutionSearch extends BaseSearch { if (this.searchAsync()) { countQuery.innerJoin(asyncOperationsTable, `${this.tableName}.async_operation_cumulus_id`, `${asyncOperationsTable}.cumulus_id`); searchQuery.innerJoin(asyncOperationsTable, `${this.tableName}.async_operation_cumulus_id`, `${asyncOperationsTable}.cumulus_id`); - } else { + } else if (this.dbQueryParameters.includeFullRecord) { searchQuery.leftJoin(asyncOperationsTable, `${this.tableName}.async_operation_cumulus_id`, `${asyncOperationsTable}.cumulus_id`); } @@ -138,8 +145,8 @@ export class ExecutionSearch extends BaseSearch { log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); const apiRecords = pgRecords.map((executionRecord: ExecutionRecord) => { const { collectionName, collectionVersion, asyncOperationId, parentArn } = executionRecord; - const collectionId = collectionName && collectionVersion ? - constructCollectionId(collectionName, collectionVersion) : undefined; + const collectionId = collectionName && collectionVersion + ? constructCollectionId(collectionName, collectionVersion) : undefined; const apiRecord = translatePostgresExecutionToApiExecutionWithoutDbQuery({ executionRecord, collectionId, diff --git a/packages/db/src/search/GranuleSearch.ts b/packages/db/src/search/GranuleSearch.ts index 56e57a9bfa7..c1b98ced70d 100644 --- a/packages/db/src/search/GranuleSearch.ts +++ b/packages/db/src/search/GranuleSearch.ts @@ -1,5 +1,6 @@ import { Knex } from 'knex'; import pick from 'lodash/pick'; +import set from 'lodash/set'; import { ApiGranuleRecord } from '@cumulus/types/api/granules'; import Logger from '@cumulus/logger'; @@ -30,6 +31,10 @@ interface GranuleRecord extends BaseRecord, PostgresGranuleRecord { */ export class GranuleSearch extends BaseSearch { constructor(event: QueryEvent) { + // estimate the table rowcount by default + if (event?.queryStringParameters?.estimateTableRowCount !== 'false') { + set(event, 'queryStringParameters.estimateTableRowCount', 'true'); + } super(event, 'granule'); } @@ -50,7 +55,7 @@ export class GranuleSearch extends BaseSearch { pdrs: pdrsTable, } = TableNames; const countQuery = knex(this.tableName) - .count(`${this.tableName}.cumulus_id`); + .count('*'); const searchQuery = knex(this.tableName) .select(`${this.tableName}.*`) diff --git a/packages/db/src/search/StatsSearch.ts b/packages/db/src/search/StatsSearch.ts index 5a2ddbfebf7..59e19804291 100644 --- a/packages/db/src/search/StatsSearch.ts +++ b/packages/db/src/search/StatsSearch.ts @@ -1,10 +1,15 @@ import omit from 'lodash/omit'; import { Knex } from 'knex'; + +import Logger from '@cumulus/logger'; + import { getKnexClient } from '../connection'; import { TableNames } from '../tables'; import { DbQueryParameters, QueryEvent } from '../types/search'; import { BaseSearch } from './BaseSearch'; +const log = new Logger({ sender: '@cumulus/db/StatsSearch' }); + type TotalSummary = { count_errors: number, count_collections: number, @@ -72,8 +77,8 @@ class StatsSearch extends BaseSearch { /** * Formats the postgres records into an API stats/aggregate response * - * @param {Record} result - the postgres query results - * @returns {ApiAggregateResult} the api object with the aggregate statistics + * @param result - the postgres query results + * @returns the api object with the aggregate statistics */ private formatAggregateResult(result: Record): ApiAggregateResult { let totalCount = 0; @@ -100,8 +105,8 @@ class StatsSearch extends BaseSearch { /** * Formats the postgres results into an API stats/summary response * - * @param {TotalSummary} result - the knex summary query results - * @returns {SummaryResult} the api object with the summary statistics + * @param result - the knex summary query results + * @returns the api object with the summary statistics */ private formatSummaryResult(result: TotalSummary): SummaryResult { const timestampTo = this.dbQueryParameters.range?.updated_at?.lte ?? new Date(); @@ -143,27 +148,28 @@ class StatsSearch extends BaseSearch { /** * Queries postgres for a summary of statistics around the granules in the system * - * @param {Knex} sendKnex - the knex client to be used - * @returns {Promise} the postgres aggregations based on query + * @param testKnex - the knex client to be used + * @returns the postgres aggregations based on query */ - public async summary(sendKnex: Knex): Promise { - const knex = sendKnex ?? await getKnexClient(); + public async summary(testKnex?: Knex): Promise { + const knex = testKnex ?? await getKnexClient(); const aggregateQuery: Knex.QueryBuilder = knex(this.tableName); this.buildRangeQuery({ searchQuery: aggregateQuery }); aggregateQuery.select( knex.raw(`COUNT(CASE WHEN ${this.tableName}.error ->> 'Error' is not null THEN 1 END) AS count_errors`), - knex.raw(`COUNT(${this.tableName}.cumulus_id) AS count_granules`), + knex.raw('COUNT(*) AS count_granules'), knex.raw(`AVG(${this.tableName}.duration) AS avg_processing_time`), knex.raw(`COUNT(DISTINCT ${this.tableName}.collection_cumulus_id) AS count_collections`) ); + log.debug(`summary about to execute query: ${aggregateQuery?.toSQL().sql}`); const aggregateQueryRes: TotalSummary[] = await aggregateQuery; return this.formatSummaryResult(aggregateQueryRes[0]); } /** - * Performs joins on the provider and/or collection table if neccessary + * Performs joins on the collections/pdrs/providers table if neccessary * - * @param {Knex.QueryBuilder} query - the knex query to be joined or not + * @param query - the knex query to be joined or not */ private joinTables(query: Knex.QueryBuilder) { const { @@ -187,8 +193,8 @@ class StatsSearch extends BaseSearch { /** * Aggregates the search query based on queryStringParameters * - * @param {Knex.QueryBuilder} query - the knex query to be aggregated - * @param {Knex} knex - the knex client to be used + * @param query - the knex query to be aggregated + * @param knex - the knex client to be used */ private aggregateQueryField(query: Knex.QueryBuilder, knex: Knex) { if (this.field?.includes('error.Error')) { @@ -197,7 +203,7 @@ class StatsSearch extends BaseSearch { query.select(`${this.tableName}.${this.field} as aggregatedfield`); } query.modify((queryBuilder) => this.joinTables(queryBuilder)) - .count(`${this.tableName}.cumulus_id as count`) + .count('* as count') .groupBy('aggregatedfield') .orderBy([{ column: 'count', order: 'desc' }, { column: 'aggregatedfield' }]); } @@ -205,7 +211,7 @@ class StatsSearch extends BaseSearch { /** * Builds basic query * - * @param {Knex} knex - the knex client + * @param knex - the knex client * @returns the search query */ protected buildBasicQuery(knex: Knex) @@ -221,7 +227,7 @@ class StatsSearch extends BaseSearch { * Builds queries for infix and prefix * * @param params - * @param {Knex.QueryBuilder} params.searchQuery - the search query + * @param params.searchQuery - the search query * @param [params.dbQueryParameters] - the db query parameters */ protected buildInfixPrefixQuery(params: { @@ -243,9 +249,9 @@ class StatsSearch extends BaseSearch { * Builds queries for term fields * * @param params - * @param {Knex.QueryBuilder} params.searchQuery - the search query + * @param params.searchQuery - the search query * @param [params.dbQueryParameters] - the db query parameters - * @returns {Knex.QueryBuilder} - the updated search query based on queryStringParams + * @returns the updated search query based on queryStringParams */ protected buildTermQuery(params: { searchQuery: Knex.QueryBuilder, @@ -267,10 +273,10 @@ class StatsSearch extends BaseSearch { /** * Executes the aggregate search query * - * @param {Knex | undefined} testKnex - the knex client to be used - * @returns {Promise} - the aggregate query results in api format + * @param testKnex - the knex client to be used + * @returns the aggregate query results in api format */ - async aggregate(testKnex: Knex | undefined): Promise { + async aggregate(testKnex?: Knex): Promise { const knex = testKnex ?? await getKnexClient(); const { searchQuery } = this.buildSearch(knex); try { diff --git a/packages/db/src/search/queries.ts b/packages/db/src/search/queries.ts index 3e1bbe4ddd1..824064a52da 100644 --- a/packages/db/src/search/queries.ts +++ b/packages/db/src/search/queries.ts @@ -15,7 +15,9 @@ const reservedWords = [ 'order', 'prefix', 'infix', + 'estimateTableRowCount', 'fields', + 'includeFullRecord', 'searchContext', ]; @@ -229,7 +231,15 @@ export const convertQueryStringToDbQueryParameters = ( type: string, queryStringParameters: QueryStringParameters ): DbQueryParameters => { - const { limit, page, prefix, infix, fields, includeFullRecord } = queryStringParameters; + const { + limit, + page, + prefix, + infix, + fields, + estimateTableRowCount, + includeFullRecord, + } = queryStringParameters; const dbQueryParameters: DbQueryParameters = {}; dbQueryParameters.page = Number.parseInt(page ?? '1', 10); @@ -239,6 +249,7 @@ export const convertQueryStringToDbQueryParameters = ( if (typeof infix === 'string') dbQueryParameters.infix = infix; if (typeof prefix === 'string') dbQueryParameters.prefix = prefix; if (typeof fields === 'string') dbQueryParameters.fields = fields.split(','); + dbQueryParameters.estimateTableRowCount = (estimateTableRowCount === 'true'); dbQueryParameters.includeFullRecord = (includeFullRecord === 'true'); dbQueryParameters.sort = convertSort(type, queryStringParameters); diff --git a/packages/db/src/types/search.ts b/packages/db/src/types/search.ts index 2dbad8f2287..8d129082544 100644 --- a/packages/db/src/types/search.ts +++ b/packages/db/src/types/search.ts @@ -33,6 +33,7 @@ export type DbQueryParameters = { infix?: string, limit?: number, includeFullRecord?: boolean, + estimateTableRowCount?: boolean, exists?: { [key: string]: boolean }, not?: { [key: string]: QueriableType | undefined }, offset?: number, diff --git a/packages/db/tests/search/test-ExecutionSearch.js b/packages/db/tests/search/test-ExecutionSearch.js index deab4baad84..3b825fb8ec0 100644 --- a/packages/db/tests/search/test-ExecutionSearch.js +++ b/packages/db/tests/search/test-ExecutionSearch.js @@ -105,7 +105,10 @@ test.after.always(async (t) => { test('ExecutionSearch returns correct response for basic query', async (t) => { const { knex } = t.context; - const dbSearch = new ExecutionSearch({}); + const queryStringParameters = { + estimateTableRowCount: 'false', + }; + const dbSearch = new ExecutionSearch({ queryStringParameters }); const results = await dbSearch.query(knex); t.is(results.meta.count, 50); t.is(results.results.length, 10); @@ -118,7 +121,6 @@ test('ExecutionSearch returns correct response for basic query', async (t) => { finalPayload: { final: 'payload__0' }, type: 'testWorkflow__0', execution: 'https://fake-execution0.com/', - asyncOperationId: t.context.testAsyncOperation.id, collectionId: 'testCollection___8', createdAt: new Date(2017, 11, 31).getTime(), updatedAt: new Date(2018, 0, 1).getTime(), @@ -145,6 +147,7 @@ test('ExecutionSearch returns correct response for basic query', async (t) => { test('ExecutionSearch supports page and limit params', async (t) => { const { knex } = t.context; let queryStringParameters = { + estimateTableRowCount: 'false', limit: 25, page: 2, }; @@ -154,6 +157,7 @@ test('ExecutionSearch supports page and limit params', async (t) => { t.is(response.results?.length, 25); queryStringParameters = { + estimateTableRowCount: 'false', limit: 10, page: 5, }; @@ -163,6 +167,7 @@ test('ExecutionSearch supports page and limit params', async (t) => { t.is(response.results?.length, 10); queryStringParameters = { + estimateTableRowCount: 'false', limit: 10, page: 11, }; @@ -218,6 +223,7 @@ test('ExecutionSearch supports asyncOperationId term search', async (t) => { const response = await dbSearch.query(knex); t.is(response.meta.count, 25); t.is(response.results?.length, 25); + t.is(response.results[0].asyncOperationId, t.context.testAsyncOperation.id); }); test('ExecutionSearch supports term search for number field', async (t) => { @@ -290,6 +296,7 @@ test('ExecutionSearch supports range search', async (t) => { test('ExecutionSearch non-existing fields are ignored', async (t) => { const { knex } = t.context; const queryStringParameters = { + estimateTableRowCount: 'false', limit: 200, non_existing_field: `non_exist_${cryptoRandomString({ length: 5 })}`, non_existing_field__from: `non_exist_${cryptoRandomString({ length: 5 })}`, @@ -304,6 +311,7 @@ test('ExecutionSearch returns fields specified', async (t) => { const { knex } = t.context; const fields = 'status,arn,type,error'; const queryStringParameters = { + estimateTableRowCount: 'false', fields, }; const dbSearch = new ExecutionSearch({ queryStringParameters }); @@ -330,6 +338,7 @@ test('ExecutionSearch supports search for multiple fields', async (t) => { test('ExecutionSearch supports sorting', async (t) => { const { knex } = t.context; let queryStringParameters = { + estimateTableRowCount: 'false', limit: 50, sort_by: 'timestamp', }; @@ -341,6 +350,7 @@ test('ExecutionSearch supports sorting', async (t) => { t.true(response.results[1].updatedAt < response.results[25].updatedAt); queryStringParameters = { + estimateTableRowCount: 'false', limit: 50, sort_by: 'timestamp', order: 'desc', @@ -353,6 +363,7 @@ test('ExecutionSearch supports sorting', async (t) => { t.true(response2.results[1].updatedAt > response2.results[25].updatedAt); queryStringParameters = { + estimateTableRowCount: 'false', limit: 200, sort_key: ['-timestamp'], }; @@ -426,7 +437,6 @@ test('ExecutionSearch supports parentArn term search', async (t) => { finalPayload: { final: 'payload__46' }, type: 'testWorkflow__46', execution: 'https://fake-execution46.com/', - asyncOperationId: t.context.testAsyncOperation.id, collectionId: 'testCollection___8', parentArn: 'fakeArn__21:fakeExecutionName', createdAt: new Date(2022, 10, 16).getTime(), @@ -544,14 +554,60 @@ test('ExecutionSearch supports term search for date field', async (t) => { test('ExecutionSearch includeFullRecord', async (t) => { const { knex } = t.context; const queryStringParameters = { + estimateTableRowCount: 'false', limit: 50, includeFullRecord: 'true', }; + + const dbSearch = new ExecutionSearch({ queryStringParameters }); + const results = await dbSearch.query(knex); + t.is(results.meta.count, 50); + t.is(results.results.length, 50); + const expectedResponse1 = { + name: 'testExecutionName', + status: 'failed', + arn: 'testArn__0:testExecutionName', + error: { Error: 'UnknownError' }, + originalPayload: { orginal: 'payload__0' }, + finalPayload: { final: 'payload__0' }, + type: 'testWorkflow__0', + execution: 'https://fake-execution0.com/', + asyncOperationId: t.context.testAsyncOperation.id, + collectionId: 'testCollection___8', + createdAt: new Date(2017, 11, 31).getTime(), + updatedAt: new Date(2018, 0, 1).getTime(), + timestamp: new Date(2018, 0, 1).getTime(), + }; + + const expectedResponse40 = { + name: 'testExecutionName', + status: 'completed', + arn: 'testArn__40:testExecutionName', + duration: 100, + error: { Error: 'UnknownError' }, + originalPayload: { orginal: 'payload__40' }, + finalPayload: { final: 'payload__40' }, + type: 'testWorkflow__40', + execution: 'https://fake-execution40.com/', + asyncOperationId: t.context.testAsyncOperation.id, + collectionId: 'testCollection___8', + parentArn: 'fakeArn__15:fakeExecutionName', + createdAt: new Date(2022, 4, 10).getTime(), + updatedAt: new Date(2022, 4, 12).getTime(), + timestamp: new Date(2022, 4, 12).getTime(), + }; + + t.deepEqual(results.results[0], expectedResponse1); + t.deepEqual(results.results[40], expectedResponse40); +}); + +test('ExecutionSearch estimates the rowcount of the table by default', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + }; const dbSearch = new ExecutionSearch({ queryStringParameters }); const response = await dbSearch.query(knex); - t.is(response.meta.count, 50); + t.true(response.meta.count > 0); t.is(response.results?.length, 50); - t.true('parentArn' in response.results[40]); - t.true('collectionId' in response.results[40]); - t.true('asyncOperationId' in response.results[40]); }); diff --git a/packages/db/tests/search/test-GranuleSearch.js b/packages/db/tests/search/test-GranuleSearch.js index 04911d29bb0..5d055c47a53 100644 --- a/packages/db/tests/search/test-GranuleSearch.js +++ b/packages/db/tests/search/test-GranuleSearch.js @@ -169,6 +169,7 @@ test('GranuleSearch returns 10 granule records by default', async (t) => { test('GranuleSearch supports page and limit params', async (t) => { const { knex } = t.context; let queryStringParameters = { + estimateTableRowCount: 'false', limit: 20, page: 2, }; @@ -178,6 +179,7 @@ test('GranuleSearch supports page and limit params', async (t) => { t.is(response.results?.length, 20); queryStringParameters = { + estimateTableRowCount: 'false', limit: 11, page: 10, }; @@ -187,6 +189,7 @@ test('GranuleSearch supports page and limit params', async (t) => { t.is(response.results?.length, 1); queryStringParameters = { + estimateTableRowCount: 'false', limit: 10, page: 11, }; @@ -409,6 +412,7 @@ test('GranuleSearch supports search for multiple fields', async (t) => { test('GranuleSearch non-existing fields are ignored', async (t) => { const { knex } = t.context; const queryStringParameters = { + estimateTableRowCount: 'false', limit: 200, non_existing_field: `non_exist_${cryptoRandomString({ length: 5 })}`, non_existing_field__from: `non_exist_${cryptoRandomString({ length: 5 })}`, @@ -423,6 +427,7 @@ test('GranuleSearch returns fields specified', async (t) => { const { knex } = t.context; const fields = 'granuleId,endingDateTime,collectionId,published,status'; const queryStringParameters = { + estimateTableRowCount: 'false', fields, }; const dbSearch = new GranuleSearch({ queryStringParameters }); @@ -435,6 +440,7 @@ test('GranuleSearch returns fields specified', async (t) => { test('GranuleSearch supports sorting', async (t) => { const { knex } = t.context; let queryStringParameters = { + estimateTableRowCount: 'false', limit: 200, sort_by: 'timestamp', }; @@ -446,6 +452,7 @@ test('GranuleSearch supports sorting', async (t) => { t.true(response.results[1].updatedAt < response.results[50].updatedAt); queryStringParameters = { + estimateTableRowCount: 'false', limit: 200, sort_by: 'timestamp', order: 'desc', @@ -458,6 +465,7 @@ test('GranuleSearch supports sorting', async (t) => { t.true(response2.results[1].updatedAt > response2.results[50].updatedAt); queryStringParameters = { + estimateTableRowCount: 'false', limit: 200, sort_key: ['-timestamp'], }; @@ -469,6 +477,7 @@ test('GranuleSearch supports sorting', async (t) => { t.true(response3.results[1].updatedAt > response3.results[50].updatedAt); queryStringParameters = { + estimateTableRowCount: 'false', limit: 200, sort_key: ['+productVolume'], }; @@ -480,6 +489,7 @@ test('GranuleSearch supports sorting', async (t) => { t.true(Number(response4.results[98].productVolume) < Number(response4.results[99].productVolume)); queryStringParameters = { + estimateTableRowCount: 'false', limit: 200, sort_key: ['-timestamp', '+productVolume'], }; @@ -493,6 +503,7 @@ test('GranuleSearch supports sorting', async (t) => { t.true(Number(response5.results[0].productVolume) < Number(response5.results[10].productVolume)); queryStringParameters = { + estimateTableRowCount: 'false', limit: 200, sort_key: ['-timestamp'], sort_by: 'timestamp', @@ -509,6 +520,7 @@ test('GranuleSearch supports sorting', async (t) => { test('GranuleSearch supports sorting by CollectionId', async (t) => { const { knex } = t.context; let queryStringParameters = { + estimateTableRowCount: 'false', limit: 200, sort_by: 'collectionId', order: 'asc', @@ -521,6 +533,7 @@ test('GranuleSearch supports sorting by CollectionId', async (t) => { t.true(response8.results[0].collectionId < response8.results[50].collectionId); queryStringParameters = { + estimateTableRowCount: 'false', limit: 200, sort_key: ['-collectionId'], }; @@ -835,3 +848,14 @@ test('GranuleSearch supports search which checks existence of error', async (t) t.is(response.meta.count, 50); t.is(response.results?.length, 50); }); + +test('GranuleSearch estimates the rowcount of the table by default', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.true(response.meta.count > 0); + t.is(response.results?.length, 50); +}); diff --git a/packages/db/tests/search/test-queries.js b/packages/db/tests/search/test-queries.js index 70d8a69c404..0a1ecfff67e 100644 --- a/packages/db/tests/search/test-queries.js +++ b/packages/db/tests/search/test-queries.js @@ -28,6 +28,7 @@ test('convertQueryStringToDbQueryParameters correctly converts api query string }; const expectedDbQueryParameters = { + estimateTableRowCount: false, exists: { error: true, }, @@ -83,6 +84,7 @@ test('convertQueryStringToDbQueryParameters correctly converts sortby error para }; const expectedDbQueryParameters = { + estimateTableRowCount: false, limit: 10, offset: 0, page: 1, From 6f7032133d3e5b85a7c235bdc6d1c121f1d8214a Mon Sep 17 00:00:00 2001 From: Naga Nages <66387215+Nnaga1@users.noreply.github.com> Date: Wed, 7 Aug 2024 11:36:00 -0400 Subject: [PATCH 15/61] CUMULUS-3238: Remove ElasticSearch dependency from Collection POST, PUT, and DEL endpoints (#3746) * first commit * CHANGELOG change * PR feedback * PR feedback --- CHANGELOG.md | 3 +- packages/api/endpoints/collections.js | 38 +-- packages/api/lib/testUtils.js | 8 - .../collections/create-collection.js | 78 +----- .../collections/delete-collection.js | 239 ++++-------------- .../endpoints/collections/get-collection.js | 16 -- .../collections/update-collection.js | 158 +----------- 7 files changed, 60 insertions(+), 480 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 58f0e59b9e2..8f742afec4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [Unreleased] ### Replace ElasticSearch Phase 1 - +- **CUMULUS-3238** + - Removed elasticsearch dependency from collections endpoint - **CUMULUS-3239** - Updated `executions` list api endpoint and added `ExecutionSearch` class to query postgres - **CUMULUS-3240** diff --git a/packages/api/endpoints/collections.js b/packages/api/endpoints/collections.js index a6688407c50..898c41dab9a 100644 --- a/packages/api/endpoints/collections.js +++ b/packages/api/endpoints/collections.js @@ -9,7 +9,6 @@ const { RecordDoesNotExist, } = require('@cumulus/errors'); const Logger = require('@cumulus/logger'); -const { constructCollectionId } = require('@cumulus/message/Collections'); const { CollectionPgModel, @@ -21,11 +20,6 @@ const { CollectionSearch, } = require('@cumulus/db'); const CollectionConfigStore = require('@cumulus/collection-config-store'); -const { getEsClient, Search } = require('@cumulus/es-client/search'); -const { - indexCollection, - deleteCollection, -} = require('@cumulus/es-client/indexer'); const { publishCollectionCreateSnsMessage, publishCollectionDeleteSnsMessage, @@ -108,7 +102,6 @@ async function post(req, res) { const { collectionPgModel = new CollectionPgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), collectionConfigStore = new CollectionConfigStore( process.env.system_bucket, process.env.stackName @@ -135,9 +128,6 @@ async function post(req, res) { await createRejectableTransaction(knex, async (trx) => { const [pgCollection] = await collectionPgModel.create(trx, dbRecord); translatedCollection = await translatePostgresCollectionToApiCollection(pgCollection); - // process.env.ES_INDEX is only used to isolate the index for - // each unit test suite - await indexCollection(esClient, translatedCollection, process.env.ES_INDEX); await publishCollectionCreateSnsMessage(translatedCollection); }); await collectionConfigStore.put(name, version, translatedCollection); @@ -176,7 +166,6 @@ async function put(req, res) { const { collectionPgModel = new CollectionPgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), collectionConfigStore = new CollectionConfigStore( process.env.system_bucket, process.env.stackName @@ -211,11 +200,7 @@ async function put(req, res) { try { await createRejectableTransaction(knex, async (trx) => { const [pgCollection] = await collectionPgModel.upsert(trx, postgresCollection); - - // process.env.ES_INDEX is only used to isolate the index for - // each unit test suite apiPgCollection = translatePostgresCollectionToApiCollection(pgCollection); - await indexCollection(esClient, apiPgCollection, process.env.ES_INDEX); await publishCollectionUpdateSnsMessage(apiPgCollection); await collectionConfigStore.put(name, version, apiPgCollection); }); @@ -238,7 +223,6 @@ async function del(req, res) { const { collectionPgModel = new CollectionPgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), collectionConfigStore = new CollectionConfigStore( process.env.system_bucket, process.env.stackName @@ -246,36 +230,20 @@ async function del(req, res) { } = req.testContext || {}; const { name, version } = req.params; - const collectionId = constructCollectionId(name, version); - const esCollectionsClient = new Search( - {}, - 'collection', - process.env.ES_INDEX - ); try { await collectionPgModel.get(knex, { name, version }); } catch (error) { if (error instanceof RecordDoesNotExist) { - if (!(await esCollectionsClient.exists(collectionId))) { - log.info('Collection does not exist in Elasticsearch and PostgreSQL'); - return res.boom.notFound('No record found'); - } - log.info('Collection does not exist in PostgreSQL, it only exists in Elasticsearch. Proceeding with deletion'); - } else { - throw error; + log.info(`Collection does not exist in PostgreSQL. Failed to delete collection with name ${name} and version ${version}`); + return res.boom.notFound('No record found'); } + throw error; } try { await createRejectableTransaction(knex, async (trx) => { await collectionPgModel.delete(trx, { name, version }); - await deleteCollection({ - esClient, - collectionId, - index: process.env.ES_INDEX, - ignore: [404], - }); await publishCollectionDeleteSnsMessage({ name, version }); }); } catch (error) { diff --git a/packages/api/lib/testUtils.js b/packages/api/lib/testUtils.js index ba9b74bf14c..f75f222f3bf 100644 --- a/packages/api/lib/testUtils.js +++ b/packages/api/lib/testUtils.js @@ -23,7 +23,6 @@ const { translatePostgresRuleToApiRule, } = require('@cumulus/db'); const { - indexCollection, indexProvider, indexRule, indexPdr, @@ -490,8 +489,6 @@ const createCollectionTestRecords = async (context, collectionParams) => { testKnex, collectionModel, collectionPgModel, - esClient, - esCollectionClient, } = context; const originalCollection = fakeCollectionFactory(collectionParams); if (collectionModel) { @@ -503,14 +500,9 @@ const createCollectionTestRecords = async (context, collectionParams) => { const originalPgRecord = await collectionPgModel.get( testKnex, { cumulus_id: pgCollection.cumulus_id } ); - await indexCollection(esClient, originalCollection, process.env.ES_INDEX); - const originalEsRecord = await esCollectionClient.get( - constructCollectionId(originalCollection.name, originalCollection.version) - ); return { originalCollection, originalPgRecord, - originalEsRecord, }; }; diff --git a/packages/api/tests/endpoints/collections/create-collection.js b/packages/api/tests/endpoints/collections/create-collection.js index 319260724d6..40940e24103 100644 --- a/packages/api/tests/endpoints/collections/create-collection.js +++ b/packages/api/tests/endpoints/collections/create-collection.js @@ -24,14 +24,6 @@ const { SubscribeCommand, DeleteTopicCommand, } = require('@aws-sdk/client-sns'); -const { - constructCollectionId, -} = require('@cumulus/message/Collections'); -const EsCollection = require('@cumulus/es-client/collections'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const CollectionConfigStore = require('@cumulus/collection-config-store'); const AccessToken = require('../../../models/access-tokens'); const { @@ -71,15 +63,6 @@ test.before(async (t) => { t.context.collectionPgModel = new CollectionPgModel(); - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esCollectionClient = new EsCollection( - {}, - undefined, - t.context.esIndex - ); - await awsServices.s3().createBucket({ Bucket: process.env.system_bucket }); const username = randomString(); @@ -124,7 +107,6 @@ test.afterEach(async (t) => { test.after.always(async (t) => { await accessTokenModel.deleteTable(); await recursivelyDeleteS3Bucket(process.env.system_bucket); - await cleanupTestIndex(t.context); await destroyLocalTestDb({ knex: t.context.testKnex, knexAdmin: t.context.testKnexAdmin, @@ -166,7 +148,7 @@ test('POST with invalid authorization scheme returns an invalid token response', assertions.isInvalidAuthorizationResponse(t, res); }); -test.serial('POST creates a new collection in all data stores and publishes an SNS message', async (t) => { +test.serial('POST creates a new collection and publishes an SNS message', async (t) => { const newCollection = fakeCollectionFactory(); const res = await request(app) @@ -192,11 +174,6 @@ test.serial('POST creates a new collection in all data stores and publishes an S t.is(res.body.message, 'Record saved'); t.like(res.body.record, translatedCollection); - const esRecord = await t.context.esCollectionClient.get( - constructCollectionId(newCollection.name, newCollection.version) - ); - t.like(esRecord, translatedCollection); - const { Messages } = await sqs().receiveMessage({ QueueUrl: t.context.QueueUrl, WaitTimeSeconds: 10, @@ -209,7 +186,7 @@ test.serial('POST creates a new collection in all data stores and publishes an S t.deepEqual(message.record, translatedCollection); }); -test.serial('POST creates a new collection in all data stores with correct timestamps', async (t) => { +test.serial('POST creates a new collection with correct timestamps', async (t) => { const newCollection = fakeCollectionFactory(); await request(app) @@ -227,15 +204,8 @@ test.serial('POST creates a new collection in all data stores with correct times } ); - const esRecord = await t.context.esCollectionClient.get( - constructCollectionId(newCollection.name, newCollection.version) - ); - t.true(collectionPgRecord.created_at.getTime() > newCollection.createdAt); t.true(collectionPgRecord.updated_at.getTime() > newCollection.updatedAt); - // Records have the same timestamps - t.is(collectionPgRecord.created_at.getTime(), esRecord.createdAt); - t.is(collectionPgRecord.updated_at.getTime(), esRecord.updatedAt); }); test.serial('POST creates collection configuration store via name and version', async (t) => { @@ -528,7 +498,7 @@ test.serial('POST with file.checksumFor matching its own file returns 400 bad re t.true(res.body.message.includes('checksumFor \'^.*$\' cannot be used to validate itself')); }); -test.serial('POST does not write to Elasticsearch/SNS if writing to PostgreSQL fails', async (t) => { +test.serial('POST does not write to SNS if writing to PostgreSQL fails', async (t) => { const collection = fakeCollectionFactory(); const fakeCollectionPgModel = { @@ -548,48 +518,6 @@ test.serial('POST does not write to Elasticsearch/SNS if writing to PostgreSQL f t.true(response.boom.badImplementation.calledWithMatch('something bad')); - t.false(await t.context.esCollectionClient.exists( - constructCollectionId(collection.name, collection.version) - )); - - const { Messages } = await sqs().receiveMessage({ - QueueUrl: t.context.QueueUrl, - WaitTimeSeconds: 10, - }); - - t.is(Messages.length, 0); -}); - -test.serial('POST does not write to PostgreSQL/SNS if writing to Elasticsearch fails', async (t) => { - const collection = fakeCollectionFactory(); - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - index: () => Promise.reject(new Error('something bad')), - }, - }; - - const expressRequest = { - body: collection, - testContext: { - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await post(expressRequest, response); - - t.true(response.boom.badImplementation.calledWithMatch('something bad')); - - t.false( - await t.context.collectionPgModel.exists(t.context.testKnex, { - name: collection.name, - version: collection.version, - }) - ); - const { Messages } = await sqs().receiveMessage({ QueueUrl: t.context.QueueUrl, WaitTimeSeconds: 10, diff --git a/packages/api/tests/endpoints/collections/delete-collection.js b/packages/api/tests/endpoints/collections/delete-collection.js index 11d452b4c08..cec03c1ea7a 100644 --- a/packages/api/tests/endpoints/collections/delete-collection.js +++ b/packages/api/tests/endpoints/collections/delete-collection.js @@ -25,12 +25,6 @@ const { const { constructCollectionId, } = require('@cumulus/message/Collections'); -const EsCollection = require('@cumulus/es-client/collections'); -const { indexCollection } = require('@cumulus/es-client/indexer'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const CollectionConfigStore = require('@cumulus/collection-config-store'); const { AccessToken } = require('../../../models'); const { @@ -70,15 +64,6 @@ test.before(async (t) => { t.context.collectionPgModel = new CollectionPgModel(); - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esCollectionClient = new EsCollection( - {}, - undefined, - t.context.esIndex - ); - await s3().createBucket({ Bucket: process.env.system_bucket }); const username = randomString(); @@ -134,7 +119,6 @@ test.afterEach(async (t) => { test.after.always(async (t) => { await accessTokenModel.deleteTable(); await recursivelyDeleteS3Bucket(process.env.system_bucket); - await cleanupTestIndex(t.context); await destroyLocalTestDb({ knex: t.context.testKnex, knexAdmin: t.context.testKnexAdmin, @@ -161,6 +145,54 @@ test('Attempting to delete a collection without an Authorization header returns ); }); +test.serial('del() does not publish an SNS message if removing from PostgreSQL fails', async (t) => { + const { + originalPgRecord, + } = await createCollectionTestRecords( + t.context + ); + + const fakeCollectionPgModel = { + delete: () => { + throw new Error('something bad'); + }, + get: () => Promise.resolve(originalPgRecord), + }; + + const expressRequest = { + params: { + name: originalPgRecord.name, + version: originalPgRecord.version, + }, + body: originalPgRecord, + testContext: { + knex: t.context.testKnex, + collectionPgModel: fakeCollectionPgModel, + }, + }; + + const response = buildFakeExpressResponse(); + + await t.throwsAsync( + del(expressRequest, response), + { message: 'something bad' } + ); + + t.true( + await t.context.collectionPgModel.exists(t.context.testKnex, { + name: originalPgRecord.name, + version: originalPgRecord.version, + }) + ); + + const { Messages } = await sqs().receiveMessage({ + QueueUrl: t.context.QueueUrl, + WaitTimeSeconds: 10, + }); + + t.is(Messages.length, 0); +}); + test('Attempting to delete a collection with an invalid access token returns an unauthorized response', async (t) => { const response = await request(app) .delete('/collections/asdf/asdf') @@ -183,10 +215,9 @@ test('DELETE returns a 404 if PostgreSQL collection cannot be found', async (t) t.is(response.body.message, 'No record found'); }); -test.serial('DELETE successfully deletes if collection exists in PostgreSQL but not Elasticsearch', async (t) => { +test.serial('DELETE successfully deletes if collection exists in PostgreSQL', async (t) => { const { collectionPgModel, - esCollectionClient, testKnex, } = t.context; const testCollection = fakeCollectionRecordFactory(); @@ -198,11 +229,6 @@ test.serial('DELETE successfully deletes if collection exists in PostgreSQL but version: testCollection.version, } )); - t.false( - await esCollectionClient.exists( - constructCollectionId(testCollection.name, testCollection.version) - ) - ); await request(app) .delete(`/collections/${testCollection.name}/${testCollection.version}`) @@ -219,69 +245,15 @@ test.serial('DELETE successfully deletes if collection exists in PostgreSQL but } ) ); - t.false( - await esCollectionClient.exists( - constructCollectionId(testCollection.name, testCollection.version) - ) - ); }); -test.serial('DELETE successfully deletes if collection exists in Elasticsearch but not PostgreSQL', async (t) => { - const { - collectionPgModel, - esClient, - esCollectionClient, - testKnex, - } = t.context; - const testCollection = fakeCollectionRecordFactory(); - await indexCollection(esClient, testCollection, process.env.ES_INDEX); - t.false(await collectionPgModel.exists( - testKnex, - { - name: testCollection.name, - version: testCollection.version, - } - )); - t.true( - await esCollectionClient.exists( - constructCollectionId(testCollection.name, testCollection.version) - ) - ); - - await request(app) - .delete(`/collections/${testCollection.name}/${testCollection.version}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.false( - await collectionPgModel.exists( - t.context.testKnex, - { - name: testCollection.name, - version: testCollection.version, - } - ) - ); - t.false( - await esCollectionClient.exists( - constructCollectionId(testCollection.name, testCollection.version) - ) - ); -}); - -test.serial('Deleting a collection removes it from all data stores and publishes an SNS message', async (t) => { +test.serial('Deleting a collection removes it and publishes an SNS message', async (t) => { const { originalPgRecord } = await createCollectionTestRecords(t.context); t.true(await t.context.collectionPgModel.exists(t.context.testKnex, { name: originalPgRecord.name, version: originalPgRecord.version, })); - t.true( - await t.context.esCollectionClient.exists( - constructCollectionId(originalPgRecord.name, originalPgRecord.version) - ) - ); await request(app) .delete(`/collections/${originalPgRecord.name}/${originalPgRecord.version}`) @@ -293,11 +265,6 @@ test.serial('Deleting a collection removes it from all data stores and publishes name: originalPgRecord.name, version: originalPgRecord.version, })); - t.false( - await t.context.esCollectionClient.exists( - constructCollectionId(originalPgRecord.name, originalPgRecord.version) - ) - ); const { Messages } = await sqs().receiveMessage({ QueueUrl: t.context.QueueUrl, @@ -386,112 +353,6 @@ test.serial('Attempting to delete a collection with an associated rule does not )); }); -test.serial('del() does not remove from Elasticsearch or publish SNS message if removing from PostgreSQL fails', async (t) => { - const { - originalPgRecord, - } = await createCollectionTestRecords( - t.context - ); - - const fakeCollectionPgModel = { - delete: () => { - throw new Error('something bad'); - }, - get: () => Promise.resolve(originalPgRecord), - }; - - const expressRequest = { - params: { - name: originalPgRecord.name, - version: originalPgRecord.version, - }, - body: originalPgRecord, - testContext: { - knex: t.context.testKnex, - collectionPgModel: fakeCollectionPgModel, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - del(expressRequest, response), - { message: 'something bad' } - ); - - t.true( - await t.context.collectionPgModel.exists(t.context.testKnex, { - name: originalPgRecord.name, - version: originalPgRecord.version, - }) - ); - t.true( - await t.context.esCollectionClient.exists( - constructCollectionId(originalPgRecord.name, originalPgRecord.version) - ) - ); - const { Messages } = await sqs().receiveMessage({ - QueueUrl: t.context.QueueUrl, - WaitTimeSeconds: 10, - }); - - t.is(Messages.length, 0); -}); - -test.serial('del() does not remove from PostgreSQL or publish SNS message if removing from Elasticsearch fails', async (t) => { - const { - originalPgRecord, - } = await createCollectionTestRecords( - t.context - ); - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - delete: () => { - throw new Error('something bad'); - }, - }, - }; - - const expressRequest = { - params: { - name: originalPgRecord.name, - version: originalPgRecord.version, - }, - body: originalPgRecord, - testContext: { - knex: t.context.testKnex, - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - del(expressRequest, response), - { message: 'something bad' } - ); - - t.true( - await t.context.collectionPgModel.exists(t.context.testKnex, { - name: originalPgRecord.name, - version: originalPgRecord.version, - }) - ); - t.true( - await t.context.esCollectionClient.exists( - constructCollectionId(originalPgRecord.name, originalPgRecord.version) - ) - ); - const { Messages } = await sqs().receiveMessage({ - QueueUrl: t.context.QueueUrl, - WaitTimeSeconds: 10, - }); - - t.is(Messages.length, 0); -}); - test.serial('del() deletes a collection and removes its configuration store via name and version', async (t) => { const newCollection = fakeCollectionFactory(); diff --git a/packages/api/tests/endpoints/collections/get-collection.js b/packages/api/tests/endpoints/collections/get-collection.js index 3a823c3c58a..e89dbaf2a4d 100644 --- a/packages/api/tests/endpoints/collections/get-collection.js +++ b/packages/api/tests/endpoints/collections/get-collection.js @@ -8,8 +8,6 @@ const { recursivelyDeleteS3Bucket, } = require('@cumulus/aws-client/S3'); const { randomString } = require('@cumulus/common/test-utils'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); -const { getEsClient } = require('@cumulus/es-client/search'); const { localStackConnectionEnv, generateLocalTestDb, @@ -34,9 +32,6 @@ process.env.TOKEN_SECRET = randomString(); // import the express app after setting the env variables const { app } = require('../../../app'); -const esIndex = randomString(); -let esClient; - let jwtAuthToken; let accessTokenModel; @@ -53,14 +48,6 @@ test.before(async (t) => { t.context.knexAdmin = knexAdmin; t.context.collectionPgModel = new CollectionPgModel(); - const esAlias = randomString(); - process.env.ES_INDEX = esAlias; - await bootstrapElasticSearch({ - host: 'fakehost', - index: esIndex, - alias: esAlias, - }); - await awsServices.s3().createBucket({ Bucket: process.env.system_bucket }); const username = randomString(); @@ -70,8 +57,6 @@ test.before(async (t) => { await accessTokenModel.createTable(); jwtAuthToken = await createFakeJwtAuthToken({ accessTokenModel, username }); - - esClient = await getEsClient('fakehost'); }); test.beforeEach(async (t) => { @@ -85,7 +70,6 @@ test.beforeEach(async (t) => { test.after.always(async (t) => { await accessTokenModel.deleteTable(); await recursivelyDeleteS3Bucket(process.env.system_bucket); - await esClient.client.indices.delete({ index: esIndex }); await destroyLocalTestDb({ knex: t.context.knex, knexAdmin: t.context.knexAdmin, diff --git a/packages/api/tests/endpoints/collections/update-collection.js b/packages/api/tests/endpoints/collections/update-collection.js index 48a1dba000a..e6f96a48448 100644 --- a/packages/api/tests/endpoints/collections/update-collection.js +++ b/packages/api/tests/endpoints/collections/update-collection.js @@ -21,14 +21,6 @@ const { translatePostgresCollectionToApiCollection, fakeCollectionRecordFactory, } = require('@cumulus/db'); -const { - constructCollectionId, -} = require('@cumulus/message/Collections'); -const EsCollection = require('@cumulus/es-client/collections'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const CollectionConfigStore = require('@cumulus/collection-config-store'); const { InvalidRegexError, @@ -71,15 +63,6 @@ test.before(async (t) => { t.context.testKnexAdmin = knexAdmin; t.context.collectionPgModel = new CollectionPgModel(); - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esCollectionClient = new EsCollection( - {}, - undefined, - t.context.esIndex - ); - await s3().createBucket({ Bucket: process.env.system_bucket }); const username = randomString(); await setAuthorizedOAuthUsers([username]); @@ -122,7 +105,6 @@ test.afterEach(async (t) => { test.after.always(async (t) => { await accessTokenModel.deleteTable(); await recursivelyDeleteS3Bucket(process.env.system_bucket); - await cleanupTestIndex(t.context); await destroyLocalTestDb({ knex: t.context.testKnex, knexAdmin: t.context.testKnexAdmin, @@ -155,7 +137,6 @@ test.serial('PUT replaces an existing collection and sends an SNS message', asyn const { originalCollection, originalPgRecord, - originalEsRecord, } = await createCollectionTestRecords( t.context, { @@ -194,21 +175,6 @@ test.serial('PUT replaces an existing collection and sends an SNS message', asyn updated_at: actualPgCollection.updated_at, }); - const updatedEsRecord = await t.context.esCollectionClient.get( - constructCollectionId(originalCollection.name, originalCollection.version) - ); - t.like( - updatedEsRecord, - { - ...originalEsRecord, - duplicateHandling: 'error', - process: undefined, - createdAt: originalCollection.createdAt, - updatedAt: actualPgCollection.updated_at.getTime(), - timestamp: updatedEsRecord.timestamp, - } - ); - const { Messages } = await sqs().receiveMessage({ QueueUrl: t.context.QueueUrl, WaitTimeSeconds: 10, @@ -270,49 +236,7 @@ test.serial('PUT replaces an existing collection and correctly removes fields', }); }); -test.serial('PUT replaces an existing collection in PG with correct timestamps', async (t) => { - const knex = t.context.testKnex; - const { originalCollection } = await createCollectionTestRecords( - t.context, - { - duplicateHandling: 'replace', - process: randomString(), - createdAt: Date.now(), - updatedAt: Date.now(), - } - ); - const updatedCollection = { - ...originalCollection, - updatedAt: Date.now(), - createdAt: Date.now(), - duplicateHandling: 'error', - }; - - await request(app) - .put(`/collections/${originalCollection.name}/${originalCollection.version}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .send(updatedCollection) - .expect(200); - - const actualPgCollection = await t.context.collectionPgModel.get(knex, { - name: originalCollection.name, - version: originalCollection.version, - }); - - const updatedEsRecord = await t.context.esCollectionClient.get( - constructCollectionId(originalCollection.name, originalCollection.version) - ); - - // Endpoint logic will set an updated timestamp and ignore the value from the request - // body, so value on actual records should be different (greater) than the value - // sent in the request body - // createdAt timestamp from original record should have been preserved - t.is(actualPgCollection.created_at.getTime(), updatedEsRecord.createdAt); - t.is(actualPgCollection.updated_at.getTime(), updatedEsRecord.updatedAt); -}); - -test.serial('PUT replaces an existing collection in all data stores with correct timestamps', async (t) => { +test.serial('PUT replaces an existing collection with correct timestamps', async (t) => { const { originalCollection } = await createCollectionTestRecords( t.context, { @@ -342,19 +266,12 @@ test.serial('PUT replaces an existing collection in all data stores with correct version: originalCollection.version, }); - const updatedEsRecord = await t.context.esCollectionClient.get( - constructCollectionId(originalCollection.name, originalCollection.version) - ); - // Endpoint logic will set an updated timestamp and ignore the value from the request // body, so value on actual records should be different (greater) than the value // sent in the request body t.true(actualPgCollection.updated_at.getTime() > updatedCollection.updatedAt); // createdAt timestamp from original record should have been preserved t.is(actualPgCollection.created_at.getTime(), originalCollection.createdAt); - // PG and ES records have the same timestamps - t.is(actualPgCollection.created_at.getTime(), updatedEsRecord.createdAt); - t.is(actualPgCollection.updated_at.getTime(), updatedEsRecord.updatedAt); }); test.serial('PUT updates collection configuration store via name and version', async (t) => { @@ -435,12 +352,11 @@ test.serial('PUT returns 400 for version mismatch between params and payload', a t.falsy(record); }); -test.serial('PUT does not write to Elasticsearch or publish SNS message if writing to PostgreSQL fails', async (t) => { +test.serial('PUT does not publish SNS message if writing to PostgreSQL fails', async (t) => { const { testKnex } = t.context; const { originalCollection, originalPgRecord, - originalEsRecord, } = await createCollectionTestRecords( t.context, { @@ -483,76 +399,6 @@ test.serial('PUT does not write to Elasticsearch or publish SNS message if writi }), originalPgRecord ); - t.deepEqual( - await t.context.esCollectionClient.get( - constructCollectionId(originalCollection.name, originalCollection.version) - ), - originalEsRecord - ); - const { Messages } = await sqs().receiveMessage({ - QueueUrl: t.context.QueueUrl, - WaitTimeSeconds: 10, - }); - - t.is(Messages.length, 0); -}); - -test.serial('PUT does not write to PostgreSQL or publish SNS message if writing to Elasticsearch fails', async (t) => { - const { testKnex } = t.context; - const { - originalCollection, - originalPgRecord, - originalEsRecord, - } = await createCollectionTestRecords( - t.context, - { - duplicateHandling: 'error', - } - ); - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - index: () => Promise.reject(new Error('something bad')), - }, - }; - - const updatedCollection = { - ...originalCollection, - duplicateHandling: 'replace', - }; - - const expressRequest = { - params: { - name: originalCollection.name, - version: originalCollection.version, - }, - body: updatedCollection, - testContext: { - knex: testKnex, - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - put(expressRequest, response), - { message: 'something bad' } - ); - t.deepEqual( - await t.context.collectionPgModel.get(t.context.testKnex, { - name: updatedCollection.name, - version: updatedCollection.version, - }), - originalPgRecord - ); - t.deepEqual( - await t.context.esCollectionClient.get( - constructCollectionId(originalCollection.name, originalCollection.version) - ), - originalEsRecord - ); const { Messages } = await sqs().receiveMessage({ QueueUrl: t.context.QueueUrl, WaitTimeSeconds: 10, From 73cdc6baca1a586c84e0ba22d89d09ae353dc916 Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Fri, 9 Aug 2024 10:52:42 -0400 Subject: [PATCH 16/61] CUMULUS-3792: Add db indexes to improve search performance (#3751) * CUMULUS-3792:Add table indexes to improve search performance --- CHANGELOG.md | 10 ++ .../update-table-indexes-CUMULUS-3792.md | 150 ++++++++++++++++++ example/deployments/cumulus/cumulus-es.tfvars | 1 - example/deployments/cumulus/cumulus-lp.tfvars | 1 - example/deployments/cumulus/sit.tfvars | 1 - .../20240728101230_add_table_indexes.sql | 57 +++++++ .../20240728101230_add_table_indexes.ts | 65 ++++++++ website/sidebars.js | 1 + 8 files changed, 283 insertions(+), 3 deletions(-) create mode 100644 docs/upgrade-notes/update-table-indexes-CUMULUS-3792.md create mode 100644 packages/db/src/migrations/20240728101230_add_table_indexes.sql create mode 100644 packages/db/src/migrations/20240728101230_add_table_indexes.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index cdb7b03083f..5f7e05725c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [Unreleased] +### Migration Notes + +#### CUMULUS-3792 Add database indexes. Please follow the instructions before upgrading Cumulus + +- The updates in CUMULUS-3792 require a manual update to the postgres database in the production environment. + Please follow [Update Table Indexes for CUMULUS-3792] + (https://nasa.github.io/cumulus/docs/next/upgrade-notes/update_table_indexes_CUMULUS_3792) + ### Replace ElasticSearch Phase 1 - **CUMULUS-3238** - Removed elasticsearch dependency from collections endpoint @@ -46,6 +54,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added functionality to `@cumulus/db/src/search` to support terms, `not` and `exists` queries - **CUMULUS-3699** - Updated `collections` api endpoint to be able to support `includeStats` query string parameter +- **CUMULUS-3792** + - Added database indexes to improve search performance ### Migration Notes diff --git a/docs/upgrade-notes/update-table-indexes-CUMULUS-3792.md b/docs/upgrade-notes/update-table-indexes-CUMULUS-3792.md new file mode 100644 index 00000000000..15b2cfd338d --- /dev/null +++ b/docs/upgrade-notes/update-table-indexes-CUMULUS-3792.md @@ -0,0 +1,150 @@ +--- +id: update_table_indexes_CUMULUS_3792 +title: Update Table Indexes for CUMULUS-3792 +hide_title: false +--- + +## Background + +As part of the ElasticSearch removal efforts, Cumulus API endpoints which previously queried ElasticSearch +are being updated to query RDS instead. New database indexes are required to make RDS queries more efficient. + +The updates will be automatically created as part of the bootstrap lambda function on deployment of the data-persistence module. + +*In cases where the indexes are already applied, the updates will have no effect. If you have an existing index with the same definition +but a different name than the one we are creating, you can rename your existing index to the new index name.* + +## Apply the Changes in Production Environment + +With a large database (e.g. number of rows in executions table is greater than 100,000), the indexes must be applied manually since +the commands can take a significant amount of time and exceed the bootstrap lambda's 15 minute timeout. + +## Tools Used + +Since the update commands can take a few hours to run based on table size and IO throughput, it is recommended that the commands are run in an EC2 instance +in the AWS environment in a tmux or screen session. This will minimize the number of network hops and potential disconnects between the database client +and the database. Additionally, this will allow operators applying the patch to check on progress periodically and not worry about credential expiration or +other issues that would result in the client being killed. + +## Upgrade Steps + +1. Login into EC2 instance with database access + + From AWS console: Go to EC2, pick a `-CumulusECSCluster` instance, click Connect, click Session Manager + and click the Connect button. + + From AWS CLI: aws ssm start-session --target `EC2 Instance ID`. + + :::note Remember to take a note on which instance you run the commands. + +2. Install tmux and postgres client + + ```sh + sudo yum install -y tmux + sudo amazon-linux-extras install postgresql13 + ``` + + Once installed, a tmux session is started with two windows, the Cumulus database is connected to each window + using the PostgreSQL client. The primary window is used for running the `CREATE INDEX` commands, while the secondary + window is used to monitor the database and `CREATE INDEX` statement. The tmux session can be detached from and + reattached to at a later time. + +3. Run SQL commands + + The database login credentials can be retrieved from the prefix_db_login secret. + When the SQL commands are running, perform step 5 to monitor the commands. + + ```sh + tmux new-session -s CumulusUpgrade -n AddIndexes + + psql -h -p -d -U -W + #e.g. psql -h cumulus-dev-rds-cluster.cluster-xxx.us-east-1.rds.amazonaws.com -p 5432 -d cumulus_test_db -U cumulus_test -W + + # Use -f option to run the SQL commands from a file, -o option to write output to file + psql -h -p -d -U -f 20240728101230_add_table_indexes.sql -W + ``` + + The following are SQL commands, and 20240728101230_add_table_indexes.sql is available + [here](https://raw.githubusercontent.com/nasa/cumulus/master/packages/db/src/migrations/20240728101230_add_table_indexes.sql): + + ```sql + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS async_operations_updated_at_index ON async_operations(updated_at); + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS async_operations_status_operation_type_cumulus_id_index ON async_operations(status, operation_type, cumulus_id); + + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS collections_updated_at_index ON collections(updated_at); + + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS executions_updated_at_index ON executions(updated_at); + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS executions_status_collection_cumulus_id_index ON executions(status, collection_cumulus_id, cumulus_id); + + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS files_updated_at_index ON files(updated_at); + + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS granules_updated_at_index ON granules(updated_at); + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS granules_coll_status_processendtime_cumulus_id_index ON granules(collection_cumulus_id, status, processing_end_date_time, cumulus_id); + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS granules_status_provider_collection_cumulus_id_index ON granules(status, provider_cumulus_id, collection_cumulus_id, cumulus_id); + + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_updated_at_index ON pdrs(updated_at); + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_status_provider_collection_cumulus_id_index ON pdrs(status, provider_cumulus_id, collection_cumulus_id, cumulus_id); + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_execution_cumulus_id_index ON pdrs(execution_cumulus_id); + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_coll_status_cumulus_id_index ON pdrs(collection_cumulus_id, status, cumulus_id); + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_provider_collection_cumulus_id_name_index ON pdrs(provider_cumulus_id, collection_cumulus_id, name); + + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS providers_updated_at_index ON providers(updated_at); + + SELECT CURRENT_TIMESTAMP; + CREATE INDEX CONCURRENTLY IF NOT EXISTS rules_updated_at_index ON rules(updated_at); + + SELECT CURRENT_TIMESTAMP; + VACUUM (ANALYZE, VERBOSE) async_operations; + SELECT CURRENT_TIMESTAMP; + VACUUM (ANALYZE, VERBOSE) collections; + SELECT CURRENT_TIMESTAMP; + VACUUM (ANALYZE, VERBOSE) executions; + SELECT CURRENT_TIMESTAMP; + VACUUM (ANALYZE, VERBOSE) files; + SELECT CURRENT_TIMESTAMP; + VACUUM (ANALYZE, VERBOSE) granules; + SELECT CURRENT_TIMESTAMP; + VACUUM (ANALYZE, VERBOSE) pdrs; + SELECT CURRENT_TIMESTAMP; + VACUUM (ANALYZE, VERBOSE) providers; + SELECT CURRENT_TIMESTAMP; + VACUUM (ANALYZE, VERBOSE) rules; + SELECT CURRENT_TIMESTAMP; + ``` + +4. Monitor the running command + + ```sh + # From tmux CumulusUpgrade session, open another window + Ctrl-b c + + psql -h -p -d -U -W + + select pid, query, state, wait_event_type, wait_event from pg_stat_activity where state = 'active'; + ``` + +5. Verify the updates + + We can verify that the tables are updated successfully by checking the `\d tablename` results from psql, the indexes created should be listed. + + If the concurrent index query fails for any reason, you may have an `invalid` index - if this occurs, + make sure to drop and create the index again to avoid resources being used for the invalid index. + +6. Close the session + + Close the tmux session after the task is complete by `exit` or `Ctrl-b x`. diff --git a/example/deployments/cumulus/cumulus-es.tfvars b/example/deployments/cumulus/cumulus-es.tfvars index 6a8a3d8df37..e094d7adeb2 100644 --- a/example/deployments/cumulus/cumulus-es.tfvars +++ b/example/deployments/cumulus/cumulus-es.tfvars @@ -1,4 +1,3 @@ prefix = "cumulus-es" archive_api_port = 8000 key_name = "lp" -cmr_oauth_provider = "launchpad" diff --git a/example/deployments/cumulus/cumulus-lp.tfvars b/example/deployments/cumulus/cumulus-lp.tfvars index 6d72a3849ea..2448760b0f9 100644 --- a/example/deployments/cumulus/cumulus-lp.tfvars +++ b/example/deployments/cumulus/cumulus-lp.tfvars @@ -1,4 +1,3 @@ prefix = "cumulus-lp" archive_api_port = 8000 key_name = "lp" -cmr_oauth_provider = "launchpad" diff --git a/example/deployments/cumulus/sit.tfvars b/example/deployments/cumulus/sit.tfvars index 8890176aaaf..d0a5a513f35 100644 --- a/example/deployments/cumulus/sit.tfvars +++ b/example/deployments/cumulus/sit.tfvars @@ -41,7 +41,6 @@ launchpad_api = "https://api.launchpad.nasa.gov/icam/api/sm/v1" launchpad_certificate = "launchpad.pfx" oauth_user_group = "GSFC-Cumulus-Dev" -cmr_oauth_provider = "earthdata" saml_idp_login = "https://auth.launchpad-sbx.nasa.gov/affwebservices/public/saml2sso" saml_launchpad_metadata_url = "https://auth.launchpad-sbx.nasa.gov/unauth/metadata/launchpad-sbx.idp.xml" diff --git a/packages/db/src/migrations/20240728101230_add_table_indexes.sql b/packages/db/src/migrations/20240728101230_add_table_indexes.sql new file mode 100644 index 00000000000..9e69a870a54 --- /dev/null +++ b/packages/db/src/migrations/20240728101230_add_table_indexes.sql @@ -0,0 +1,57 @@ +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS async_operations_updated_at_index ON async_operations(updated_at); +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS async_operations_status_operation_type_cumulus_id_index ON async_operations(status, operation_type, cumulus_id); + +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS collections_updated_at_index ON collections(updated_at); + +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS executions_updated_at_index ON executions(updated_at); +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS executions_status_collection_cumulus_id_index ON executions(status, collection_cumulus_id, cumulus_id); + +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS files_updated_at_index ON files(updated_at); + +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS granules_updated_at_index ON granules(updated_at); +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS granules_coll_status_processendtime_cumulus_id_index ON granules(collection_cumulus_id, status, processing_end_date_time, cumulus_id); +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS granules_status_provider_collection_cumulus_id_index ON granules(status, provider_cumulus_id, collection_cumulus_id, cumulus_id); + +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_updated_at_index ON pdrs(updated_at); +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_status_provider_collection_cumulus_id_index ON pdrs(status, provider_cumulus_id, collection_cumulus_id, cumulus_id); +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_execution_cumulus_id_index ON pdrs(execution_cumulus_id); +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_coll_status_cumulus_id_index ON pdrs(collection_cumulus_id, status, cumulus_id); +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_provider_collection_cumulus_id_name_index ON pdrs(provider_cumulus_id, collection_cumulus_id, name); + +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS providers_updated_at_index ON providers(updated_at); + +SELECT CURRENT_TIMESTAMP; +CREATE INDEX CONCURRENTLY IF NOT EXISTS rules_updated_at_index ON rules(updated_at); + +SELECT CURRENT_TIMESTAMP; +VACUUM (ANALYZE, VERBOSE) async_operations; +SELECT CURRENT_TIMESTAMP; +VACUUM (ANALYZE, VERBOSE) collections; +SELECT CURRENT_TIMESTAMP; +VACUUM (ANALYZE, VERBOSE) executions; +SELECT CURRENT_TIMESTAMP; +VACUUM (ANALYZE, VERBOSE) files; +SELECT CURRENT_TIMESTAMP; +VACUUM (ANALYZE, VERBOSE) granules; +SELECT CURRENT_TIMESTAMP; +VACUUM (ANALYZE, VERBOSE) pdrs; +SELECT CURRENT_TIMESTAMP; +VACUUM (ANALYZE, VERBOSE) providers; +SELECT CURRENT_TIMESTAMP; +VACUUM (ANALYZE, VERBOSE) rules; +SELECT CURRENT_TIMESTAMP; diff --git a/packages/db/src/migrations/20240728101230_add_table_indexes.ts b/packages/db/src/migrations/20240728101230_add_table_indexes.ts new file mode 100644 index 00000000000..b93deb8dccf --- /dev/null +++ b/packages/db/src/migrations/20240728101230_add_table_indexes.ts @@ -0,0 +1,65 @@ +import { Knex } from 'knex'; + +export const up = async (knex: Knex): Promise => { + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS async_operations_updated_at_index ON async_operations(updated_at)'); + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS async_operations_status_operation_type_cumulus_id_index ON async_operations(status, operation_type, cumulus_id)'); + + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS collections_updated_at_index ON collections(updated_at)'); + + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS executions_updated_at_index ON executions(updated_at)'); + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS executions_status_collection_cumulus_id_index ON executions(status, collection_cumulus_id, cumulus_id)'); + + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS files_updated_at_index ON files(updated_at)'); + + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS granules_updated_at_index ON granules(updated_at)'); + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS granules_coll_status_processendtime_cumulus_id_index ON granules(collection_cumulus_id, status, processing_end_date_time, cumulus_id)'); + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS granules_status_provider_collection_cumulus_id_index ON granules(status, provider_cumulus_id, collection_cumulus_id, cumulus_id)'); + + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_updated_at_index ON pdrs(updated_at)'); + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_status_provider_collection_cumulus_id_index ON pdrs(status, provider_cumulus_id, collection_cumulus_id, cumulus_id)'); + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_execution_cumulus_id_index ON pdrs(execution_cumulus_id)'); + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_coll_status_cumulus_id_index ON pdrs(collection_cumulus_id, status, cumulus_id)'); + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS pdrs_provider_collection_cumulus_id_name_index ON pdrs(provider_cumulus_id, collection_cumulus_id, name)'); + + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS providers_updated_at_index ON providers(updated_at)'); + + await knex.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS rules_updated_at_index ON rules(updated_at)'); + + await knex.raw('VACUUM (ANALYZE, VERBOSE) async_operations'); + await knex.raw('VACUUM (ANALYZE, VERBOSE) collections'); + await knex.raw('VACUUM (ANALYZE, VERBOSE) executions'); + await knex.raw('VACUUM (ANALYZE, VERBOSE) files'); + await knex.raw('VACUUM (ANALYZE, VERBOSE) granules'); + await knex.raw('VACUUM (ANALYZE, VERBOSE) pdrs'); + await knex.raw('VACUUM (ANALYZE, VERBOSE) providers'); + await knex.raw('VACUUM (ANALYZE, VERBOSE) rules'); +}; + +export const down = async (knex: Knex): Promise => { + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS async_operations_updated_at_index'); + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS async_operations_status_operation_type_cumulus_id_index'); + + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS collections_updated_at_index'); + + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS executions_updated_at_index'); + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS executions_status_collection_cumulus_id_index'); + + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS files_updated_at_index'); + + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS granules_updated_at_index'); + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS granules_coll_status_processendtime_cumulus_id_index'); + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS granules_status_provider_collection_cumulus_id_index'); + + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS pdrs_updated_at_index'); + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS pdrs_status_provider_collection_cumulus_id_index'); + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS pdrs_execution_cumulus_id_index'); + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS pdrs_coll_status_cumulus_id_index'); + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS pdrs_provider_collection_cumulus_id_name_index'); + + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS providers_updated_at_index'); + + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS rules_updated_at_index'); +}; +exports.config = { + transaction: false, +}; diff --git a/website/sidebars.js b/website/sidebars.js index 263c344e948..d1aedbc472f 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -231,6 +231,7 @@ const sidebars = { 'upgrade-notes/upgrade-rds-cluster-tf-postgres-13', 'upgrade-notes/update-cumulus_id-type-indexes-CUMULUS-3449', 'upgrade-notes/upgrade_execution_table_CUMULUS_3320', + 'upgrade-notes/update_table_indexes_CUMULUS_3792', ], }, { From 227bfee7b101e694761327b4e73d346b3f238779 Mon Sep 17 00:00:00 2001 From: Jonathan Kovarik Date: Fri, 16 Aug 2024 15:21:01 -0600 Subject: [PATCH 17/61] CUMULUS-3236 - Update POST/DEL async operation api endpoints to use postgres (#3771) * Update PR mentioned endpoints to not write to ES * Update POST units to not use ES * Fix/manage broken tests * Update package/async-operations to not use ES * Remove ES update from async-operations image * Update async image to use v53, which includes removal of ES code * Fix broken ecs package tests/logic * Minor refactor * Update CHANGELOG * Remove additional unneeded test * Fix CHANGELOG * Modify unit ratchet to match after removal of code Code was removed, not refactored - this change did not degrade/improve units so much as change the denominator of lines/methods tested * Fix integration test/make it not rely on list endpoint * Refactor/PR feedback * Update example/spec/serial/AsyncOperationRunnerSuccessfulLambdaSpec.js Co-authored-by: jennyhliu <34660846+jennyhliu@users.noreply.github.com> * Remove unneeded transaction * Remove unneeded type assertion/error handling * Remove transaction * Address PR comments --------- Co-authored-by: jennyhliu <34660846+jennyhliu@users.noreply.github.com> --- CHANGELOG.md | 8 + example/cumulus-tf/variables.tf | 2 +- ...syncOperationRunnerSuccessfulLambdaSpec.js | 11 +- packages/api/ecs/async-operation/.nycrc.json | 6 +- packages/api/ecs/async-operation/index.js | 52 ++---- packages/api/ecs/async-operation/package.json | 1 - .../ecs/async-operation/tests/test-index.js | 133 +------------- packages/api/endpoints/async-operations.js | 46 +---- packages/api/lib/testUtils.js | 8 - .../test-create-async-operations.js | 60 +------ .../test-endpoints-async-operations.js | 163 +----------------- .../async-operations/src/async_operations.ts | 22 +-- .../tests/test-async_operations.js | 122 +------------ packages/integration-tests/index.js | 2 +- 14 files changed, 48 insertions(+), 588 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f7e05725c9..9d584b1120f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [Unreleased] +### Replace ElasticSearch Phase 2 + +- **CUMULUS-3236** + - Update API AsyncOperation endpoints `POST` and `DEL` to not update + Elasticsearch + - Update `@cumlus/api/ecs/async-operation` to not update Elasticsearch index when + reporting status of async operation + ### Migration Notes #### CUMULUS-3792 Add database indexes. Please follow the instructions before upgrading Cumulus diff --git a/example/cumulus-tf/variables.tf b/example/cumulus-tf/variables.tf index ca8f096e7f8..e506a492bd0 100644 --- a/example/cumulus-tf/variables.tf +++ b/example/cumulus-tf/variables.tf @@ -350,7 +350,7 @@ variable "rds_admin_access_secret_arn" { variable "async_operation_image_version" { description = "docker image version to use for Cumulus async operations tasks" type = string - default = "52" + default = "53" } variable "cumulus_process_activity_version" { diff --git a/example/spec/serial/AsyncOperationRunnerSuccessfulLambdaSpec.js b/example/spec/serial/AsyncOperationRunnerSuccessfulLambdaSpec.js index 554a72e4283..198dc8b8a9a 100644 --- a/example/spec/serial/AsyncOperationRunnerSuccessfulLambdaSpec.js +++ b/example/spec/serial/AsyncOperationRunnerSuccessfulLambdaSpec.js @@ -4,7 +4,7 @@ const { waitUntilTasksStopped } = require('@aws-sdk/client-ecs'); const get = require('lodash/get'); const { v4: uuidv4 } = require('uuid'); -const { createAsyncOperation, deleteAsyncOperation, listAsyncOperations } = require('@cumulus/api-client/asyncOperations'); +const { createAsyncOperation, deleteAsyncOperation, getAsyncOperation } = require('@cumulus/api-client/asyncOperations'); const { startECSTask } = require('@cumulus/async-operations'); const { ecs, s3 } = require('@cumulus/aws-client/services'); const { randomString } = require('@cumulus/common/test-utils'); @@ -108,15 +108,10 @@ describe('The AsyncOperation task runner executing a successful lambda function' it('returns the updated record from GET /asyncOperations', async () => { if (beforeAllError) fail(beforeAllError); else { - const response = await listAsyncOperations({ + const record = await getAsyncOperation({ prefix: config.stackName, - query: { - id: asyncOperationId, - }, + asyncOperationId, }); - const { results } = JSON.parse(response.body); - expect(results.length).toEqual(1); - const [record] = results; expect(record.status).toEqual('SUCCEEDED'); const parsedOutput = JSON.parse(record.output); expect(parsedOutput).toEqual([1, 2, 3]); diff --git a/packages/api/ecs/async-operation/.nycrc.json b/packages/api/ecs/async-operation/.nycrc.json index d8341ba284b..47cc7a940cc 100644 --- a/packages/api/ecs/async-operation/.nycrc.json +++ b/packages/api/ecs/async-operation/.nycrc.json @@ -1,7 +1,7 @@ { "extends": "../../../../nyc.config.js", - "statements": 39.0, - "functions": 37.0, - "branches": 38.0, + "statements": 38.38, + "functions": 30.76, + "branches": 37.5, "lines": 38.0 } \ No newline at end of file diff --git a/packages/api/ecs/async-operation/index.js b/packages/api/ecs/async-operation/index.js index 80b1e5082fd..734e3d48678 100644 --- a/packages/api/ecs/async-operation/index.js +++ b/packages/api/ecs/async-operation/index.js @@ -20,12 +20,9 @@ const { getObject, getObjectStreamContents, } = require('@cumulus/aws-client/S3'); -const indexer = require('@cumulus/es-client/indexer'); -const { getEsClient } = require('@cumulus/es-client/search'); const { getKnexClient, AsyncOperationPgModel, - createRejectableTransaction, translatePostgresAsyncOperationToApiAsyncOperation, } = require('@cumulus/db'); @@ -165,7 +162,7 @@ function buildErrorOutput(error) { const writeAsyncOperationToPostgres = async (params) => { const { - trx, + knex, env, dbOutput, status, @@ -175,7 +172,7 @@ const writeAsyncOperationToPostgres = async (params) => { const id = env.asyncOperationId; return await asyncOperationPgModel .update( - trx, + knex, { id }, { status, @@ -186,27 +183,6 @@ const writeAsyncOperationToPostgres = async (params) => { ); }; -const writeAsyncOperationToEs = async (params) => { - const { - env, - status, - dbOutput, - updatedTime, - esClient, - } = params; - - await indexer.updateAsyncOperation( - esClient, - env.asyncOperationId, - { - status, - output: dbOutput, - updatedAt: Number(updatedTime), - }, - process.env.ES_INDEX - ); -}; - /** * Update an AsyncOperation item in Postgres * @@ -222,7 +198,6 @@ const updateAsyncOperation = async (params) => { status, output, envOverride = {}, - esClient = await getEsClient(), asyncOperationPgModel = new AsyncOperationPgModel(), } = params; @@ -234,20 +209,17 @@ const updateAsyncOperation = async (params) => { logger.info(`About to update async operation to ${JSON.stringify(status)} with output: ${dbOutput}`); const knex = await getKnexClient({ env }); - return await createRejectableTransaction(knex, async (trx) => { - const pgRecords = await writeAsyncOperationToPostgres({ - dbOutput, - env, - status, - trx, - updatedTime, - asyncOperationPgModel, - }); - const result = translatePostgresAsyncOperationToApiAsyncOperation(pgRecords[0]); - await writeAsyncOperationToEs({ env, status, dbOutput, updatedTime, esClient }); - logger.info(`Successfully updated async operation to ${JSON.stringify(status)} with output: ${JSON.stringify(dbOutput)}`); - return result; + const pgRecords = await writeAsyncOperationToPostgres({ + dbOutput, + env, + status, + knex, + updatedTime, + asyncOperationPgModel, }); + const result = translatePostgresAsyncOperationToApiAsyncOperation(pgRecords[0]); + logger.info(`Successfully updated async operation to ${JSON.stringify(status)} with output: ${JSON.stringify(dbOutput)}`); + return result; }; /** diff --git a/packages/api/ecs/async-operation/package.json b/packages/api/ecs/async-operation/package.json index cf32698ee7c..5feb7881f09 100644 --- a/packages/api/ecs/async-operation/package.json +++ b/packages/api/ecs/async-operation/package.json @@ -25,7 +25,6 @@ "@aws-sdk/client-lambda": "^3.621.0", "@cumulus/aws-client": "18.3.0", "@cumulus/db": "18.3.0", - "@cumulus/es-client": "18.3.0", "@cumulus/logger": "18.3.0", "crypto-random-string": "^3.2.0", "got": "^11.8.5", diff --git a/packages/api/ecs/async-operation/tests/test-index.js b/packages/api/ecs/async-operation/tests/test-index.js index cb6f580294f..97649c89bda 100644 --- a/packages/api/ecs/async-operation/tests/test-index.js +++ b/packages/api/ecs/async-operation/tests/test-index.js @@ -12,14 +12,6 @@ const { translatePostgresAsyncOperationToApiAsyncOperation, migrationDir, } = require('@cumulus/db'); -const { - indexAsyncOperation, -} = require('@cumulus/es-client/indexer'); -const { Search } = require('@cumulus/es-client/search'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); // eslint-disable-next-line unicorn/import-index const { updateAsyncOperation } = require('../index'); @@ -32,15 +24,6 @@ test.before(async (t) => { t.context.testKnexAdmin = knexAdmin; t.context.asyncOperationPgModel = new AsyncOperationPgModel(); - - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esAsyncOperationsClient = new Search( - {}, - 'asyncOperation', - t.context.esIndex - ); }); test.beforeEach(async (t) => { @@ -56,11 +39,6 @@ test.beforeEach(async (t) => { t.context.testAsyncOperationPgRecord = translateApiAsyncOperationToPostgresAsyncOperation( t.context.testAsyncOperation ); - await indexAsyncOperation( - t.context.esClient, - t.context.testAsyncOperation, - t.context.esIndex - ); await t.context.asyncOperationPgModel.create( t.context.testKnex, t.context.testAsyncOperationPgRecord @@ -73,10 +51,9 @@ test.after.always(async (t) => { knexAdmin: t.context.testKnexAdmin, testDbName, }); - await cleanupTestIndex(t.context); }); -test('updateAsyncOperation updates databases as expected', async (t) => { +test('updateAsyncOperation updates database as expected', async (t) => { const status = 'SUCCEEDED'; const output = { foo: 'bar' }; const updateTime = (Number(Date.now())).toString(); @@ -107,21 +84,9 @@ test('updateAsyncOperation updates databases as expected', async (t) => { output, updated_at: new Date(Number(updateTime)), }); - - const asyncOpEsRecord = await t.context.esAsyncOperationsClient.get( - t.context.testAsyncOperation.id - ); - t.deepEqual(asyncOpEsRecord, { - ...t.context.testAsyncOperation, - _id: asyncOpEsRecord._id, - timestamp: asyncOpEsRecord.timestamp, - status, - output: JSON.stringify(output), - updatedAt: Number(updateTime), - }); }); -test('updateAsyncOperation updates records correctly when output is undefined', async (t) => { +test('updateAsyncOperation updates record correctly when output is undefined', async (t) => { const status = 'SUCCEEDED'; const output = undefined; const updateTime = (Number(Date.now())).toString(); @@ -154,7 +119,7 @@ test('updateAsyncOperation updates records correctly when output is undefined', }); }); -test('updateAsyncOperation updates databases with correct timestamps', async (t) => { +test('updateAsyncOperation updates database with correct timestamps', async (t) => { const status = 'SUCCEEDED'; const output = { foo: 'bar' }; const updateTime = (Number(Date.now())).toString(); @@ -179,95 +144,3 @@ test('updateAsyncOperation updates databases with correct timestamps', async (t) ); t.is(asyncOperationPgRecord.updated_at.getTime().toString(), updateTime); }); - -test('updateAsyncOperation does not update PostgreSQL if write to Elasticsearch fails', async (t) => { - const status = 'SUCCEEDED'; - const output = { foo: cryptoRandomString({ length: 5 }) }; - const updateTime = (Number(Date.now())).toString(); - - const fakeEsClient = { - client: { - update: () => { - throw new Error('ES fail'); - }, - }, - }; - - await t.throwsAsync( - updateAsyncOperation({ - status, - output, - envOverride: { - asyncOperationId: t.context.asyncOperationId, - ...localStackConnectionEnv, - PG_DATABASE: testDbName, - updateTime, - }, - esClient: fakeEsClient, - }), - { message: 'ES fail' } - ); - - const asyncOperationPgRecord = await t.context.asyncOperationPgModel - .get( - t.context.testKnex, - { - id: t.context.asyncOperationId, - } - ); - t.like(asyncOperationPgRecord, t.context.testAsyncOperationPgRecord); - - const asyncOpEsRecord = await t.context.esAsyncOperationsClient.get( - t.context.testAsyncOperation.id - ); - t.deepEqual(asyncOpEsRecord, { - ...t.context.testAsyncOperation, - _id: asyncOpEsRecord._id, - timestamp: asyncOpEsRecord.timestamp, - }); -}); - -test('updateAsyncOperation does not update Elasticsearch if write to PostgreSQL fails', async (t) => { - const status = 'SUCCEEDED'; - const output = { foo: cryptoRandomString({ length: 5 }) }; - const updateTime = (Number(Date.now())).toString(); - - const fakePgModel = { - update: () => { - throw new Error('PG fail'); - }, - }; - - await t.throwsAsync( - updateAsyncOperation({ - status, - output, - envOverride: { - asyncOperationId: t.context.asyncOperationId, - ...localStackConnectionEnv, - PG_DATABASE: testDbName, - updateTime, - }, - asyncOperationPgModel: fakePgModel, - }), - { message: 'PG fail' } - ); - - const asyncOperationPgRecord = await t.context.asyncOperationPgModel - .get( - t.context.testKnex, - { - id: t.context.asyncOperationId, - } - ); - t.like(asyncOperationPgRecord, t.context.testAsyncOperationPgRecord); - - const asyncOpEsRecord = await t.context.esAsyncOperationsClient.get( - t.context.testAsyncOperation.id - ); - t.deepEqual(asyncOpEsRecord, { - ...t.context.testAsyncOperation, - _id: asyncOpEsRecord._id, - timestamp: asyncOpEsRecord.timestamp, - }); -}); diff --git a/packages/api/endpoints/async-operations.js b/packages/api/endpoints/async-operations.js index 15de1821a4d..87add962d56 100644 --- a/packages/api/endpoints/async-operations.js +++ b/packages/api/endpoints/async-operations.js @@ -1,3 +1,5 @@ +//@ts-check + 'use strict'; const router = require('express-promise-router')(); @@ -8,20 +10,15 @@ const { getKnexClient, translateApiAsyncOperationToPostgresAsyncOperation, translatePostgresAsyncOperationToApiAsyncOperation, - createRejectableTransaction, } = require('@cumulus/db'); const { RecordDoesNotExist, ValidationError, } = require('@cumulus/errors'); -const { - indexAsyncOperation, -} = require('@cumulus/es-client/indexer'); const Logger = require('@cumulus/logger'); -const { Search, getEsClient } = require('@cumulus/es-client/search'); -const { deleteAsyncOperation } = require('@cumulus/es-client/indexer'); +const { Search } = require('@cumulus/es-client/search'); const { isBadRequestError } = require('../lib/errors'); const { recordIsValid } = require('../lib/schema'); @@ -74,16 +71,9 @@ async function del(req, res) { const { asyncOperationPgModel = new AsyncOperationPgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), } = req.testContext || {}; const { id } = req.params || {}; - const esAsyncOperationsClient = new Search( - {}, - 'asyncOperation', - process.env.ES_INDEX - ); - if (!id) { return res.boom.badRequest('id parameter is missing'); } @@ -92,26 +82,13 @@ async function del(req, res) { await asyncOperationPgModel.get(knex, { id }); } catch (error) { if (error instanceof RecordDoesNotExist) { - if (!(await esAsyncOperationsClient.exists(id))) { - logger.info('Async Operation does not exist in Elasticsearch and PostgreSQL'); - return res.boom.notFound('No record found'); - } - logger.info('Async Operation does not exist in PostgreSQL, it only exists in Elasticsearch. Proceeding with deletion'); - } else { - throw error; + logger.info('Async Operation does not exist PostgreSQL'); + return res.boom.notFound('No record found'); } + return res.boom.badImplementation(JSON.stringify(error)); } - await createRejectableTransaction(knex, async (trx) => { - await asyncOperationPgModel.delete(trx, { id }); - await deleteAsyncOperation({ - esClient, - id, - index: process.env.ES_INDEX, - ignore: [404], - }); - }); - + await asyncOperationPgModel.delete(knex, { id }); return res.send({ message: 'Record deleted' }); } @@ -126,7 +103,6 @@ async function post(req, res) { const { asyncOperationPgModel = new AsyncOperationPgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), } = req.testContext || {}; const apiAsyncOperation = req.body; @@ -144,12 +120,8 @@ async function post(req, res) { } const dbRecord = translateApiAsyncOperationToPostgresAsyncOperation(apiAsyncOperation); logger.debug(`Attempting to create async operation ${dbRecord.id}`); - let apiDbRecord; - await createRejectableTransaction(knex, async (trx) => { - const pgRecord = await asyncOperationPgModel.create(trx, dbRecord, ['*']); - apiDbRecord = await translatePostgresAsyncOperationToApiAsyncOperation(pgRecord[0]); - await indexAsyncOperation(esClient, apiDbRecord, process.env.ES_INDEX); - }); + const pgRecord = await asyncOperationPgModel.create(knex, dbRecord, ['*']); + const apiDbRecord = translatePostgresAsyncOperationToApiAsyncOperation(pgRecord[0]); logger.info(`Successfully created async operation ${apiDbRecord.id}:`); return res.send({ message: 'Record saved', diff --git a/packages/api/lib/testUtils.js b/packages/api/lib/testUtils.js index f75f222f3bf..0910cba7ccd 100644 --- a/packages/api/lib/testUtils.js +++ b/packages/api/lib/testUtils.js @@ -26,7 +26,6 @@ const { indexProvider, indexRule, indexPdr, - indexAsyncOperation, deleteExecution, } = require('@cumulus/es-client/indexer'); const { @@ -627,8 +626,6 @@ const createAsyncOperationTestRecords = async (context) => { const { knex, asyncOperationPgModel, - esClient, - esAsyncOperationClient, } = context; const originalAsyncOperation = fakeAsyncOperationFactory(); @@ -643,13 +640,8 @@ const createAsyncOperationTestRecords = async (context) => { const originalPgRecord = await asyncOperationPgModel.get( knex, { cumulus_id: pgAsyncOperation.cumulus_id } ); - await indexAsyncOperation(esClient, originalAsyncOperation, process.env.ES_INDEX); - const originalEsRecord = await esAsyncOperationClient.get( - originalAsyncOperation.id - ); return { originalPgRecord, - originalEsRecord, }; }; diff --git a/packages/api/tests/endpoints/async-operations/test-create-async-operations.js b/packages/api/tests/endpoints/async-operations/test-create-async-operations.js index 8b7a73477a8..e7bf6e5c553 100644 --- a/packages/api/tests/endpoints/async-operations/test-create-async-operations.js +++ b/packages/api/tests/endpoints/async-operations/test-create-async-operations.js @@ -9,11 +9,6 @@ const sinon = require('sinon'); const { s3 } = require('@cumulus/aws-client/services'); const { recursivelyDeleteS3Bucket } = require('@cumulus/aws-client/S3'); const { randomId, randomString } = require('@cumulus/common/test-utils'); -const { Search } = require('@cumulus/es-client/search'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const { localStackConnectionEnv, generateLocalTestDb, @@ -26,8 +21,6 @@ const { const assertions = require('../../../lib/assertions'); const { fakeAsyncOperationFactory } = require('../../../lib/testUtils'); -const { buildFakeExpressResponse } = require('../utils'); -const { post } = require('../../../endpoints/async-operations'); const { createFakeJwtAuthToken, setAuthorizedOAuthUsers, @@ -58,15 +51,6 @@ test.before(async (t) => { t.context.testKnexAdmin = knexAdmin; t.context.asyncOperationPgModel = new AsyncOperationPgModel(); - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esAsyncOperationsClient = new Search( - {}, - 'asyncOperation', - t.context.esIndex - ); - await s3().createBucket({ Bucket: process.env.system_bucket }); const username = randomString(); @@ -83,7 +67,6 @@ test.before(async (t) => { test.after.always(async (t) => { await t.context.accessTokenModel.deleteTable().catch(noop); - await cleanupTestIndex(t.context); await recursivelyDeleteS3Bucket(process.env.system_bucket); await destroyLocalTestDb({ knex: t.context.testKnex, @@ -117,7 +100,7 @@ test('POST with an invalid access token returns an unauthorized response', async assertions.isInvalidAccessTokenResponse(t, response); }); -test('POST creates a new async operation in all data stores', async (t) => { +test('POST creates and stores expected new async operation record', async (t) => { const { asyncOperationPgModel, jwtAuthToken } = t.context; const asyncOperation = fakeAsyncOperationFactory({ output: JSON.stringify({ age: 59 }), @@ -151,14 +134,9 @@ test('POST creates a new async operation in all data stores', async (t) => { omit(pgAsyncOperation, omitList) ); t.deepEqual(asyncOperationPgRecord.output, pgAsyncOperation.output); - - const esRecord = await t.context.esAsyncOperationsClient.get( - asyncOperation.id - ); - t.like(esRecord, record); }); -test('POST creates a new async operation in PostgreSQL/Elasticsearch with correct timestamps', async (t) => { +test('POST creates a new async operation record with correct timestamps', async (t) => { const { asyncOperationPgModel, jwtAuthToken } = t.context; const asyncOperation = fakeAsyncOperationFactory({ output: JSON.stringify({ age: 59 }), @@ -184,12 +162,8 @@ test('POST creates a new async operation in PostgreSQL/Elasticsearch with correc t.true(apiRecord.createdAt > asyncOperation.createdAt); t.true(apiRecord.updatedAt > asyncOperation.updatedAt); - const esRecord = await t.context.esAsyncOperationsClient.get(asyncOperation.id); - t.is(asyncOperationPgRecord.created_at.getTime(), record.createdAt); t.is(asyncOperationPgRecord.updated_at.getTime(), record.updatedAt); - t.is(asyncOperationPgRecord.created_at.getTime(), esRecord.createdAt); - t.is(asyncOperationPgRecord.updated_at.getTime(), esRecord.updatedAt); }); test('POST returns a 409 error if the async operation already exists in PostgreSQL', async (t) => { @@ -269,33 +243,3 @@ test('POST returns a 400 response if invalid JSON provided', async (t) => { t.is(error.error, 'Bad Request'); t.is(error.message, 'Async Operations require an ID'); }); - -test('post() does not write to PostgreSQL if writing to Elasticsearch fails', async (t) => { - const asyncOperation = fakeAsyncOperationFactory({ - output: JSON.stringify({ age: 59 }), - }); - const fakeEsClient = { - client: { - index: () => Promise.reject(new Error('something bad')), - }, - }; - - const expressRequest = { - body: asyncOperation, - testContext: { - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await post(expressRequest, response); - - t.true(response.boom.badImplementation.calledWithMatch('something bad')); - - t.false( - await t.context.asyncOperationPgModel.exists(t.context.testKnex, { - id: asyncOperation.id, - }) - ); -}); diff --git a/packages/api/tests/endpoints/async-operations/test-endpoints-async-operations.js b/packages/api/tests/endpoints/async-operations/test-endpoints-async-operations.js index dcfd4703cf4..c5ff4db259a 100644 --- a/packages/api/tests/endpoints/async-operations/test-endpoints-async-operations.js +++ b/packages/api/tests/endpoints/async-operations/test-endpoints-async-operations.js @@ -36,7 +36,6 @@ const { setAuthorizedOAuthUsers, createAsyncOperationTestRecords, } = require('../../../lib/testUtils'); -const { buildFakeExpressResponse } = require('../utils'); process.env.stackName = randomString(); process.env.system_bucket = randomString(); @@ -225,7 +224,7 @@ test('del() returns a 401 bad request if id is not provided', async (t) => { t.true(fakeResponse.boom.badRequest.called); }); -test('DELETE returns a 404 if PostgreSQL and Elasticsearch async operation cannot be found', async (t) => { +test('DELETE returns a 404 if PostgreSQL async operation cannot be found', async (t) => { const nonExistentAsyncOperation = fakeAsyncOperationFactory(); const response = await request(app) .delete(`/asyncOperations/${nonExistentAsyncOperation.id}`) @@ -235,76 +234,7 @@ test('DELETE returns a 404 if PostgreSQL and Elasticsearch async operation canno t.is(response.body.message, 'No record found'); }); -test('DELETE deletes async operation successfully if it exists in PostgreSQL but not Elasticsearch', async (t) => { - const { - asyncOperationPgModel, - esAsyncOperationClient, - knex, - } = t.context; - - const originalAsyncOperation = fakeAsyncOperationFactory(); - const insertPgRecord = await translateApiAsyncOperationToPostgresAsyncOperation( - originalAsyncOperation, - knex - ); - const id = insertPgRecord.id; - await asyncOperationPgModel.create( - knex, - insertPgRecord - ); - t.true( - await asyncOperationPgModel.exists(knex, { id }) - ); - - const response = await request(app) - .delete(`/asyncOperations/${id}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - const { message } = response.body; - - t.is(message, 'Record deleted'); - t.false( - await asyncOperationPgModel.exists(knex, { id }) - ); - t.false(await esAsyncOperationClient.exists( - id - )); -}); - -test('DELETE deletes async operation successfully if it exists Elasticsearch but not PostgreSQL', async (t) => { - const { - asyncOperationPgModel, - esAsyncOperationClient, - esClient, - esIndex, - knex, - } = t.context; - - const originalAsyncOperation = fakeAsyncOperationFactory(); - const id = originalAsyncOperation.id; - await indexer.indexAsyncOperation(esClient, originalAsyncOperation, esIndex); - t.false( - await asyncOperationPgModel.exists(knex, { id }) - ); - t.true( - await esAsyncOperationClient.exists(id) - ); - - const response = await request(app) - .delete(`/asyncOperations/${id}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - const { message } = response.body; - - t.is(message, 'Record deleted'); - t.false( - await esAsyncOperationClient.exists(id) - ); -}); - -test('DELETE deletes the async operation from all data stores', async (t) => { +test('DELETE deletes the async operation from the database', async (t) => { const { originalPgRecord, } = await createAsyncOperationTestRecords(t.context); @@ -326,93 +256,4 @@ test('DELETE deletes the async operation from all data stores', async (t) => { const dbRecords = await t.context.asyncOperationPgModel .search(t.context.knex, { id }); t.is(dbRecords.length, 0); - t.false(await t.context.esAsyncOperationClient.exists( - id - )); -}); - -test('del() does not remove from Elasticsearch if removing from PostgreSQL fails', async (t) => { - const { - originalPgRecord, - } = await createAsyncOperationTestRecords(t.context); - const { id } = originalPgRecord; - - const fakeAsyncOperationPgModel = { - delete: () => { - throw new Error('PG something bad'); - }, - get: () => Promise.resolve(originalPgRecord), - }; - - const expressRequest = { - params: { - id, - }, - testContext: { - knex: t.context.knex, - asyncOperationPgModel: fakeAsyncOperationPgModel, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - del(expressRequest, response), - { message: 'PG something bad' } - ); - - t.true( - await t.context.asyncOperationPgModel.exists(t.context.knex, { - id, - }) - ); - t.true( - await t.context.esAsyncOperationClient.exists( - id - ) - ); -}); - -test('del() does not remove from PostgreSQL if removing from Elasticsearch fails', async (t) => { - const { - originalPgRecord, - } = await createAsyncOperationTestRecords(t.context); - const { id } = originalPgRecord; - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - delete: () => { - throw new Error('ES something bad'); - }, - }, - }; - - const expressRequest = { - params: { - id, - }, - testContext: { - knex: t.context.knex, - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - del(expressRequest, response), - { message: 'ES something bad' } - ); - - t.true( - await t.context.asyncOperationPgModel.exists(t.context.knex, { - id, - }) - ); - t.true( - await t.context.esAsyncOperationClient.exists( - id - ) - ); }); diff --git a/packages/async-operations/src/async_operations.ts b/packages/async-operations/src/async_operations.ts index f00b8e41ed6..643b526a0b1 100644 --- a/packages/async-operations/src/async_operations.ts +++ b/packages/async-operations/src/async_operations.ts @@ -1,5 +1,4 @@ import { RunTaskCommandOutput } from '@aws-sdk/client-ecs'; -import { Knex } from 'knex'; import { FunctionConfiguration, GetFunctionConfigurationCommand } from '@aws-sdk/client-lambda'; import { ecs, s3, lambda } from '@cumulus/aws-client/services'; @@ -8,7 +7,6 @@ import { translateApiAsyncOperationToPostgresAsyncOperation, translatePostgresAsyncOperationToApiAsyncOperation, AsyncOperationPgModel, - createRejectableTransaction, } from '@cumulus/db'; import Logger from '@cumulus/logger'; import { ApiAsyncOperation, AsyncOperationType } from '@cumulus/types/api/async_operations'; @@ -19,13 +17,6 @@ import type { } from './types'; const { EcsStartTaskError, MissingRequiredArgument } = require('@cumulus/errors'); -const { - indexAsyncOperation, -} = require('@cumulus/es-client/indexer'); -const { - getEsClient, EsClient, -} = require('@cumulus/es-client/search'); - const logger = new Logger({ sender: '@cumulus/async-operation' }); type StartEcsTaskReturnType = Promise; @@ -127,7 +118,6 @@ export const createAsyncOperation = async ( stackName: string, systemBucket: string, knexConfig?: NodeJS.ProcessEnv, - esClient?: typeof EsClient, asyncOperationPgModel?: AsyncOperationPgModelObject } ): Promise> => { @@ -136,7 +126,6 @@ export const createAsyncOperation = async ( stackName, systemBucket, knexConfig = process.env, - esClient = await getEsClient(), asyncOperationPgModel = new AsyncOperationPgModel(), } = params; @@ -144,14 +133,9 @@ export const createAsyncOperation = async ( if (!systemBucket) throw new TypeError('systemBucket is required'); const knex = await getKnexClient({ env: knexConfig }); - return await createRejectableTransaction(knex, async (trx: Knex.Transaction) => { - const pgCreateObject = translateApiAsyncOperationToPostgresAsyncOperation(createObject); - const pgRecord = await asyncOperationPgModel.create(trx, pgCreateObject, ['*']); - const apiRecord = translatePostgresAsyncOperationToApiAsyncOperation(pgRecord[0]); - await indexAsyncOperation(esClient, apiRecord, process.env.ES_INDEX); - - return apiRecord; - }); + const pgCreateObject = translateApiAsyncOperationToPostgresAsyncOperation(createObject); + const pgRecord = await asyncOperationPgModel.create(knex, pgCreateObject, ['*']); + return translatePostgresAsyncOperationToApiAsyncOperation(pgRecord[0]); }; /** diff --git a/packages/async-operations/tests/test-async_operations.js b/packages/async-operations/tests/test-async_operations.js index 1521e70469c..be56f26605e 100644 --- a/packages/async-operations/tests/test-async_operations.js +++ b/packages/async-operations/tests/test-async_operations.js @@ -23,11 +23,6 @@ const { migrationDir, } = require('@cumulus/db'); const { EcsStartTaskError, MissingRequiredArgument } = require('@cumulus/errors'); -const { Search } = require('@cumulus/es-client/search'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const { getLambdaConfiguration, getLambdaEnvironmentVariables, @@ -55,15 +50,6 @@ test.before(async (t) => { systemBucket = randomString(); await s3().createBucket({ Bucket: systemBucket }); - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esAsyncOperationsClient = new Search( - {}, - 'asyncOperation', - t.context.esIndex - ); - // Set up the mock ECS client ecsClient = ecs(); ecsClient.runTask = (params) => { @@ -103,7 +89,6 @@ test.beforeEach((t) => { test.after.always(async (t) => { sinon.restore(); await recursivelyDeleteS3Bucket(systemBucket); - await cleanupTestIndex(t.context); await destroyLocalTestDb({ knex: t.context.testKnex, knexAdmin: t.context.testKnexAdmin, @@ -272,7 +257,7 @@ test.serial('The startAsyncOperation method throws error and calls createAsyncOp ); }); -test('The startAsyncOperation writes records to all data stores', async (t) => { +test('The startAsyncOperation writes records to the database', async (t) => { const description = randomString(); const stackName = randomString(); const operationType = 'ES Index'; @@ -312,51 +297,6 @@ test('The startAsyncOperation writes records to all data stores', async (t) => { omit(asyncOperationPgRecord, omitList), translateApiAsyncOperationToPostgresAsyncOperation(omit(expected, omitList)) ); - const esRecord = await t.context.esAsyncOperationsClient.get(id); - t.deepEqual( - await t.context.esAsyncOperationsClient.get(id), - { - ...expected, - _id: esRecord._id, - timestamp: esRecord.timestamp, - updatedAt: esRecord.updatedAt, - createdAt: esRecord.createdAt, - } - ); -}); - -test.serial('The startAsyncOperation writes records with correct timestamps', async (t) => { - const description = randomString(); - const stackName = randomString(); - const operationType = 'ES Index'; - const taskArn = randomString(); - - stubbedEcsRunTaskResult = { - tasks: [{ taskArn }], - failures: [], - }; - - const { id } = await startAsyncOperation({ - asyncOperationTaskDefinition: randomString(), - cluster: randomString(), - callerLambdaName: randomString(), - lambdaName: randomString(), - description, - operationType, - payload: {}, - stackName, - knexConfig: knexConfig, - systemBucket, - }); - - const asyncOperationPgRecord = await t.context.asyncOperationPgModel.get( - t.context.testKnex, - { id } - ); - - const esRecord = await t.context.esAsyncOperationsClient.get(id); - t.is(asyncOperationPgRecord.created_at.getTime(), esRecord.createdAt); - t.is(asyncOperationPgRecord.updated_at.getTime(), esRecord.updatedAt); }); test.serial('The startAsyncOperation method returns the newly-generated record', async (t) => { @@ -492,63 +432,3 @@ test('createAsyncOperation throws if systemBucket is not provided', async (t) => { name: 'TypeError' } ); }); - -test.serial('createAsyncOperation() does not write to Elasticsearch if writing to PostgreSQL fails', async (t) => { - const { id, createObject } = t.context; - - const fakeAsyncOpPgModel = { - create: () => { - throw new Error('something bad'); - }, - }; - - const createParams = { - knex: t.context.testKnex, - asyncOperationPgModel: fakeAsyncOpPgModel, - createObject, - stackName: 'FakeStack', - systemBucket: 'FakeBucket', - }; - await t.throwsAsync( - createAsyncOperation(createParams), - { message: 'something bad' } - ); - - const dbRecords = await t.context.asyncOperationPgModel - .search(t.context.testKnex, { id }); - t.is(dbRecords.length, 0); - t.false(await t.context.esAsyncOperationsClient.exists( - id - )); -}); - -test.serial('createAsyncOperation() does not write to PostgreSQL if writing to Elasticsearch fails', async (t) => { - const { id, createObject } = t.context; - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - index: () => { - throw new Error('ES something bad'); - }, - }, - }; - - const createParams = { - knex: t.context.testKnex, - createObject, - esClient: fakeEsClient, - stackName: 'FakeStack', - systemBucket: 'FakeBucket', - }; - await t.throwsAsync( - createAsyncOperation(createParams), - { message: 'ES something bad' } - ); - - const dbRecords = await t.context.asyncOperationPgModel - .search(t.context.testKnex, { id }); - t.is(dbRecords.length, 0); - t.false(await t.context.esAsyncOperationsClient.exists( - id - )); -}); diff --git a/packages/integration-tests/index.js b/packages/integration-tests/index.js index 71eae2ef6a1..b53ceca1bc6 100644 --- a/packages/integration-tests/index.js +++ b/packages/integration-tests/index.js @@ -49,7 +49,7 @@ const lambdaStep = new LambdaStep(); /** * Wait for an AsyncOperation to reach a given status * - * Retries using exponental backoff until desired has been reached. If the + * Retries using exponential backoff until desired has been reached. If the * desired state is not reached an error is thrown. * * @param {Object} params - params From 21d05ff4e08cd50f6bb6b7e893e5f2482cef87a6 Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Sat, 17 Aug 2024 20:37:08 -0400 Subject: [PATCH 18/61] CUMULUS-3235: Updated asyncOperations api endpoint to query postgres (#3768) * CUMULUS-3235: Updated asyncOperations api endpoint to query postgres * id::text * revert uuid for term search update unit tests * update unit test * refact basic search * raw-parameter-binding * deconstruct response * deconstruct fields * whereRaw parameter binding * remove extra knex parameter * fix unit test conflict --- CHANGELOG.md | 2 + packages/api/endpoints/async-operations.js | 10 +- .../test-endpoints-async-operations.js | 25 +- packages/db/src/index.ts | 10 +- .../db/src/search/AsyncOperationSearch.ts | 62 ++++ packages/db/src/search/BaseSearch.ts | 31 +- packages/db/src/search/CollectionSearch.ts | 25 +- packages/db/src/search/ExecutionSearch.ts | 5 +- packages/db/src/search/GranuleSearch.ts | 5 +- packages/db/src/search/field-mapping.ts | 3 + .../tests/search/test-AsyncOperationSearch.js | 331 ++++++++++++++++++ 11 files changed, 430 insertions(+), 79 deletions(-) create mode 100644 packages/db/src/search/AsyncOperationSearch.ts create mode 100644 packages/db/tests/search/test-AsyncOperationSearch.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d584b1120f..fec422f4574 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Replace ElasticSearch Phase 2 +- **CUMULUS-3235** + - Updated `asyncOperations` api endpoint to query postgres - **CUMULUS-3236** - Update API AsyncOperation endpoints `POST` and `DEL` to not update Elasticsearch diff --git a/packages/api/endpoints/async-operations.js b/packages/api/endpoints/async-operations.js index 87add962d56..c54c3f83ac1 100644 --- a/packages/api/endpoints/async-operations.js +++ b/packages/api/endpoints/async-operations.js @@ -17,8 +17,8 @@ const { } = require('@cumulus/errors'); const Logger = require('@cumulus/logger'); +const { AsyncOperationSearch } = require('@cumulus/db'); -const { Search } = require('@cumulus/es-client/search'); const { isBadRequestError } = require('../lib/errors'); const { recordIsValid } = require('../lib/schema'); @@ -27,13 +27,9 @@ const asyncSchema = require('../lib/schemas').asyncOperation; const logger = new Logger({ sender: '@cumulus/api/asyncOperations' }); async function list(req, res) { - const search = new Search( - { queryStringParameters: req.query }, - 'asyncOperation', - process.env.ES_INDEX - ); + const dbSearch = new AsyncOperationSearch({ queryStringParameters: req.query }); - const response = await search.query(); + const response = await dbSearch.query(); return res.send(response); } diff --git a/packages/api/tests/endpoints/async-operations/test-endpoints-async-operations.js b/packages/api/tests/endpoints/async-operations/test-endpoints-async-operations.js index c5ff4db259a..87f75646a6e 100644 --- a/packages/api/tests/endpoints/async-operations/test-endpoints-async-operations.js +++ b/packages/api/tests/endpoints/async-operations/test-endpoints-async-operations.js @@ -17,12 +17,6 @@ const { translateApiAsyncOperationToPostgresAsyncOperation, migrationDir, } = require('@cumulus/db'); -const { Search } = require('@cumulus/es-client/search'); -const indexer = require('@cumulus/es-client/indexer'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const { fakeAsyncOperationFactory } = require('../../../lib/testUtils'); const { @@ -63,15 +57,6 @@ test.before(async (t) => { t.context.asyncOperationPgModel = new AsyncOperationPgModel(); - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esAsyncOperationClient = new Search( - {}, - 'asyncOperation', - t.context.esIndex - ); - await s3().createBucket({ Bucket: process.env.system_bucket }); const username = randomString(); @@ -91,21 +76,17 @@ test.after.always(async (t) => { knexAdmin: t.context.testKnexAdmin, testDbName, }); - await cleanupTestIndex(t.context); }); test.serial('GET /asyncOperations returns a list of operations', async (t) => { - const { esClient, esIndex } = t.context; const asyncOperation1 = fakeAsyncOperationFactory(); const asyncOperation2 = fakeAsyncOperationFactory(); const asyncOpPgRecord1 = translateApiAsyncOperationToPostgresAsyncOperation(asyncOperation1); await t.context.asyncOperationPgModel.create(t.context.knex, asyncOpPgRecord1); - await indexer.indexAsyncOperation(esClient, asyncOperation1, esIndex); const asyncOpPgRecord2 = translateApiAsyncOperationToPostgresAsyncOperation(asyncOperation2); await t.context.asyncOperationPgModel.create(t.context.knex, asyncOpPgRecord2); - await indexer.indexAsyncOperation(esClient, asyncOperation2, esIndex); const response = await request(app) .get('/asyncOperations') @@ -133,19 +114,15 @@ test.serial('GET /asyncOperations returns a list of operations', async (t) => { }); test.serial('GET /asyncOperations with a timestamp parameter returns a list of filtered results', async (t) => { - const { esClient, esIndex } = t.context; const firstDate = Date.now(); const asyncOperation1 = fakeAsyncOperationFactory(); - const asyncOperation2 = fakeAsyncOperationFactory(); const asyncOpPgRecord1 = translateApiAsyncOperationToPostgresAsyncOperation(asyncOperation1); await t.context.asyncOperationPgModel.create(t.context.knex, asyncOpPgRecord1); - await indexer.indexAsyncOperation(esClient, asyncOperation1, esIndex); const secondDate = Date.now(); - + const asyncOperation2 = fakeAsyncOperationFactory(); const asyncOpPgRecord2 = translateApiAsyncOperationToPostgresAsyncOperation(asyncOperation2); await t.context.asyncOperationPgModel.create(t.context.knex, asyncOpPgRecord2); - await indexer.indexAsyncOperation(esClient, asyncOperation2, esIndex); const response1 = await request(app) .get(`/asyncOperations?timestamp__from=${firstDate}`) diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index 3a417e174b5..2a817dcaaa2 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -140,8 +140,11 @@ export { QuerySearchClient, } from './lib/QuerySearchClient'; export { - BaseSearch, -} from './search/BaseSearch'; + AsyncOperationSearch, +} from './search/AsyncOperationSearch'; +export { + CollectionSearch, +} from './search/CollectionSearch'; export { ExecutionSearch, } from './search/ExecutionSearch'; @@ -151,9 +154,6 @@ export { export { StatsSearch, } from './search/StatsSearch'; -export { - CollectionSearch, -} from './search/CollectionSearch'; export { AsyncOperationPgModel } from './models/async_operation'; export { BasePgModel } from './models/base'; diff --git a/packages/db/src/search/AsyncOperationSearch.ts b/packages/db/src/search/AsyncOperationSearch.ts new file mode 100644 index 00000000000..17ded787f5a --- /dev/null +++ b/packages/db/src/search/AsyncOperationSearch.ts @@ -0,0 +1,62 @@ +import { Knex } from 'knex'; +import pick from 'lodash/pick'; + +import { ApiAsyncOperation } from '@cumulus/types/api/async_operations'; +import Logger from '@cumulus/logger'; + +import { BaseSearch } from './BaseSearch'; +import { DbQueryParameters, QueryEvent } from '../types/search'; +import { PostgresAsyncOperationRecord } from '../types/async_operation'; +import { translatePostgresAsyncOperationToApiAsyncOperation } from '../translate/async_operations'; + +const log = new Logger({ sender: '@cumulus/db/AsyncOperationSearch' }); + +/** + * Class to build and execute db search query for asyncOperation + */ +export class AsyncOperationSearch extends BaseSearch { + constructor(event: QueryEvent) { + super(event, 'asyncOperation'); + } + + /** + * Build queries for infix and prefix + * + * @param params + * @param params.countQuery - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildInfixPrefixQuery(params: { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { countQuery, searchQuery, dbQueryParameters } = params; + const { infix, prefix } = dbQueryParameters ?? this.dbQueryParameters; + if (infix) { + [countQuery, searchQuery].forEach((query) => query.whereRaw(`${this.tableName}.id::text like ?`, `%${infix}%`)); + } + if (prefix) { + [countQuery, searchQuery].forEach((query) => query.whereRaw(`${this.tableName}.id::text like ?`, `${prefix}%`)); + } + } + + /** + * Translate postgres records to api records + * + * @param pgRecords - postgres records returned from query + * @returns translated api records + */ + protected translatePostgresRecordsToApiRecords(pgRecords: PostgresAsyncOperationRecord[]) + : Partial[] { + log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); + const { fields } = this.dbQueryParameters; + const apiRecords = pgRecords.map((item: PostgresAsyncOperationRecord) => { + const pgAsyncOperation = item; + const apiRecord = translatePostgresAsyncOperationToApiAsyncOperation(pgAsyncOperation); + return fields ? pick(apiRecord, fields) : apiRecord; + }); + return apiRecords; + } +} diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index 663c0ebfb03..e71a836764f 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -33,7 +33,7 @@ export const typeToTable: { [key: string]: string } = { /** * Class to build and execute db search query */ -class BaseSearch { +abstract class BaseSearch { readonly type: string; readonly tableName: string; readonly queryStringParameters: QueryStringParameters; @@ -140,14 +140,18 @@ class BaseSearch { * Build basic query * * @param knex - DB client - * @throws - function is not implemented + * @returns queries for getting count and search result */ protected buildBasicQuery(knex: Knex): { countQuery?: Knex.QueryBuilder, searchQuery: Knex.QueryBuilder, } { - log.debug(`buildBasicQuery is not implemented ${knex.constructor.name}`); - throw new Error('buildBasicQuery is not implemented'); + const countQuery = knex(this.tableName) + .count('*'); + + const searchQuery = knex(this.tableName) + .select(`${this.tableName}.*`); + return { countQuery, searchQuery }; } /** @@ -233,13 +237,12 @@ class BaseSearch { const { range = {} } = dbQueryParameters ?? this.dbQueryParameters; Object.entries(range).forEach(([name, rangeValues]) => { - if (rangeValues.gte) { - countQuery?.where(`${this.tableName}.${name}`, '>=', rangeValues.gte); - searchQuery.where(`${this.tableName}.${name}`, '>=', rangeValues.gte); + const { gte, lte } = rangeValues; + if (gte) { + [countQuery, searchQuery].forEach((query) => query?.where(`${this.tableName}.${name}`, '>=', gte)); } - if (rangeValues.lte) { - countQuery?.where(`${this.tableName}.${name}`, '<=', rangeValues.lte); - searchQuery.where(`${this.tableName}.${name}`, '<=', rangeValues.lte); + if (lte) { + [countQuery, searchQuery].forEach((query) => query?.where(`${this.tableName}.${name}`, '<=', lte)); } }); } @@ -284,7 +287,7 @@ class BaseSearch { break; case 'error.Error': [countQuery, searchQuery] - .forEach((query) => query?.whereRaw(`${this.tableName}.error->>'Error' = '${value}'`)); + .forEach((query) => value && query?.whereRaw(`${this.tableName}.error->>'Error' = ?`, value)); break; case 'asyncOperationId': [countQuery, searchQuery].forEach((query) => query?.where(`${asyncOperationsTable}.id`, value)); @@ -347,7 +350,7 @@ class BaseSearch { break; case 'error.Error': [countQuery, searchQuery] - .forEach((query) => query?.whereRaw(`${this.tableName}.error->>'Error' in ('${value.join('\',\'')}')`)); + .forEach((query) => query?.whereRaw(`${this.tableName}.error->>'Error' in (${value.map(() => '?').join(',')})`, [...value])); break; case 'asyncOperationId': [countQuery, searchQuery].forEach((query) => query?.whereIn(`${asyncOperationsTable}.id`, value)); @@ -408,7 +411,7 @@ class BaseSearch { [countQuery, searchQuery].forEach((query) => query?.whereNot(`${executionsTable}_parent.arn`, value)); break; case 'error.Error': - [countQuery, searchQuery].forEach((query) => query?.whereRaw(`${this.tableName}.error->>'Error' != '${value}'`)); + [countQuery, searchQuery].forEach((query) => value && query?.whereRaw(`${this.tableName}.error->>'Error' != ?`, value)); break; default: [countQuery, searchQuery].forEach((query) => query?.whereNot(`${this.tableName}.${name}`, value)); @@ -464,7 +467,7 @@ class BaseSearch { tableName? : string, }) : Promise { const { knex, tableName = this.tableName } = params; - const query = knex.raw(`EXPLAIN (FORMAT JSON) select * from "${tableName}"`); + const query = knex.raw('EXPLAIN (FORMAT JSON) select * from ??', tableName); log.debug(`Estimating the row count ${query.toSQL().sql}`); const countResult = await query; const countPath = 'rows[0]["QUERY PLAN"][0].Plan["Plan Rows"]'; diff --git a/packages/db/src/search/CollectionSearch.ts b/packages/db/src/search/CollectionSearch.ts index af30b66989b..f4a1dee554a 100644 --- a/packages/db/src/search/CollectionSearch.ts +++ b/packages/db/src/search/CollectionSearch.ts @@ -41,26 +41,6 @@ export class CollectionSearch extends BaseSearch { this.includeStats = (includeStats === 'true'); } - /** - * Build basic query - * - * @param knex - DB client - * @returns queries for getting count and search result - */ - protected buildBasicQuery(knex: Knex) - : { - countQuery: Knex.QueryBuilder, - searchQuery: Knex.QueryBuilder, - } { - const countQuery = knex(this.tableName) - .count('*'); - - const searchQuery = knex(this.tableName) - .select(`${this.tableName}.*`); - - return { countQuery, searchQuery }; - } - /** * Build queries for infix and prefix * @@ -180,6 +160,7 @@ export class CollectionSearch extends BaseSearch { protected async translatePostgresRecordsToApiRecords(pgRecords: PostgresCollectionRecord[], knex: Knex): Promise[]> { log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); + const { fields } = this.dbQueryParameters; let statsRecords: StatsRecords; const cumulusIds = pgRecords.map((record) => record.cumulus_id); if (this.includeStats) { @@ -188,9 +169,7 @@ export class CollectionSearch extends BaseSearch { const apiRecords = pgRecords.map((record) => { const apiRecord: CollectionRecordApi = translatePostgresCollectionToApiCollection(record); - const apiRecordFinal = this.dbQueryParameters.fields - ? pick(apiRecord, this.dbQueryParameters.fields) - : apiRecord; + const apiRecordFinal = fields ? pick(apiRecord, fields) : apiRecord; if (statsRecords) { apiRecordFinal.stats = statsRecords[record.cumulus_id] ? statsRecords[record.cumulus_id] diff --git a/packages/db/src/search/ExecutionSearch.ts b/packages/db/src/search/ExecutionSearch.ts index 9dd5621933b..8fbea4b8eda 100644 --- a/packages/db/src/search/ExecutionSearch.ts +++ b/packages/db/src/search/ExecutionSearch.ts @@ -143,6 +143,7 @@ export class ExecutionSearch extends BaseSearch { protected translatePostgresRecordsToApiRecords(pgRecords: ExecutionRecord[]) : Partial[] { log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); + const { fields } = this.dbQueryParameters; const apiRecords = pgRecords.map((executionRecord: ExecutionRecord) => { const { collectionName, collectionVersion, asyncOperationId, parentArn } = executionRecord; const collectionId = collectionName && collectionVersion @@ -153,9 +154,7 @@ export class ExecutionSearch extends BaseSearch { asyncOperationId, parentArn, }); - return this.dbQueryParameters.fields - ? pick(apiRecord, this.dbQueryParameters.fields) - : apiRecord; + return fields ? pick(apiRecord, fields) : apiRecord; }); return apiRecords; } diff --git a/packages/db/src/search/GranuleSearch.ts b/packages/db/src/search/GranuleSearch.ts index c1b98ced70d..86a4bc275cc 100644 --- a/packages/db/src/search/GranuleSearch.ts +++ b/packages/db/src/search/GranuleSearch.ts @@ -119,6 +119,7 @@ export class GranuleSearch extends BaseSearch { protected translatePostgresRecordsToApiRecords(pgRecords: GranuleRecord[]) : Partial[] { log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); + const { fields } = this.dbQueryParameters; const apiRecords = pgRecords.map((item: GranuleRecord) => { const granulePgRecord = item; const collectionPgRecord = { @@ -131,9 +132,7 @@ export class GranuleSearch extends BaseSearch { const apiRecord = translatePostgresGranuleToApiGranuleWithoutDbQuery({ granulePgRecord, collectionPgRecord, pdr, providerPgRecord, }); - return this.dbQueryParameters.fields - ? pick(apiRecord, this.dbQueryParameters.fields) - : apiRecord; + return fields ? pick(apiRecord, fields) : apiRecord; }); return apiRecords; } diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts index 39fd2ef61ec..1f2f5b5f9a8 100644 --- a/packages/db/src/search/field-mapping.ts +++ b/packages/db/src/search/field-mapping.ts @@ -92,6 +92,9 @@ const asyncOperationMapping : { [key: string]: Function } = { id: (value?: string) => ({ id: value, }), + _id: (value?: string) => ({ + id: value, + }), operationType: (value?: string) => ({ operation_type: value, }), diff --git a/packages/db/tests/search/test-AsyncOperationSearch.js b/packages/db/tests/search/test-AsyncOperationSearch.js new file mode 100644 index 00000000000..c46fda31caf --- /dev/null +++ b/packages/db/tests/search/test-AsyncOperationSearch.js @@ -0,0 +1,331 @@ +'use strict'; + +const test = require('ava'); +const cryptoRandomString = require('crypto-random-string'); +const { v4: uuidv4 } = require('uuid'); +const omit = require('lodash/omit'); +const range = require('lodash/range'); +const { AsyncOperationSearch } = require('../../dist/search/AsyncOperationSearch'); +const { + translatePostgresAsyncOperationToApiAsyncOperation, +} = require('../../dist/translate/async_operations'); + +const { + destroyLocalTestDb, + generateLocalTestDb, + fakeAsyncOperationRecordFactory, + migrationDir, + AsyncOperationPgModel, +} = require('../../dist'); + +const testDbName = `asyncOperation_${cryptoRandomString({ length: 10 })}`; + +test.before(async (t) => { + const { knexAdmin, knex } = await generateLocalTestDb( + testDbName, + migrationDir + ); + + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + t.context.asyncOperationPgModel = new AsyncOperationPgModel(); + t.context.asyncOperations = []; + t.context.asyncOperationSearchTmestamp = 1579352700000; + + range(100).map((num) => ( + t.context.asyncOperations.push(fakeAsyncOperationRecordFactory({ + cumulus_id: num, + updated_at: new Date(t.context.asyncOperationSearchTmestamp + (num % 2)), + operation_type: num % 2 === 0 ? 'Bulk Granules' : 'Data Migration', + task_arn: num % 2 === 0 ? cryptoRandomString({ length: 3 }) : undefined, + })) + )); + + await t.context.asyncOperationPgModel.insert( + t.context.knex, + t.context.asyncOperations + ); +}); + +test.after.always(async (t) => { + await destroyLocalTestDb({ + ...t.context, + testDbName, + }); +}); + +test('AsyncOperationSearch returns 10 async operations by default', async (t) => { + const { knex } = t.context; + const dbSearch = new AsyncOperationSearch({}); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 100); + t.is(results.length, 10); +}); + +test('AsyncOperationSearch supports page and limit params', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 20, + page: 2, + }; + let dbSearch = new AsyncOperationSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 20); + + queryStringParameters = { + limit: 11, + page: 10, + }; + dbSearch = new AsyncOperationSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 1); + + queryStringParameters = { + limit: 10, + page: 11, + }; + dbSearch = new AsyncOperationSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 0); +}); + +test('AsyncOperationSearch supports infix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 20, + infix: t.context.asyncOperations[5].id.slice(1), + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 1); + t.is(results?.length, 1); +}); + +test('AsyncOperationSearch supports prefix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 20, + prefix: t.context.asyncOperations[5].id.slice(0, -1), + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 1); + t.is(results?.length, 1); +}); + +test('AsyncOperationSearch supports term search for uuid field', async (t) => { + const { knex } = t.context; + const dbRecord = t.context.asyncOperations[5]; + const queryStringParameters = { + limit: 200, + id: dbRecord.id, + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 1); + t.is(results?.length, 1); +}); + +test('AsyncOperationSearch supports term search for date field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + updatedAt: `${t.context.asyncOperationSearchTmestamp + 1}`, + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 50); + t.is(results?.length, 50); +}); + +test('AsyncOperationSearch supports term search for _id field', async (t) => { + const { knex } = t.context; + const dbRecord = t.context.asyncOperations[5]; + const queryStringParameters = { + limit: 200, + _id: dbRecord.id, + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 1); + t.is(results?.length, 1); +}); + +test('AsyncOperationSearch supports term search for string field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + operationType: 'Bulk Granules', + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 50); + t.is(results?.length, 50); +}); + +test('AsyncOperationSearch supports range search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + timestamp__from: `${t.context.asyncOperationSearchTmestamp + 1}`, + timestamp__to: `${t.context.asyncOperationSearchTmestamp + 2}`, + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 50); + t.is(results?.length, 50); +}); + +test('AsyncOperationSearch supports search for multiple fields', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + id: t.context.asyncOperations[2].id, + updatedAt: `${t.context.asyncOperationSearchTmestamp}`, + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 1); + t.is(results?.length, 1); +}); + +test('AsyncOperationSearch non-existing fields are ignored', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + non_existing_field: `non_exist_${cryptoRandomString({ length: 5 })}`, + non_existing_field__from: `non_exist_${cryptoRandomString({ length: 5 })}`, + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 100); + t.is(results?.length, 100); +}); + +test('AsyncOperationSearch returns fields specified', async (t) => { + const { knex } = t.context; + const fields = 'id,operationType,status,taskArn'; + const queryStringParameters = { + fields, + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 100); + t.is(results?.length, 10); + results.forEach((asyncOperation) => t.deepEqual(Object.keys(asyncOperation), fields.split(','))); +}); + +test('AsyncOperationSearch supports sorting', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + sort_by: 'id', + order: 'asc', + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 100); + t.true(response.results[0].id < response.results[99].id); + t.true(response.results[0].id < response.results[50].id); + + queryStringParameters = { + limit: 200, + sort_key: ['-id'], + }; + const dbSearch2 = new AsyncOperationSearch({ queryStringParameters }); + const response2 = await dbSearch2.query(knex); + t.is(response2.meta.count, 100); + t.is(response2.results?.length, 100); + t.true(response2.results[0].id > response2.results[99].id); + t.true(response2.results[0].id > response2.results[50].id); + + queryStringParameters = { + limit: 200, + sort_by: 'operationType', + }; + const dbSearch3 = new AsyncOperationSearch({ queryStringParameters }); + const response3 = await dbSearch3.query(knex); + t.is(response3.meta.count, 100); + t.is(response3.results?.length, 100); + t.true(response3.results[0].operationType < response3.results[99].operationType); + t.true(response3.results[49].operationType < response3.results[50].operationType); +}); + +test('AsyncOperationSearch supports terms search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + operationType__in: ['Bulk Granules', 'NOTEXIST'].join(','), + }; + let dbSearch = new AsyncOperationSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 200, + operationType__in: ['Bulk Granules', 'NOTEXIST'].join(','), + _id__in: [t.context.asyncOperations[2].id, uuidv4()].join(','), + }; + dbSearch = new AsyncOperationSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('AsyncOperationSearch supports search when asyncOperation field does not match the given value', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + operationType__not: 'Bulk Granules', + }; + let dbSearch = new AsyncOperationSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 200, + operationType__not: 'Bulk Granules', + id__not: t.context.asyncOperations[1].id, + }; + dbSearch = new AsyncOperationSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 49); + t.is(response.results?.length, 49); +}); + +test('AsyncOperationSearch supports search which checks existence of asyncOperation field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + taskArn__exists: 'false', + output_exists: 'true', + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 50); + t.is(results?.length, 50); +}); + +test('AsyncOperationSearch returns the correct record', async (t) => { + const { knex } = t.context; + const dbRecord = t.context.asyncOperations[2]; + const queryStringParameters = { + limit: 200, + id: dbRecord.id, + }; + const dbSearch = new AsyncOperationSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 1); + t.is(results?.length, 1); + + const expectedApiRecord = translatePostgresAsyncOperationToApiAsyncOperation(dbRecord); + t.deepEqual(omit(results?.[0], 'createdAt'), omit(expectedApiRecord, 'createdAt')); + t.truthy(results?.[0]?.createdAt); +}); From f4afe1926da1ebf9d39128ddbf38c2323114c5a8 Mon Sep 17 00:00:00 2001 From: Jonathan Kovarik Date: Wed, 21 Aug 2024 16:45:37 -0400 Subject: [PATCH 19/61] Jk/cumulus 3232 (#3778) * Update PR mentioned endpoints to not write to ES * Update POST units to not use ES * Fix/manage broken tests * Update package/async-operations to not use ES * Remove ES update from async-operations image * Update async image to use v53, which includes removal of ES code * Fix broken ecs package tests/logic * Minor refactor * Update CHANGELOG * Remove additional unneeded test * Fix CHANGELOG * Modify unit ratchet to match after removal of code Code was removed, not refactored - this change did not degrade/improve units so much as change the denominator of lines/methods tested * Fix integration test/make it not rely on list endpoint * Refactor/PR feedback * Update example/spec/serial/AsyncOperationRunnerSuccessfulLambdaSpec.js Co-authored-by: jennyhliu <34660846+jennyhliu@users.noreply.github.com> * Remove unneeded transaction * Remove unneeded type assertion/error handling * Remove transaction * Address PR comments * Remove ES calls from DEL/GET PDR endpoints * Update CHANGELOG * Remove unneede deps * Fix bad merge * Fix lint/etc --------- Co-authored-by: jennyhliu <34660846+jennyhliu@users.noreply.github.com> --- CHANGELOG.md | 2 + packages/api/endpoints/pdrs.js | 59 ++------- packages/api/lib/testUtils.js | 10 -- packages/api/tests/endpoints/test-pdrs.js | 148 +--------------------- 4 files changed, 16 insertions(+), 203 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fec422f4574..a38072bd9f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Replace ElasticSearch Phase 2 +- **CUMULUS-3232** + - Update API PDR endpoints `DEL` and `GET` to not update Elasticsearch - **CUMULUS-3235** - Updated `asyncOperations` api endpoint to query postgres - **CUMULUS-3236** diff --git a/packages/api/endpoints/pdrs.js b/packages/api/endpoints/pdrs.js index 6023cbec4ef..3490da47b08 100644 --- a/packages/api/endpoints/pdrs.js +++ b/packages/api/endpoints/pdrs.js @@ -1,3 +1,5 @@ +//@ts-check + 'use strict'; const router = require('express-promise-router')(); @@ -9,8 +11,7 @@ const { createRejectableTransaction, } = require('@cumulus/db'); const { RecordDoesNotExist } = require('@cumulus/errors'); -const { indexPdr, deletePdr } = require('@cumulus/es-client/indexer'); -const { Search, getEsClient } = require('@cumulus/es-client/search'); +const { Search } = require('@cumulus/es-client/search'); const Logger = require('@cumulus/logger'); const log = new Logger({ sender: '@cumulus/api/pdrs' }); @@ -57,8 +58,6 @@ async function get(req, res) { } } -const isRecordDoesNotExistError = (e) => e.message.includes('RecordDoesNotExist'); - /** * delete a given PDR * @@ -70,63 +69,23 @@ async function del(req, res) { const { pdrPgModel = new PdrPgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), s3Utils = S3UtilsLib, } = req.testContext || {}; const pdrName = req.params.pdrName; const pdrS3Key = `${process.env.stackName}/pdrs/${pdrName}`; - const esPdrsClient = new Search( - {}, - 'pdr', - process.env.ES_INDEX - ); try { - await pdrPgModel.get(knex, { name: pdrName }); - } catch (error) { - if (error instanceof RecordDoesNotExist) { - if (!(await esPdrsClient.exists(pdrName))) { - log.info('PDR does not exist in Elasticsearch'); + await createRejectableTransaction(knex, async (trx) => { + const deleteResultsCount = await pdrPgModel.delete(trx, { name: pdrName }); + if (deleteResultsCount === 0) { return res.boom.notFound('No record found'); } - log.info('PDR does not exist in PostgreSQL, it only exists in Elasticsearch'); - } else { - throw error; - } - } - - const esPdrClient = new Search( - {}, - 'pdr', - process.env.ES_INDEX - ); - const esPdrRecord = await esPdrClient.get(pdrName).catch(log.info); - - try { - let esPdrDeleted = false; - try { - await createRejectableTransaction(knex, async (trx) => { - await pdrPgModel.delete(trx, { name: pdrName }); - await deletePdr({ - esClient, - name: pdrName, - index: process.env.ES_INDEX, - ignore: [404], - }); - esPdrDeleted = true; - await s3Utils.deleteS3Object(process.env.system_bucket, pdrS3Key); - }); - } catch (innerError) { - if (esPdrDeleted && esPdrRecord) { - delete esPdrRecord._id; - await indexPdr(esClient, esPdrRecord, process.env.ES_INDEX); - } - throw innerError; - } + return await s3Utils.deleteS3Object(process.env.system_bucket, pdrS3Key); + }); } catch (error) { log.debug(`Failed to delete PDR with name ${pdrName}. Error ${JSON.stringify(error)}.`); - if (!isRecordDoesNotExistError(error)) throw error; + throw error; } return res.send({ detail: 'Record deleted' }); } diff --git a/packages/api/lib/testUtils.js b/packages/api/lib/testUtils.js index 0910cba7ccd..a9ab1adb789 100644 --- a/packages/api/lib/testUtils.js +++ b/packages/api/lib/testUtils.js @@ -19,13 +19,11 @@ const { translateApiExecutionToPostgresExecution, translateApiProviderToPostgresProvider, translateApiRuleToPostgresRuleRaw, - translatePostgresPdrToApiPdr, translatePostgresRuleToApiRule, } = require('@cumulus/db'); const { indexProvider, indexRule, - indexPdr, deleteExecution, } = require('@cumulus/es-client/indexer'); const { @@ -570,8 +568,6 @@ const createPdrTestRecords = async (context, pdrParams = {}) => { const { knex, pdrPgModel, - esClient, - esPdrsClient, collectionCumulusId, providerCumulusId, } = context; @@ -593,14 +589,8 @@ const createPdrTestRecords = async (context, pdrParams = {}) => { const originalPgRecord = await pdrPgModel.get( knex, { cumulus_id: pgPdr.cumulus_id } ); - const originalPdr = await translatePostgresPdrToApiPdr(originalPgRecord, knex); - await indexPdr(esClient, originalPdr, process.env.ES_INDEX); - const originalEsRecord = await esPdrsClient.get( - originalPdr.pdrName - ); return { originalPgRecord, - originalEsRecord, }; }; diff --git a/packages/api/tests/endpoints/test-pdrs.js b/packages/api/tests/endpoints/test-pdrs.js index 562cf33598e..90b4cb125c6 100644 --- a/packages/api/tests/endpoints/test-pdrs.js +++ b/packages/api/tests/endpoints/test-pdrs.js @@ -19,7 +19,6 @@ const { migrationDir, PdrPgModel, ProviderPgModel, - translatePostgresPdrToApiPdr, } = require('@cumulus/db'); const { fakeCollectionRecordFactory, @@ -40,7 +39,6 @@ const { fakePdrFactory, setAuthorizedOAuthUsers, createPdrTestRecords, - fakePdrFactoryV2, } = require('../../lib/testUtils'); const models = require('../../models'); const assertions = require('../../lib/assertions'); @@ -281,7 +279,7 @@ test('GET fails if pdr is not found', async (t) => { t.true(message.includes('No record found for')); }); -test('DELETE returns a 404 if PostgreSQL and Elasticsearch PDR cannot be found', async (t) => { +test('DELETE returns a 404 if PostgreSQL PDR cannot be found', async (t) => { const nonExistentPdr = fakePdrFactory('completed'); const response = await request(app) .delete(`/pdrs/${nonExistentPdr.pdrName}`) @@ -291,9 +289,8 @@ test('DELETE returns a 404 if PostgreSQL and Elasticsearch PDR cannot be found', t.is(response.body.message, 'No record found'); }); -test('Deleting a PDR that exists in PostgreSQL and not Elasticsearch succeeds', async (t) => { +test('Deleting a PDR that exists in PostgreSQL succeeds', async (t) => { const { - esPdrsClient, collectionCumulusId, providerCumulusId, knex, @@ -310,12 +307,6 @@ test('Deleting a PDR that exists in PostgreSQL and not Elasticsearch succeeds', knex, { cumulus_id: pgPdr.cumulus_id } ); - t.false( - await esPdrsClient.exists( - originalPgRecord.name - ) - ); - const response = await request(app) .delete(`/pdrs/${originalPgRecord.name}`) .set('Accept', 'application/json') @@ -327,43 +318,10 @@ test('Deleting a PDR that exists in PostgreSQL and not Elasticsearch succeeds', t.false(await pdrPgModel.exists(knex, { name: originalPgRecord.name })); }); -test.serial('Deleting a PDR that exists in Elastisearch and not PostgreSQL succeeds', async (t) => { - const { - esPdrsClient, - testPgCollection, - testPgProvider, - knex, - pdrPgModel, - } = t.context; - - const testPdr = fakePdrFactoryV2({ - collectionId: constructCollectionId(testPgCollection.name, testPgCollection.version), - provider: testPgProvider.name, - }); - await indexer.indexPdr(t.context.esClient, testPdr, t.context.esIndex); - - t.false(await pdrPgModel.exists(knex, { name: testPdr.pdrName })); - - const response = await request(app) - .delete(`/pdrs/${testPdr.pdrName}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - const { detail } = response.body; - - t.is(detail, 'Record deleted'); - t.false( - await esPdrsClient.exists( - testPdr.pdrName - ) - ); -}); - test.serial('DELETE handles the case where the PDR exists in PostgreSQL but not in S3', async (t) => { const { knex, pdrPgModel, - esClient, collectionCumulusId, providerCumulusId, } = t.context; @@ -377,9 +335,6 @@ test.serial('DELETE handles the case where the PDR exists in PostgreSQL but not const originalPgRecord = await pdrPgModel.get( knex, { cumulus_id: pdr.cumulus_id } ); - const originalPdr = await translatePostgresPdrToApiPdr(originalPgRecord, knex); - await indexer.indexPdr(esClient, originalPdr, process.env.ES_INDEX); - const response = await request(app) .delete(`/pdrs/${originalPgRecord.name}`) .set('Accept', 'application/json') @@ -387,14 +342,12 @@ test.serial('DELETE handles the case where the PDR exists in PostgreSQL but not .expect(200); t.is(response.status, 200); - const parsedBody = response.body; t.is(parsedBody.detail, 'Record deleted'); t.false(await pdrPgModel.exists(knex, { name: originalPgRecord.name })); - t.false(await t.context.esPdrsClient.exists(originalPgRecord.name)); }); -test.serial('DELETE removes a PDR from all data stores', async (t) => { +test.serial('DELETE removes a PDR from data store', async (t) => { const { originalPgRecord, } = await createPdrTestRecords(t.context); @@ -408,11 +361,6 @@ test.serial('DELETE removes a PDR from all data stores', async (t) => { t.is(detail, 'Record deleted'); t.false(await t.context.pdrPgModel.exists(t.context.knex, { name: originalPgRecord.name })); - t.false( - await t.context.esPdrsClient.exists( - originalPgRecord.name - ) - ); t.false( await s3ObjectExists({ Bucket: process.env.system_bucket, @@ -421,7 +369,7 @@ test.serial('DELETE removes a PDR from all data stores', async (t) => { ); }); -test.serial('del() does not remove from Elasticsearch/S3 if removing from PostgreSQL fails', async (t) => { +test.serial('del() does not remove from S3 if removing from PostgreSQL fails', async (t) => { const { originalPgRecord, } = await createPdrTestRecords( @@ -432,12 +380,6 @@ test.serial('del() does not remove from Elasticsearch/S3 if removing from Postgr await t.context.pdrPgModel.delete(t.context.knex, { name: originalPgRecord.name, }); - await indexer.deleteRecord({ - esClient: t.context.esClient, - id: originalPgRecord.name, - type: 'pdr', - index: t.context.esIndex, - }); await deleteS3Object(process.env.system_bucket, pdrS3Key(originalPgRecord.name)); }); @@ -470,75 +412,6 @@ test.serial('del() does not remove from Elasticsearch/S3 if removing from Postgr name: originalPgRecord.name, }) ); - t.true( - await t.context.esPdrsClient.exists( - originalPgRecord.name - ) - ); - t.true( - await s3ObjectExists({ - Bucket: process.env.system_bucket, - Key: pdrS3Key(originalPgRecord.name), - }) - ); -}); - -test.serial('del() does not remove from PostgreSQL/S3 if removing from Elasticsearch fails', async (t) => { - const { - originalPgRecord, - } = await createPdrTestRecords( - t.context - ); - - t.teardown(async () => { - await t.context.pdrPgModel.delete(t.context.knex, { - name: originalPgRecord.name, - }); - await indexer.deleteRecord({ - esClient: t.context.esClient, - id: originalPgRecord.name, - type: 'pdr', - index: t.context.esIndex, - }); - await deleteS3Object(process.env.system_bucket, pdrS3Key(originalPgRecord.name)); - }); - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - delete: () => { - throw new Error('something bad'); - }, - }, - }; - - const expressRequest = { - params: { - pdrName: originalPgRecord.name, - }, - testContext: { - knex: t.context.knex, - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - del(expressRequest, response), - { message: 'something bad' } - ); - - t.true( - await t.context.pdrPgModel.exists(t.context.knex, { - name: originalPgRecord.name, - }) - ); - t.true( - await t.context.esPdrsClient.exists( - originalPgRecord.name - ) - ); t.true( await s3ObjectExists({ Bucket: process.env.system_bucket, @@ -547,7 +420,7 @@ test.serial('del() does not remove from PostgreSQL/S3 if removing from Elasticse ); }); -test.serial('del() does not remove from PostgreSQL/Elasticsearch if removing from S3 fails', async (t) => { +test.serial('del() does not remove from PostgreSQL if removing from S3 fails', async (t) => { const { originalPgRecord, } = await createPdrTestRecords( @@ -558,12 +431,6 @@ test.serial('del() does not remove from PostgreSQL/Elasticsearch if removing fro await t.context.pdrPgModel.delete(t.context.knex, { name: originalPgRecord.name, }); - await indexer.deleteRecord({ - esClient: t.context.esClient, - id: originalPgRecord.name, - type: 'pdr', - index: t.context.esIndex, - }); await deleteS3Object(process.env.system_bucket, pdrS3Key(originalPgRecord.name)); }); @@ -595,11 +462,6 @@ test.serial('del() does not remove from PostgreSQL/Elasticsearch if removing fro name: originalPgRecord.name, }) ); - t.true( - await t.context.esPdrsClient.exists( - originalPgRecord.name - ) - ); t.true( await s3ObjectExists({ Bucket: process.env.system_bucket, From 4b46cece8e3684c6272ef1934160f9c6c44d619d Mon Sep 17 00:00:00 2001 From: Naga Nages <66387215+Nnaga1@users.noreply.github.com> Date: Thu, 22 Aug 2024 11:17:21 -0400 Subject: [PATCH 20/61] CUMULUS-3233/3234: Update Providers LIST endpoint to remove ElasticSearch dependency and query Postgres instead (#3764) * first commit * fixing CHANGELOG entry * PR feedback * PR feedback --- CHANGELOG.md | 3 + packages/api/endpoints/providers.js | 40 +-- packages/api/lib/testUtils.js | 8 - .../endpoints/providers/create-provider.js | 87 +---- .../endpoints/providers/delete-provider.js | 155 +-------- .../tests/endpoints/providers/get-provider.js | 15 - .../endpoints/providers/list-providers.js | 35 +- .../endpoints/providers/update-provider.js | 151 +------- packages/db/src/index.ts | 3 + packages/db/src/search/CollectionSearch.ts | 2 +- packages/db/src/search/ExecutionSearch.ts | 2 +- packages/db/src/search/ProviderSearch.ts | 63 ++++ packages/db/src/search/StatsSearch.ts | 2 +- packages/db/src/search/field-mapping.ts | 33 ++ .../db/tests/search/test-ProviderSearch.js | 323 ++++++++++++++++++ 15 files changed, 448 insertions(+), 474 deletions(-) create mode 100644 packages/db/src/search/ProviderSearch.ts create mode 100644 packages/db/tests/search/test-ProviderSearch.js diff --git a/CHANGELOG.md b/CHANGELOG.md index a38072bd9f2..219295ac5cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - **CUMULUS-3232** - Update API PDR endpoints `DEL` and `GET` to not update Elasticsearch +- **CUMULUS-3233** + - Updated `providers` list api endpoint and added `ProviderSearch` class to query postgres + - Removed Elasticsearch dependency from `providers` endpoints - **CUMULUS-3235** - Updated `asyncOperations` api endpoint to query postgres - **CUMULUS-3236** diff --git a/packages/api/endpoints/providers.js b/packages/api/endpoints/providers.js index 7c9dd3cdec5..8f040980191 100644 --- a/packages/api/endpoints/providers.js +++ b/packages/api/endpoints/providers.js @@ -10,14 +10,13 @@ const { translateApiProviderToPostgresProvider, translatePostgresProviderToApiProvider, validateProviderHost, + ProviderSearch, } = require('@cumulus/db'); const { RecordDoesNotExist, ValidationError, } = require('@cumulus/errors'); const Logger = require('@cumulus/logger'); -const { getEsClient, Search } = require('@cumulus/es-client/search'); -const { indexProvider, deleteProvider } = require('@cumulus/es-client/indexer'); const { removeNilProperties } = require('@cumulus/common/util'); const { isBadRequestError } = require('../lib/errors'); @@ -31,14 +30,11 @@ const log = new Logger({ sender: '@cumulus/api/providers' }); * @returns {Promise} the promise of express response object */ async function list(req, res) { - const search = new Search( - { queryStringParameters: req.query }, - 'provider', - process.env.ES_INDEX + const dbSearch = new ProviderSearch( + { queryStringParameters: req.query } ); - - const response = await search.query(); - return res.send(response); + const result = await dbSearch.query(); + return res.send(result); } /** @@ -75,7 +71,6 @@ async function post(req, res) { const { providerPgModel = new ProviderPgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), } = req.testContext || {}; const apiProvider = req.body; @@ -97,7 +92,6 @@ async function post(req, res) { await createRejectableTransaction(knex, async (trx) => { const [updatedPostgresProvider] = await providerPgModel.create(trx, postgresProvider, '*'); record = translatePostgresProviderToApiProvider(updatedPostgresProvider); - await indexProvider(esClient, record, process.env.ES_INDEX); }); return res.send({ record, message: 'Record saved' }); } catch (error) { @@ -125,7 +119,6 @@ async function put(req, res) { const { providerPgModel = new ProviderPgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), } = req.testContext || {}; const { params: { id }, body } = req; @@ -160,7 +153,6 @@ async function put(req, res) { await createRejectableTransaction(knex, async (trx) => { const [updatedPostgresProvider] = await providerPgModel.upsert(trx, postgresProvider); record = translatePostgresProviderToApiProvider(updatedPostgresProvider); - await indexProvider(esClient, record, process.env.ES_INDEX); }); return res.send(record); @@ -177,39 +169,23 @@ async function del(req, res) { const { providerPgModel = new ProviderPgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), } = req.testContext || {}; const { id } = req.params; - const esProvidersClient = new Search( - {}, - 'provider', - process.env.ES_INDEX - ); try { await providerPgModel.get(knex, { name: id }); } catch (error) { if (error instanceof RecordDoesNotExist) { - if (!(await esProvidersClient.exists(id))) { - log.info('Provider does not exist in Elasticsearch and PostgreSQL'); - return res.boom.notFound('No record found'); - } - log.info('Provider does not exist in PostgreSQL, it only exists in Elasticsearch. Proceeding with deletion'); - } else { - throw error; + log.info('Provider does not exist in PostgreSQL'); + return res.boom.notFound('No record found'); } + throw error; } try { await createRejectableTransaction(knex, async (trx) => { await providerPgModel.delete(trx, { name: id }); - await deleteProvider({ - esClient, - id, - index: process.env.ES_INDEX, - ignore: [404], - }); }); log.debug(`deleted provider ${id}`); return res.send({ message: 'Record deleted' }); diff --git a/packages/api/lib/testUtils.js b/packages/api/lib/testUtils.js index a9ab1adb789..3ee3b1b2581 100644 --- a/packages/api/lib/testUtils.js +++ b/packages/api/lib/testUtils.js @@ -22,7 +22,6 @@ const { translatePostgresRuleToApiRule, } = require('@cumulus/db'); const { - indexProvider, indexRule, deleteExecution, } = require('@cumulus/es-client/indexer'); @@ -507,8 +506,6 @@ const createProviderTestRecords = async (context, providerParams) => { const { testKnex, providerPgModel, - esClient, - esProviderClient, } = context; const originalProvider = fakeProviderFactory(providerParams); @@ -517,14 +514,9 @@ const createProviderTestRecords = async (context, providerParams) => { const originalPgRecord = await providerPgModel.get( testKnex, { cumulus_id: pgProvider.cumulus_id } ); - await indexProvider(esClient, originalProvider, process.env.ES_INDEX); - const originalEsRecord = await esProviderClient.get( - originalProvider.id - ); return { originalProvider, originalPgRecord, - originalEsRecord, }; }; diff --git a/packages/api/tests/endpoints/providers/create-provider.js b/packages/api/tests/endpoints/providers/create-provider.js index a1596e3e000..b26babaaf3e 100644 --- a/packages/api/tests/endpoints/providers/create-provider.js +++ b/packages/api/tests/endpoints/providers/create-provider.js @@ -20,11 +20,6 @@ const { } = require('@cumulus/aws-client/S3'); const { randomString } = require('@cumulus/common/test-utils'); const { RecordDoesNotExist } = require('@cumulus/errors'); -const { Search } = require('@cumulus/es-client/search'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const AccessToken = require('../../../models/access-tokens'); const { @@ -33,9 +28,6 @@ const { setAuthorizedOAuthUsers, } = require('../../../lib/testUtils'); const assertions = require('../../../lib/assertions'); -const { post } = require('../../../endpoints/providers'); - -const { buildFakeExpressResponse } = require('../utils'); const testDbName = randomString(12); process.env.AccessTokensTable = randomString(); @@ -69,15 +61,6 @@ test.before(async (t) => { await s3().createBucket({ Bucket: process.env.system_bucket }); - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esProviderClient = new Search( - {}, - 'provider', - t.context.esIndex - ); - const username = randomString(); await setAuthorizedOAuthUsers([username]); @@ -90,7 +73,6 @@ test.before(async (t) => { test.after.always(async (t) => { await recursivelyDeleteS3Bucket(process.env.system_bucket); await accessTokenModel.deleteTable(); - await cleanupTestIndex(t.context); await destroyLocalTestDb({ knex: t.context.testKnex, knexAdmin: t.context.testKnexAdmin, @@ -140,7 +122,7 @@ test('POST with invalid authorization scheme returns an invalid authorization re await providerDoesNotExist(t, newProvider.id); }); -test('POST creates a new provider in all data stores', async (t) => { +test('POST creates a new provider in postgres', async (t) => { const { providerPgModel } = t.context; const newProviderId = randomString(); const newProvider = fakeProviderFactory({ @@ -176,11 +158,6 @@ test('POST creates a new provider in all data stores', async (t) => { postgresOmitList ) ); - - const esRecord = await t.context.esProviderClient.get( - newProvider.id - ); - t.like(esRecord, record); }); test('POST creates a new provider in PG with correct timestamps', async (t) => { @@ -208,15 +185,9 @@ test('POST creates a new provider in PG with correct timestamps', async (t) => { t.true(record.createdAt > newProvider.createdAt); t.true(record.updatedAt > newProvider.updatedAt); - const esRecord = await t.context.esProviderClient.get( - newProvider.id - ); - // PG and ES and returned API records have the same timestamps t.is(providerPgRecord.created_at.getTime(), record.createdAt); t.is(providerPgRecord.updated_at.getTime(), record.updatedAt); - t.is(providerPgRecord.created_at.getTime(), esRecord.createdAt); - t.is(providerPgRecord.updated_at.getTime(), esRecord.updatedAt); }); test('POST returns a 409 error if the provider already exists in postgres', async (t) => { @@ -309,59 +280,3 @@ test('CUMULUS-176 POST returns a 400 response if invalid JSON provided', async ( `response.text: ${response.text}` ); }); - -test('post() does not write to Elasticsearch if writing to PostgreSQL fails', async (t) => { - const provider = fakeProviderFactory(); - - const fakeProviderPgModel = { - create: () => Promise.reject(new Error('something bad')), - exists: () => false, - }; - - const expressRequest = { - body: provider, - testContext: { - providerPgModel: fakeProviderPgModel, - }, - }; - - const response = buildFakeExpressResponse(); - - await post(expressRequest, response); - - t.true(response.boom.badImplementation.calledWithMatch('something bad')); - - t.false(await t.context.esProviderClient.exists( - provider.id - )); -}); - -test('post() does not write to PostgreSQL if writing to Elasticsearch fails', async (t) => { - const provider = fakeProviderFactory(); - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - index: () => Promise.reject(new Error('something bad')), - }, - }; - - const expressRequest = { - body: provider, - testContext: { - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await post(expressRequest, response); - - t.true(response.boom.badImplementation.calledWithMatch('something bad')); - - t.false( - await t.context.providerPgModel.exists(t.context.testKnex, { - name: provider.id, - }) - ); -}); diff --git a/packages/api/tests/endpoints/providers/delete-provider.js b/packages/api/tests/endpoints/providers/delete-provider.js index 6868ce55ff2..70e9f06e34f 100644 --- a/packages/api/tests/endpoints/providers/delete-provider.js +++ b/packages/api/tests/endpoints/providers/delete-provider.js @@ -22,25 +22,14 @@ const { RulePgModel, ProviderPgModel, migrationDir, - translatePostgresProviderToApiProvider, } = require('@cumulus/db'); -const { Search } = require('@cumulus/es-client/search'); -const indexer = require('@cumulus/es-client/indexer'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const { AccessToken } = require('../../../models'); const { createFakeJwtAuthToken, setAuthorizedOAuthUsers, - createProviderTestRecords, } = require('../../../lib/testUtils'); const assertions = require('../../../lib/assertions'); -const { del } = require('../../../endpoints/providers'); - -const { buildFakeExpressResponse } = require('../utils'); const testDbName = randomId('db'); @@ -71,15 +60,6 @@ test.before(async (t) => { await s3().createBucket({ Bucket: process.env.system_bucket }); - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esProviderClient = new Search( - {}, - 'provider', - t.context.esIndex - ); - const username = randomId('user'); await setAuthorizedOAuthUsers([username]); @@ -99,19 +79,16 @@ test.before(async (t) => { test.beforeEach(async (t) => { const testPgProvider = fakeProviderRecordFactory(); t.context.testPgProvider = testPgProvider; - const testProvider = translatePostgresProviderToApiProvider(testPgProvider); const [pgProvider] = await t.context.providerPgModel .create( t.context.testKnex, testPgProvider ); t.context.providerCumulusId = pgProvider.cumulus_id; - await indexer.indexProvider(t.context.esClient, testProvider, t.context.esIndex); }); test.after.always(async (t) => { await accessTokenModel.deleteTable(); - await cleanupTestIndex(t.context); await recursivelyDeleteS3Bucket(process.env.system_bucket); await destroyLocalTestDb({ knex: t.context.testKnex, @@ -144,7 +121,7 @@ test('Attempting to delete a provider with an invalid access token returns an un test.todo('Attempting to delete a provider with an unauthorized user returns an unauthorized response'); -test('Deleting a provider removes the provider from all data stores', async (t) => { +test('Deleting a provider removes the provider from postgres', async (t) => { const { testPgProvider, providerPgModel } = t.context; const name = testPgProvider.name; await request(app) @@ -154,11 +131,6 @@ test('Deleting a provider removes the provider from all data stores', async (t) .expect(200); t.false(await providerPgModel.exists(t.context.testKnex, { name })); - t.false( - await t.context.esProviderClient.exists( - testPgProvider.name - ) - ); }); test('Deleting a provider that exists in PostgreSQL and not Elasticsearch succeeds', async (t) => { @@ -181,43 +153,9 @@ test('Deleting a provider that exists in PostgreSQL and not Elasticsearch succee { name: testPgProvider.name } ) ); - t.false( - await t.context.esProviderClient.exists( - testPgProvider.name - ) - ); -}); - -test('Deleting a provider that exists in Elasticsearch and not PostgreSQL succeeds', async (t) => { - const testPgProvider = fakeProviderRecordFactory(); - const testProvider = translatePostgresProviderToApiProvider(testPgProvider); - await indexer.indexProvider(t.context.esClient, testProvider, t.context.esIndex); - - t.true( - await t.context.esProviderClient.exists( - testPgProvider.name - ) - ); - - await request(app) - .delete(`/providers/${testPgProvider.name}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - t.false( - await t.context.providerPgModel.exists( - t.context.testKnex, - { name: testPgProvider.name } - ) - ); - t.false( - await t.context.esProviderClient.exists( - testPgProvider.name - ) - ); }); -test('Deleting a provider that does not exist in PostgreSQL and Elasticsearch returns a 404', async (t) => { +test('Deleting a provider that does not exist in PostgreSQL returns a 404', async (t) => { const { status } = await request(app) .delete(`/providers/${randomString}`) .set('Accept', 'application/json') @@ -247,95 +185,6 @@ test('Attempting to delete a provider with an associated postgres rule returns a t.true(response.body.message.includes('Cannot delete provider with associated rules')); }); -test('del() does not remove from Elasticsearch if removing from PostgreSQL fails', async (t) => { - const { - originalPgRecord, - } = await createProviderTestRecords( - t.context - ); - - const fakeproviderPgModel = { - delete: () => { - throw new Error('something bad'); - }, - get: () => Promise.resolve(originalPgRecord), - }; - - const expressRequest = { - params: { - id: originalPgRecord.id, - }, - testContext: { - knex: t.context.testKnex, - providerPgModel: fakeproviderPgModel, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - del(expressRequest, response), - { message: 'something bad' } - ); - - t.true( - await t.context.providerPgModel.exists(t.context.testKnex, { - name: originalPgRecord.name, - }) - ); - t.true( - await t.context.esProviderClient.exists( - originalPgRecord.name - ) - ); -}); - -test('del() does not remove from PostgreSQL if removing from Elasticsearch fails', async (t) => { - const { - originalProvider, - } = await createProviderTestRecords( - t.context - ); - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - delete: () => { - throw new Error('something bad'); - }, - }, - }; - - const expressRequest = { - params: { - id: originalProvider.id, - }, - body: originalProvider, - testContext: { - knex: t.context.testKnex, - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - del(expressRequest, response), - { message: 'something bad' } - ); - - t.true( - await t.context.providerPgModel.exists(t.context.testKnex, { - name: originalProvider.id, - }) - ); - t.true( - await t.context.esProviderClient.exists( - originalProvider.id - ) - ); -}); - test('Attempting to delete a provider with an associated granule does not delete the provider', async (t) => { const { collectionPgModel, diff --git a/packages/api/tests/endpoints/providers/get-provider.js b/packages/api/tests/endpoints/providers/get-provider.js index 47f746be540..87ccba33fc8 100644 --- a/packages/api/tests/endpoints/providers/get-provider.js +++ b/packages/api/tests/endpoints/providers/get-provider.js @@ -9,8 +9,6 @@ const { recursivelyDeleteS3Bucket, } = require('@cumulus/aws-client/S3'); const { randomString } = require('@cumulus/common/test-utils'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); -const { getEsClient } = require('@cumulus/es-client/search'); const { destroyLocalTestDb, fakeProviderRecordFactory, @@ -32,9 +30,6 @@ process.env.stackName = randomString(); process.env.system_bucket = randomString(); process.env.TOKEN_SECRET = randomString(); -const esIndex = randomString(); -let esClient; - let jwtAuthToken; let accessTokenModel; @@ -42,14 +37,6 @@ test.before(async (t) => { t.context.testDbName = `test_executions_${cryptoRandomString({ length: 10 })}`; await s3().createBucket({ Bucket: process.env.system_bucket }); - const esAlias = randomString(); - process.env.ES_INDEX = esAlias; - await bootstrapElasticSearch({ - host: 'fakehost', - index: esIndex, - alias: esAlias, - }); - const username = randomString(); await setAuthorizedOAuthUsers([username]); @@ -59,7 +46,6 @@ test.before(async (t) => { jwtAuthToken = await createFakeJwtAuthToken({ accessTokenModel, username }); - esClient = await getEsClient('fakehost'); const { knex, knexAdmin } = await generateLocalTestDb(t.context.testDbName, migrationDir); t.context.knex = knex; t.context.knexAdmin = knexAdmin; @@ -83,7 +69,6 @@ test.beforeEach(async (t) => { test.after.always(async (t) => { await recursivelyDeleteS3Bucket(process.env.system_bucket); await accessTokenModel.deleteTable(); - await esClient.client.indices.delete({ index: esIndex }); await destroyLocalTestDb({ knex: t.context.knex, knexAdmin: t.context.knexAdmin, diff --git a/packages/api/tests/endpoints/providers/list-providers.js b/packages/api/tests/endpoints/providers/list-providers.js index 40c33d4ddd9..fd7f3b945fa 100644 --- a/packages/api/tests/endpoints/providers/list-providers.js +++ b/packages/api/tests/endpoints/providers/list-providers.js @@ -8,9 +8,6 @@ const { recursivelyDeleteS3Bucket, } = require('@cumulus/aws-client/S3'); const { randomString } = require('@cumulus/common/test-utils'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); -const { getEsClient } = require('@cumulus/es-client/search'); -const indexer = require('@cumulus/es-client/indexer'); const { ProviderPgModel, @@ -35,9 +32,6 @@ process.env.TOKEN_SECRET = randomString(); // import the express app after setting the env variables const { app } = require('../../../app'); -const esIndex = randomString(); -let esClient; - let jwtAuthToken; let accessTokenModel; @@ -48,18 +42,7 @@ test.before(async (t) => { const username = randomString(); await setAuthorizedOAuthUsers([username]); - - const esAlias = randomString(); - process.env.ES_INDEX = esAlias; - - await Promise.all([ - accessTokenModel.createTable(), - bootstrapElasticSearch({ - host: 'fakehost', - index: esIndex, - alias: esAlias, - }), - ]); + await accessTokenModel.createTable(); t.context.testDbName = `test_providers_${cryptoRandomString({ length: 10 })}`; @@ -74,13 +57,17 @@ test.before(async (t) => { jwtAuthToken = await createFakeJwtAuthToken({ accessTokenModel, username }); - esClient = await getEsClient('fakehost'); + t.context.testProvider = fakeProviderRecordFactory(); + t.context.providerPgModel = new ProviderPgModel(); + await t.context.providerPgModel.insert( + t.context.knex, + t.context.testProvider + ); }); test.after.always((t) => Promise.all([ recursivelyDeleteS3Bucket(process.env.system_bucket), accessTokenModel.deleteTable(), - esClient.client.indices.delete({ index: esIndex }), destroyLocalTestDb({ ...t.context, }), @@ -108,12 +95,6 @@ test('CUMULUS-912 GET without pathParameters and with an invalid access token re test.todo('CUMULUS-912 GET without pathParameters and with an unauthorized user returns an unauthorized response'); test('default returns list of providers', async (t) => { - const testProvider = fakeProviderRecordFactory(); - const providerPgModel = new ProviderPgModel(); - const [provider] = await providerPgModel.create(t.context.knex, testProvider); - const pgProvider = await providerPgModel.get(t.context.knex, { cumulus_id: provider.cumulus_id }); - await indexer.indexProvider(esClient, pgProvider, esIndex); - const response = await request(app) .get('/providers') .set('Accept', 'application/json') @@ -121,5 +102,5 @@ test('default returns list of providers', async (t) => { .expect(200); const { results } = response.body; - t.truthy(results.find((r) => r.id === testProvider.id)); + t.truthy(results.find((r) => r.id === t.context.testProvider.name)); }); diff --git a/packages/api/tests/endpoints/providers/update-provider.js b/packages/api/tests/endpoints/providers/update-provider.js index 81e0df725a2..637a927403f 100644 --- a/packages/api/tests/endpoints/providers/update-provider.js +++ b/packages/api/tests/endpoints/providers/update-provider.js @@ -16,26 +16,16 @@ const { translateApiProviderToPostgresProvider, ProviderPgModel, migrationDir, - fakeProviderRecordFactory, - translatePostgresProviderToApiProvider, } = require('@cumulus/db'); -const { Search } = require('@cumulus/es-client/search'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const { AccessToken } = require('../../../models'); const { createFakeJwtAuthToken, fakeProviderFactory, setAuthorizedOAuthUsers, - createProviderTestRecords, } = require('../../../lib/testUtils'); const assertions = require('../../../lib/assertions'); -const { put } = require('../../../endpoints/providers'); -const { buildFakeExpressResponse } = require('../utils'); const testDbName = randomString(12); @@ -62,15 +52,6 @@ test.before(async (t) => { await s3().createBucket({ Bucket: process.env.system_bucket }); - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esProviderClient = new Search( - {}, - 'provider', - t.context.esIndex - ); - const username = randomString(); await setAuthorizedOAuthUsers([username]); @@ -95,7 +76,6 @@ test.beforeEach(async (t) => { test.after.always(async (t) => { await recursivelyDeleteS3Bucket(process.env.system_bucket); await accessTokenModel.deleteTable(); - await cleanupTestIndex(t.context); await destroyLocalTestDb({ knex: t.context.testKnex, knexAdmin: t.context.testKnexAdmin, @@ -160,18 +140,6 @@ test('PUT updates existing provider', async (t) => { postgresOmitList ) ); - - const updatedEsRecord = await t.context.esProviderClient.get( - testProvider.id - ); - t.like( - updatedEsRecord, - { - ...expectedProvider, - updatedAt: actualPostgresProvider.updated_at.getTime(), - timestamp: updatedEsRecord.timestamp, - } - ); }); test('PUT updates existing provider and correctly removes fields', async (t) => { @@ -216,7 +184,7 @@ test('PUT updates existing provider and correctly removes fields', async (t) => t.is(actualPostgresProvider.global_connection_limit, null); }); -test('PUT updates existing provider in all data stores with correct timestamps', async (t) => { +test('PUT updates existing provider in postgres with correct timestamps', async (t) => { const { testProvider, testProvider: { id } } = t.context; const expectedProvider = omit(testProvider, ['globalConnectionLimit', 'protocol', 'cmKeyId']); @@ -238,16 +206,10 @@ test('PUT updates existing provider in all data stores with correct timestamps', t.context.testKnex, { name: id } ); - const updatedEsRecord = await t.context.esProviderClient.get( - testProvider.id - ); t.true(actualPostgresProvider.updated_at.getTime() > updatedProvider.updatedAt); // createdAt timestamp from original record should have been preserved t.is(actualPostgresProvider.created_at.getTime(), testProvider.createdAt); - // PG and ES records have the same timestamps - t.is(actualPostgresProvider.created_at.getTime(), updatedEsRecord.createdAt); - t.is(actualPostgresProvider.updated_at.getTime(), updatedEsRecord.updatedAt); }); test('PUT returns 404 for non-existent provider', async (t) => { @@ -309,114 +271,3 @@ test('PUT without an Authorization header returns an Authorization Missing respo ); t.is(provider.name, t.context.testPostgresProvider.name); }); - -test('put() does not write to Elasticsearch if writing to PostgreSQL fails', async (t) => { - const { testKnex } = t.context; - const { - originalProvider, - originalPgRecord, - originalEsRecord, - } = await createProviderTestRecords( - t.context, - { - host: 'first-host', - } - ); - - const fakeproviderPgModel = { - upsert: () => Promise.reject(new Error('something bad')), - get: () => fakeProviderRecordFactory({ created_at: new Date() }), - }; - - const updatedProvider = { - ...originalProvider, - host: 'second-host', - }; - - const expressRequest = { - params: { - id: updatedProvider.id, - }, - body: updatedProvider, - testContext: { - knex: testKnex, - providerPgModel: fakeproviderPgModel, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - put(expressRequest, response), - { message: 'something bad' } - ); - - t.deepEqual( - await t.context.providerPgModel.get(t.context.testKnex, { - name: updatedProvider.id, - }), - originalPgRecord - ); - t.deepEqual( - await t.context.esProviderClient.get( - originalProvider.id - ), - originalEsRecord - ); -}); - -test('put() does not write to PostgreSQL if writing to Elasticsearch fails', async (t) => { - const { testKnex } = t.context; - const { - originalPgRecord, - originalEsRecord, - } = await createProviderTestRecords( - t.context, - { - host: 'first-host', - } - ); - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - index: () => Promise.reject(new Error('something bad')), - }, - }; - const apiProvider = translatePostgresProviderToApiProvider(originalPgRecord); - const updatedProvider = { - ...apiProvider, - host: 'second-host', - }; - - const expressRequest = { - params: { - id: updatedProvider.id, - }, - body: updatedProvider, - testContext: { - knex: testKnex, - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - put(expressRequest, response), - { message: 'something bad' } - ); - - t.deepEqual( - await t.context.providerPgModel.get(t.context.testKnex, { - name: updatedProvider.id, - }), - originalPgRecord - ); - t.deepEqual( - await t.context.esProviderClient.get( - originalPgRecord.name - ), - originalEsRecord - ); -}); diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index 2a817dcaaa2..bdde86ffb44 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -151,6 +151,9 @@ export { export { GranuleSearch, } from './search/GranuleSearch'; +export { + ProviderSearch, +} from './search/ProviderSearch'; export { StatsSearch, } from './search/StatsSearch'; diff --git a/packages/db/src/search/CollectionSearch.ts b/packages/db/src/search/CollectionSearch.ts index f4a1dee554a..466437737ec 100644 --- a/packages/db/src/search/CollectionSearch.ts +++ b/packages/db/src/search/CollectionSearch.ts @@ -60,7 +60,7 @@ export class CollectionSearch extends BaseSearch { [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `%${infix}%`)); } if (prefix) { - [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `%${prefix}%`)); + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `${prefix}%`)); } } diff --git a/packages/db/src/search/ExecutionSearch.ts b/packages/db/src/search/ExecutionSearch.ts index 8fbea4b8eda..07e66e85d2f 100644 --- a/packages/db/src/search/ExecutionSearch.ts +++ b/packages/db/src/search/ExecutionSearch.ts @@ -130,7 +130,7 @@ export class ExecutionSearch extends BaseSearch { [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.arn`, `%${infix}%`)); } if (prefix) { - [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.arn`, `%${prefix}%`)); + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.arn`, `${prefix}%`)); } } diff --git a/packages/db/src/search/ProviderSearch.ts b/packages/db/src/search/ProviderSearch.ts new file mode 100644 index 00000000000..d6aa1c82a53 --- /dev/null +++ b/packages/db/src/search/ProviderSearch.ts @@ -0,0 +1,63 @@ +import { Knex } from 'knex'; +import pick from 'lodash/pick'; + +import Logger from '@cumulus/logger'; +import { ApiProvider } from '@cumulus/types/api/providers'; +import { BaseSearch } from './BaseSearch'; +import { DbQueryParameters, QueryEvent } from '../types/search'; +import { translatePostgresProviderToApiProvider } from '../translate/providers'; +import { PostgresProviderRecord } from '../types/provider'; + +const log = new Logger({ sender: '@cumulus/db/ProviderSearch' }); + +/** + * Class to build and execute db search query for collections + */ +export class ProviderSearch extends BaseSearch { + constructor(event: QueryEvent) { + const queryStringParameters = event.queryStringParameters || {}; + super({ queryStringParameters }, 'provider'); + } + + /** + * Build queries for infix and prefix + * + * @param params + * @param params.countQuery - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildInfixPrefixQuery(params: { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { countQuery, searchQuery, dbQueryParameters } = params; + const { infix, prefix } = dbQueryParameters ?? this.dbQueryParameters; + if (infix) { + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `%${infix}%`)); + } + if (prefix) { + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `${prefix}%`)); + } + } + + /** + * Translate postgres records to api records + * + * @param pgRecords - postgres Provider records returned from query + * @returns translated api records + */ + protected async translatePostgresRecordsToApiRecords(pgRecords: PostgresProviderRecord[]) + : Promise[]> { + log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); + const apiRecords = pgRecords.map((record) => { + const apiRecord: ApiProvider = translatePostgresProviderToApiProvider(record); + const apiRecordFinal = this.dbQueryParameters.fields + ? pick(apiRecord, this.dbQueryParameters.fields) + : apiRecord; + return apiRecordFinal; + }); + return apiRecords; + } +} diff --git a/packages/db/src/search/StatsSearch.ts b/packages/db/src/search/StatsSearch.ts index 59e19804291..04457d476ed 100644 --- a/packages/db/src/search/StatsSearch.ts +++ b/packages/db/src/search/StatsSearch.ts @@ -241,7 +241,7 @@ class StatsSearch extends BaseSearch { searchQuery.whereLike(`${this.tableName}.${fieldName}`, `%${infix}%`); } if (prefix) { - searchQuery.whereLike(`${this.tableName}.${fieldName}`, `%${prefix}%`); + searchQuery.whereLike(`${this.tableName}.${fieldName}`, `${prefix}%`); } } diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts index 1f2f5b5f9a8..0ad33fe7129 100644 --- a/packages/db/src/search/field-mapping.ts +++ b/packages/db/src/search/field-mapping.ts @@ -237,18 +237,51 @@ const pdrMapping : { [key: string]: Function } = { }; const providerMapping : { [key: string]: Function } = { + allowedRedirects: (value?: string) => ({ + allowed_redirects: value?.split(','), + }), + certificateUrl: (value?: string) => ({ + certificate_url: value, + }), + cmKeyId: (value?: string) => ({ + cm_key_id: value, + }), createdAt: (value?: string) => ({ created_at: value && new Date(Number(value)), }), id: (value?: string) => ({ name: value, }), + name: (value?: string) => ({ + name: value, + }), timestamp: (value?: string) => ({ updated_at: value && new Date(Number(value)), }), updatedAt: (value?: string) => ({ updated_at: value && new Date(Number(value)), }), + globalConnectionLimit: (value?: string) => ({ + global_connection_limit: value && Number(value), + }), + host: (value?: string) => ({ + host: value, + }), + password: (value?: string) => ({ + password: value, + }), + port: (value?: string) => ({ + port: value, + }), + privateKey: (value?: string) => ({ + private_key: value, + }), + protocol: (value?: string) => ({ + protocol: value, + }), + username: (value?: string) => ({ + username: value, + }), }; const ruleMapping : { [key: string]: Function } = { diff --git a/packages/db/tests/search/test-ProviderSearch.js b/packages/db/tests/search/test-ProviderSearch.js new file mode 100644 index 00000000000..3530118f0ca --- /dev/null +++ b/packages/db/tests/search/test-ProviderSearch.js @@ -0,0 +1,323 @@ +'use strict'; + +const test = require('ava'); +const cryptoRandomString = require('crypto-random-string'); +const range = require('lodash/range'); +const { ProviderSearch } = require('../../dist/search/ProviderSearch'); + +const { + destroyLocalTestDb, + generateLocalTestDb, + ProviderPgModel, + fakeProviderRecordFactory, + migrationDir, +} = require('../../dist'); + +const testDbName = `provider_${cryptoRandomString({ length: 10 })}`; + +test.before(async (t) => { + const { knexAdmin, knex } = await generateLocalTestDb( + testDbName, + migrationDir + ); + + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + t.context.providerPgModel = new ProviderPgModel(); + const providers = []; + t.context.providerSearchTmestamp = 1579352700000; + + range(100).map((num) => ( + providers.push(fakeProviderRecordFactory({ + cumulus_id: num, + updated_at: new Date(t.context.providerSearchTmestamp + (num % 2)), + created_at: new Date(t.context.providerSearchTmestamp - (num % 2)), + name: num % 2 === 0 ? `testProvider${num}` : `fakeProvider${num}`, + host: num % 2 === 0 ? 'cumulus-sit' : 'cumulus-uat', + global_connection_limit: num % 2 === 0 ? 0 : 10, + private_key: num % 2 === 0 ? `fakeKey${num}` : undefined, + })) + )); + + await t.context.providerPgModel.insert( + t.context.knex, + providers + ); +}); + +test.after.always(async (t) => { + await destroyLocalTestDb({ + ...t.context, + testDbName, + }); +}); + +test('ProviderSearch returns 10 providers by default', async (t) => { + const { knex } = t.context; + const dbSearch = new ProviderSearch({}); + const results = await dbSearch.query(knex); + t.is(results.meta.count, 100); + t.is(results.results.length, 10); +}); + +test('ProviderSearch supports page and limit params', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 20, + page: 2, + }; + let dbSearch = new ProviderSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 20); + + queryStringParameters = { + limit: 11, + page: 10, + }; + dbSearch = new ProviderSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 1); + + queryStringParameters = { + limit: 10, + page: 11, + }; + dbSearch = new ProviderSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 0); +}); + +test('ProviderSearch supports infix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 20, + infix: 'test', + }; + const dbSearch = new ProviderSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 20); + t.true(response.results?.every((provider) => provider.id.includes('test'))); +}); + +test('ProviderSearch supports prefix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 20, + prefix: 'fake', + }; + const dbSearch = new ProviderSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 20); + t.true(response.results?.every((provider) => provider.id.startsWith('fake'))); +}); + +test('ProviderSearch supports term search for date field', async (t) => { + const { knex } = t.context; + const testUpdatedAt = t.context.providerSearchTmestamp + 1; + const queryStringParameters = { + limit: 200, + updatedAt: `${testUpdatedAt}`, + }; + const dbSearch = new ProviderSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + t.true(response.results?.every((provider) => provider.updatedAt === testUpdatedAt)); +}); + +test('ProviderSearch supports term search for number field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + globalConnectionLimit: '10', + }; + const dbSearch = new ProviderSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + t.true(response.results?.every((provider) => provider.globalConnectionLimit === 10)); +}); + +test('ProviderSearch supports term search for string field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + host: 'cumulus-sit', + }; + const dbSearch = new ProviderSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + t.true(response.results?.every((provider) => provider.host === 'cumulus-sit')); +}); + +test('ProviderSearch supports range search', async (t) => { + const { knex } = t.context; + const timestamp1 = t.context.providerSearchTmestamp + 1; + const timestamp2 = t.context.providerSearchTmestamp + 2; + const queryStringParameters = { + limit: 200, + timestamp__from: `${timestamp1}`, + timestamp__to: `${timestamp2}`, + }; + const dbSearch = new ProviderSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + t.true(response.results?.every((provider) => provider.updatedAt >= timestamp1 + && provider.updatedAt <= timestamp2)); +}); + +test('ProviderSearch supports search for multiple fields', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + id: 'testProvider82', + host: 'cumulus-sit', + global_connection_limit: 0, + }; + + const expectedResponse = { + createdAt: 1579352700000, + host: 'cumulus-sit', + id: 'testProvider82', + globalConnectionLimit: 0, + privateKey: 'fakeKey82', + protocol: 's3', + updatedAt: 1579352700000, + }; + const dbSearch = new ProviderSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); + t.deepEqual(response.results[0], expectedResponse); +}); + +test('ProviderSearch non-existing fields are ignored', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + non_existing_field: `non_exist_${cryptoRandomString({ length: 5 })}`, + non_existing_field__from: `non_exist_${cryptoRandomString({ length: 5 })}`, + }; + const dbSearch = new ProviderSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 100); +}); + +test('ProviderSearch returns fields specified', async (t) => { + const { knex } = t.context; + let fields = 'id'; + let queryStringParameters = { + fields, + }; + let dbSearch = new ProviderSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 10); + response.results.forEach((provider) => t.deepEqual(Object.keys(provider), fields.split(','))); + + fields = 'id,host,globalConnectionLimit'; + queryStringParameters = { + fields, + }; + dbSearch = new ProviderSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 10); + response.results.forEach((provider) => t.deepEqual(Object.keys(provider), fields.split(','))); +}); + +test('ProviderSearch supports sorting', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + sort_by: 'id', + order: 'asc', + }; + const dbSearch = new ProviderSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 100); + t.is(response.results?.length, 100); + t.true(response.results[0].id < response.results[99].id); + t.true(response.results[0].id < response.results[50].id); + + queryStringParameters = { + limit: 200, + sort_key: ['-id'], + }; + const dbSearch2 = new ProviderSearch({ queryStringParameters }); + const response2 = await dbSearch2.query(knex); + t.is(response2.meta.count, 100); + t.is(response2.results?.length, 100); + t.true(response2.results[0].id > response2.results[99].id); + t.true(response2.results[0].id > response2.results[50].id); + + queryStringParameters = { + limit: 200, + sort_by: 'globalConnectionLimit', + }; + const dbSearch3 = new ProviderSearch({ queryStringParameters }); + const response3 = await dbSearch3.query(knex); + t.is(response3.meta.count, 100); + t.is(response3.results?.length, 100); + t.true(response3.results[0].globalConnectionLimit < response3.results[99].globalConnectionLimit); + t.true(response3.results[49].globalConnectionLimit < response3.results[50].globalConnectionLimit); +}); + +test('ProviderSearch supports terms search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + id__in: ['fakeProvider85', 'testProvider86'].join(','), + }; + const dbSearch = new ProviderSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 2); + t.is(response.results?.length, 2); + t.true(response.results?.every((provider) => ['fakeProvider85', 'testProvider86'].includes(provider.id))); +}); + +test('ProviderSearch supports search when provider field does not match the given value', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + host__not: 'cumulus-uat', + }; + let dbSearch = new ProviderSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + t.true(response.results?.every((provider) => provider.host !== 'cumulus-uat')); + + queryStringParameters = { + limit: 200, + host__not: 'cumulus-uat', + id__not: 'testProvider38', + }; + dbSearch = new ProviderSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 49); + t.is(response.results?.length, 49); + t.true(response.results?.every((provider) => provider.host !== 'cumulus-uat' && provider.id !== 'testProvider38')); +}); + +test('ProviderSearch supports search which checks existence of provider field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + privateKey__exists: 'true', + }; + const dbSearch = new ProviderSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + t.true(response.results?.every((provider) => provider.privateKey)); +}); From 34c3be875d5b25505b90d010c0ee34631dd8d333 Mon Sep 17 00:00:00 2001 From: Nate Pauzenga Date: Mon, 26 Aug 2024 13:37:55 -0400 Subject: [PATCH 21/61] [CUMULUS-3229] - Remove ElasticSearch from Rules Endpoints and Update LIST Query (#3760) * initial commit * Add field mappings and query builder * Remove elasticsearch dependencies * remove indexer tests for rules * Remove rule ES dependency * Update CL and remove ES dependencies * fix lint * Update typings and translate method * fix lint * update tests and translate logic * fix lint * Make collection and provider optional on rule queries * Fix lint * tests for new class * Update tests and rule search * Fix lint and update types to be reasonable * Update prefix syntax to differentiate from infix * Update mapping with correct PG field * add whitespace back * Refactor rule translate to match other record types * update test title * add additional units * fix lint * update unit to match existing translate * update translate to not return a collection if not available * Add tests for in, exists, and not * fix lint --- CHANGELOG.md | 5 +- packages/api/endpoints/rules.js | 46 +- packages/api/lib/testUtils.js | 11 +- packages/api/tests/endpoints/test-rules.js | 1996 +++-------------- .../tests/lambdas/test-index-from-database.js | 25 - packages/db/src/index.ts | 3 + packages/db/src/search/RuleSearch.ts | 123 + packages/db/src/search/field-mapping.ts | 21 + packages/db/src/search/queries.ts | 1 - packages/db/src/translate/rules.ts | 44 +- packages/db/tests/search/test-RuleSearch.js | 442 ++++ packages/db/tests/translate/test-rules.js | 60 + packages/es-client/indexer.js | 43 - packages/es-client/tests/test-es-indexer.js | 48 - 14 files changed, 1047 insertions(+), 1821 deletions(-) create mode 100644 packages/db/src/search/RuleSearch.ts create mode 100644 packages/db/tests/search/test-RuleSearch.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 219295ac5cb..bc7c38addbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [Unreleased] ### Replace ElasticSearch Phase 2 - +- **CUMULUS-3229** + - Remove ElasticSearch queries from Rule LIST endpoint +- **CUMULUS-3230** + - Remove ElasticSearch dependency from Rule Endpoints - **CUMULUS-3232** - Update API PDR endpoints `DEL` and `GET` to not update Elasticsearch - **CUMULUS-3233** diff --git a/packages/api/endpoints/rules.js b/packages/api/endpoints/rules.js index 38897c720da..0097ca6936b 100644 --- a/packages/api/endpoints/rules.js +++ b/packages/api/endpoints/rules.js @@ -14,11 +14,10 @@ const { getKnexClient, isCollisionError, RulePgModel, + RuleSearch, translateApiRuleToPostgresRuleRaw, translatePostgresRuleToApiRule, } = require('@cumulus/db'); -const { Search, getEsClient } = require('@cumulus/es-client/search'); -const { indexRule, deleteRule } = require('@cumulus/es-client/indexer'); const { requireApiVersion, @@ -47,12 +46,11 @@ const log = new Logger({ sender: '@cumulus/api/rules' }); * @returns {Promise} the promise of express response object */ async function list(req, res) { - const search = new Search( - { queryStringParameters: req.query }, - 'rule', - process.env.ES_INDEX + const dbSearch = new RuleSearch( + { queryStringParameters: req.query } ); - const response = await search.query(); + + const response = await dbSearch.query(); return res.send(response); } @@ -93,7 +91,6 @@ async function post(req, res) { const { rulePgModel = new RulePgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), } = req.testContext || {}; let record; @@ -116,7 +113,6 @@ async function post(req, res) { await createRejectableTransaction(knex, async (trx) => { const [pgRecord] = await rulePgModel.create(trx, postgresRule); record = await translatePostgresRuleToApiRule(pgRecord, knex); - await indexRule(esClient, record, process.env.ES_INDEX); }); } catch (innerError) { if (isCollisionError(innerError)) { @@ -143,7 +139,6 @@ async function post(req, res) { * @param {object} params.apiRule - updated API rule * @param {object} params.rulePgModel - @cumulus/db compatible rule module instance * @param {object} params.knex - Knex object - * @param {object} params.esClient - Elasticsearch client * @returns {Promise} - promise of an express response object. */ async function patchRule(params) { @@ -153,7 +148,6 @@ async function patchRule(params) { apiRule, rulePgModel = new RulePgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), } = params; log.debug(`rules.patchRule oldApiRule: ${JSON.stringify(oldApiRule)}, apiRule: ${JSON.stringify(apiRule)}`); @@ -172,7 +166,6 @@ async function patchRule(params) { const [pgRule] = await rulePgModel.upsert(trx, apiPgRule); log.debug(`rules.patchRule pgRule: ${JSON.stringify(pgRule)}`); translatedRule = await translatePostgresRuleToApiRule(pgRule, knex); - await indexRule(esClient, translatedRule, process.env.ES_INDEX); }); log.info(`rules.patchRule translatedRule: ${JSON.stringify(translatedRule)}`); @@ -198,7 +191,6 @@ async function patch(req, res) { const { rulePgModel = new RulePgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), } = req.testContext || {}; const { params: { name }, body } = req; @@ -216,7 +208,7 @@ async function patch(req, res) { apiRule.createdAt = oldApiRule.createdAt; apiRule = merge(cloneDeep(oldApiRule), apiRule); - return await patchRule({ res, oldApiRule, apiRule, knex, esClient, rulePgModel }); + return await patchRule({ res, oldApiRule, apiRule, knex, rulePgModel }); } catch (error) { log.error('Unexpected error when updating rule:', error); if (error instanceof RecordDoesNotExist) { @@ -242,7 +234,6 @@ async function put(req, res) { const { rulePgModel = new RulePgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), } = req.testContext || {}; const { params: { name }, body } = req; @@ -272,7 +263,7 @@ async function put(req, res) { apiRule.createdAt = oldApiRule.createdAt; - return await patchRule({ res, oldApiRule, apiRule, knex, esClient, rulePgModel }); + return await patchRule({ res, oldApiRule, apiRule, knex, rulePgModel }); } catch (error) { log.error('Unexpected error when updating rule:', error); if (error instanceof RecordDoesNotExist) { @@ -293,15 +284,10 @@ async function del(req, res) { const { rulePgModel = new RulePgModel(), knex = await getKnexClient(), - esClient = await getEsClient(), } = req.testContext || {}; const name = (req.params.name || '').replace(/%20/g, ' '); - const esRulesClient = new Search( - {}, - 'rule', - process.env.ES_INDEX - ); + let rule; let apiRule; @@ -309,26 +295,14 @@ async function del(req, res) { rule = await rulePgModel.get(knex, { name }); apiRule = await translatePostgresRuleToApiRule(rule, knex); } catch (error) { - // If rule doesn't exist in PG or ES, return not found if (error instanceof RecordDoesNotExist) { - if (!(await esRulesClient.exists(name))) { - log.info('Rule does not exist in Elasticsearch and PostgreSQL'); - return res.boom.notFound('No record found'); - } - log.info('Rule does not exist in PostgreSQL, it only exists in Elasticsearch. Proceeding with deletion'); - } else { - throw error; + return res.boom.notFound('No record found'); } + throw error; } await createRejectableTransaction(knex, async (trx) => { await rulePgModel.delete(trx, { name }); - await deleteRule({ - esClient, - name, - index: process.env.ES_INDEX, - ignore: [404], - }); if (rule) await deleteRuleResources(knex, apiRule); }); diff --git a/packages/api/lib/testUtils.js b/packages/api/lib/testUtils.js index 3ee3b1b2581..e13ba2132ea 100644 --- a/packages/api/lib/testUtils.js +++ b/packages/api/lib/testUtils.js @@ -22,7 +22,6 @@ const { translatePostgresRuleToApiRule, } = require('@cumulus/db'); const { - indexRule, deleteExecution, } = require('@cumulus/es-client/indexer'); const { @@ -527,14 +526,12 @@ const createProviderTestRecords = async (context, providerParams) => { * @param {PostgresRule} - Postgres Rule parameters * * @returns {Object} - * Returns new object consisting of `originalApiRule`, `originalPgRecord, and `originalEsRecord` + * Returns new object consisting of `originalApiRule` and `originalPgRecord` */ const createRuleTestRecords = async (context, ruleParams) => { const { testKnex, rulePgModel, - esClient, - esRulesClient, } = context; const originalRule = fakeRuleRecordFactory(ruleParams); @@ -545,14 +542,10 @@ const createRuleTestRecords = async (context, ruleParams) => { const [originalPgRecord] = await rulePgModel.create(testKnex, pgRuleWithTrigger, '*'); const originalApiRule = await translatePostgresRuleToApiRule(originalPgRecord, testKnex); - await indexRule(esClient, originalApiRule, process.env.ES_INDEX); - const originalEsRecord = await esRulesClient.get( - originalRule.name - ); + return { originalApiRule, originalPgRecord, - originalEsRecord, }; }; diff --git a/packages/api/tests/endpoints/test-rules.js b/packages/api/tests/endpoints/test-rules.js index 20a0a222534..afdec634c3c 100644 --- a/packages/api/tests/endpoints/test-rules.js +++ b/packages/api/tests/endpoints/test-rules.js @@ -16,11 +16,9 @@ const { mockClient } = require('aws-sdk-client-mock'); const { createSnsTopic } = require('@cumulus/aws-client/SNS'); const { randomString, randomId } = require('@cumulus/common/test-utils'); +const { removeNilProperties } = require('@cumulus/common/util'); const workflows = require('@cumulus/common/workflows'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); + const { CollectionPgModel, destroyLocalTestDb, @@ -39,8 +37,6 @@ const { } = require('@cumulus/db'); const awsServices = require('@cumulus/aws-client/services'); const S3 = require('@cumulus/aws-client/S3'); -const { Search } = require('@cumulus/es-client/search'); -const indexer = require('@cumulus/es-client/indexer'); const { constructCollectionId } = require('@cumulus/message/Collections'); const { @@ -58,7 +54,7 @@ const { createRuleTestRecords, createSqsQueues, } = require('../../lib/testUtils'); -const { patch, post, put, del } = require('../../endpoints/rules'); +const { patch, post, put } = require('../../endpoints/rules'); const rulesHelpers = require('../../lib/rulesHelpers'); const AccessToken = require('../../models/access-tokens'); @@ -116,16 +112,6 @@ test.before(async (t) => { t.context.testKnex = knex; t.context.testKnexAdmin = knexAdmin; - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esRulesClient = new Search( - {}, - 'rule', - t.context.esIndex - ); - process.env.ES_INDEX = esIndex; - await S3.createBucket(process.env.system_bucket); buildPayloadStub = setBuildPayloadStub(); @@ -174,6 +160,20 @@ test.before(async (t) => { provider: t.context.pgProvider.name, }); + t.context.testRuleWithoutForeignKeys = fakeRuleFactoryV2({ + name: 'testRuleWithoutForeignKeys', + workflow: workflow, + rule: { + type: 'onetime', + arn: 'arn', + value: 'value', + }, + state: 'ENABLED', + queueUrl: 'https://sqs.us-west-2.amazonaws.com/123456789012/queue_url', + collection: undefined, + provider: undefined, + }); + const username = randomString(); await setAuthorizedOAuthUsers([username]); @@ -199,8 +199,13 @@ test.before(async (t) => { const ruleWithTrigger = await rulesHelpers.createRuleTrigger(t.context.testRule); t.context.collectionId = constructCollectionId(collectionName, collectionVersion); t.context.testPgRule = await translateApiRuleToPostgresRuleRaw(ruleWithTrigger, knex); - await indexer.indexRule(esClient, ruleWithTrigger, t.context.esIndex); t.context.rulePgModel.create(knex, t.context.testPgRule); + + const rule2WithTrigger = await rulesHelpers.createRuleTrigger( + t.context.testRuleWithoutForeignKeys + ); + t.context.testPgRule2 = await translateApiRuleToPostgresRuleRaw(rule2WithTrigger, knex); + t.context.rulePgModel.create(knex, t.context.testPgRule2); }); test.beforeEach((t) => { @@ -215,7 +220,6 @@ test.beforeEach((t) => { test.after.always(async (t) => { await accessTokenModel.deleteTable(); await S3.recursivelyDeleteS3Bucket(process.env.system_bucket); - await cleanupTestIndex(t.context); buildPayloadStub.restore(); await destroyLocalTestDb({ @@ -383,7 +387,7 @@ test.serial('default returns list of rules', async (t) => { .expect(200); const { results } = response.body; - t.is(results.length, 1); + t.is(results.length, 2); }); test.serial('search returns correct list of rules', async (t) => { @@ -394,7 +398,7 @@ test.serial('search returns correct list of rules', async (t) => { .expect(200); const { results } = response.body; - t.is(results.length, 1); + t.is(results.length, 2); const newResponse = await request(app) .get('/rules?page=1&rule.type=sqs&state=ENABLED') @@ -406,7 +410,46 @@ test.serial('search returns correct list of rules', async (t) => { t.is(newResults.length, 0); }); -test('GET gets a rule', async (t) => { +test.serial('Rules search returns the expected fields', async (t) => { + const response = await request(app) + .get(`/rules?page=1&rule.type=onetime&provider=${t.context.pgProvider.name}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .expect(200); + + const { results } = response.body; + + const expectedRule = { + ...t.context.testRule, + updatedAt: results[0].updatedAt, + createdAt: results[0].createdAt, + }; + + t.is(results.length, 1); + t.deepEqual(results[0], expectedRule); +}); + +test.serial('Rules search returns results without a provider or collection', async (t) => { + const response = await request(app) + .get(`/rules?page=1&name=${t.context.testPgRule2.name}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .expect(200); + + const { results } = response.body; + + t.is(results.length, 1); + + const expectedRule = { + ...t.context.testRuleWithoutForeignKeys, + updatedAt: results[0].updatedAt, + createdAt: results[0].createdAt, + }; + + t.deepEqual(results[0], removeNilProperties(expectedRule)); +}); + +test.serial('GET gets a rule', async (t) => { const response = await request(app) .get(`/rules/${t.context.testRule.name}`) .set('Accept', 'application/json') @@ -477,12 +520,11 @@ test('403 error when calling the API endpoint to delete an existing rule without t.deepEqual(response.body, record); }); -test('POST creates a rule in all data stores', async (t) => { +test('POST creates a rule', async (t) => { const { collectionPgModel, newRule, providerPgModel, - rulePgModel, testKnex, } = t.context; @@ -525,19 +567,11 @@ test('POST creates a rule in all data stores', async (t) => { .expect(200); const { message } = response.body; - const fetchedPostgresRecord = await rulePgModel - .get(testKnex, { name: newRule.name }); t.is(message, 'Record saved'); - const translatedPgRecord = await translatePostgresRuleToApiRule(fetchedPostgresRecord, testKnex); - - const esRecord = await t.context.esRulesClient.get( - newRule.name - ); - t.like(esRecord, translatedPgRecord); }); -test.serial('post() creates SNS rule with same trigger information in PostgreSQL/Elasticsearch', async (t) => { +test.serial('post() creates SNS rule with trigger information in PostgreSQL', async (t) => { const { pgProvider, pgCollection, @@ -569,33 +603,18 @@ test.serial('post() creates SNS rule with same trigger information in PostgreSQL const pgRule = await t.context.rulePgModel .get(t.context.testKnex, { name: rule.name }); - const esRule = await t.context.esRulesClient.get( - rule.name - ); t.truthy(pgRule.arn); - t.truthy(esRule.rule.arn); - t.like( - esRule, - { - rule: { - type: 'sns', - value: topic1.TopicArn, - arn: pgRule.arn, - }, - } - ); t.like(pgRule, { name: rule.name, enabled: true, type: 'sns', - arn: esRule.rule.arn, value: topic1.TopicArn, }); }); -test.serial('post() creates the same Kinesis rule with trigger information in PostgreSQL/Elasticsearch', async (t) => { +test.serial('post() creates Kinesis rule with trigger information in PostgreSQL', async (t) => { const { pgProvider, pgCollection, @@ -627,24 +646,9 @@ test.serial('post() creates the same Kinesis rule with trigger information in Po const pgRule = await t.context.rulePgModel .get(t.context.testKnex, { name: rule.name }); - const esRule = await t.context.esRulesClient.get( - rule.name - ); - t.truthy(esRule.rule.arn); - t.truthy(esRule.rule.logEventArn); t.truthy(pgRule.arn); t.truthy(pgRule.log_event_arn); - t.like( - esRule, - { - ...rule, - rule: { - type: 'kinesis', - value: kinesisArn1, - }, - } - ); t.like(pgRule, { name: rule.name, enabled: true, @@ -653,7 +657,7 @@ test.serial('post() creates the same Kinesis rule with trigger information in Po }); }); -test.serial('post() creates the SQS rule with trigger information in PostgreSQL/Elasticsearch', async (t) => { +test.serial('post() creates the SQS rule with trigger information in PostgreSQL', async (t) => { const { pgProvider, pgCollection, @@ -691,20 +695,7 @@ test.serial('post() creates the SQS rule with trigger information in PostgreSQL/ const pgRule = await t.context.rulePgModel .get(t.context.testKnex, { name: rule.name }); - const esRule = await t.context.esRulesClient.get( - rule.name - ); - t.like( - esRule, - { - rule: { - type: 'sqs', - value: queueUrl1, - }, - meta: expectedMeta, - } - ); t.like(pgRule, { name: rule.name, enabled: true, @@ -714,7 +705,7 @@ test.serial('post() creates the SQS rule with trigger information in PostgreSQL/ }); }); -test.serial('post() creates the SQS rule with the meta provided in PostgreSQL/Elasticsearch', async (t) => { +test.serial('post() creates the SQS rule with the meta provided in PostgreSQL', async (t) => { const { pgProvider, pgCollection, @@ -756,20 +747,7 @@ test.serial('post() creates the SQS rule with the meta provided in PostgreSQL/El const pgRule = await t.context.rulePgModel .get(t.context.testKnex, { name: rule.name }); - const esRule = await t.context.esRulesClient.get( - rule.name - ); - t.like( - esRule, - { - rule: { - type: 'sqs', - value: queueUrl1, - }, - meta: expectedMeta, - } - ); t.like(pgRule, { name: rule.name, enabled: true, @@ -965,67 +943,8 @@ test.serial('POST returns a 500 response if record creation throws unexpected er } }); -test.serial('post() does not write to Elasticsearch if writing to PostgreSQL fails', async (t) => { - const { newRule, testKnex } = t.context; - - const fakeRulePgModel = { - create: () => { - throw new Error('something bad'); - }, - }; - - const expressRequest = { - body: newRule, - testContext: { - knex: testKnex, - rulePgModel: fakeRulePgModel, - }, - }; - const response = buildFakeExpressResponse(); - await post(expressRequest, response); - - const dbRecords = await t.context.rulePgModel - .search(t.context.testKnex, { name: newRule.name }); - - t.is(dbRecords.length, 0); - t.false(await t.context.esRulesClient.exists( - newRule.name - )); -}); - -test.serial('post() does not write to PostgreSQL if writing to Elasticsearch fails', async (t) => { - const { newRule, testKnex } = t.context; - - const fakeEsClient = { - client: { - index: () => Promise.reject(new Error('something bad')), - }, - }; - - const expressRequest = { - body: newRule, - testContext: { - knex: testKnex, - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await post(expressRequest, response); - - const dbRecords = await t.context.rulePgModel - .search(t.context.testKnex, { name: newRule.name }); - - t.is(dbRecords.length, 0); - t.false(await t.context.esRulesClient.exists( - newRule.name - )); -}); - -test.serial('PATCH updates an existing rule in all data stores', async (t) => { +test.serial('PATCH updates an existing rule', async (t) => { const { - esRulesClient, rulePgModel, testKnex, } = t.context; @@ -1039,7 +958,6 @@ test.serial('PATCH updates an existing rule in all data stores', async (t) => { const { originalApiRule, originalPgRecord, - originalEsRecord, } = await createRuleTestRecords( t.context, { @@ -1052,8 +970,6 @@ test.serial('PATCH updates an existing rule in all data stores', async (t) => { t.deepEqual(originalPgRecord.meta, oldMetaFields); t.is(originalPgRecord.payload, null); - t.deepEqual(originalEsRecord.meta, oldMetaFields); - t.is(originalEsRecord.payload, undefined); const updateMetaFields = { nestedFieldOne: { @@ -1085,25 +1001,10 @@ test.serial('PATCH updates an existing rule in all data stores', async (t) => { .expect(200); const actualPostgresRule = await rulePgModel.get(testKnex, { name: updateRule.name }); - const updatedEsRecord = await esRulesClient.get(originalApiRule.name); const expectedMeta = merge(cloneDeep(oldMetaFields), updateMetaFields); - // PG and ES records have the same timestamps + // PG record has the original timestamp t.true(actualPostgresRule.updated_at > originalPgRecord.updated_at); - t.is(actualPostgresRule.created_at.getTime(), updatedEsRecord.createdAt); - t.is(actualPostgresRule.updated_at.getTime(), updatedEsRecord.updatedAt); - t.deepEqual( - updatedEsRecord, - { - ...originalEsRecord, - state: 'ENABLED', - meta: expectedMeta, - payload: updatePayload, - createdAt: originalPgRecord.created_at.getTime(), - updatedAt: actualPostgresRule.updated_at.getTime(), - timestamp: updatedEsRecord.timestamp, - } - ); t.deepEqual( actualPostgresRule, { @@ -1117,9 +1018,8 @@ test.serial('PATCH updates an existing rule in all data stores', async (t) => { ); }); -test.serial('PATCH nullifies expected fields for existing rule in all datastores', async (t) => { +test.serial('PATCH nullifies expected fields for existing rule', async (t) => { const { - esRulesClient, rulePgModel, testKnex, } = t.context; @@ -1168,7 +1068,6 @@ test.serial('PATCH nullifies expected fields for existing rule in all datastores .expect(200); const actualPostgresRule = await rulePgModel.get(testKnex, { name: updateRule.name }); - const updatedEsRecord = await esRulesClient.get(originalApiRule.name); const apiRule = await translatePostgresRuleToApiRule(actualPostgresRule, testKnex); const expectedApiRule = { @@ -1180,13 +1079,6 @@ test.serial('PATCH nullifies expected fields for existing rule in all datastores }; t.deepEqual(apiRule, expectedApiRule); - const expectedEsRecord = { - ...expectedApiRule, - _id: updatedEsRecord._id, - timestamp: updatedEsRecord.timestamp, - }; - t.deepEqual(updatedEsRecord, expectedEsRecord); - t.deepEqual( actualPostgresRule, { @@ -1231,13 +1123,11 @@ test.serial('PATCH sets SNS rule to "disabled" and removes source mapping ARN', }); const { - esRulesClient, rulePgModel, testKnex, } = t.context; const { originalPgRecord, - originalEsRecord, } = await createRuleTestRecords( t.context, { @@ -1249,7 +1139,6 @@ test.serial('PATCH sets SNS rule to "disabled" and removes source mapping ARN', ); t.truthy(originalPgRecord.arn); - t.is(originalEsRecord.rule.arn, originalPgRecord.arn); const updateRule = { name: originalPgRecord.name, @@ -1264,10 +1153,8 @@ test.serial('PATCH sets SNS rule to "disabled" and removes source mapping ARN', .expect(200); const updatedPostgresRule = await rulePgModel.get(testKnex, { name: originalPgRecord.name }); - const updatedEsRecord = await esRulesClient.get(originalPgRecord.name); t.is(updatedPostgresRule.arn, null); - t.is(updatedEsRecord.rule.arn, undefined); }); test('PATCH returns 404 for non-existent rule', async (t) => { @@ -1441,120 +1328,7 @@ test('PATCH returns a 400 response if rule value is not specified for non-onetim t.truthy(message.match(regexp)); }); -test('PATCH does not write to Elasticsearch if writing to PostgreSQL fails', async (t) => { - const { testKnex } = t.context; - const { - originalApiRule, - originalPgRecord, - originalEsRecord, - } = await createRuleTestRecords( - t.context, - { - queue_url: 'queue-1', - workflow, - } - ); - - const fakerulePgModel = { - get: () => Promise.resolve(originalPgRecord), - upsert: () => Promise.reject(new Error('something bad')), - }; - - const updatedRule = { - name: originalApiRule.name, - queueUrl: 'queue-2', - }; - - const expressRequest = { - params: { - name: originalPgRecord.name, - }, - body: updatedRule, - testContext: { - knex: testKnex, - rulePgModel: fakerulePgModel, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - patch(expressRequest, response), - { message: 'something bad' } - ); - - t.deepEqual( - await t.context.rulePgModel.get(t.context.testKnex, { - name: originalPgRecord.name, - }), - originalPgRecord - ); - t.deepEqual( - await t.context.esRulesClient.get( - originalPgRecord.name - ), - originalEsRecord - ); -}); - -test('PATCH does not write to PostgreSQL if writing to Elasticsearch fails', async (t) => { - const { testKnex } = t.context; - const { - originalApiRule, - originalPgRecord, - originalEsRecord, - } = await createRuleTestRecords( - t.context, - { - queue_url: 'queue-1', - workflow, - } - ); - - const fakeEsClient = { - client: { - index: () => Promise.reject(new Error('something bad')), - }, - }; - - const updatedRule = { - name: originalApiRule.name, - queueUrl: 'queue-2', - }; - - const expressRequest = { - params: { - name: originalApiRule.name, - }, - body: updatedRule, - testContext: { - knex: testKnex, - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - patch(expressRequest, response), - { message: 'something bad' } - ); - - t.deepEqual( - await t.context.rulePgModel.get(t.context.testKnex, { - name: originalApiRule.name, - }), - originalPgRecord - ); - t.deepEqual( - await t.context.esRulesClient.get( - originalApiRule.name - ), - originalEsRecord - ); -}); - -test.serial('PATCH creates the same updated SNS rule in PostgreSQL/Elasticsearch', async (t) => { +test.serial('PATCH keeps initial trigger information if writing to PostgreSQL fails', async (t) => { const { pgProvider, pgCollection, @@ -1565,12 +1339,12 @@ test.serial('PATCH creates the same updated SNS rule in PostgreSQL/Elasticsearch const { originalPgRecord, - originalEsRecord, } = await createRuleTestRecords( - t.context, + { + ...t.context, + }, { workflow, - queueUrl: 'fake-queue-url', state: 'ENABLED', type: 'sns', value: topic1.TopicArn, @@ -1582,8 +1356,8 @@ test.serial('PATCH creates the same updated SNS rule in PostgreSQL/Elasticsearch } ); - t.truthy(originalEsRecord.rule.value); t.truthy(originalPgRecord.value); + const updateRule = { name: originalPgRecord.name, rule: { @@ -1597,643 +1371,86 @@ test.serial('PATCH creates the same updated SNS rule in PostgreSQL/Elasticsearch name: originalPgRecord.name, }, body: updateRule, + testContext: { + rulePgModel: { + get: () => Promise.resolve(originalPgRecord), + upsert: () => { + throw new Error('PG fail'); + }, + }, + }, }; + const response = buildFakeExpressResponse(); - await patch(expressRequest, response); - const updatedPgRule = await t.context.rulePgModel - .get(t.context.testKnex, { name: updateRule.name }); - const updatedEsRule = await t.context.esRulesClient.get( - originalPgRecord.name + + await t.throwsAsync( + patch(expressRequest, response), + { message: 'PG fail' } ); - t.truthy(updatedEsRule.rule.value); - t.truthy(updatedPgRule.value); + const updatedPgRule = await t.context.rulePgModel + .get(t.context.testKnex, { name: updateRule.name }); - t.not(updatedEsRule.rule.value, originalEsRecord.rule.value); - t.not(updatedPgRule.value, originalPgRecord.value); + t.is(updatedPgRule.arn, originalPgRecord.arn); - t.deepEqual( - updatedEsRule, - { - ...originalEsRecord, - updatedAt: updatedEsRule.updatedAt, - timestamp: updatedEsRule.timestamp, - rule: { - type: 'sns', - value: topic2.TopicArn, - }, - } - ); - t.deepEqual(updatedPgRule, { + t.like(updatedPgRule, { ...originalPgRecord, updated_at: updatedPgRule.updated_at, type: 'sns', - arn: updatedPgRule.arn, - value: topic2.TopicArn, + value: topic1.TopicArn, }); }); -test.serial('PATCH creates the same updated Kinesis rule in PostgreSQL/Elasticsearch', async (t) => { +test.serial('PUT replaces an existing rule', async (t) => { const { - pgProvider, - pgCollection, + rulePgModel, + testKnex, } = t.context; - - const kinesisArn1 = `arn:aws:kinesis:us-east-1:000000000000:${randomId('kinesis1_')}`; - const kinesisArn2 = `arn:aws:kinesis:us-east-1:000000000000:${randomId('kinesis2_')}`; + const oldMetaFields = { + nestedFieldOne: { + fieldOne: 'fieldone-data', + }, + }; const { + originalApiRule, originalPgRecord, - originalEsRecord, } = await createRuleTestRecords( t.context, { + queue_url: 'fake-queue-url', workflow, - state: 'ENABLED', - type: 'kinesis', - value: kinesisArn1, - collection: { - name: pgCollection.name, - version: pgCollection.version, - }, - provider: pgProvider.name, + meta: oldMetaFields, + tags: ['tag1', 'tag2'], } ); - t.truthy(originalEsRecord.rule.arn); - t.truthy(originalEsRecord.rule.logEventArn); - t.truthy(originalPgRecord.arn); - t.truthy(originalPgRecord.log_event_arn); + t.deepEqual(originalPgRecord.meta, oldMetaFields); + t.is(originalPgRecord.payload, null); - const updateRule = { - name: originalPgRecord.name, - rule: { - type: 'kinesis', - value: kinesisArn2, + const updateMetaFields = { + nestedFieldOne: { + nestedFieldOneKey2: randomId('nestedFieldOneKey2'), + 'key.with.period': randomId('key.with.period'), }, - }; - - const expressRequest = { - params: { - name: originalPgRecord.name, - }, - body: updateRule, - }; - - const response = buildFakeExpressResponse(); - - await patch(expressRequest, response); - - const updatedPgRule = await t.context.rulePgModel - .get(t.context.testKnex, { name: updateRule.name }); - const updatedEsRule = await t.context.esRulesClient.get( - originalPgRecord.name - ); - - t.truthy(updatedEsRule.rule.arn); - t.truthy(updatedEsRule.rule.logEventArn); - t.truthy(updatedPgRule.arn); - t.truthy(updatedPgRule.log_event_arn); - - t.not(originalEsRecord.rule.arn, updatedEsRule.rule.arn); - t.not(originalEsRecord.rule.logEventArn, updatedEsRule.rule.logEventArn); - t.not(originalPgRecord.arn, updatedPgRule.arn); - t.not(originalPgRecord.log_event_arn, updatedPgRule.log_event_arn); - - t.deepEqual( - updatedEsRule, - { - ...originalEsRecord, - updatedAt: updatedEsRule.updatedAt, - timestamp: updatedEsRule.timestamp, - rule: { - arn: updatedEsRule.rule.arn, - logEventArn: updatedEsRule.rule.logEventArn, - type: 'kinesis', - value: kinesisArn2, - }, - } - ); - t.deepEqual(updatedPgRule, { - ...originalPgRecord, - updated_at: updatedPgRule.updated_at, - type: 'kinesis', - value: kinesisArn2, - arn: updatedPgRule.arn, - log_event_arn: updatedPgRule.log_event_arn, - }); -}); - -test.serial('PATCH creates the same SQS rule in PostgreSQL/Elasticsearch', async (t) => { - const { - pgProvider, - pgCollection, - } = t.context; - - const queue1 = randomId('queue'); - const queue2 = randomId('queue'); - - const { queueUrl: queueUrl1 } = await createSqsQueues(queue1); - const { queueUrl: queueUrl2 } = await createSqsQueues(queue2, 4, '100'); - - const { - originalPgRecord, - originalEsRecord, - } = await createRuleTestRecords( - { - ...t.context, - }, - { - workflow, - name: randomId('rule'), - state: 'ENABLED', - type: 'sqs', - value: queueUrl1, - collection: { - name: pgCollection.name, - version: pgCollection.version, - }, - provider: pgProvider.name, - } - ); - - const expectedMeta = { - visibilityTimeout: 300, - retries: 3, - }; - - t.deepEqual(originalPgRecord.meta, expectedMeta); - t.deepEqual(originalEsRecord.meta, expectedMeta); - - const updateRule = { - name: originalPgRecord.name, - rule: { - type: 'sqs', - value: queueUrl2, - }, - meta: { - retries: 2, - visibilityTimeout: null, - }, - }; - const expressRequest = { - params: { - name: originalPgRecord.name, - }, - body: updateRule, - }; - const response = buildFakeExpressResponse(); - await patch(expressRequest, response); - - const updatedPgRule = await t.context.rulePgModel - .get(t.context.testKnex, { name: updateRule.name }); - const updatedEsRule = await t.context.esRulesClient.get( - updateRule.name - ); - - const expectedMetaUpdate = { - visibilityTimeout: 100, - retries: 2, - }; - - t.deepEqual( - updatedEsRule, - { - ...originalEsRecord, - updatedAt: updatedEsRule.updatedAt, - timestamp: updatedEsRule.timestamp, - rule: { - type: 'sqs', - value: queueUrl2, - }, - meta: expectedMetaUpdate, - } - ); - t.deepEqual(updatedPgRule, { - ...originalPgRecord, - updated_at: updatedPgRule.updated_at, - type: 'sqs', - value: queueUrl2, - meta: expectedMetaUpdate, - }); -}); - -test.serial('PATCH keeps initial trigger information if writing to PostgreSQL fails', async (t) => { - const { - pgProvider, - pgCollection, - } = t.context; - - const topic1 = await createSnsTopic(randomId('topic1_')); - const topic2 = await createSnsTopic(randomId('topic2_')); - - const { - originalPgRecord, - originalEsRecord, - } = await createRuleTestRecords( - { - ...t.context, - }, - { - workflow, - state: 'ENABLED', - type: 'sns', - value: topic1.TopicArn, - collection: { - name: pgCollection.name, - version: pgCollection.version, - }, - provider: pgProvider.name, - } - ); - - t.truthy(originalEsRecord.rule.value); - t.truthy(originalPgRecord.value); - - const updateRule = { - name: originalPgRecord.name, - rule: { - type: 'sns', - value: topic2.TopicArn, - }, - }; - - const expressRequest = { - params: { - name: originalPgRecord.name, - }, - body: updateRule, - testContext: { - rulePgModel: { - get: () => Promise.resolve(originalPgRecord), - upsert: () => { - throw new Error('PG fail'); - }, - }, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - patch(expressRequest, response), - { message: 'PG fail' } - ); - - const updatedPgRule = await t.context.rulePgModel - .get(t.context.testKnex, { name: updateRule.name }); - const updatedEsRule = await t.context.esRulesClient.get( - originalPgRecord.name - ); - - t.is(updatedEsRule.rule.arn, originalEsRecord.rule.arn); - t.is(updatedPgRule.arn, originalPgRecord.arn); - - t.like( - updatedEsRule, - { - ...originalEsRecord, - updatedAt: updatedEsRule.updatedAt, - timestamp: updatedEsRule.timestamp, - rule: { - type: 'sns', - value: topic1.TopicArn, - }, - } - ); - t.like(updatedPgRule, { - ...originalPgRecord, - updated_at: updatedPgRule.updated_at, - type: 'sns', - value: topic1.TopicArn, - }); -}); - -test.serial('PATCH keeps initial trigger information if writing to Elasticsearch fails', async (t) => { - const { - pgProvider, - pgCollection, - } = t.context; - - const topic1 = await createSnsTopic(randomId('topic1_')); - const topic2 = await createSnsTopic(randomId('topic2_')); - - const { - originalPgRecord, - originalEsRecord, - } = await createRuleTestRecords( - { - ...t.context, - }, - { - workflow, - state: 'ENABLED', - type: 'sns', - value: topic1.TopicArn, - collection: { - name: pgCollection.name, - version: pgCollection.version, - }, - provider: pgProvider.name, - } - ); - - t.truthy(originalEsRecord.rule.value); - t.truthy(originalPgRecord.value); - - const updateRule = { - name: originalPgRecord.name, - rule: { - type: 'sns', - value: topic2.TopicArn, - }, - }; - - const expressRequest = { - params: { - name: originalPgRecord.name, - }, - body: updateRule, - testContext: { - esClient: { - client: { - index: () => { - throw new Error('ES fail'); - }, - }, - }, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - patch(expressRequest, response), - { message: 'ES fail' } - ); - - const updatedPgRule = await t.context.rulePgModel - .get(t.context.testKnex, { name: updateRule.name }); - const updatedEsRule = await t.context.esRulesClient.get( - originalPgRecord.name - ); - - t.is(updatedEsRule.rule.arn, originalEsRecord.rule.arn); - t.is(updatedPgRule.arn, originalPgRecord.arn); - - t.like( - updatedEsRule, - { - ...originalEsRecord, - updatedAt: updatedEsRule.updatedAt, - timestamp: updatedEsRule.timestamp, - rule: { - type: 'sns', - value: topic1.TopicArn, - }, - } - ); - t.like(updatedPgRule, { - ...originalPgRecord, - updated_at: updatedPgRule.updated_at, - type: 'sns', - value: topic1.TopicArn, - }); -}); - -test.serial('PUT replaces an existing rule in all data stores', async (t) => { - const { - esRulesClient, - rulePgModel, - testKnex, - } = t.context; - const oldMetaFields = { - nestedFieldOne: { - fieldOne: 'fieldone-data', - }, - }; - - const { - originalApiRule, - originalPgRecord, - originalEsRecord, - } = await createRuleTestRecords( - t.context, - { - queue_url: 'fake-queue-url', - workflow, - meta: oldMetaFields, - tags: ['tag1', 'tag2'], - } - ); - - t.deepEqual(originalPgRecord.meta, oldMetaFields); - t.is(originalPgRecord.payload, null); - t.deepEqual(originalEsRecord.meta, oldMetaFields); - t.is(originalEsRecord.payload, undefined); - - const updateMetaFields = { - nestedFieldOne: { - nestedFieldOneKey2: randomId('nestedFieldOneKey2'), - 'key.with.period': randomId('key.with.period'), - }, - nestedFieldTwo: { - nestedFieldTwoKey1: randomId('nestedFieldTwoKey1'), - }, - }; - const updatePayload = { - foo: 'bar', - }; - const updateTags = ['tag2', 'tag3']; - const removedFields = ['queueUrl', 'queue_url', 'provider', 'collection']; - const updateRule = { - ...omit(originalApiRule, removedFields), - state: 'ENABLED', - meta: updateMetaFields, - payload: updatePayload, - tags: updateTags, - // these timestamps should not get used - createdAt: Date.now(), - updatedAt: Date.now(), - }; - - await request(app) - .put(`/rules/${updateRule.name}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .send(updateRule) - .expect(200); - - const actualPostgresRule = await rulePgModel.get(testKnex, { name: updateRule.name }); - const updatedEsRecord = await esRulesClient.get(originalApiRule.name); - - // PG and ES records have the same timestamps - t.true(actualPostgresRule.updated_at > originalPgRecord.updated_at); - t.is(actualPostgresRule.created_at.getTime(), updatedEsRecord.createdAt); - t.is(actualPostgresRule.updated_at.getTime(), updatedEsRecord.updatedAt); - t.deepEqual( - updatedEsRecord, - { - ...omit(originalEsRecord, removedFields), - state: 'ENABLED', - meta: updateMetaFields, - payload: updatePayload, - tags: updateTags, - createdAt: originalApiRule.createdAt, - updatedAt: updatedEsRecord.updatedAt, - timestamp: updatedEsRecord.timestamp, - } - ); - t.deepEqual( - actualPostgresRule, - { - ...omit(originalPgRecord, removedFields), - enabled: true, - meta: updateMetaFields, - payload: updatePayload, - tags: updateTags, - queue_url: null, - created_at: originalPgRecord.created_at, - updated_at: actualPostgresRule.updated_at, - } - ); -}); - -test.serial('PUT removes existing fields if not specified or set to null', async (t) => { - const { - esRulesClient, - rulePgModel, - testKnex, - } = t.context; - const oldMetaFields = { - nestedFieldOne: { - fieldOne: 'fieldone-data', - }, - }; - - const { - originalApiRule, - originalPgRecord, - } = await createRuleTestRecords( - t.context, - { - queue_url: 'fake-queue-url', - workflow, - meta: oldMetaFields, - execution_name_prefix: 'testRule', - payload: { foo: 'bar' }, - value: randomId('value'), - tags: ['tag1', 'tag2'], - } - ); - - const removedFields = ['provider', 'collection', 'payload', 'tags']; - const updateRule = { - ...omit(originalApiRule, removedFields), - executionNamePrefix: null, - meta: null, - queueUrl: null, - rule: { - type: originalApiRule.rule.type, - }, - createdAt: null, - updatedAt: null, - }; - - await request(app) - .put(`/rules/${updateRule.name}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .send(updateRule) - .expect(200); - - const actualPostgresRule = await rulePgModel.get(testKnex, { name: updateRule.name }); - const updatedEsRecord = await esRulesClient.get(originalApiRule.name); - const apiRule = await translatePostgresRuleToApiRule(actualPostgresRule, testKnex); - - const expectedApiRule = { - ...pick(originalApiRule, ['name', 'workflow', 'createdAt', 'state']), - rule: { - type: originalApiRule.rule.type, - }, - updatedAt: apiRule.updatedAt, - }; - t.deepEqual(apiRule, expectedApiRule); - - const expectedEsRecord = { - ...expectedApiRule, - _id: updatedEsRecord._id, - timestamp: updatedEsRecord.timestamp, - }; - t.deepEqual(updatedEsRecord, expectedEsRecord); - - t.deepEqual( - actualPostgresRule, - { - ...originalPgRecord, - enabled: false, - execution_name_prefix: null, - meta: null, - payload: null, - queue_url: null, - type: originalApiRule.rule.type, - value: null, - tags: null, - created_at: originalPgRecord.created_at, - updated_at: actualPostgresRule.updated_at, - } - ); -}); - -test.serial('PUT sets SNS rule to "disabled" and removes source mapping ARN', async (t) => { - const snsMock = mockClient(awsServices.sns()); - - snsMock - .onAnyCommand() - .rejects() - .on(ListSubscriptionsByTopicCommand) - .resolves({ - Subscriptions: [{ - Endpoint: process.env.messageConsumer, - SubscriptionArn: randomString(), - }], - }) - .on(UnsubscribeCommand) - .resolves({}); - const mockLambdaClient = mockClient(awsServices.lambda()).onAnyCommand().rejects(); - mockLambdaClient.on(AddPermissionCommand).resolves(); - mockLambdaClient.on(RemovePermissionCommand).resolves(); - - t.teardown(() => { - snsMock.restore(); - mockLambdaClient.restore(); - }); - const { - esRulesClient, - rulePgModel, - testKnex, - } = t.context; - const { - originalPgRecord, - originalEsRecord, - } = await createRuleTestRecords( - t.context, - { - value: 'sns-arn', - type: 'sns', - enabled: true, - workflow, - } - ); - - t.truthy(originalPgRecord.arn); - t.is(originalEsRecord.rule.arn, originalPgRecord.arn); - - const translatedPgRecord = await translatePostgresRuleToApiRule(originalPgRecord, testKnex); - + nestedFieldTwo: { + nestedFieldTwoKey1: randomId('nestedFieldTwoKey1'), + }, + }; + const updatePayload = { + foo: 'bar', + }; + const updateTags = ['tag2', 'tag3']; + const removedFields = ['queueUrl', 'queue_url', 'provider', 'collection']; const updateRule = { - ...translatedPgRecord, - state: 'DISABLED', + ...omit(originalApiRule, removedFields), + state: 'ENABLED', + meta: updateMetaFields, + payload: updatePayload, + tags: updateTags, + // these timestamps should not get used + createdAt: Date.now(), + updatedAt: Date.now(), }; await request(app) @@ -2243,629 +1460,308 @@ test.serial('PUT sets SNS rule to "disabled" and removes source mapping ARN', as .send(updateRule) .expect(200); - const updatedPostgresRule = await rulePgModel.get(testKnex, { name: updateRule.name }); - const updatedEsRecord = await esRulesClient.get(translatedPgRecord.name); - - t.is(updatedPostgresRule.arn, null); - t.is(updatedEsRecord.rule.arn, undefined); -}); - -test('PUT returns 404 for non-existent rule', async (t) => { - const name = 'new_make_coffee'; - const response = await request(app) - .put(`/rules/${name}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .send({ name }) - .expect(404); - - const { message, record } = response.body; - t.true(message.includes(name)); - t.falsy(record); -}); - -test('PUT returns 400 for name mismatch between params and payload', - async (t) => { - const response = await request(app) - .put(`/rules/${randomString()}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .send({ name: randomString() }) - .expect(400); - const { message, record } = response.body; - - t.true(message.includes('Expected rule name to be')); - t.falsy(record); - }); - -test('PUT returns a 400 response if record is missing workflow property', async (t) => { - const { - originalApiRule, - } = await createRuleTestRecords( - t.context, - { - queue_url: 'fake-queue-url', - workflow, - } - ); - - // Set required property to null to trigger create error - originalApiRule.workflow = null; - - const response = await request(app) - .put(`/rules/${originalApiRule.name}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .send(originalApiRule) - .expect(400); - const { message } = response.body; - t.true(message.includes('The record has validation errors. Rule workflow is undefined')); -}); - -test('PUT returns a 400 response if record is missing type property', async (t) => { - const { - originalApiRule, - originalPgRecord, - } = await createRuleTestRecords( - t.context, - { - queue_url: 'fake-queue-url', - workflow, - } - ); - originalApiRule.rule.type = null; - const response = await request(app) - .put(`/rules/${originalPgRecord.name}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .send(originalApiRule) - .expect(400); - const { message } = response.body; - t.true(message.includes('The record has validation errors. Rule type is undefined.')); -}); - -test('PUT returns a 400 response if rule name is invalid', async (t) => { - const { - originalApiRule, - originalPgRecord, - } = await createRuleTestRecords( - t.context, - { - queue_url: 'fake-queue-url', - workflow, - } - ); - originalApiRule.name = 'bad rule name'; - const response = await request(app) - .put(`/rules/${originalPgRecord.name}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .send(originalApiRule) - .expect(400); - const { message } = response.body; - t.true(message.includes(originalApiRule.name)); -}); - -test('PUT returns a 400 response if rule type is invalid', async (t) => { - const { - originalApiRule, - originalPgRecord, - } = await createRuleTestRecords( - t.context, - { - queue_url: 'fake-queue-url', - workflow, - } - ); - originalApiRule.rule.type = 'invalid'; - - const response = await request(app) - .put(`/rules/${originalPgRecord.name}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .send(originalApiRule) - .expect(400); - - const { message } = response.body; - const regexp = new RegExp('The record has validation errors:.*rule.type.*should be equal to one of the allowed values'); - t.truthy(message.match(regexp)); -}); - -test('PUT returns a 400 response if rule value is not specified for non-onetime rule', async (t) => { - const { - originalApiRule, - originalPgRecord, - } = await createRuleTestRecords( - t.context, - { - queue_url: 'fake-queue-url', - workflow, - } - ); - originalApiRule.rule.type = 'kinesis'; - - const response = await request(app) - .put(`/rules/${originalPgRecord.name}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .send(originalApiRule) - .expect(400); - - const { message } = response.body; - const regexp = new RegExp('Rule value is undefined for kinesis rule'); - t.truthy(message.match(regexp)); -}); - -test('PUT does not write to Elasticsearch if writing to PostgreSQL fails', async (t) => { - const { testKnex } = t.context; - const { - originalApiRule, - originalPgRecord, - originalEsRecord, - } = await createRuleTestRecords( - t.context, - { - queue_url: 'queue-1', - workflow, - } - ); - - const fakerulePgModel = { - get: () => Promise.resolve(originalPgRecord), - upsert: () => Promise.reject(new Error('something bad')), - }; - - const updatedRule = { - ...originalApiRule, - queueUrl: 'queue-2', - }; - - const expressRequest = { - params: { - name: originalPgRecord.name, - }, - body: updatedRule, - testContext: { - knex: testKnex, - rulePgModel: fakerulePgModel, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - put(expressRequest, response), - { message: 'something bad' } - ); - - t.deepEqual( - await t.context.rulePgModel.get(t.context.testKnex, { - name: originalPgRecord.name, - }), - originalPgRecord - ); - t.deepEqual( - await t.context.esRulesClient.get( - originalPgRecord.name - ), - originalEsRecord - ); -}); - -test('PUT does not write to PostgreSQL if writing to Elasticsearch fails', async (t) => { - const { testKnex } = t.context; - const { - originalApiRule, - originalPgRecord, - originalEsRecord, - } = await createRuleTestRecords( - t.context, - { - queue_url: 'queue-1', - workflow, - } - ); - - const fakeEsClient = { - client: { - index: () => Promise.reject(new Error('something bad')), - }, - }; - - const updatedRule = { - ...originalApiRule, - queueUrl: 'queue-2', - }; - - const expressRequest = { - params: { - name: originalApiRule.name, - }, - body: updatedRule, - testContext: { - knex: testKnex, - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - put(expressRequest, response), - { message: 'something bad' } - ); + const actualPostgresRule = await rulePgModel.get(testKnex, { name: updateRule.name }); + t.true(actualPostgresRule.updated_at > originalPgRecord.updated_at); t.deepEqual( - await t.context.rulePgModel.get(t.context.testKnex, { - name: originalApiRule.name, - }), - originalPgRecord - ); - t.deepEqual( - await t.context.esRulesClient.get( - originalApiRule.name - ), - originalEsRecord + actualPostgresRule, + { + ...omit(originalPgRecord, removedFields), + enabled: true, + meta: updateMetaFields, + payload: updatePayload, + tags: updateTags, + queue_url: null, + created_at: originalPgRecord.created_at, + updated_at: actualPostgresRule.updated_at, + } ); }); -test.serial('PUT creates the same updated SNS rule in PostgreSQL/Elasticsearch', async (t) => { +test.serial('PUT removes existing fields if not specified or set to null', async (t) => { const { - pgProvider, - pgCollection, + rulePgModel, + testKnex, } = t.context; - - const topic1 = await createSnsTopic(randomId('topic1_')); - const topic2 = await createSnsTopic(randomId('topic2_')); + const oldMetaFields = { + nestedFieldOne: { + fieldOne: 'fieldone-data', + }, + }; const { originalApiRule, originalPgRecord, - originalEsRecord, } = await createRuleTestRecords( t.context, { + queue_url: 'fake-queue-url', workflow, - queueUrl: 'fake-queue-url', - state: 'ENABLED', - type: 'sns', - value: topic1.TopicArn, - collection: { - name: pgCollection.name, - version: pgCollection.version, - }, - provider: pgProvider.name, + meta: oldMetaFields, + execution_name_prefix: 'testRule', + payload: { foo: 'bar' }, + value: randomId('value'), + tags: ['tag1', 'tag2'], } ); - t.truthy(originalEsRecord.rule.value); - t.truthy(originalPgRecord.value); + const removedFields = ['provider', 'collection', 'payload', 'tags']; const updateRule = { - ...originalApiRule, + ...omit(originalApiRule, removedFields), + executionNamePrefix: null, + meta: null, + queueUrl: null, rule: { - type: 'sns', - value: topic2.TopicArn, + type: originalApiRule.rule.type, }, + createdAt: null, + updatedAt: null, }; - const expressRequest = { - params: { - name: originalApiRule.name, - }, - body: updateRule, - }; - const response = buildFakeExpressResponse(); - await put(expressRequest, response); - const updatedPgRule = await t.context.rulePgModel - .get(t.context.testKnex, { name: updateRule.name }); - const updatedEsRule = await t.context.esRulesClient.get( - originalPgRecord.name - ); + await request(app) + .put(`/rules/${updateRule.name}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .send(updateRule) + .expect(200); - t.truthy(updatedEsRule.rule.value); - t.truthy(updatedPgRule.value); + const actualPostgresRule = await rulePgModel.get(testKnex, { name: updateRule.name }); + const apiRule = await translatePostgresRuleToApiRule(actualPostgresRule, testKnex); - t.not(updatedEsRule.rule.value, originalEsRecord.rule.value); - t.not(updatedPgRule.value, originalPgRecord.value); + const expectedApiRule = { + ...pick(originalApiRule, ['name', 'workflow', 'createdAt', 'state']), + rule: { + type: originalApiRule.rule.type, + }, + updatedAt: apiRule.updatedAt, + }; + t.deepEqual(apiRule, expectedApiRule); t.deepEqual( - updatedEsRule, + actualPostgresRule, { - ...originalEsRecord, - updatedAt: updatedEsRule.updatedAt, - timestamp: updatedEsRule.timestamp, - rule: { - type: 'sns', - value: topic2.TopicArn, - }, + ...originalPgRecord, + enabled: false, + execution_name_prefix: null, + meta: null, + payload: null, + queue_url: null, + type: originalApiRule.rule.type, + value: null, + tags: null, + created_at: originalPgRecord.created_at, + updated_at: actualPostgresRule.updated_at, } ); - t.deepEqual(updatedPgRule, { - ...originalPgRecord, - updated_at: updatedPgRule.updated_at, - type: 'sns', - arn: updatedPgRule.arn, - value: topic2.TopicArn, - }); }); -test.serial('PUT creates the same updated Kinesis rule in PostgreSQL/Elasticsearch', async (t) => { - const { - pgProvider, - pgCollection, - } = t.context; +test.serial('PUT sets SNS rule to "disabled" and removes source mapping ARN', async (t) => { + const snsMock = mockClient(awsServices.sns()); - const kinesisArn1 = `arn:aws:kinesis:us-east-1:000000000000:${randomId('kinesis1_')}`; - const kinesisArn2 = `arn:aws:kinesis:us-east-1:000000000000:${randomId('kinesis2_')}`; + snsMock + .onAnyCommand() + .rejects() + .on(ListSubscriptionsByTopicCommand) + .resolves({ + Subscriptions: [{ + Endpoint: process.env.messageConsumer, + SubscriptionArn: randomString(), + }], + }) + .on(UnsubscribeCommand) + .resolves({}); + const mockLambdaClient = mockClient(awsServices.lambda()).onAnyCommand().rejects(); + mockLambdaClient.on(AddPermissionCommand).resolves(); + mockLambdaClient.on(RemovePermissionCommand).resolves(); + t.teardown(() => { + snsMock.restore(); + mockLambdaClient.restore(); + }); + const { + rulePgModel, + testKnex, + } = t.context; const { - originalApiRule, originalPgRecord, - originalEsRecord, } = await createRuleTestRecords( t.context, { + value: 'sns-arn', + type: 'sns', + enabled: true, workflow, - state: 'ENABLED', - type: 'kinesis', - value: kinesisArn1, - collection: { - name: pgCollection.name, - version: pgCollection.version, - }, - provider: pgProvider.name, } ); - t.truthy(originalEsRecord.rule.arn); - t.truthy(originalEsRecord.rule.logEventArn); t.truthy(originalPgRecord.arn); - t.truthy(originalPgRecord.log_event_arn); + + const translatedPgRecord = await translatePostgresRuleToApiRule(originalPgRecord, testKnex); const updateRule = { - ...originalApiRule, - rule: { - type: 'kinesis', - value: kinesisArn2, - }, + ...translatedPgRecord, + state: 'DISABLED', }; - const expressRequest = { - params: { - name: originalApiRule.name, - }, - body: updateRule, - }; + await request(app) + .put(`/rules/${updateRule.name}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .send(updateRule) + .expect(200); - const response = buildFakeExpressResponse(); + const updatedPostgresRule = await rulePgModel.get(testKnex, { name: updateRule.name }); - await put(expressRequest, response); + t.is(updatedPostgresRule.arn, null); +}); - const updatedPgRule = await t.context.rulePgModel - .get(t.context.testKnex, { name: updateRule.name }); - const updatedEsRule = await t.context.esRulesClient.get( - originalPgRecord.name - ); +test('PUT returns 404 for non-existent rule', async (t) => { + const name = 'new_make_coffee'; + const response = await request(app) + .put(`/rules/${name}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .send({ name }) + .expect(404); + + const { message, record } = response.body; + t.true(message.includes(name)); + t.falsy(record); +}); - t.truthy(updatedEsRule.rule.arn); - t.truthy(updatedEsRule.rule.logEventArn); - t.truthy(updatedPgRule.arn); - t.truthy(updatedPgRule.log_event_arn); +test('PUT returns 400 for name mismatch between params and payload', + async (t) => { + const response = await request(app) + .put(`/rules/${randomString()}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .send({ name: randomString() }) + .expect(400); + const { message, record } = response.body; - t.not(originalEsRecord.rule.arn, updatedEsRule.rule.arn); - t.not(originalEsRecord.rule.logEventArn, updatedEsRule.rule.logEventArn); - t.not(originalPgRecord.arn, updatedPgRule.arn); - t.not(originalPgRecord.log_event_arn, updatedPgRule.log_event_arn); + t.true(message.includes('Expected rule name to be')); + t.falsy(record); + }); - t.deepEqual( - updatedEsRule, +test('PUT returns a 400 response if record is missing workflow property', async (t) => { + const { + originalApiRule, + } = await createRuleTestRecords( + t.context, { - ...originalEsRecord, - updatedAt: updatedEsRule.updatedAt, - timestamp: updatedEsRule.timestamp, - rule: { - arn: updatedEsRule.rule.arn, - logEventArn: updatedEsRule.rule.logEventArn, - type: 'kinesis', - value: kinesisArn2, - }, + queue_url: 'fake-queue-url', + workflow, } ); - t.deepEqual(updatedPgRule, { - ...originalPgRecord, - updated_at: updatedPgRule.updated_at, - type: 'kinesis', - value: kinesisArn2, - arn: updatedPgRule.arn, - log_event_arn: updatedPgRule.log_event_arn, - }); + + // Set required property to null to trigger create error + originalApiRule.workflow = null; + + const response = await request(app) + .put(`/rules/${originalApiRule.name}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .send(originalApiRule) + .expect(400); + const { message } = response.body; + t.true(message.includes('The record has validation errors. Rule workflow is undefined')); }); -test.serial('PUT creates the same SQS rule in PostgreSQL/Elasticsearch', async (t) => { +test('PUT returns a 400 response if record is missing type property', async (t) => { const { - pgProvider, - pgCollection, - } = t.context; - - const queue1 = randomId('queue'); - const queue2 = randomId('queue'); + originalApiRule, + originalPgRecord, + } = await createRuleTestRecords( + t.context, + { + queue_url: 'fake-queue-url', + workflow, + } + ); + originalApiRule.rule.type = null; + const response = await request(app) + .put(`/rules/${originalPgRecord.name}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .send(originalApiRule) + .expect(400); + const { message } = response.body; + t.true(message.includes('The record has validation errors. Rule type is undefined.')); +}); - const { queueUrl: queueUrl1 } = await createSqsQueues(queue1); - const { queueUrl: queueUrl2 } = await createSqsQueues(queue2, 4, '100'); +test('PUT returns a 400 response if rule name is invalid', async (t) => { + const { + originalApiRule, + originalPgRecord, + } = await createRuleTestRecords( + t.context, + { + queue_url: 'fake-queue-url', + workflow, + } + ); + originalApiRule.name = 'bad rule name'; + const response = await request(app) + .put(`/rules/${originalPgRecord.name}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .send(originalApiRule) + .expect(400); + const { message } = response.body; + t.true(message.includes(originalApiRule.name)); +}); +test('PUT returns a 400 response if rule type is invalid', async (t) => { const { originalApiRule, originalPgRecord, - originalEsRecord, } = await createRuleTestRecords( + t.context, { - ...t.context, - }, - { + queue_url: 'fake-queue-url', workflow, - name: randomId('rule'), - state: 'ENABLED', - type: 'sqs', - value: queueUrl1, - collection: { - name: pgCollection.name, - version: pgCollection.version, - }, - provider: pgProvider.name, } ); + originalApiRule.rule.type = 'invalid'; - const expectedMeta = { - visibilityTimeout: 300, - retries: 3, - }; - - t.deepEqual(originalPgRecord.meta, expectedMeta); - t.deepEqual(originalEsRecord.meta, expectedMeta); - - const updateRule = { - ...originalApiRule, - rule: { - type: 'sqs', - value: queueUrl2, - }, - meta: { - retries: 2, - }, - }; - const expressRequest = { - params: { - name: originalApiRule.name, - }, - body: updateRule, - }; - const response = buildFakeExpressResponse(); - await put(expressRequest, response); + const response = await request(app) + .put(`/rules/${originalPgRecord.name}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .send(originalApiRule) + .expect(400); - const updatedPgRule = await t.context.rulePgModel - .get(t.context.testKnex, { name: updateRule.name }); - const updatedEsRule = await t.context.esRulesClient.get( - updateRule.name - ); - const expectedMetaUpdate = { - visibilityTimeout: 100, - retries: 2, - }; - t.deepEqual( - updatedEsRule, - { - ...originalEsRecord, - updatedAt: updatedEsRule.updatedAt, - timestamp: updatedEsRule.timestamp, - rule: { - type: 'sqs', - value: queueUrl2, - }, - meta: expectedMetaUpdate, - } - ); - t.deepEqual(updatedPgRule, { - ...originalPgRecord, - updated_at: updatedPgRule.updated_at, - type: 'sqs', - value: queueUrl2, - meta: expectedMetaUpdate, - }); + const { message } = response.body; + const regexp = new RegExp('The record has validation errors:.*rule.type.*should be equal to one of the allowed values'); + t.truthy(message.match(regexp)); }); -test.serial('PUT keeps initial trigger information if writing to PostgreSQL fails', async (t) => { - const { - pgProvider, - pgCollection, - } = t.context; - - const topic1 = await createSnsTopic(randomId('topic1_')); - const topic2 = await createSnsTopic(randomId('topic2_')); - +test('PUT returns a 400 response if rule value is not specified for non-onetime rule', async (t) => { const { originalApiRule, originalPgRecord, - originalEsRecord, } = await createRuleTestRecords( + t.context, { - ...t.context, - }, - { + queue_url: 'fake-queue-url', workflow, - state: 'ENABLED', - type: 'sns', - value: topic1.TopicArn, - collection: { - name: pgCollection.name, - version: pgCollection.version, - }, - provider: pgProvider.name, } ); + originalApiRule.rule.type = 'kinesis'; - t.truthy(originalEsRecord.rule.value); - t.truthy(originalPgRecord.value); - - const updateRule = { - ...originalApiRule, - rule: { - type: 'sns', - value: topic2.TopicArn, - }, - }; - - const expressRequest = { - params: { - name: originalApiRule.name, - }, - body: updateRule, - testContext: { - rulePgModel: { - get: () => Promise.resolve(originalPgRecord), - upsert: () => { - throw new Error('PG fail'); - }, - }, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - put(expressRequest, response), - { message: 'PG fail' } - ); - - const updatedPgRule = await t.context.rulePgModel - .get(t.context.testKnex, { name: updateRule.name }); - const updatedEsRule = await t.context.esRulesClient.get( - originalPgRecord.name - ); - - t.is(updatedEsRule.rule.arn, originalEsRecord.rule.arn); - t.is(updatedPgRule.arn, originalPgRecord.arn); + const response = await request(app) + .put(`/rules/${originalPgRecord.name}`) + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .send(originalApiRule) + .expect(400); - t.like( - updatedEsRule, - { - ...originalEsRecord, - updatedAt: updatedEsRule.updatedAt, - timestamp: updatedEsRule.timestamp, - rule: { - type: 'sns', - value: topic1.TopicArn, - }, - } - ); - t.like(updatedPgRule, { - ...originalPgRecord, - updated_at: updatedPgRule.updated_at, - type: 'sns', - value: topic1.TopicArn, - }); + const { message } = response.body; + const regexp = new RegExp('Rule value is undefined for kinesis rule'); + t.truthy(message.match(regexp)); }); -test.serial('PUT keeps initial trigger information if writing to Elasticsearch fails', async (t) => { +test.serial('PUT keeps initial trigger information if writing to PostgreSQL fails', async (t) => { const { pgProvider, pgCollection, @@ -2877,7 +1773,6 @@ test.serial('PUT keeps initial trigger information if writing to Elasticsearch f const { originalApiRule, originalPgRecord, - originalEsRecord, } = await createRuleTestRecords( { ...t.context, @@ -2895,7 +1790,6 @@ test.serial('PUT keeps initial trigger information if writing to Elasticsearch f } ); - t.truthy(originalEsRecord.rule.value); t.truthy(originalPgRecord.value); const updateRule = { @@ -2912,11 +1806,10 @@ test.serial('PUT keeps initial trigger information if writing to Elasticsearch f }, body: updateRule, testContext: { - esClient: { - client: { - index: () => { - throw new Error('ES fail'); - }, + rulePgModel: { + get: () => Promise.resolve(originalPgRecord), + upsert: () => { + throw new Error('PG fail'); }, }, }, @@ -2926,30 +1819,14 @@ test.serial('PUT keeps initial trigger information if writing to Elasticsearch f await t.throwsAsync( put(expressRequest, response), - { message: 'ES fail' } + { message: 'PG fail' } ); const updatedPgRule = await t.context.rulePgModel .get(t.context.testKnex, { name: updateRule.name }); - const updatedEsRule = await t.context.esRulesClient.get( - originalPgRecord.name - ); - t.is(updatedEsRule.rule.arn, originalEsRecord.rule.arn); t.is(updatedPgRule.arn, originalPgRecord.arn); - t.like( - updatedEsRule, - { - ...originalEsRecord, - updatedAt: updatedEsRule.updatedAt, - timestamp: updatedEsRule.timestamp, - rule: { - type: 'sns', - value: topic1.TopicArn, - }, - } - ); t.like(updatedPgRule, { ...originalPgRecord, updated_at: updatedPgRule.updated_at, @@ -3022,7 +1899,7 @@ test.serial('PATCH returns 200 for version value greater than the configured val t.is(response.status, 200); }); -test('DELETE returns a 404 if PostgreSQL and Elasticsearch rule cannot be found', async (t) => { +test('DELETE returns a 404 if rule cannot be found', async (t) => { const nonExistentRule = fakeRuleRecordFactory(); const response = await request(app) .delete(`/rules/${nonExistentRule.name}`) @@ -3032,74 +1909,6 @@ test('DELETE returns a 404 if PostgreSQL and Elasticsearch rule cannot be found' t.is(response.body.message, 'No record found'); }); -test('DELETE deletes rule that exists in PostgreSQL but not Elasticsearch', async (t) => { - const { - esRulesClient, - rulePgModel, - testKnex, - } = t.context; - const newRule = fakeRuleRecordFactory(); - delete newRule.collection; - delete newRule.provider; - await rulePgModel.create(testKnex, newRule); - - t.false( - await esRulesClient.exists( - newRule.name - ) - ); - t.true( - await rulePgModel.exists(testKnex, { - name: newRule.name, - }) - ); - const response = await request(app) - .delete(`/rules/${newRule.name}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - const { message } = response.body; - const dbRecords = await rulePgModel - .search(testKnex, { name: newRule.name }); - - t.is(dbRecords.length, 0); - t.is(message, 'Record deleted'); -}); - -test('DELETE deletes rule that exists in Elasticsearch but not PostgreSQL', async (t) => { - const { - esClient, - esIndex, - esRulesClient, - rulePgModel, - testKnex, - } = t.context; - const newRule = fakeRuleRecordFactory(); - await indexer.indexRule(esClient, newRule, esIndex); - - t.true( - await esRulesClient.exists( - newRule.name - ) - ); - t.false( - await rulePgModel.exists(testKnex, { - name: newRule.name, - }) - ); - const response = await request(app) - .delete(`/rules/${newRule.name}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - const { message } = response.body; - const dbRecords = await rulePgModel - .search(t.context.testKnex, { name: newRule.name }); - - t.is(dbRecords.length, 0); - t.is(message, 'Record deleted'); -}); - test('DELETE deletes a rule', async (t) => { const { originalPgRecord, @@ -3123,103 +1932,4 @@ test('DELETE deletes a rule', async (t) => { t.is(dbRecords.length, 0); t.is(message, 'Record deleted'); - t.false( - await t.context.esRulesClient.exists( - originalPgRecord.name - ) - ); -}); - -test('del() does not remove from Elasticsearch if removing from PostgreSQL fails', async (t) => { - const { - originalPgRecord, - } = await createRuleTestRecords( - t.context, - { - workflow, - } - ); - - const fakeRulesPgModel = { - delete: () => { - throw new Error('something bad'); - }, - get: () => Promise.resolve(originalPgRecord), - }; - - const expressRequest = { - params: { - name: originalPgRecord.name, - }, - testContext: { - knex: t.context.testKnex, - rulePgModel: fakeRulesPgModel, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - del(expressRequest, response), - { message: 'something bad' } - ); - - t.true( - await t.context.rulePgModel.exists(t.context.testKnex, { - name: originalPgRecord.name, - }) - ); - t.true( - await t.context.esRulesClient.exists( - originalPgRecord.name - ) - ); -}); - -test('del() does not remove from PostgreSQL if removing from Elasticsearch fails', async (t) => { - const { - originalPgRecord, - } = await createRuleTestRecords( - t.context, - { - workflow, - } - ); - - const fakeEsClient = { - client: { - delete: () => { - throw new Error('something bad'); - }, - }, - initializeEsClient: () => Promise.resolve(), - }; - - const expressRequest = { - params: { - name: originalPgRecord.name, - }, - testContext: { - knex: t.context.testKnex, - esClient: fakeEsClient, - }, - }; - - const response = buildFakeExpressResponse(); - - await t.throwsAsync( - del(expressRequest, response), - { message: 'something bad' } - ); - - t.true( - await t.context.rulePgModel.exists(t.context.testKnex, { - name: originalPgRecord.name, - }) - ); - t.true( - await t.context.esRulesClient.exists( - originalPgRecord.name - ) - ); }); diff --git a/packages/api/tests/lambdas/test-index-from-database.js b/packages/api/tests/lambdas/test-index-from-database.js index 3aa6a6dae06..8e647736154 100644 --- a/packages/api/tests/lambdas/test-index-from-database.js +++ b/packages/api/tests/lambdas/test-index-from-database.js @@ -23,19 +23,16 @@ const { fakeGranuleRecordFactory, fakePdrRecordFactory, fakeProviderRecordFactory, - fakeRuleRecordFactory, generateLocalTestDb, GranulePgModel, migrationDir, PdrPgModel, ProviderPgModel, - RulePgModel, translatePostgresCollectionToApiCollection, translatePostgresExecutionToApiExecution, translatePostgresGranuleToApiGranule, translatePostgresPdrToApiPdr, translatePostgresProviderToApiProvider, - translatePostgresRuleToApiRule, } = require('@cumulus/db'); const { @@ -281,19 +278,6 @@ test.serial('Lambda successfully indexes records of all types', async (t) => { reconciliationReportModel ); - const fakeRuleRecords = await addFakeData( - knex, - numItems, - fakeRuleRecordFactory, - new RulePgModel(), - { - workflow: workflowList[0].name, - collection_cumulus_id: fakeCollectionRecords[0].cumulus_id, - provider_cumulus_id: fakeProviderRecords[0].cumulus_id, - ...dateObject, - } - ); - await indexFromDatabase.handler({ indexName: esAlias, pageSize: 6, @@ -307,7 +291,6 @@ test.serial('Lambda successfully indexes records of all types', async (t) => { searchEs('pdr', esAlias, '20'), searchEs('provider', esAlias, '20'), searchEs('reconciliationReport', esAlias, '20'), - searchEs('rule', esAlias, '20'), ]); searchResults.map((res) => t.is(res.meta.count, numItems)); @@ -332,9 +315,6 @@ test.serial('Lambda successfully indexes records of all types', async (t) => { const providerResults = await Promise.all( fakeProviderRecords.map((r) => translatePostgresProviderToApiProvider(r)) ); - const ruleResults = await Promise.all( - fakeRuleRecords.map((r) => translatePostgresRuleToApiRule(r, knex)) - ); t.deepEqual( searchResults[0].results @@ -368,11 +348,6 @@ test.serial('Lambda successfully indexes records of all types', async (t) => { sortAndFilter(searchResults[5].results, ['timestamp'], 'name'), sortAndFilter(fakeReconciliationReportRecords, ['timestamp'], 'name') ); - - t.deepEqual( - sortAndFilter(searchResults[6].results, ['timestamp'], 'name'), - sortAndFilter(ruleResults, ['timestamp'], 'name') - ); }); test.serial('failure in indexing record of specific type should not prevent indexing of other records with same type', async (t) => { diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index bdde86ffb44..d007f7a58fc 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -154,6 +154,9 @@ export { export { ProviderSearch, } from './search/ProviderSearch'; +export { + RuleSearch, +} from './search/RuleSearch'; export { StatsSearch, } from './search/StatsSearch'; diff --git a/packages/db/src/search/RuleSearch.ts b/packages/db/src/search/RuleSearch.ts new file mode 100644 index 00000000000..6a5ca270917 --- /dev/null +++ b/packages/db/src/search/RuleSearch.ts @@ -0,0 +1,123 @@ +import { Knex } from 'knex'; +import pick from 'lodash/pick'; + +import Logger from '@cumulus/logger'; +import { RuleRecord } from '@cumulus/types/api/rules'; +import { BaseSearch } from './BaseSearch'; +import { DbQueryParameters, QueryEvent } from '../types/search'; +import { PostgresRuleRecord } from '../types/rule'; +import { translatePostgresRuleToApiRuleWithoutDbQuery } from '../translate/rules'; +import { TableNames } from '../tables'; + +const log = new Logger({ sender: '@cumulus/db/RuleSearch' }); + +interface RuleRecordWithExternals extends PostgresRuleRecord { + collectionName: string, + collectionVersion: string, + providerName?: string, +} + +/** + * Class to build and execute db search query for rules + */ +export class RuleSearch extends BaseSearch { + constructor(event: QueryEvent) { + super(event, 'rule'); + } + + /** + * Build basic query + * + * @param knex - DB client + * @returns queries for getting count and search result + */ + protected buildBasicQuery(knex: Knex): { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + } { + const { + collections: collectionsTable, + providers: providersTable, + } = TableNames; + + const countQuery = knex(this.tableName) + .count(`${this.tableName}.cumulus_id`); + + const searchQuery = knex(this.tableName) + .select(`${this.tableName}.*`) + .select({ + collectionName: `${collectionsTable}.name`, + collectionVersion: `${collectionsTable}.version`, + providerName: `${providersTable}.name`, + }); + + if (this.searchCollection()) { + searchQuery.innerJoin(collectionsTable, `${this.tableName}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); + countQuery.innerJoin(collectionsTable, `${this.tableName}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); + } else { + searchQuery.leftJoin(collectionsTable, `${this.tableName}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); + } + + if (this.searchProvider()) { + searchQuery.innerJoin(providersTable, `${this.tableName}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + countQuery.innerJoin(providersTable, `${this.tableName}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + } else { + searchQuery.leftJoin(providersTable, `${this.tableName}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + } + + return { countQuery, searchQuery }; + } + + /** + * Build queries for infix and prefix + * + * @param params + * @param params.countQuery - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildInfixPrefixQuery(params: { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { countQuery, searchQuery, dbQueryParameters } = params; + const { infix, prefix } = dbQueryParameters ?? this.dbQueryParameters; + if (infix) { + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `%${infix}%`)); + } + if (prefix) { + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `${prefix}%`)); + } + } + + /** + * Translate postgres records to api records + * + * @param pgRecords - postgres Rule records returned from query + * @param knex - knex for the translation method + * @returns translated api records + */ + protected async translatePostgresRecordsToApiRecords( + pgRecords: RuleRecordWithExternals[] + ): Promise[]> { + log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); + + const apiRecords = pgRecords.map(async (record) => { + const providerPgRecord = record.providerName ? { name: record.providerName } : undefined; + const collectionPgRecord = record.collectionName ? { + name: record.collectionName, + version: record.collectionVersion, + } : undefined; + const apiRecord = await translatePostgresRuleToApiRuleWithoutDbQuery( + record, + collectionPgRecord, + providerPgRecord + ); + return this.dbQueryParameters.fields + ? pick(apiRecord, this.dbQueryParameters.fields) + : apiRecord; + }); + return await Promise.all(apiRecords); + } +} diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts index 0ad33fe7129..9f654700242 100644 --- a/packages/db/src/search/field-mapping.ts +++ b/packages/db/src/search/field-mapping.ts @@ -285,6 +285,9 @@ const providerMapping : { [key: string]: Function } = { }; const ruleMapping : { [key: string]: Function } = { + arn: (value?: string) => ({ + arn: value, + }), createdAt: (value?: string) => ({ created_at: value && new Date(Number(value)), }), @@ -300,6 +303,24 @@ const ruleMapping : { [key: string]: Function } = { updatedAt: (value?: string) => ({ updated_at: value && new Date(Number(value)), }), + workflow: (value?: string) => ({ + workflow: value, + }), + logEventArn: (value?: string) => ({ + log_event_arn: value, + }), + executionNamePrefix: (value?: string) => ({ + execution_name_prefix: value, + }), + queueUrl: (value?: string) => ({ + queue_url: value, + }), + 'rule.type': (value?: string) => ({ + type: value, + }), + 'rule.value': (value?: string) => ({ + value: value, + }), // The following fields require querying other tables collectionId: (value?: string) => { const { name, version } = (value && deconstructCollectionId(value)) || {}; diff --git a/packages/db/src/search/queries.ts b/packages/db/src/search/queries.ts index 824064a52da..ed041e277e0 100644 --- a/packages/db/src/search/queries.ts +++ b/packages/db/src/search/queries.ts @@ -262,7 +262,6 @@ export const convertQueryStringToDbQueryParameters = ( // for each search strategy, get all parameters and convert them to db parameters Object.keys(regexes).forEach((k: string) => { const matchedFields = fieldsList.filter((f) => f.name.match(regexes[k])); - if (matchedFields && matchedFields.length > 0 && convert[k]) { const queryParams = convert[k](type, matchedFields, regexes[k]); Object.assign(dbQueryParameters, queryParams); diff --git a/packages/db/src/translate/rules.ts b/packages/db/src/translate/rules.ts index 8af101284ab..39dafbcae89 100644 --- a/packages/db/src/translate/rules.ts +++ b/packages/db/src/translate/rules.ts @@ -5,27 +5,21 @@ import { RuleRecord, Rule } from '@cumulus/types/api/rules'; import { CollectionPgModel } from '../models/collection'; import { ProviderPgModel } from '../models/provider'; import { PostgresRule, PostgresRuleRecord } from '../types/rule'; +import { PostgresProviderRecord } from '../types/provider'; +import { PostgresCollectionRecord } from '../types/collection'; -export const translatePostgresRuleToApiRule = async ( +export const translatePostgresRuleToApiRuleWithoutDbQuery = async ( pgRule: PostgresRuleRecord, - knex: Knex | Knex.Transaction, - collectionPgModel = new CollectionPgModel(), - providerPgModel = new ProviderPgModel() + collectionPgRecord?: Pick, + providerPgRecord?: Partial ): Promise => { - const provider = pgRule.provider_cumulus_id - ? await providerPgModel.get(knex, { cumulus_id: pgRule.provider_cumulus_id }) - : undefined; - const collection = pgRule.collection_cumulus_id - ? await collectionPgModel.get(knex, { cumulus_id: pgRule.collection_cumulus_id }) - : undefined; - const apiRule: RuleRecord = { name: pgRule.name, workflow: pgRule.workflow, - provider: provider ? provider.name : undefined, - collection: collection ? { - name: collection.name, - version: collection.version, + provider: providerPgRecord ? providerPgRecord.name : undefined, + collection: collectionPgRecord ? { + name: collectionPgRecord.name, + version: collectionPgRecord.version, } : undefined, rule: removeNilProperties({ type: pgRule.type, @@ -45,6 +39,26 @@ export const translatePostgresRuleToApiRule = async ( return removeNilProperties(apiRule); }; +export const translatePostgresRuleToApiRule = async ( + pgRule: PostgresRuleRecord, + knex: Knex | Knex.Transaction, + collectionPgModel = new CollectionPgModel(), + providerPgModel = new ProviderPgModel() +): Promise => { + const providerPgRecord = pgRule.provider_cumulus_id + ? await providerPgModel.get(knex, { cumulus_id: pgRule.provider_cumulus_id }) + : undefined; + const collectionPgRecord = pgRule.collection_cumulus_id + ? await collectionPgModel.get(knex, { cumulus_id: pgRule.collection_cumulus_id }) + : undefined; + + return translatePostgresRuleToApiRuleWithoutDbQuery( + pgRule, + collectionPgRecord, + providerPgRecord + ); +}; + /** * Generate a Postgres rule record from a DynamoDB record. * diff --git a/packages/db/tests/search/test-RuleSearch.js b/packages/db/tests/search/test-RuleSearch.js new file mode 100644 index 00000000000..7ecbcdc7008 --- /dev/null +++ b/packages/db/tests/search/test-RuleSearch.js @@ -0,0 +1,442 @@ +'use strict'; + +const test = require('ava'); +const cryptoRandomString = require('crypto-random-string'); +const range = require('lodash/range'); +const { constructCollectionId } = require('@cumulus/message/Collections'); + +const { RuleSearch } = require('../../dist/search/RuleSearch'); + +const { + AsyncOperationPgModel, + CollectionPgModel, + destroyLocalTestDb, + fakeAsyncOperationRecordFactory, + fakeCollectionRecordFactory, + fakeRuleRecordFactory, + generateLocalTestDb, + migrationDir, + RulePgModel, + ProviderPgModel, + fakeProviderRecordFactory, +} = require('../../dist'); + +const testDbName = `rule_${cryptoRandomString({ length: 10 })}`; + +test.before(async (t) => { + const { knexAdmin, knex } = await generateLocalTestDb( + testDbName, + migrationDir + ); + + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + // Create PG Collections + t.context.collectionPgModel = new CollectionPgModel(); + t.context.testPgCollection = fakeCollectionRecordFactory( + { cumulus_id: 0, + name: 'testCollection', + version: 8 } + ); + t.context.testPgCollection2 = fakeCollectionRecordFactory( + { cumulus_id: 1, + name: 'testCollection2', + version: 4 } + ); + + await t.context.collectionPgModel.insert( + t.context.knex, + t.context.testPgCollection + ); + + await t.context.collectionPgModel.insert( + t.context.knex, + t.context.testPgCollection2 + ); + + t.context.collectionCumulusId = t.context.testPgCollection.cumulus_id; + t.context.collectionCumulusId2 = t.context.testPgCollection2.cumulus_id; + + t.context.collectionId = constructCollectionId( + t.context.testPgCollection.name, + t.context.testPgCollection.version + ); + t.context.collectionId2 = constructCollectionId( + t.context.testPgCollection2.name, + t.context.testPgCollection2.version + ); + + // Create a Provider + t.context.providerPgModel = new ProviderPgModel(); + t.context.testProvider = fakeProviderRecordFactory({ + name: 'testProvider', + }); + t.context.testProvider2 = fakeProviderRecordFactory({ + name: 'testProvider2', + }); + + const [pgProvider] = await t.context.providerPgModel.insert( + t.context.knex, + t.context.testProvider + ); + const [pgProvider2] = await t.context.providerPgModel.insert( + t.context.knex, + t.context.testProvider2 + ); + + t.context.providerCumulusId = pgProvider.cumulus_id; + t.context.providerCumulusId2 = pgProvider2.cumulus_id; + + // Create an Async Operation + t.context.asyncOperationsPgModel = new AsyncOperationPgModel(); + t.context.testAsyncOperation = fakeAsyncOperationRecordFactory({ cumulus_id: 140 }); + t.context.asyncCumulusId = t.context.testAsyncOperation.cumulus_id; + + await t.context.asyncOperationsPgModel.insert( + t.context.knex, + t.context.testAsyncOperation + ); + + t.context.duration = 100; + + // Create a lot of Rules + t.context.ruleSearchFields = { + createdAt: new Date(2017, 11, 31), + updatedAt: new Date(2018, 0, 1), + updatedAt2: new Date(2018, 0, 2), + }; + + const rules = []; + t.context.rulePgModel = new RulePgModel(); + + range(50).map((num) => ( + rules.push(fakeRuleRecordFactory({ + name: `fakeRule-${num}`, + created_at: t.context.ruleSearchFields.createdAt, + updated_at: (num % 2) ? + t.context.ruleSearchFields.updatedAt : t.context.ruleSearchFields.updatedAt2, + enabled: num % 2 === 0, + workflow: `testWorkflow-${num}`, + queue_url: (num % 2) ? 'https://sqs.us-east-1.amazonaws.com/123/456' : null, + collection_cumulus_id: (num % 2) + ? t.context.collectionCumulusId : t.context.collectionCumulusId2, + provider_cumulus_id: (num % 2) + ? t.context.providerCumulusId : t.context.providerCumulusId2, + })) + )); + + await t.context.rulePgModel.insert( + t.context.knex, + rules + ); +}); + +test.after.always(async (t) => { + await destroyLocalTestDb({ + ...t.context, + testDbName, + }); +}); + +test('RuleSearch returns correct response for basic query', async (t) => { + const { knex } = t.context; + const dbSearch = new RuleSearch({}); + const results = await dbSearch.query(knex); + t.is(results.meta.count, 50); + t.is(results.results.length, 10); + + const expectedResponse1 = { + name: 'fakeRule-0', + createdAt: t.context.ruleSearchFields.createdAt.getTime(), + updatedAt: t.context.ruleSearchFields.updatedAt2.getTime(), + state: 'ENABLED', + rule: { + type: 'onetime', + }, + workflow: 'testWorkflow-0', + collection: { + name: 'testCollection2', + version: '4', + }, + provider: t.context.testProvider2.name, + }; + + const expectedResponse10 = { + name: 'fakeRule-9', + createdAt: t.context.ruleSearchFields.createdAt.getTime(), + updatedAt: t.context.ruleSearchFields.updatedAt.getTime(), + state: 'DISABLED', + rule: { + type: 'onetime', + }, + workflow: 'testWorkflow-9', + collection: { + name: 'testCollection', + version: '8', + }, + provider: t.context.testProvider.name, + queueUrl: 'https://sqs.us-east-1.amazonaws.com/123/456', + }; + + t.deepEqual(results.results[0], expectedResponse1); + t.deepEqual(results.results[9], expectedResponse10); +}); + +test('RuleSearchsupports page and limit params', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 25, + page: 2, + }; + let dbSearch = new RuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 25); + + queryStringParameters = { + limit: 10, + page: 5, + }; + dbSearch = new RuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 10); + + queryStringParameters = { + limit: 10, + page: 11, + }; + dbSearch = new RuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 0); +}); + +test('RuleSearch supports infix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + infix: 'Rule-27', + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('RuleSearch supports prefix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + prefix: 'fakeRule-1', + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 11); + t.is(response.results?.length, 11); +}); + +test('RuleSearch supports term search for string field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 10, + workflow: 'testWorkflow-11', + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('RuleSearch non-existing fields are ignored', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + non_existing_field: `non_exist_${cryptoRandomString({ length: 5 })}`, + non_existing_field__from: `non_exist_${cryptoRandomString({ length: 5 })}`, + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('RuleSearch returns fields specified', async (t) => { + const { knex } = t.context; + const fields = 'state,name'; + const queryStringParameters = { + fields, + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 10); + response.results.forEach((rule) => t.deepEqual(Object.keys(rule), fields.split(','))); +}); + +test('RuleSearch supports search for multiple fields', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 10, + prefix: 'fakeRule-1', + state: 'DISABLED', + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + + t.is(response.meta.count, 6); + t.is(response.results?.length, 6); +}); + +test('RuleSearch supports sorting', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + sort_by: 'workflow', + order: 'desc', + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + t.true(response.results[0].workflow > response.results[10].workflow); + t.true(response.results[1].workflow > response.results[30].workflow); +}); + +test('RuleSearch supports collectionId term search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + collectionId: t.context.collectionId, + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('RuleSearch supports provider term search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + provider: t.context.testProvider.name, + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('RuleSearch supports term search for date field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + updatedAt: t.context.ruleSearchFields.updatedAt, + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('RuleSearch supports term search for boolean field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + state: 'ENABLED', // maps to the bool field "enabled" + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('RuleSearch supports term search for timestamp', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + timestamp: t.context.ruleSearchFields.updatedAt, //maps to timestamp + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('RuleSearch supports range search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + timestamp__from: t.context.ruleSearchFields.timestamp, + timestamp__to: t.context.ruleSearchFields.timestamp + 1600, + }; + const dbSearch = new RuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('RuleSearch supports search which checks existence of queue URL field', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + queueUrl__exists: 'true', + }; + let dbSearch = new RuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); + + queryStringParameters = { + limit: 200, + queueUrl__exists: 'false', + }; + dbSearch = new RuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('RuleSearch supports collectionId terms search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + collectionId__in: [t.context.collectionId2, constructCollectionId('fakecollectionterms', 'v1')].join(','), + }; + let dbSearch = new RuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); + + queryStringParameters = { + limit: 200, + collectionId__in: [t.context.collectionId, t.context.collectionId2].join(','), + }; + dbSearch = new RuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('RuleSearch supports search which provider does not match the given value', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 200, + provider__not: t.context.testProvider.name, + }; + let dbSearch = new RuleSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); + + queryStringParameters = { + limit: 200, + provider__not: 'providernotexist', + }; + dbSearch = new RuleSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); diff --git a/packages/db/tests/translate/test-rules.js b/packages/db/tests/translate/test-rules.js index b7804b97ae6..b7ddd5acec3 100644 --- a/packages/db/tests/translate/test-rules.js +++ b/packages/db/tests/translate/test-rules.js @@ -1,6 +1,7 @@ const test = require('ava'); const { translatePostgresRuleToApiRule, + translatePostgresRuleToApiRuleWithoutDbQuery, translateApiRuleToPostgresRule, translateApiRuleToPostgresRuleRaw, } = require('../../dist/translate/rules'); @@ -243,3 +244,62 @@ test('translatePostgresRuleToApiRule handles optional fields', async (t) => { expectedRule ); }); + +test('translatePostgresRuleToApiRuleWithoutDbQuery translates a postgres Rule', async (t) => { + const pgRecord = { + name: 'testRule', + workflow: 'testWorkflow', + type: 'onetime', + enabled: true, + collection_cumulus_id: 1, + provider_cumulus_id: 2, + execution_name_prefix: 'test', + value: 'abcd', + arn: 'arn:123', + log_event_arn: 'arn:987', + payload: { object: 'value' }, + meta: { + retries: 2, + visibility: 30, + more: 'meta', + }, + tags: ['tag1', 'tag2'], + queue_url: 'https://sqs.us-west-2.amazonaws.com/123456789012/my-queue', + created_at: new Date(), + updated_at: new Date(), + }; + + // Assume provider, collection are already retrieved, no query required + const fakeCollection = { name: 'abc', version: '123' }; + const fakeProvider = { name: 'abc' }; + + const expectedRule = { + name: pgRecord.name, + state: 'ENABLED', + workflow: pgRecord.workflow, + collection: fakeCollection, + provider: fakeProvider.name, + meta: pgRecord.meta, + payload: pgRecord.payload, + queueUrl: pgRecord.queue_url, + rule: { + type: pgRecord.type, + arn: pgRecord.arn, + logEventArn: pgRecord.log_event_arn, + value: pgRecord.value, + }, + executionNamePrefix: pgRecord.execution_name_prefix, + tags: pgRecord.tags, + createdAt: pgRecord.created_at.getTime(), + updatedAt: pgRecord.updated_at.getTime(), + }; + + t.deepEqual( + await translatePostgresRuleToApiRuleWithoutDbQuery( + pgRecord, + fakeCollection, + fakeProvider + ), + expectedRule + ); +}); diff --git a/packages/es-client/indexer.js b/packages/es-client/indexer.js index a6a770fa9b5..4fc40ec60d4 100644 --- a/packages/es-client/indexer.js +++ b/packages/es-client/indexer.js @@ -294,20 +294,6 @@ function indexReconciliationReport(esClient, payload, index = defaultIndexAlias, return genericRecordUpdate(esClient, payload.name, payload, index, type); } -/** - * Indexes the rule type on Elasticsearch - * - * @param {Object} esClient - Elasticsearch Connection object - * @param {Object} payload - the Rule record - * @param {string} index - Elasticsearch index alias (default defined in search.js) - * @param {string} type - Elasticsearch type (default: rule) - * @returns {Promise} Elasticsearch response - */ - -function indexRule(esClient, payload, index = defaultIndexAlias, type = 'rule') { - return genericRecordUpdate(esClient, payload.name, payload, index, type); -} - /** * Indexes the granule type on Elasticsearch * @@ -609,33 +595,6 @@ function deleteProvider({ }); } -/** - * Deletes the rule in Elasticsearch - * - * @param {Object} params - * @param {Object} params.esClient - Elasticsearch Connection object - * @param {string} params.name - the rule name - * @param {string[]} [params.ignore] - Array of response codes to ignore - * @param {string} params.index - Elasticsearch index alias (default defined in search.js) - * @param {string} params.type - Elasticsearch type (default: rule) - * @returns {Promise} Elasticsearch response - */ -function deleteRule({ - esClient, - name, - ignore, - index = defaultIndexAlias, - type = 'rule', -}) { - return deleteRecord({ - esClient, - id: name, - index, - type, - ignore, - }); -} - /** * Deletes the PDR in Elasticsearch * @@ -805,7 +764,6 @@ module.exports = { deleteProvider, deleteReconciliationReport, deleteRecord, - deleteRule, executionInvalidNullFields, granuleInvalidNullFields, genericRecordUpdate, @@ -816,7 +774,6 @@ module.exports = { indexPdr, indexProvider, indexReconciliationReport, - indexRule, updateAsyncOperation, upsertExecution, upsertGranule, diff --git a/packages/es-client/tests/test-es-indexer.js b/packages/es-client/tests/test-es-indexer.js index e3cf3a0f8f0..8b936b79863 100644 --- a/packages/es-client/tests/test-es-indexer.js +++ b/packages/es-client/tests/test-es-indexer.js @@ -168,29 +168,6 @@ test.serial('creating multiple deletedgranule records and retrieving them', asyn }); }); -test.serial('indexing a rule record', async (t) => { - const { esIndex, esClient } = t.context; - - const testRecord = { - name: randomString(), - }; - - const r = await indexer.indexRule(esClient, testRecord, esIndex); - - // make sure record is created - t.is(r.result, 'created'); - - // check the record exists - const record = await esClient.client.get({ - index: esIndex, - type: 'rule', - id: testRecord.name, - }).then((response) => response.body); - - t.is(record._id, testRecord.name); - t.is(typeof record._source.timestamp, 'number'); -}); - test.serial('indexing a provider record', async (t) => { const { esIndex, esClient } = t.context; @@ -617,31 +594,6 @@ test.serial('deleting a collection record', async (t) => { t.false(await esCollectionsClient.exists(collectionId)); }); -test.serial('deleting a rule record', async (t) => { - const { esIndex, esClient } = t.context; - const name = randomString(); - const testRecord = { - name, - }; - - await indexer.indexRule(esClient, testRecord, esIndex); - - // check the record exists - const esRulesClient = new Search( - {}, - 'rule', - esIndex - ); - t.true(await esRulesClient.exists(name)); - - await indexer.deleteRule({ - esClient, - name, - index: esIndex, - }); - t.false(await esRulesClient.exists(name)); -}); - test.serial('deleting a PDR record', async (t) => { const { esIndex, esClient } = t.context; From a2874297338a5149e67c74175c754b276622f400 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Wed, 28 Aug 2024 13:45:24 -0400 Subject: [PATCH 22/61] CUMULUS-3837: Create ReconciliationReportsTable in RDS and add indexes * add migration file to create reconciliation reports table, including indexes * create types and model files for reconciliation reports * modify files for reconciliation reports work and to be accessible * add test for new model * update reconciliation reports lambda to write to new table instead of Dynamo table --- .../lambdas/create-reconciliation-report.js | 36 +++++---- packages/db/src/index.ts | 61 ++++++++------- ...217_create_reconciliation_reports_table.ts | 34 +++++++++ .../db/src/models/reconciliation_report.ts | 33 ++++++++ packages/db/src/tables.ts | 1 + packages/db/src/test-utils.ts | 12 +++ .../db/src/types/reconciliation_report.ts | 34 +++++++++ .../test-reconciliation-report-model.js | 75 +++++++++++++++++++ tf-modules/archive/reconciliation_report.tf | 2 +- 9 files changed, 245 insertions(+), 43 deletions(-) create mode 100644 packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts create mode 100644 packages/db/src/models/reconciliation_report.ts create mode 100644 packages/db/src/types/reconciliation_report.ts create mode 100644 packages/db/tests/models/test-reconciliation-report-model.js diff --git a/packages/api/lambdas/create-reconciliation-report.js b/packages/api/lambdas/create-reconciliation-report.js index ad940777704..5a67f9eb129 100644 --- a/packages/api/lambdas/create-reconciliation-report.js +++ b/packages/api/lambdas/create-reconciliation-report.js @@ -31,7 +31,8 @@ const Logger = require('@cumulus/logger'); const { createInternalReconciliationReport } = require('./internal-reconciliation-report'); const { createGranuleInventoryReport } = require('./reports/granule-inventory-report'); const { createOrcaBackupReconciliationReport } = require('./reports/orca-backup-reconciliation-report'); -const { ReconciliationReport } = require('../models'); +// const { ReconciliationReport } = require('../models'); +const { ReconciliationReportPgModel } = require ('@cumulus/db') const { errorify, filenamify } = require('../lib/utils'); const { cmrGranuleSearchParams, @@ -803,7 +804,7 @@ async function createReconciliationReport(recReportParams) { * @param {string} params.stackName - the name of the CUMULUS stack * @param {string} params.reportType - the type of reconciliation report * @param {string} params.reportName - the name of the report - * @param {Knex} params.knex - Optional Instance of a Knex client for testing + * @param {Knex} params.knex - Knex client to interact with pg * @returns {Object} report record saved to the database */ async function processRequest(params) { @@ -824,16 +825,18 @@ async function processRequest(params) { if (reportType === 'Granule Inventory') reportKey = reportKey.replace('.json', '.csv'); // add request to database - const reconciliationReportModel = new ReconciliationReport(); - const reportRecord = { + // TODO: do this with pg/knex + // const reconciliationReportModel = new ReconciliationReport(); + const reconciliationReportModel = new ReconciliationReportPgModel(); + const builtReportRecord = { name: reportRecordName, type: reportType, status: 'Pending', location: buildS3Uri(systemBucket, reportKey), }; - let apiRecord = await reconciliationReportModel.create(reportRecord); - await indexReconciliationReport(esClient, apiRecord, process.env.ES_INDEX); - log.info(`Report added to database as pending: ${JSON.stringify(apiRecord)}.`); + let [reportPgRecord] = await reconciliationReportModel.create(knex, builtReportRecord); + await indexReconciliationReport(esClient, reportPgRecord, process.env.ES_INDEX); + log.info(`Report added to database as pending: ${JSON.stringify(reportPgRecord)}.`); const concurrency = env.CONCURRENCY || 3; @@ -857,27 +860,34 @@ async function processRequest(params) { // reportType is in ['Inventory', 'Granule Not Found'] await createReconciliationReport(recReportParams); } - apiRecord = await reconciliationReportModel.updateStatus({ name: reportRecord.name }, 'Generated'); - await indexReconciliationReport(esClient, { ...apiRecord, status: 'Generated' }, process.env.ES_INDEX); + // apiRecord = await reconciliationReportModel.updateStatus({ name: reportRecord.name }, 'Generated'); + const updatedRecord = { + ...reportPgRecord, + status: 'Generated', + } + reportPgRecord = await reconciliationReportModel.upsert(knex, updatedRecord); + await indexReconciliationReport(esClient, reportPgRecord, process.env.ES_INDEX); } catch (error) { log.error(`Error caught in createReconciliationReport creating ${reportType} report ${reportRecordName}. ${error}`); - const updates = { + const updatedErrorRecord = { + ...reportPgRecord, status: 'Failed', error: { Error: error.message, Cause: errorify(error), }, }; - apiRecord = await reconciliationReportModel.update({ name: reportRecord.name }, updates); + // apiRecord = await reconciliationReportModel.update({ name: reportRecord.name }, updates); + reportPgRecord = await reconciliationReportModel.upsert(knex, updatedErrorRecord); await indexReconciliationReport( esClient, - { ...apiRecord, ...updates }, + reportPgRecord, process.env.ES_INDEX ); throw error; } - return reconciliationReportModel.get({ name: reportRecord.name }); + return reconciliationReportModel.get(knex, { name: reportRecord.name }); } async function handler(event) { diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index bdde86ffb44..8f1667eacf2 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -31,7 +31,6 @@ export { export { BaseRecord, } from './types/base'; - export { PostgresAsyncOperation, PostgresAsyncOperationRecord, @@ -45,56 +44,50 @@ export { PostgresExecutionRecord, } from './types/execution'; export { - PostgresProvider, - PostgresProviderRecord, -} from './types/provider'; -export { - PostgresRule, - PostgresRuleRecord, -} from './types/rule'; + PostgresFile, + PostgresFileRecord, +} from './types/file'; export { PostgresGranule, PostgresGranuleRecord, } from './types/granule'; +export { + PostgresGranuleExecution, +} from './types/granule-execution'; export { PostgresPdr, PostgresPdrRecord, } from './types/pdr'; export { - PostgresFile, - PostgresFileRecord, -} from './types/file'; + PostgresProvider, + PostgresProviderRecord, +} from './types/provider'; export { - PostgresGranuleExecution, -} from './types/granule-execution'; + PostgresReconciliationReport, + PostgresReconciliationReportRecord, +} from './types/reconciliation_report'; +export { + PostgresRule, + PostgresRuleRecord, +} from './types/rule'; + export { translateApiAsyncOperationToPostgresAsyncOperation, translatePostgresAsyncOperationToApiAsyncOperation, } from './translate/async_operations'; -export { - translateApiFiletoPostgresFile, - translatePostgresFileToApiFile, -} from './translate/file'; - export { translateApiCollectionToPostgresCollection, translatePostgresCollectionToApiCollection, } from './translate/collections'; - -export { - translateApiProviderToPostgresProvider, - translatePostgresProviderToApiProvider, -} from './translate/providers'; -export { - translatePostgresRuleToApiRule, - translateApiRuleToPostgresRule, - translateApiRuleToPostgresRuleRaw, -} from './translate/rules'; export { translateApiExecutionToPostgresExecution, translateApiExecutionToPostgresExecutionWithoutNilsRemoved, translatePostgresExecutionToApiExecution, } from './translate/executions'; +export { + translateApiFiletoPostgresFile, + translatePostgresFileToApiFile, +} from './translate/file'; export { translateApiGranuleToPostgresGranule, translateApiGranuleToPostgresGranuleWithoutNilsRemoved, @@ -105,6 +98,15 @@ export { translateApiPdrToPostgresPdr, translatePostgresPdrToApiPdr, } from './translate/pdr'; +export { + translateApiProviderToPostgresProvider, + translatePostgresProviderToApiProvider, +} from './translate/providers'; +export { + translatePostgresRuleToApiRule, + translateApiRuleToPostgresRule, + translateApiRuleToPostgresRuleRaw, +} from './translate/rules'; export { getCollectionsByGranuleIds, @@ -158,8 +160,8 @@ export { StatsSearch, } from './search/StatsSearch'; -export { AsyncOperationPgModel } from './models/async_operation'; export { BasePgModel } from './models/base'; +export { AsyncOperationPgModel } from './models/async_operation'; export { CollectionPgModel } from './models/collection'; export { ExecutionPgModel } from './models/execution'; export { FilePgModel } from './models/file'; @@ -167,4 +169,5 @@ export { GranulePgModel } from './models/granule'; export { GranulesExecutionsPgModel } from './models/granules-executions'; export { PdrPgModel } from './models/pdr'; export { ProviderPgModel } from './models/provider'; +export { ReconciliationReportPgModel } from './models/reconciliation_report' export { RulePgModel } from './models/rule'; diff --git a/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts b/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts new file mode 100644 index 00000000000..f3dc4c44138 --- /dev/null +++ b/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts @@ -0,0 +1,34 @@ +import { Knex } from 'knex'; + +export const up = async (knex: Knex): Promise => { + await knex.schema.createTable('reconciliation_reports', (table) => { + table + .increments('cumulus_id') + .primary(); + table + .text('name') + .comment('Reconciliation Report name'); + table + .enum('type', + ['Granule Inventory', 'Granule Not Found', 'Internal', 'Inventory', 'ORCA Backup']) + .comment('Type of Reconciliation Report'); + table + .enum('status', ['Generated', 'Pending', 'Failed']) + .comment('Status of Reconciliation Report'); + table + .text('location') + .comment('Location of Reconciliation Report'); + table + .jsonb('error') + .comment('Error object'); + // adds "created_at" and "updated_at" columns automatically + table + .timestamps(false, true); + table.index('status'); + table.index('updated_at'); + }); +}; + +export const down = async (knex: Knex): Promise => { + await knex.schema.dropTableIfExists('reconciliation_reports'); +}; diff --git a/packages/db/src/models/reconciliation_report.ts b/packages/db/src/models/reconciliation_report.ts new file mode 100644 index 00000000000..e3dd8ab72fc --- /dev/null +++ b/packages/db/src/models/reconciliation_report.ts @@ -0,0 +1,33 @@ +import { Knex } from 'knex'; +import { BasePgModel } from './base'; +import { TableNames } from '../tables'; + +import { PostgresReconciliationReport, PostgresReconciliationReportRecord } from '../types/reconciliation_report'; + +class ReconciliationReportPgModel extends BasePgModel { + constructor () { + super({ + tableName: TableNames.reconciliationReports, + }) + } + + create( + knexOrTransaction: Knex | Knex.Transaction, + item: PostgresReconciliationReport + ) { + return super.create(knexOrTransaction, item, '*'); + } + + upsert( + knexOrTransaction: Knex | Knex.Transaction, + reconciliationReport: PostgresReconciliationReport + ) { + return await knexOrTransaction(this.tableName) + .insert(reconciliationReport) + .onConflict('name') + .merge() + .returning('*'); + } +} + +export { ReconciliationReportPgModel }; \ No newline at end of file diff --git a/packages/db/src/tables.ts b/packages/db/src/tables.ts index 43208f48c16..75bf9058697 100644 --- a/packages/db/src/tables.ts +++ b/packages/db/src/tables.ts @@ -7,5 +7,6 @@ export enum TableNames { granulesExecutions = 'granules_executions', pdrs = 'pdrs', providers = 'providers', + reconciliationReports = 'reconciliation_reports', rules = 'rules' } diff --git a/packages/db/src/test-utils.ts b/packages/db/src/test-utils.ts index 889387bebee..2e3792e7baf 100644 --- a/packages/db/src/test-utils.ts +++ b/packages/db/src/test-utils.ts @@ -16,6 +16,7 @@ import { PostgresFile } from './types/file'; import { PostgresGranule } from './types/granule'; import { PostgresPdr } from './types/pdr'; import { PostgresProvider } from './types/provider'; +import { PostgresReconciliationReport } from './types/reconciliation_report'; import { PostgresRule } from './types/rule'; export const createTestDatabase = async (knex: Knex, dbName: string, dbUser: string) => { @@ -152,3 +153,14 @@ export const fakePdrRecordFactory = ( created_at: new Date(), ...params, }); + +export const fakeReconciliationReportRecordFactory = ( + params: Partial +) => ({ + name: `reconReport${cryptoRandomString({ length: 10 })}`, + type: 'Granule Inventory', + status: 'Generated', + created_at: new Date(), + updated_at: new Date(), + ...params, +}); \ No newline at end of file diff --git a/packages/db/src/types/reconciliation_report.ts b/packages/db/src/types/reconciliation_report.ts new file mode 100644 index 00000000000..0ef620d34b4 --- /dev/null +++ b/packages/db/src/types/reconciliation_report.ts @@ -0,0 +1,34 @@ +export type ReconciliationReportType = + 'Granule Inventory' | 'Granule Not Found' | 'Internal' | 'Inventory' | 'ORCA Backup'; +export type ReconciliationReportStatus = 'Generated' | 'Pending' | 'Failed'; +// rules imports some types from '@cumulus/types/api/rules', not sure if these should be moved there later + +/** + * PostgresReconciliationReport + * + * This interface describes a Reconciliation Report object in postgres compatible format that + * is ready for write to Cumulus's postgres database instance + */ + +export interface PostgresReconciliationReport { + name: string, + type: ReconciliationReportType, // string + status: ReconciliationReportStatus, // string + location?: string, + error?: object, + created_at: Date | undefined, + updated_at: Date | undefined, +} + +/** + * PostgresReconciliationReportRecord + * + * This interface describes a Reconciliation Report Record that has been retrieved from + * postgres for reading. It differs from the PostgresReconciliationReport interface in that it types + * the autogenerated/required fields in the Postgres database as required + */ +export interface PostgresReconciliationReportRecord extends PostgresReconciliationReport { + cumulus_id: number, + created_at: Date, + updated_at: Date +} \ No newline at end of file diff --git a/packages/db/tests/models/test-reconciliation-report-model.js b/packages/db/tests/models/test-reconciliation-report-model.js new file mode 100644 index 00000000000..37e10373055 --- /dev/null +++ b/packages/db/tests/models/test-reconciliation-report-model.js @@ -0,0 +1,75 @@ +const test = require ('ava'); +const cryptoRandomString = require('crypto-random-string'); + +const { + ReconciliationReportPgModel, + fakeReconciliationReportRecordFactory, + generateLocalTestDb, + destroyLocalTestDb, + migrationDir, +} = require ('../../dist') +// other models? + +const testDbName = `rule_${cryptoRandomString({ length: 10 })}`; + +test.before(async (t) => { + const { knexAdmin, knex } = generateLocalTestDb( + testDbName, + migrationDir + ) + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + t.context.reconciliationReportPgModel = new ReconciliationReportPgModel(); +}); + +test.beforeEach((t) => { + t.context.reconciliationReportRecord = fakeReconciliationReportRecordFactory(); +}); + +test.after.always(async (t) => { + await destroyLocalTestDb({ + ...t.context, + testDbName, + }); +}); + +test('ReconciliationReportPgModel.upsert() creates new reconciliation report' async (t) => { + const { + knex, + reconciliationReportPgModel, + reconciliationReportRecord, + } = t.context; + + await reconciliationReportPgModel.upsert(knex, reconciliationReportRecord); + + t.like( + await reconciliationReportPgModel.get(knex, reconciliationReportRecord), + reconciliationReportRecord + ); +}); + +test('ReconciliationReportPgModel.upsert() overwrites a reconciliation report record' async (t) => { + const { + knex, + reconciliationReportPgModel, + reconciliationReportRecord, + } = t.context + + await reconciliationReportPgModel.create(knex, reconciliationReportRecord); + + const updatedReconciliationReport = { + ...reconciliationReportRecord, + type: 'ORCA Backup', + status: 'Failed', + } + + t.like( + await reconciliationReportPgModel.get(knex, { + name: reconciliationReportRecord.name, + }), + updatedReconciliationReport + ); +}); +// test('ReconciliationReportPgModel.upsert() ' async (t) => { +// }); \ No newline at end of file diff --git a/tf-modules/archive/reconciliation_report.tf b/tf-modules/archive/reconciliation_report.tf index 36089d77949..cd55d95db10 100644 --- a/tf-modules/archive/reconciliation_report.tf +++ b/tf-modules/archive/reconciliation_report.tf @@ -17,7 +17,7 @@ resource "aws_lambda_function" "create_reconciliation_report" { ES_HOST = var.elasticsearch_hostname ES_SCROLL = lookup(var.elasticsearch_client_config, "create_reconciliation_report_es_scroll_duration", "6m") ES_SCROLL_SIZE = lookup(var.elasticsearch_client_config, "create_reconciliation_report_es_scroll_size", 1000) - ReconciliationReportsTable = var.dynamo_tables.reconciliation_reports.name + ReconciliationReportsTable = var.dynamo_tables.reconciliation_reports.name # need to change? stackName = var.prefix system_bucket = var.system_bucket cmr_client_id = var.cmr_client_id From 48f852d7ca8130b8e9ba7565d0dc0c2ffdaa35fb Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Wed, 28 Aug 2024 13:55:28 -0400 Subject: [PATCH 23/61] CUMULUS-3837: update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 219295ac5cb..6b7ee9655d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). Elasticsearch - Update `@cumlus/api/ecs/async-operation` to not update Elasticsearch index when reporting status of async operation +- **CUMULUS-3837** + - added `reconciliation_reports` table in RDS, including indexes + - created model and types for `reconciliationReports` + - updated reconciliation reports lambda to write to new RDS table instead of Dynamo ### Migration Notes From 0a6dd42e8b543ad1ab61df0522c78b475a1f7742 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Wed, 28 Aug 2024 14:23:46 -0400 Subject: [PATCH 24/61] CUMULUS-3837: fix prepare in @cumulus/db --- packages/db/src/models/reconciliation_report.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/db/src/models/reconciliation_report.ts b/packages/db/src/models/reconciliation_report.ts index e3dd8ab72fc..57734f9f17d 100644 --- a/packages/db/src/models/reconciliation_report.ts +++ b/packages/db/src/models/reconciliation_report.ts @@ -22,7 +22,7 @@ class ReconciliationReportPgModel extends BasePgModel Date: Wed, 28 Aug 2024 15:41:26 -0400 Subject: [PATCH 25/61] CUMULUS-3837: fix eslint errors --- .../lambdas/create-reconciliation-report.js | 7 ++- packages/db/src/index.ts | 2 +- ...217_create_reconciliation_reports_table.ts | 52 +++++++++---------- .../db/src/models/reconciliation_report.ts | 8 +-- .../db/src/types/reconciliation_report.ts | 11 ++-- .../test-reconciliation-report-model.js | 51 +++++++++--------- 6 files changed, 65 insertions(+), 66 deletions(-) diff --git a/packages/api/lambdas/create-reconciliation-report.js b/packages/api/lambdas/create-reconciliation-report.js index 5a67f9eb129..4df3761afef 100644 --- a/packages/api/lambdas/create-reconciliation-report.js +++ b/packages/api/lambdas/create-reconciliation-report.js @@ -28,11 +28,10 @@ const { indexReconciliationReport } = require('@cumulus/es-client/indexer'); const { getEsClient } = require('@cumulus/es-client/search'); const Logger = require('@cumulus/logger'); +const { ReconciliationReportPgModel } = require('@cumulus/db'); const { createInternalReconciliationReport } = require('./internal-reconciliation-report'); const { createGranuleInventoryReport } = require('./reports/granule-inventory-report'); const { createOrcaBackupReconciliationReport } = require('./reports/orca-backup-reconciliation-report'); -// const { ReconciliationReport } = require('../models'); -const { ReconciliationReportPgModel } = require ('@cumulus/db') const { errorify, filenamify } = require('../lib/utils'); const { cmrGranuleSearchParams, @@ -864,7 +863,7 @@ async function processRequest(params) { const updatedRecord = { ...reportPgRecord, status: 'Generated', - } + }; reportPgRecord = await reconciliationReportModel.upsert(knex, updatedRecord); await indexReconciliationReport(esClient, reportPgRecord, process.env.ES_INDEX); } catch (error) { @@ -887,7 +886,7 @@ async function processRequest(params) { throw error; } - return reconciliationReportModel.get(knex, { name: reportRecord.name }); + return reconciliationReportModel.get(knex, { name: reportPgRecord.name }); } async function handler(event) { diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index 8f1667eacf2..90f39ae1015 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -169,5 +169,5 @@ export { GranulePgModel } from './models/granule'; export { GranulesExecutionsPgModel } from './models/granules-executions'; export { PdrPgModel } from './models/pdr'; export { ProviderPgModel } from './models/provider'; -export { ReconciliationReportPgModel } from './models/reconciliation_report' +export { ReconciliationReportPgModel } from './models/reconciliation_report'; export { RulePgModel } from './models/rule'; diff --git a/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts b/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts index f3dc4c44138..1205b4021eb 100644 --- a/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts +++ b/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts @@ -1,32 +1,32 @@ import { Knex } from 'knex'; export const up = async (knex: Knex): Promise => { - await knex.schema.createTable('reconciliation_reports', (table) => { - table - .increments('cumulus_id') - .primary(); - table - .text('name') - .comment('Reconciliation Report name'); - table - .enum('type', - ['Granule Inventory', 'Granule Not Found', 'Internal', 'Inventory', 'ORCA Backup']) - .comment('Type of Reconciliation Report'); - table - .enum('status', ['Generated', 'Pending', 'Failed']) - .comment('Status of Reconciliation Report'); - table - .text('location') - .comment('Location of Reconciliation Report'); - table - .jsonb('error') - .comment('Error object'); - // adds "created_at" and "updated_at" columns automatically - table - .timestamps(false, true); - table.index('status'); - table.index('updated_at'); - }); + await knex.schema.createTable('reconciliation_reports', (table) => { + table + .increments('cumulus_id') + .primary(); + table + .text('name') + .comment('Reconciliation Report name'); + table + .enum('type', + ['Granule Inventory', 'Granule Not Found', 'Internal', 'Inventory', 'ORCA Backup']) + .comment('Type of Reconciliation Report'); + table + .enum('status', ['Generated', 'Pending', 'Failed']) + .comment('Status of Reconciliation Report'); + table + .text('location') + .comment('Location of Reconciliation Report'); + table + .jsonb('error') + .comment('Error object'); + // adds "created_at" and "updated_at" columns automatically + table + .timestamps(false, true); + table.index('status'); + table.index('updated_at'); + }); }; export const down = async (knex: Knex): Promise => { diff --git a/packages/db/src/models/reconciliation_report.ts b/packages/db/src/models/reconciliation_report.ts index 57734f9f17d..cfff6377e28 100644 --- a/packages/db/src/models/reconciliation_report.ts +++ b/packages/db/src/models/reconciliation_report.ts @@ -5,10 +5,10 @@ import { TableNames } from '../tables'; import { PostgresReconciliationReport, PostgresReconciliationReportRecord } from '../types/reconciliation_report'; class ReconciliationReportPgModel extends BasePgModel { - constructor () { + constructor() { super({ tableName: TableNames.reconciliationReports, - }) + }); } create( @@ -20,7 +20,7 @@ class ReconciliationReportPgModel extends BasePgModel { - const { knexAdmin, knex } = generateLocalTestDb( + const { knexAdmin, knex } = await generateLocalTestDb( testDbName, migrationDir - ) + ); t.context.knexAdmin = knexAdmin; t.context.knex = knex; @@ -34,7 +33,7 @@ test.after.always(async (t) => { }); }); -test('ReconciliationReportPgModel.upsert() creates new reconciliation report' async (t) => { +test('ReconciliationReportPgModel.upsert() creates new reconciliation report', async (t) => { const { knex, reconciliationReportPgModel, @@ -49,27 +48,27 @@ test('ReconciliationReportPgModel.upsert() creates new reconciliation report' as ); }); -test('ReconciliationReportPgModel.upsert() overwrites a reconciliation report record' async (t) => { - const { - knex, - reconciliationReportPgModel, - reconciliationReportRecord, - } = t.context +test('ReconciliationReportPgModel.upsert() overwrites a reconciliation report record', async (t) => { + const { + knex, + reconciliationReportPgModel, + reconciliationReportRecord, + } = t.context; - await reconciliationReportPgModel.create(knex, reconciliationReportRecord); + await reconciliationReportPgModel.create(knex, reconciliationReportRecord); - const updatedReconciliationReport = { - ...reconciliationReportRecord, - type: 'ORCA Backup', - status: 'Failed', - } + const updatedReconciliationReport = { + ...reconciliationReportRecord, + type: 'ORCA Backup', + status: 'Failed', + }; - t.like( - await reconciliationReportPgModel.get(knex, { - name: reconciliationReportRecord.name, - }), - updatedReconciliationReport - ); + t.like( + await reconciliationReportPgModel.get(knex, { + name: reconciliationReportRecord.name, + }), + updatedReconciliationReport + ); }); -// test('ReconciliationReportPgModel.upsert() ' async (t) => { -// }); \ No newline at end of file +// test('ReconciliationReportPgModel.upsert() ', async (t) => { +// }); From ae4a0d1d114242f0316207ddda911d5b57e1005b Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Wed, 28 Aug 2024 17:17:22 -0400 Subject: [PATCH 26/61] CUMULUS-3837: fix more eslint errors --- packages/api/lambdas/create-reconciliation-report.js | 11 +++++------ packages/db/src/models/reconciliation_report.ts | 6 +++++- packages/db/src/test-utils.ts | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/packages/api/lambdas/create-reconciliation-report.js b/packages/api/lambdas/create-reconciliation-report.js index 4df3761afef..605bec6566f 100644 --- a/packages/api/lambdas/create-reconciliation-report.js +++ b/packages/api/lambdas/create-reconciliation-report.js @@ -859,16 +859,16 @@ async function processRequest(params) { // reportType is in ['Inventory', 'Granule Not Found'] await createReconciliationReport(recReportParams); } - // apiRecord = await reconciliationReportModel.updateStatus({ name: reportRecord.name }, 'Generated'); - const updatedRecord = { + + const generatedRecord = { ...reportPgRecord, status: 'Generated', }; - reportPgRecord = await reconciliationReportModel.upsert(knex, updatedRecord); + reportPgRecord = await reconciliationReportModel.upsert(knex, generatedRecord); await indexReconciliationReport(esClient, reportPgRecord, process.env.ES_INDEX); } catch (error) { log.error(`Error caught in createReconciliationReport creating ${reportType} report ${reportRecordName}. ${error}`); - const updatedErrorRecord = { + const erroredRecord = { ...reportPgRecord, status: 'Failed', error: { @@ -876,8 +876,7 @@ async function processRequest(params) { Cause: errorify(error), }, }; - // apiRecord = await reconciliationReportModel.update({ name: reportRecord.name }, updates); - reportPgRecord = await reconciliationReportModel.upsert(knex, updatedErrorRecord); + reportPgRecord = await reconciliationReportModel.upsert(knex, erroredRecord); await indexReconciliationReport( esClient, reportPgRecord, diff --git a/packages/db/src/models/reconciliation_report.ts b/packages/db/src/models/reconciliation_report.ts index cfff6377e28..fc5c19a37a9 100644 --- a/packages/db/src/models/reconciliation_report.ts +++ b/packages/db/src/models/reconciliation_report.ts @@ -2,8 +2,12 @@ import { Knex } from 'knex'; import { BasePgModel } from './base'; import { TableNames } from '../tables'; -import { PostgresReconciliationReport, PostgresReconciliationReportRecord } from '../types/reconciliation_report'; +import { + PostgresReconciliationReport, + PostgresReconciliationReportRecord, +} from '../types/reconciliation_report'; +// eslint-disable-next-line max-len class ReconciliationReportPgModel extends BasePgModel { constructor() { super({ diff --git a/packages/db/src/test-utils.ts b/packages/db/src/test-utils.ts index 2e3792e7baf..14c8d589e54 100644 --- a/packages/db/src/test-utils.ts +++ b/packages/db/src/test-utils.ts @@ -163,4 +163,4 @@ export const fakeReconciliationReportRecordFactory = ( created_at: new Date(), updated_at: new Date(), ...params, -}); \ No newline at end of file +}); From 200066537ca33574dc9516003b1745dfe2b44183 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Thu, 29 Aug 2024 17:05:52 -0400 Subject: [PATCH 27/61] CUMULUS-3837: add missing export, add unique constraint --- packages/db/src/index.ts | 1 + ...14185217_create_reconciliation_reports_table.ts | 1 + packages/db/src/test-utils.ts | 4 ++-- packages/db/src/types/reconciliation_report.ts | 4 ++-- .../models/test-reconciliation-report-model.js | 14 ++++++++------ 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index 90f39ae1015..d0dbca0f7b8 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -12,6 +12,7 @@ export { fakeGranuleRecordFactory, fakePdrRecordFactory, fakeProviderRecordFactory, + fakeReconciliationReportRecordFactory, fakeRuleRecordFactory, generateLocalTestDb, } from './test-utils'; diff --git a/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts b/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts index 1205b4021eb..a29d2818b41 100644 --- a/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts +++ b/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts @@ -26,6 +26,7 @@ export const up = async (knex: Knex): Promise => { .timestamps(false, true); table.index('status'); table.index('updated_at'); + table.unique(['name']); }); }; diff --git a/packages/db/src/test-utils.ts b/packages/db/src/test-utils.ts index 14c8d589e54..6870a80d9c5 100644 --- a/packages/db/src/test-utils.ts +++ b/packages/db/src/test-utils.ts @@ -147,7 +147,7 @@ export const fakeAsyncOperationRecordFactory = ( export const fakePdrRecordFactory = ( params: Partial -) => ({ +): Partial => ({ name: `pdr${cryptoRandomString({ length: 10 })}`, status: 'running', created_at: new Date(), @@ -156,7 +156,7 @@ export const fakePdrRecordFactory = ( export const fakeReconciliationReportRecordFactory = ( params: Partial -) => ({ +): PostgresReconciliationReport => ({ name: `reconReport${cryptoRandomString({ length: 10 })}`, type: 'Granule Inventory', status: 'Generated', diff --git a/packages/db/src/types/reconciliation_report.ts b/packages/db/src/types/reconciliation_report.ts index 3afdf403e0c..6839f42ea00 100644 --- a/packages/db/src/types/reconciliation_report.ts +++ b/packages/db/src/types/reconciliation_report.ts @@ -13,8 +13,8 @@ export type ReconciliationReportStatus = 'Generated' | 'Pending' | 'Failed'; export interface PostgresReconciliationReport { name: string, - type: ReconciliationReportType, // string - status: ReconciliationReportStatus, // string + type: ReconciliationReportType, + status: ReconciliationReportStatus, location?: string, error?: object, created_at: Date | undefined, diff --git a/packages/db/tests/models/test-reconciliation-report-model.js b/packages/db/tests/models/test-reconciliation-report-model.js index 04b642693ef..fe3c11a19c7 100644 --- a/packages/db/tests/models/test-reconciliation-report-model.js +++ b/packages/db/tests/models/test-reconciliation-report-model.js @@ -42,8 +42,9 @@ test('ReconciliationReportPgModel.upsert() creates new reconciliation report', a await reconciliationReportPgModel.upsert(knex, reconciliationReportRecord); + const pgReport = await reconciliationReportPgModel.get(knex, reconciliationReportRecord); t.like( - await reconciliationReportPgModel.get(knex, reconciliationReportRecord), + pgReport, reconciliationReportRecord ); }); @@ -63,12 +64,13 @@ test('ReconciliationReportPgModel.upsert() overwrites a reconciliation report re status: 'Failed', }; + await reconciliationReportPgModel.upsert(knex, updatedReconciliationReport); + + const pgReport = await reconciliationReportPgModel.get(knex, { + name: reconciliationReportRecord.name, + }); t.like( - await reconciliationReportPgModel.get(knex, { - name: reconciliationReportRecord.name, - }), + pgReport, updatedReconciliationReport ); }); -// test('ReconciliationReportPgModel.upsert() ', async (t) => { -// }); From cb332ec3c5132b66810506c2e46b45ef8f86f8d3 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Wed, 4 Sep 2024 21:48:40 -0400 Subject: [PATCH 28/61] CUMULUS-3837: fix test-create-reconciliation-report --- .../lambdas/create-reconciliation-report.js | 29 ++++++----- .../test-create-reconciliation-report.js | 51 ++++++++++--------- packages/db/src/index.ts | 3 ++ .../src/translate/reconciliation_reports.ts | 26 ++++++++++ 4 files changed, 73 insertions(+), 36 deletions(-) create mode 100644 packages/db/src/translate/reconciliation_reports.ts diff --git a/packages/api/lambdas/create-reconciliation-report.js b/packages/api/lambdas/create-reconciliation-report.js index 605bec6566f..3e7a4550834 100644 --- a/packages/api/lambdas/create-reconciliation-report.js +++ b/packages/api/lambdas/create-reconciliation-report.js @@ -28,7 +28,10 @@ const { indexReconciliationReport } = require('@cumulus/es-client/indexer'); const { getEsClient } = require('@cumulus/es-client/search'); const Logger = require('@cumulus/logger'); -const { ReconciliationReportPgModel } = require('@cumulus/db'); +const { + ReconciliationReportPgModel, + translatePostgresReconciliationReportToApiReconciliationReport, +} = require('@cumulus/db'); const { createInternalReconciliationReport } = require('./internal-reconciliation-report'); const { createGranuleInventoryReport } = require('./reports/granule-inventory-report'); const { createOrcaBackupReconciliationReport } = require('./reports/orca-backup-reconciliation-report'); @@ -824,18 +827,17 @@ async function processRequest(params) { if (reportType === 'Granule Inventory') reportKey = reportKey.replace('.json', '.csv'); // add request to database - // TODO: do this with pg/knex - // const reconciliationReportModel = new ReconciliationReport(); - const reconciliationReportModel = new ReconciliationReportPgModel(); + const reconciliationReportPgModel = new ReconciliationReportPgModel(); const builtReportRecord = { name: reportRecordName, type: reportType, status: 'Pending', location: buildS3Uri(systemBucket, reportKey), }; - let [reportPgRecord] = await reconciliationReportModel.create(knex, builtReportRecord); - await indexReconciliationReport(esClient, reportPgRecord, process.env.ES_INDEX); - log.info(`Report added to database as pending: ${JSON.stringify(reportPgRecord)}.`); + let [reportPgRecord] = await reconciliationReportPgModel.create(knex, builtReportRecord); + let reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); + await indexReconciliationReport(esClient, reportApiRecord, process.env.ES_INDEX); + log.info(`Report added to database as pending: ${JSON.stringify(reportApiRecord)}.`); const concurrency = env.CONCURRENCY || 3; @@ -864,8 +866,9 @@ async function processRequest(params) { ...reportPgRecord, status: 'Generated', }; - reportPgRecord = await reconciliationReportModel.upsert(knex, generatedRecord); - await indexReconciliationReport(esClient, reportPgRecord, process.env.ES_INDEX); + [reportPgRecord] = await reconciliationReportPgModel.upsert(knex, generatedRecord); + reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); + await indexReconciliationReport(esClient, reportApiRecord, process.env.ES_INDEX); } catch (error) { log.error(`Error caught in createReconciliationReport creating ${reportType} report ${reportRecordName}. ${error}`); const erroredRecord = { @@ -876,16 +879,18 @@ async function processRequest(params) { Cause: errorify(error), }, }; - reportPgRecord = await reconciliationReportModel.upsert(knex, erroredRecord); + [reportPgRecord] = await reconciliationReportPgModel.upsert(knex, erroredRecord); + reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); await indexReconciliationReport( esClient, - reportPgRecord, + reportApiRecord, process.env.ES_INDEX ); throw error; } - return reconciliationReportModel.get(knex, { name: reportPgRecord.name }); + reportPgRecord = await reconciliationReportPgModel.get(knex, { name: builtReportRecord.name }); + return translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); } async function handler(event) { diff --git a/packages/api/tests/lambdas/test-create-reconciliation-report.js b/packages/api/tests/lambdas/test-create-reconciliation-report.js index 314d6db87a3..0296c6a7e3b 100644 --- a/packages/api/tests/lambdas/test-create-reconciliation-report.js +++ b/packages/api/tests/lambdas/test-create-reconciliation-report.js @@ -25,19 +25,21 @@ const { getBucketsConfigKey } = require('@cumulus/common/stack'); const { constructCollectionId } = require('@cumulus/message/Collections'); const { randomString, randomId } = require('@cumulus/common/test-utils'); const { - CollectionPgModel, - destroyLocalTestDb, generateLocalTestDb, + destroyLocalTestDb, localStackConnectionEnv, + migrationDir, + CollectionPgModel, + ExecutionPgModel, FilePgModel, GranulePgModel, + ReconciliationReportPgModel, fakeCollectionRecordFactory, + fakeExecutionRecordFactory, fakeGranuleRecordFactory, - migrationDir, - translateApiGranuleToPostgresGranule, translatePostgresCollectionToApiCollection, - ExecutionPgModel, - fakeExecutionRecordFactory, + translateApiGranuleToPostgresGranule, + translatePostgresReconciliationReportToApiReconciliationReport, upsertGranuleWithExecutionJoinRecord, } = require('@cumulus/db'); const { getDistributionBucketMapKey } = require('@cumulus/distribution-utils'); @@ -355,11 +357,10 @@ test.before(async (t) => { t.context.executionPgModel = new ExecutionPgModel(); t.context.filePgModel = new FilePgModel(); t.context.granulePgModel = new GranulePgModel(); + t.context.reconciliationReportPgModel = new ReconciliationReportPgModel(); }); test.beforeEach(async (t) => { - process.env.ReconciliationReportsTable = randomId('reconciliationTable'); - t.context.bucketsToCleanup = []; t.context.stackName = randomId('stack'); t.context.systemBucket = randomId('bucket'); @@ -368,8 +369,6 @@ test.beforeEach(async (t) => { await awsServices.s3().createBucket({ Bucket: t.context.systemBucket }) .then(() => t.context.bucketsToCleanup.push(t.context.systemBucket)); - await new models.ReconciliationReport().createTable(); - const cmrSearchStub = sinon.stub(CMR.prototype, 'searchConcept'); cmrSearchStub.withArgs('collections').resolves([]); cmrSearchStub.withArgs('granules').resolves([]); @@ -400,10 +399,7 @@ test.beforeEach(async (t) => { test.afterEach.always(async (t) => { await Promise.all( - flatten([ - t.context.bucketsToCleanup.map(recursivelyDeleteS3Bucket), - new models.ReconciliationReport().deleteTable(), - ]) + flatten(t.context.bucketsToCleanup.map(recursivelyDeleteS3Bucket)) ); await t.context.executionPgModel.delete( t.context.knex, @@ -1867,17 +1863,21 @@ test.serial('When report creation fails, reconciliation report status is set to }; await t.throwsAsync(handler(event)); - const reportRecord = await new models.ReconciliationReport().get({ name: reportName }); - t.is(reportRecord.status, 'Failed'); - t.is(reportRecord.type, 'Inventory'); + + const reportPgRecord = await t.context.reconciliationReportPgModel.get(t.context.knex, { name: reportName }); + // reconciliation report lambda outputs the translated API version, not the PG version, so + // it should be translated for comparison, at least for the comparison with the ES (API) version + const reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); + t.is(reportApiRecord.status, 'Failed'); + t.is(reportApiRecord.type, 'Inventory'); const reportKey = `${t.context.stackName}/reconciliation-reports/${reportName}.json`; const report = await getJsonS3Object(t.context.systemBucket, reportKey); t.is(report.status, 'Failed'); t.truthy(report.error); - const esRecord = await t.context.esReportClient.get(reportRecord.name); - t.like(esRecord, reportRecord); + const esRecord = await t.context.esReportClient.get(reportName); + t.like(esRecord, reportApiRecord); }); test.serial('A valid internal reconciliation report is generated when ES and DB are in sync', async (t) => { @@ -2214,15 +2214,18 @@ test.serial('When there is an error for an ORCA backup report, it throws', async { message: 'ORCA error' } ); - const reportRecord = await new models.ReconciliationReport().get({ name: reportName }); - t.is(reportRecord.status, 'Failed'); - t.is(reportRecord.type, event.reportType); + const reportPgRecord = await t.context.reconciliationReportPgModel.get(t.context.knex, { name: reportName }); + // reconciliation report lambda outputs the translated API version, not the PG version, so + // it should be translated for comparison, at least for the comparison with the ES (API) version + const reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); + t.is(reportApiRecord.status, 'Failed'); + t.is(reportApiRecord.type, event.reportType); const reportKey = `${t.context.stackName}/reconciliation-reports/${reportName}.json`; const report = await getJsonS3Object(t.context.systemBucket, reportKey); t.is(report.status, 'Failed'); t.is(report.reportType, event.reportType); - const esRecord = await t.context.esReportClient.get(reportRecord.name); - t.like(esRecord, reportRecord); + const esRecord = await t.context.esReportClient.get(reportName); + t.like(esRecord, reportApiRecord); }); diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index d0dbca0f7b8..b0967eaf3a2 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -103,6 +103,9 @@ export { translateApiProviderToPostgresProvider, translatePostgresProviderToApiProvider, } from './translate/providers'; +export { + translatePostgresReconciliationReportToApiReconciliationReport, +} from './translate/reconciliation_reports'; export { translatePostgresRuleToApiRule, translateApiRuleToPostgresRule, diff --git a/packages/db/src/translate/reconciliation_reports.ts b/packages/db/src/translate/reconciliation_reports.ts new file mode 100644 index 00000000000..a3512691c38 --- /dev/null +++ b/packages/db/src/translate/reconciliation_reports.ts @@ -0,0 +1,26 @@ +// import Logger from '@cumulus/logger'; +import { PostgresReconciliationReportRecord } from '../types/reconciliation_report'; + +// const log = new Logger({ sender: '@cumulus/db/translate/reconciliation-reports' }); + +/** + * Generate an API Reconciliation Report record from a PostgreSQL record. + * + * @param {Object} pgReconciliationReport - a PostgreSQL reconciliation report record + * @returns {Object} an API reconciliation report record + */ +export const translatePostgresReconciliationReportToApiReconciliationReport = ( + pgReconciliationReport: PostgresReconciliationReportRecord +) => { + const apiReconciliationReport = { + // id or cumulus_id? + name: pgReconciliationReport.name, + type: pgReconciliationReport.type, + status: pgReconciliationReport.status, + location: pgReconciliationReport.location, + error: pgReconciliationReport.error, + createdAt: pgReconciliationReport.created_at?.getTime(), + updatedAt: pgReconciliationReport.updated_at?.getTime(), + }; + return apiReconciliationReport; +}; \ No newline at end of file From f47e1f231ae9a9e956e00b952d042ec679eacef9 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Wed, 4 Sep 2024 22:20:22 -0400 Subject: [PATCH 29/61] CUMULUS-3837: remove no longer needed dynamo table in test-internal-reconciliation-report --- .../lambdas/test-create-reconciliation-report.js | 4 +--- .../lambdas/test-internal-reconciliation-report.js | 12 +----------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/packages/api/tests/lambdas/test-create-reconciliation-report.js b/packages/api/tests/lambdas/test-create-reconciliation-report.js index 0296c6a7e3b..6f49a26537e 100644 --- a/packages/api/tests/lambdas/test-create-reconciliation-report.js +++ b/packages/api/tests/lambdas/test-create-reconciliation-report.js @@ -398,9 +398,7 @@ test.beforeEach(async (t) => { }); test.afterEach.always(async (t) => { - await Promise.all( - flatten(t.context.bucketsToCleanup.map(recursivelyDeleteS3Bucket)) - ); + await Promise.all(flatten(t.context.bucketsToCleanup.map(recursivelyDeleteS3Bucket))); await t.context.executionPgModel.delete( t.context.knex, { cumulus_id: t.context.executionCumulusId } diff --git a/packages/api/tests/lambdas/test-internal-reconciliation-report.js b/packages/api/tests/lambdas/test-internal-reconciliation-report.js index e52473138b7..c9496fd4596 100644 --- a/packages/api/tests/lambdas/test-internal-reconciliation-report.js +++ b/packages/api/tests/lambdas/test-internal-reconciliation-report.js @@ -46,7 +46,6 @@ const { internalRecReportForGranules, } = require('../../lambdas/internal-reconciliation-report'); const { normalizeEvent } = require('../../lib/reconciliationReport/normalizeEvent'); -const models = require('../../models'); let esAlias; let esIndex; @@ -61,8 +60,6 @@ test.before((t) => { }); test.beforeEach(async (t) => { - process.env.ReconciliationReportsTable = randomId('reconciliationTable'); - t.context.bucketsToCleanup = []; t.context.stackName = randomId('stack'); t.context.systemBucket = randomId('bucket'); @@ -71,8 +68,6 @@ test.beforeEach(async (t) => { await awsServices.s3().createBucket({ Bucket: t.context.systemBucket }) .then(() => t.context.bucketsToCleanup.push(t.context.systemBucket)); - await new models.ReconciliationReport().createTable(); - esAlias = randomId('esalias'); esIndex = randomId('esindex'); process.env.ES_INDEX = esAlias; @@ -100,12 +95,7 @@ test.afterEach.always(async (t) => { knexAdmin: t.context.knexAdmin, testDbName: t.context.testDbName, }); - await Promise.all( - flatten([ - t.context.bucketsToCleanup.map(recursivelyDeleteS3Bucket), - new models.ReconciliationReport().deleteTable(), - ]) - ); + await Promise.all(flatten(t.context.bucketsToCleanup.map(recursivelyDeleteS3Bucket))); await esClient.client.indices.delete({ index: esIndex }); }); From 50ab0e5db8cff3bcb48e5339e04caa287f0a5b44 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Thu, 5 Sep 2024 08:32:28 -0400 Subject: [PATCH 30/61] CUMULUS-3837: fix lint errors --- .../CreateReconciliationReportSpec.js | 1 + .../api/lambdas/create-reconciliation-report.js | 10 +++++----- .../lambdas/test-create-reconciliation-report.js | 13 ++++++++----- packages/db/src/translate/reconciliation_reports.ts | 7 ++----- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js index ea3a35db7c4..7b826443d60 100644 --- a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js +++ b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js @@ -354,6 +354,7 @@ describe('When there are granule differences and granule reconciliation is run', console.log('XXX Completed for setupCollectionAndTestData'); // Write an extra file to the DynamoDB Files table + // TODO: is ^ comment above correct, or should it be modified? extraFileInDb = { bucket: protectedBucket, key: randomString(), diff --git a/packages/api/lambdas/create-reconciliation-report.js b/packages/api/lambdas/create-reconciliation-report.js index 3e7a4550834..3931babfac3 100644 --- a/packages/api/lambdas/create-reconciliation-report.js +++ b/packages/api/lambdas/create-reconciliation-report.js @@ -28,8 +28,8 @@ const { indexReconciliationReport } = require('@cumulus/es-client/indexer'); const { getEsClient } = require('@cumulus/es-client/search'); const Logger = require('@cumulus/logger'); -const { - ReconciliationReportPgModel, +const { + ReconciliationReportPgModel, translatePostgresReconciliationReportToApiReconciliationReport, } = require('@cumulus/db'); const { createInternalReconciliationReport } = require('./internal-reconciliation-report'); @@ -835,7 +835,7 @@ async function processRequest(params) { location: buildS3Uri(systemBucket, reportKey), }; let [reportPgRecord] = await reconciliationReportPgModel.create(knex, builtReportRecord); - let reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); + let reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); // eslint-disable-line max-len await indexReconciliationReport(esClient, reportApiRecord, process.env.ES_INDEX); log.info(`Report added to database as pending: ${JSON.stringify(reportApiRecord)}.`); @@ -870,7 +870,7 @@ async function processRequest(params) { reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); await indexReconciliationReport(esClient, reportApiRecord, process.env.ES_INDEX); } catch (error) { - log.error(`Error caught in createReconciliationReport creating ${reportType} report ${reportRecordName}. ${error}`); + log.error(`Error caught in createReconciliationReport creating ${reportType} report ${reportRecordName}. ${error}`); // eslint-disable-line max-len const erroredRecord = { ...reportPgRecord, status: 'Failed', @@ -880,7 +880,7 @@ async function processRequest(params) { }, }; [reportPgRecord] = await reconciliationReportPgModel.upsert(knex, erroredRecord); - reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); + reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); // eslint-disable-line max-len await indexReconciliationReport( esClient, reportApiRecord, diff --git a/packages/api/tests/lambdas/test-create-reconciliation-report.js b/packages/api/tests/lambdas/test-create-reconciliation-report.js index 6f49a26537e..3fe9203795f 100644 --- a/packages/api/tests/lambdas/test-create-reconciliation-report.js +++ b/packages/api/tests/lambdas/test-create-reconciliation-report.js @@ -55,7 +55,6 @@ const { const { handler: unwrappedHandler, reconciliationReportForGranules, reconciliationReportForGranuleFiles, } = require('../../lambdas/create-reconciliation-report'); -const models = require('../../models'); const { normalizeEvent } = require('../../lib/reconciliationReport/normalizeEvent'); const ORCASearchCatalogQueue = require('../../lib/ORCASearchCatalogQueue'); @@ -1862,10 +1861,12 @@ test.serial('When report creation fails, reconciliation report status is set to await t.throwsAsync(handler(event)); - const reportPgRecord = await t.context.reconciliationReportPgModel.get(t.context.knex, { name: reportName }); + const reportPgRecord = await t.context.reconciliationReportPgModel.get( + t.context.knex, { name: reportName } + ); // reconciliation report lambda outputs the translated API version, not the PG version, so // it should be translated for comparison, at least for the comparison with the ES (API) version - const reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); + const reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); // eslint-disable-line max-len t.is(reportApiRecord.status, 'Failed'); t.is(reportApiRecord.type, 'Inventory'); @@ -2212,10 +2213,12 @@ test.serial('When there is an error for an ORCA backup report, it throws', async { message: 'ORCA error' } ); - const reportPgRecord = await t.context.reconciliationReportPgModel.get(t.context.knex, { name: reportName }); + const reportPgRecord = await t.context.reconciliationReportPgModel.get( + t.context.knex, { name: reportName } + ); // reconciliation report lambda outputs the translated API version, not the PG version, so // it should be translated for comparison, at least for the comparison with the ES (API) version - const reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); + const reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); // eslint-disable-line max-len t.is(reportApiRecord.status, 'Failed'); t.is(reportApiRecord.type, event.reportType); diff --git a/packages/db/src/translate/reconciliation_reports.ts b/packages/db/src/translate/reconciliation_reports.ts index a3512691c38..819fe85c728 100644 --- a/packages/db/src/translate/reconciliation_reports.ts +++ b/packages/db/src/translate/reconciliation_reports.ts @@ -1,11 +1,8 @@ -// import Logger from '@cumulus/logger'; import { PostgresReconciliationReportRecord } from '../types/reconciliation_report'; -// const log = new Logger({ sender: '@cumulus/db/translate/reconciliation-reports' }); - /** * Generate an API Reconciliation Report record from a PostgreSQL record. - * + * * @param {Object} pgReconciliationReport - a PostgreSQL reconciliation report record * @returns {Object} an API reconciliation report record */ @@ -23,4 +20,4 @@ export const translatePostgresReconciliationReportToApiReconciliationReport = ( updatedAt: pgReconciliationReport.updated_at?.getTime(), }; return apiReconciliationReport; -}; \ No newline at end of file +}; From 758140e4d6b515021d546695726a1de66b70df0d Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Thu, 5 Sep 2024 10:21:07 -0400 Subject: [PATCH 31/61] CUMULUS-3837: fix another lint error --- packages/api/lambdas/create-reconciliation-report.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/lambdas/create-reconciliation-report.js b/packages/api/lambdas/create-reconciliation-report.js index 3931babfac3..30be0fbf2a2 100644 --- a/packages/api/lambdas/create-reconciliation-report.js +++ b/packages/api/lambdas/create-reconciliation-report.js @@ -867,7 +867,7 @@ async function processRequest(params) { status: 'Generated', }; [reportPgRecord] = await reconciliationReportPgModel.upsert(knex, generatedRecord); - reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); + reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); // eslint-disable-line max-len await indexReconciliationReport(esClient, reportApiRecord, process.env.ES_INDEX); } catch (error) { log.error(`Error caught in createReconciliationReport creating ${reportType} report ${reportRecordName}. ${error}`); // eslint-disable-line max-len From bbc27fd9f1eb6c0a3bdbc0c4ce9eee0c4fceb0e8 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Thu, 5 Sep 2024 12:24:33 -0400 Subject: [PATCH 32/61] CUMULUS-3837: remove superfluous comment --- tf-modules/archive/reconciliation_report.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf-modules/archive/reconciliation_report.tf b/tf-modules/archive/reconciliation_report.tf index cd55d95db10..36089d77949 100644 --- a/tf-modules/archive/reconciliation_report.tf +++ b/tf-modules/archive/reconciliation_report.tf @@ -17,7 +17,7 @@ resource "aws_lambda_function" "create_reconciliation_report" { ES_HOST = var.elasticsearch_hostname ES_SCROLL = lookup(var.elasticsearch_client_config, "create_reconciliation_report_es_scroll_duration", "6m") ES_SCROLL_SIZE = lookup(var.elasticsearch_client_config, "create_reconciliation_report_es_scroll_size", 1000) - ReconciliationReportsTable = var.dynamo_tables.reconciliation_reports.name # need to change? + ReconciliationReportsTable = var.dynamo_tables.reconciliation_reports.name stackName = var.prefix system_bucket = var.system_bucket cmr_client_id = var.cmr_client_id From 53aa73352217eb29b5c58203b6e6eece91e03eed Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Thu, 5 Sep 2024 19:10:24 -0400 Subject: [PATCH 33/61] CUMULUS-3231: Update PDRs LIST endpoint to query RDS (#3784) * CUMULUS-3231:Update PDRs LIST endpoint to query RDS * Unit test on PdrSearch * test description * pdr endpoint test * fix tsc:listEmittedFiles error * fix test typo --- CHANGELOG.md | 2 + packages/api/endpoints/pdrs.js | 10 +- .../api/lib/writeRecords/write-execution.js | 4 +- packages/api/tests/endpoints/test-pdrs.js | 62 +- packages/db/src/index.ts | 3 + packages/db/src/search/BaseSearch.ts | 22 + packages/db/src/search/GranuleSearch.ts | 5 - packages/db/src/search/PdrSearch.ts | 128 +++ packages/db/src/search/field-mapping.ts | 22 +- packages/db/src/translate/pdr.ts | 66 +- packages/db/tests/search/test-PdrSearch.js | 751 ++++++++++++++++++ 11 files changed, 1012 insertions(+), 63 deletions(-) create mode 100644 packages/db/src/search/PdrSearch.ts create mode 100644 packages/db/tests/search/test-PdrSearch.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 069b620ddc7..6ee64f10e99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Remove ElasticSearch queries from Rule LIST endpoint - **CUMULUS-3230** - Remove ElasticSearch dependency from Rule Endpoints +- **CUMULUS-3231** + - Updated API `pdrs` `LIST` endpoint to query postgres - **CUMULUS-3232** - Update API PDR endpoints `DEL` and `GET` to not update Elasticsearch - **CUMULUS-3233** diff --git a/packages/api/endpoints/pdrs.js b/packages/api/endpoints/pdrs.js index 3490da47b08..5725039f3d5 100644 --- a/packages/api/endpoints/pdrs.js +++ b/packages/api/endpoints/pdrs.js @@ -11,7 +11,7 @@ const { createRejectableTransaction, } = require('@cumulus/db'); const { RecordDoesNotExist } = require('@cumulus/errors'); -const { Search } = require('@cumulus/es-client/search'); +const { PdrSearch } = require('@cumulus/db'); const Logger = require('@cumulus/logger'); const log = new Logger({ sender: '@cumulus/api/pdrs' }); @@ -24,12 +24,8 @@ const log = new Logger({ sender: '@cumulus/api/pdrs' }); * @returns {Promise} the promise of express response object */ async function list(req, res) { - const search = new Search( - { queryStringParameters: req.query }, - 'pdr', - process.env.ES_INDEX - ); - const result = await search.query(); + const dbSearch = new PdrSearch({ queryStringParameters: req.query }); + const result = await dbSearch.query(); return res.send(result); } diff --git a/packages/api/lib/writeRecords/write-execution.js b/packages/api/lib/writeRecords/write-execution.js index d562ddbbf37..0fa24e98211 100644 --- a/packages/api/lib/writeRecords/write-execution.js +++ b/packages/api/lib/writeRecords/write-execution.js @@ -1,3 +1,5 @@ +// @ts-check + const isNil = require('lodash/isNil'); const isUndefined = require('lodash/isUndefined'); const omitBy = require('lodash/omitBy'); @@ -64,7 +66,7 @@ const buildExecutionRecord = ({ const record = { arn, status: getMetaStatus(cumulusMessage), - url: getExecutionUrlFromArn(arn), + url: arn ? getExecutionUrlFromArn(arn) : undefined, cumulus_version: getMessageCumulusVersion(cumulusMessage), tasks: getMessageWorkflowTasks(cumulusMessage), workflow_name: getMessageWorkflowName(cumulusMessage), diff --git a/packages/api/tests/endpoints/test-pdrs.js b/packages/api/tests/endpoints/test-pdrs.js index 90b4cb125c6..65abd558693 100644 --- a/packages/api/tests/endpoints/test-pdrs.js +++ b/packages/api/tests/endpoints/test-pdrs.js @@ -3,6 +3,7 @@ const test = require('ava'); const request = require('supertest'); const cryptoRandomString = require('crypto-random-string'); +const range = require('lodash/range'); const awsServices = require('@cumulus/aws-client/services'); const { recursivelyDeleteS3Bucket, @@ -26,12 +27,6 @@ const { fakePdrRecordFactory, fakeProviderRecordFactory, } = require('@cumulus/db/dist/test-utils'); -const indexer = require('@cumulus/es-client/indexer'); -const { Search } = require('@cumulus/es-client/search'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const { constructCollectionId } = require('@cumulus/message/Collections'); const { @@ -57,7 +52,6 @@ const pdrS3Key = (pdrName) => `${process.env.stackName}/pdrs/${pdrName}`; // create all the variables needed across this test const testDbName = `pdrs_${cryptoRandomString({ length: 10 })}`; -let fakePdrs; let jwtAuthToken; let accessTokenModel; @@ -74,15 +68,6 @@ test.before(async (t) => { t.context.knex = knex; t.context.knexAdmin = knexAdmin; - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esPdrsClient = new Search( - {}, - 'pdr', - t.context.esIndex - ); - // create a fake bucket await awsServices.s3().createBucket({ Bucket: process.env.system_bucket }); @@ -96,14 +81,6 @@ test.before(async (t) => { jwtAuthToken = await createFakeJwtAuthToken({ accessTokenModel, username }); - // create fake PDR records - fakePdrs = ['completed', 'failed'].map(fakePdrFactory); - await Promise.all( - fakePdrs.map( - (pdr) => indexer.indexPdr(t.context.esClient, pdr, t.context.esIndex) - ) - ); - // Create a PG Collection t.context.testPgCollection = fakeCollectionRecordFactory(); const collectionPgModel = new CollectionPgModel(); @@ -132,11 +109,36 @@ test.before(async (t) => { t.context.testPgExecution ); t.context.executionCumulusId = pgExecution.cumulus_id; + const timestamp = new Date(); + t.context.pdrs = range(2).map(() => fakePdrRecordFactory({ + collection_cumulus_id: t.context.collectionCumulusId, + provider_cumulus_id: t.context.providerCumulusId, + execution_cumulus_id: t.context.executionCumulusId, + progress: 0.5, + pan_sent: false, + pan_message: `pan${cryptoRandomString({ length: 10 })}`, + stats: { + processing: 0, + completed: 0, + failed: 0, + total: 0, + }, + address: `address${cryptoRandomString({ length: 10 })}`, + original_url: 'https://example.com', + duration: 6.8, + created_at: timestamp, + updated_at: timestamp, + })); + + t.context.pdrPgModel = new PdrPgModel(); + t.context.pgPdrs = await t.context.pdrPgModel.insert( + knex, + t.context.pdrs + ); }); test.after.always(async (t) => { await accessTokenModel.deleteTable(); - await cleanupTestIndex(t.context); await recursivelyDeleteS3Bucket(process.env.system_bucket); await destroyLocalTestDb({ knex: t.context.knex, @@ -206,7 +208,7 @@ test('CUMULUS-912 DELETE with pathParameters and with an invalid access token re test.todo('CUMULUS-912 DELETE with pathParameters and with an unauthorized user returns an unauthorized response'); -test('default returns list of pdrs', async (t) => { +test.serial('default returns list of pdrs', async (t) => { const response = await request(app) .get('/pdrs') .set('Accept', 'application/json') @@ -216,15 +218,15 @@ test('default returns list of pdrs', async (t) => { const { meta, results } = response.body; t.is(results.length, 2); t.is(meta.stack, process.env.stackName); - t.is(meta.table, 'pdr'); + t.is(meta.table, 'pdrs'); t.is(meta.count, 2); - const pdrNames = fakePdrs.map((i) => i.pdrName); + const pdrNames = t.context.pdrs.map((i) => i.name); results.forEach((r) => { t.true(pdrNames.includes(r.pdrName)); }); }); -test('GET returns an existing pdr', async (t) => { +test.serial('GET returns an existing pdr', async (t) => { const timestamp = new Date(); const newPGPdr = { @@ -289,7 +291,7 @@ test('DELETE returns a 404 if PostgreSQL PDR cannot be found', async (t) => { t.is(response.body.message, 'No record found'); }); -test('Deleting a PDR that exists in PostgreSQL succeeds', async (t) => { +test.serial('Deleting a PDR that exists in PostgreSQL succeeds', async (t) => { const { collectionCumulusId, providerCumulusId, diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index d007f7a58fc..97acb67d707 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -151,6 +151,9 @@ export { export { GranuleSearch, } from './search/GranuleSearch'; +export { + PdrSearch, +} from './search/PdrSearch'; export { ProviderSearch, } from './search/ProviderSearch'; diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index aaf44d23a9e..22a966c2e2a 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -65,6 +65,16 @@ abstract class BaseSearch { || terms?.collectionVersion); } + /** + * check if joined executions table search is needed + * + * @returns whether execution search is needed + */ + protected searchExecution(): boolean { + const { not, term, terms } = this.dbQueryParameters; + return !!(not?.executionArn || term?.executionArn || terms?.executionArn); + } + /** * check if joined pdrs table search is needed * @@ -196,6 +206,9 @@ abstract class BaseSearch { case 'collectionVersion': [countQuery, searchQuery].forEach((query) => query?.[queryMethod](`${this.tableName}.collection_cumulus_id`)); break; + case 'executionArn': + [countQuery, searchQuery].forEach((query) => query?.[queryMethod](`${this.tableName}.execution_cumulus_id`)); + break; case 'providerName': [countQuery, searchQuery].forEach((query) => query?.[queryMethod](`${this.tableName}.provider_cumulus_id`)); break; @@ -280,6 +293,9 @@ abstract class BaseSearch { case 'collectionVersion': [countQuery, searchQuery].forEach((query) => query?.where(`${collectionsTable}.version`, value)); break; + case 'executionArn': + [countQuery, searchQuery].forEach((query) => query?.where(`${executionsTable}.arn`, value)); + break; case 'providerName': [countQuery, searchQuery].forEach((query) => query?.where(`${providersTable}.name`, value)); break; @@ -343,6 +359,9 @@ abstract class BaseSearch { Object.entries(omit(terms, ['collectionName', 'collectionVersion'])).forEach(([name, value]) => { switch (name) { + case 'executionArn': + [countQuery, searchQuery].forEach((query) => query?.whereIn(`${executionsTable}.arn`, value)); + break; case 'providerName': [countQuery, searchQuery].forEach((query) => query?.whereIn(`${providersTable}.name`, value)); break; @@ -399,6 +418,9 @@ abstract class BaseSearch { } Object.entries(omit(term, ['collectionName', 'collectionVersion'])).forEach(([name, value]) => { switch (name) { + case 'executionArn': + [countQuery, searchQuery].forEach((query) => query?.whereNot(`${executionsTable}.arn`, value)); + break; case 'providerName': [countQuery, searchQuery].forEach((query) => query?.whereNot(`${providersTable}.name`, value)); break; diff --git a/packages/db/src/search/GranuleSearch.ts b/packages/db/src/search/GranuleSearch.ts index 6797fff75d9..0584efc4c3b 100644 --- a/packages/db/src/search/GranuleSearch.ts +++ b/packages/db/src/search/GranuleSearch.ts @@ -15,14 +15,9 @@ import { TableNames } from '../tables'; const log = new Logger({ sender: '@cumulus/db/GranuleSearch' }); interface GranuleRecord extends BaseRecord, PostgresGranuleRecord { - cumulus_id: number, - updated_at: Date, - collection_cumulus_id: number, collectionName: string, collectionVersion: string, - pdr_cumulus_id: number, pdrName?: string, - provider_cumulus_id?: number, providerName?: string, } diff --git a/packages/db/src/search/PdrSearch.ts b/packages/db/src/search/PdrSearch.ts new file mode 100644 index 00000000000..b0f53ae258d --- /dev/null +++ b/packages/db/src/search/PdrSearch.ts @@ -0,0 +1,128 @@ +import { Knex } from 'knex'; +import pick from 'lodash/pick'; + +import { ApiPdrRecord } from '@cumulus/types/api/pdrs'; +import Logger from '@cumulus/logger'; + +import { BaseRecord } from '../types/base'; +import { BaseSearch } from './BaseSearch'; +import { DbQueryParameters, QueryEvent } from '../types/search'; +import { PostgresPdrRecord } from '../types/pdr'; +import { translatePostgresPdrToApiPdrWithoutDbQuery } from '../translate/pdr'; +import { TableNames } from '../tables'; + +const log = new Logger({ sender: '@cumulus/db/PdrSearch' }); + +interface PdrRecord extends BaseRecord, PostgresPdrRecord { + collectionName: string, + collectionVersion: string, + executionArn?: string, + providerName: string, +} + +/** + * Class to build and execute db search query for PDRs + */ +export class PdrSearch extends BaseSearch { + constructor(event: QueryEvent) { + super(event, 'pdr'); + } + + /** + * Build basic query + * + * @param knex - DB client + * @returns queries for getting count and search result + */ + protected buildBasicQuery(knex: Knex) + : { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + } { + const { + collections: collectionsTable, + providers: providersTable, + executions: executionsTable, + } = TableNames; + const countQuery = knex(this.tableName) + .count('*'); + + const searchQuery = knex(this.tableName) + .select(`${this.tableName}.*`) + .select({ + providerName: `${providersTable}.name`, + collectionName: `${collectionsTable}.name`, + collectionVersion: `${collectionsTable}.version`, + executionArn: `${executionsTable}.arn`, + }) + .innerJoin(collectionsTable, `${this.tableName}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`) + .innerJoin(providersTable, `${this.tableName}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + + if (this.searchCollection()) { + countQuery.innerJoin(collectionsTable, `${this.tableName}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`); + } + + if (this.searchProvider()) { + countQuery.innerJoin(providersTable, `${this.tableName}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + } + + if (this.searchExecution()) { + countQuery.innerJoin(executionsTable, `${this.tableName}.execution_cumulus_id`, `${executionsTable}.cumulus_id`); + searchQuery.innerJoin(executionsTable, `${this.tableName}.execution_cumulus_id`, `${executionsTable}.cumulus_id`); + } else { + searchQuery.leftJoin(executionsTable, `${this.tableName}.execution_cumulus_id`, `${executionsTable}.cumulus_id`); + } + + return { countQuery, searchQuery }; + } + + /** + * Build queries for infix and prefix + * + * @param params + * @param params.countQuery - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildInfixPrefixQuery(params: { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { countQuery, searchQuery, dbQueryParameters } = params; + const { infix, prefix } = dbQueryParameters ?? this.dbQueryParameters; + if (infix) { + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `%${infix}%`)); + } + if (prefix) { + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `${prefix}%`)); + } + } + + /** + * Translate postgres records to api records + * + * @param pgRecords - postgres records returned from query + * @returns translated api records + */ + protected translatePostgresRecordsToApiRecords(pgRecords: PdrRecord[]) + : Partial[] { + log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); + const { fields } = this.dbQueryParameters; + const apiRecords = pgRecords.map((item: PdrRecord) => { + const pdrPgRecord = item; + const collectionPgRecord = { + cumulus_id: item.collection_cumulus_id, + name: item.collectionName, + version: item.collectionVersion, + }; + const providerPgRecord = { name: item.providerName }; + const executionArn = item.executionArn; + const apiRecord = translatePostgresPdrToApiPdrWithoutDbQuery({ + pdrPgRecord, collectionPgRecord, executionArn, providerPgRecord, + }); + return fields ? pick(apiRecord, fields) : apiRecord; + }); + return apiRecords; + } +} diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts index 9f654700242..41a64bb0530 100644 --- a/packages/db/src/search/field-mapping.ts +++ b/packages/db/src/search/field-mapping.ts @@ -158,7 +158,6 @@ const collectionMapping : { [key: string]: Function } = { }), }; -// TODO add and verify all queryable fields for the following record types const executionMapping : { [key: string]: Function } = { arn: (value?: string) => ({ arn: value, @@ -208,12 +207,30 @@ const executionMapping : { [key: string]: Function } = { }; const pdrMapping : { [key: string]: Function } = { + address: (value?: string) => ({ + address: value, + }), createdAt: (value?: string) => ({ created_at: value && new Date(Number(value)), }), + duration: (value?: string) => ({ + duration: value && Number(value), + }), + originalUrl: (value?: string) => ({ + original_url: value, + }), + PANSent: (value?: string) => ({ + pan_sent: (value === 'true'), + }), + PANmessage: (value?: string) => ({ + pan_message: value, + }), pdrName: (value?: string) => ({ name: value, }), + progress: (value?: string) => ({ + progress: value && Number(value), + }), status: (value?: string) => ({ status: value, }), @@ -234,6 +251,9 @@ const pdrMapping : { [key: string]: Function } = { provider: (value?: string) => ({ providerName: value, }), + execution: (value?: string) => ({ + executionArn: value && value.split('/').pop(), + }), }; const providerMapping : { [key: string]: Function } = { diff --git a/packages/db/src/translate/pdr.ts b/packages/db/src/translate/pdr.ts index c38a7c55e69..d2c010825e0 100644 --- a/packages/db/src/translate/pdr.ts +++ b/packages/db/src/translate/pdr.ts @@ -9,6 +9,8 @@ import { CollectionPgModel } from '../models/collection'; import { ExecutionPgModel } from '../models/execution'; import { ProviderPgModel } from '../models/provider'; import { PostgresPdr, PostgresPdrRecord } from '../types/pdr'; +import { PostgresCollectionRecord } from '../types/collection'; +import { PostgresProviderRecord } from '../types/provider'; /** * Generate a Postgres PDR record from a DynamoDB record. @@ -57,6 +59,45 @@ export const translateApiPdrToPostgresPdr = async ( return removeNilProperties(pdrRecord); }; +/** + * Generate an API PDR object from the PDR and associated Postgres objects without + * querying the database + * + * @param params - params + * @param params.pdrPgRecord - PDR from Postgres + * @param params.collectionPgRecord - Collection from Postgres + * @param [params.executionArn] - executionUrl from Postgres + * @param [params.providerPgRecord] - provider from Postgres + * @returns An API PDR + */ +export const translatePostgresPdrToApiPdrWithoutDbQuery = ({ + pdrPgRecord, + collectionPgRecord, + executionArn, + providerPgRecord, +}: { + pdrPgRecord: PostgresPdrRecord, + collectionPgRecord: Pick, + executionArn?: string, + providerPgRecord: Pick, +}): ApiPdr => removeNilProperties({ + pdrName: pdrPgRecord.name, + provider: providerPgRecord?.name, + collectionId: constructCollectionId(collectionPgRecord.name, collectionPgRecord.version), + status: pdrPgRecord.status, + createdAt: pdrPgRecord.created_at.getTime(), + progress: pdrPgRecord.progress, + execution: executionArn ? getExecutionUrlFromArn(executionArn) : undefined, + PANSent: pdrPgRecord.pan_sent, + PANmessage: pdrPgRecord.pan_message, + stats: pdrPgRecord.stats, + address: pdrPgRecord.address, + originalUrl: pdrPgRecord.original_url, + timestamp: (pdrPgRecord.timestamp ? pdrPgRecord.timestamp.getTime() : undefined), + duration: pdrPgRecord.duration, + updatedAt: pdrPgRecord.updated_at.getTime(), +}); + /** * Generate a Postgres PDR record from a DynamoDB record. * @@ -85,23 +126,10 @@ export const translatePostgresPdrToApiPdr = async ( cumulus_id: postgresPDR.execution_cumulus_id, }) : undefined; - const apiPdr: ApiPdr = { - pdrName: postgresPDR.name, - provider: provider.name, - collectionId: constructCollectionId(collection.name, collection.version), - status: postgresPDR.status, - createdAt: postgresPDR.created_at.getTime(), - progress: postgresPDR.progress, - execution: execution ? getExecutionUrlFromArn(execution.arn) : undefined, - PANSent: postgresPDR.pan_sent, - PANmessage: postgresPDR.pan_message, - stats: postgresPDR.stats, - address: postgresPDR.address, - originalUrl: postgresPDR.original_url, - timestamp: (postgresPDR.timestamp ? postgresPDR.timestamp.getTime() : undefined), - duration: postgresPDR.duration, - updatedAt: postgresPDR.updated_at.getTime(), - }; - - return removeNilProperties(apiPdr); + return translatePostgresPdrToApiPdrWithoutDbQuery({ + pdrPgRecord: postgresPDR, + collectionPgRecord: collection, + executionArn: execution?.arn, + providerPgRecord: provider, + }); }; diff --git a/packages/db/tests/search/test-PdrSearch.js b/packages/db/tests/search/test-PdrSearch.js new file mode 100644 index 00000000000..f972f904f53 --- /dev/null +++ b/packages/db/tests/search/test-PdrSearch.js @@ -0,0 +1,751 @@ +const test = require('ava'); +const cryptoRandomString = require('crypto-random-string'); +const range = require('lodash/range'); + +const { constructCollectionId } = require('@cumulus/message/Collections'); + +const { + CollectionPgModel, + fakeCollectionRecordFactory, + fakeExecutionRecordFactory, + fakePdrRecordFactory, + fakeProviderRecordFactory, + generateLocalTestDb, + ExecutionPgModel, + PdrSearch, + PdrPgModel, + ProviderPgModel, + migrationDir, +} = require('../../dist'); + +const testDbName = `pdr_${cryptoRandomString({ length: 10 })}`; + +// generate PDR name for infix and prefix search +const generatePdrName = (num) => { + let name = cryptoRandomString({ length: 10 }); + if (num % 30 === 0) name = `${cryptoRandomString({ length: 5 })}infix${cryptoRandomString({ length: 5 })}`; + if (num % 25 === 0) name = `prefix${cryptoRandomString({ length: 10 })}`; + return name; +}; + +test.before(async (t) => { + const { knexAdmin, knex } = await generateLocalTestDb( + testDbName, + migrationDir + ); + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + // Create collection + t.context.collectionPgModel = new CollectionPgModel(); + t.context.collectionName = 'fakeCollection'; + t.context.collectionVersion = 'v1'; + + const collectionName2 = 'testCollection2'; + const collectionVersion2 = 'v2'; + + t.context.collectionId = constructCollectionId( + t.context.collectionName, + t.context.collectionVersion + ); + + t.context.collectionId2 = constructCollectionId( + collectionName2, + collectionVersion2 + ); + + t.context.testPgCollection = fakeCollectionRecordFactory({ + name: t.context.collectionName, + version: t.context.collectionVersion, + }); + t.context.testPgCollection2 = fakeCollectionRecordFactory({ + name: collectionName2, + version: collectionVersion2, + }); + + const [pgCollection] = await t.context.collectionPgModel.create( + t.context.knex, + t.context.testPgCollection + ); + const [pgCollection2] = await t.context.collectionPgModel.create( + t.context.knex, + t.context.testPgCollection2 + ); + t.context.collectionCumulusId = pgCollection.cumulus_id; + t.context.collectionCumulusId2 = pgCollection2.cumulus_id; + + // Create provider + t.context.providerPgModel = new ProviderPgModel(); + t.context.provider = fakeProviderRecordFactory(); + + const [pgProvider] = await t.context.providerPgModel.create( + t.context.knex, + t.context.provider + ); + t.context.providerCumulusId = pgProvider.cumulus_id; + + // Create execution + t.context.executionPgModel = new ExecutionPgModel(); + t.context.execution = fakeExecutionRecordFactory(); + + const [pgExecution] = await t.context.executionPgModel.create( + t.context.knex, + t.context.execution + ); + t.context.executionCumulusId = pgExecution.cumulus_id; + + t.context.pdrSearchFields = { + createdAt: 1579352700000, + duration: 6.8, + progress: 0.9, + status: 'failed', + timestamp: 1579352700000, + updatedAt: 1579352700000, + }; + + t.context.pdrNames = range(100).map(generatePdrName); + t.context.pdrs = range(50).map((num) => fakePdrRecordFactory({ + name: t.context.pdrNames[num], + created_at: new Date(t.context.pdrSearchFields.createdAt), + collection_cumulus_id: (num % 2) + ? t.context.collectionCumulusId : t.context.collectionCumulusId2, + provider_cumulus_id: t.context.providerCumulusId, + execution_cumulus_id: !(num % 2) ? t.context.executionCumulusId : undefined, + status: !(num % 2) ? t.context.pdrSearchFields.status : 'completed', + progress: num / 50, + pan_sent: num % 2 === 0, + pan_message: `pan${cryptoRandomString({ length: 10 })}`, + stats: { + processing: 0, + completed: 0, + failed: 0, + total: 0, + }, + address: `address${cryptoRandomString({ length: 10 })}`, + original_url: !(num % 50) ? `url${cryptoRandomString({ length: 10 })}` : undefined, + duration: t.context.pdrSearchFields.duration + (num % 2), + updated_at: new Date(t.context.pdrSearchFields.timestamp + (num % 2) * 1000), + })); + + t.context.pdrPgModel = new PdrPgModel(); + t.context.pgPdrs = await t.context.pdrPgModel.insert( + knex, + t.context.pdrs + ); +}); + +test('PdrSearch returns 10 PDR records by default', async (t) => { + const { knex } = t.context; + const dbSearch = new PdrSearch(); + const response = await dbSearch.query(knex); + + t.is(response.meta.count, 50); + + const apiPdrs = response.results || {}; + t.is(apiPdrs.length, 10); + const validatedRecords = apiPdrs.filter((pdr) => ( + [t.context.collectionId, t.context.collectionId2].includes(pdr.collectionId) + && (pdr.provider === t.context.provider.name) + && (!pdr.execution || pdr.execution === t.context.execution.arn))); + t.is(validatedRecords.length, apiPdrs.length); +}); + +test('PdrSearch supports page and limit params', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 25, + page: 2, + }; + let dbSearch = new PdrSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 25); + + queryStringParameters = { + limit: 10, + page: 5, + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 10); + + queryStringParameters = { + limit: 10, + page: 11, + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 0); +}); + +test('PdrSearch supports infix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + infix: 'infix', + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('PdrSearch supports prefix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + prefix: 'prefix', + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 2); + t.is(response.results?.length, 2); +}); + +test('PdrSearch supports collectionId term search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + collectionId: t.context.collectionId2, + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('PdrSearch supports provider term search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + provider: t.context.provider.name, + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('PdrSearch supports execution term search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + execution: `https://example.com/${t.context.execution.arn}`, + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('PdrSearch supports term search for boolean field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + PANSent: 'true', + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('PdrSearch supports term search for date field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + updatedAt: `${t.context.pdrSearchFields.updatedAt}`, + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('PdrSearch supports term search for number field', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + duration: t.context.pdrSearchFields.duration, + }; + let dbSearch = new PdrSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); + + queryStringParameters = { + limit: 100, + progress: t.context.pdrSearchFields.progress, + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('PdrSearch supports term search for string field', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + status: t.context.pdrSearchFields.status, + }; + let dbSearch = new PdrSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); + + const dbRecord = t.context.pdrs[0]; + queryStringParameters = { + limit: 100, + address: dbRecord.address, + pdrName: dbRecord.name, + originalUrl: dbRecord.original_url, + PANmessage: dbRecord.pan_message, + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('PdrSearch supports term search for timestamp', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + timestamp: `${t.context.pdrSearchFields.timestamp}`, + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('PdrSearch supports range search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + duration__from: `${t.context.pdrSearchFields.duration - 1}`, + duration__to: `${t.context.pdrSearchFields.duration + 1}`, + timestamp__from: `${t.context.pdrSearchFields.timestamp}`, + timestamp__to: `${t.context.pdrSearchFields.timestamp + 1600}`, + }; + let dbSearch = new PdrSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 100, + timestamp__from: t.context.pdrSearchFields.timestamp, + timestamp__to: t.context.pdrSearchFields.timestamp + 500, + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); + + queryStringParameters = { + limit: 100, + duration__from: `${t.context.pdrSearchFields.duration + 2}`, + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); +}); + +test('PdrSearch supports search for multiple fields', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + collectionId__in: [t.context.collectionId2, t.context.collectionId].join(','), + provider: t.context.provider.name, + PANSent__not: 'false', + status: 'failed', + timestamp__from: t.context.pdrSearchFields.timestamp, + timestamp__to: t.context.pdrSearchFields.timestamp + 500, + sort_key: ['collectionId', '-timestamp'], + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('PdrSearch non-existing fields are ignored', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + non_existing_field: `non_exist_${cryptoRandomString({ length: 5 })}`, + non_existing_field__from: `non_exist_${cryptoRandomString({ length: 5 })}`, + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('PdrSearch returns fields specified', async (t) => { + const { knex } = t.context; + const fields = 'pdrName,collectionId,progress,PANSent,status'; + const queryStringParameters = { + fields, + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 10); + response.results.forEach((pdr) => t.deepEqual(Object.keys(pdr), fields.split(','))); +}); + +test('PdrSearch supports sorting', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + sort_by: 'timestamp', + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + t.true(response.results[0].updatedAt < response.results[25].updatedAt); + t.true(response.results[1].updatedAt < response.results[40].updatedAt); + + queryStringParameters = { + limit: 100, + sort_by: 'timestamp', + order: 'desc', + }; + const dbSearch2 = new PdrSearch({ queryStringParameters }); + const response2 = await dbSearch2.query(knex); + t.is(response2.meta.count, 50); + t.is(response2.results?.length, 50); + t.true(response2.results[0].updatedAt > response2.results[25].updatedAt); + t.true(response2.results[1].updatedAt > response2.results[40].updatedAt); + + queryStringParameters = { + limit: 100, + sort_key: ['-timestamp'], + }; + const dbSearch3 = new PdrSearch({ queryStringParameters }); + const response3 = await dbSearch3.query(knex); + t.is(response3.meta.count, 50); + t.is(response3.results?.length, 50); + t.true(response3.results[0].updatedAt > response3.results[25].updatedAt); + t.true(response3.results[1].updatedAt > response3.results[40].updatedAt); + + queryStringParameters = { + limit: 100, + sort_key: ['+progress'], + }; + const dbSearch4 = new PdrSearch({ queryStringParameters }); + const response4 = await dbSearch4.query(knex); + t.is(response4.meta.count, 50); + t.is(response4.results?.length, 50); + t.true(Number(response4.results[0].progress) < Number(response4.results[25].progress)); + t.true(Number(response4.results[1].progress) < Number(response4.results[40].progress)); + + queryStringParameters = { + limit: 100, + sort_key: ['-timestamp', '+progress'], + }; + const dbSearch5 = new PdrSearch({ queryStringParameters }); + const response5 = await dbSearch5.query(knex); + t.is(response5.meta.count, 50); + t.is(response5.results?.length, 50); + t.true(response5.results[0].updatedAt > response5.results[25].updatedAt); + t.true(response5.results[1].updatedAt > response5.results[40].updatedAt); + t.true(Number(response5.results[0].progress) < Number(response5.results[10].progress)); + t.true(Number(response5.results[30].progress) < Number(response5.results[40].progress)); + + queryStringParameters = { + limit: 100, + sort_key: ['-timestamp'], + sort_by: 'timestamp', + order: 'asc', + }; + const dbSearch6 = new PdrSearch({ queryStringParameters }); + const response6 = await dbSearch6.query(knex); + t.is(response6.meta.count, 50); + t.is(response6.results?.length, 50); + t.true(response6.results[0].updatedAt < response6.results[25].updatedAt); + t.true(response6.results[1].updatedAt < response6.results[40].updatedAt); +}); + +test('PdrSearch supports sorting by CollectionId', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + sort_by: 'collectionId', + order: 'asc', + }; + const dbSearch8 = new PdrSearch({ queryStringParameters }); + const response8 = await dbSearch8.query(knex); + t.is(response8.meta.count, 50); + t.is(response8.results?.length, 50); + t.true(response8.results[0].collectionId < response8.results[25].collectionId); + t.true(response8.results[1].collectionId < response8.results[40].collectionId); + + queryStringParameters = { + limit: 100, + sort_key: ['-collectionId'], + }; + const dbSearch9 = new PdrSearch({ queryStringParameters }); + const response9 = await dbSearch9.query(knex); + t.is(response9.meta.count, 50); + t.is(response9.results?.length, 50); + t.true(response9.results[0].collectionId > response9.results[25].collectionId); + t.true(response9.results[1].collectionId > response9.results[40].collectionId); +}); + +test('PdrSearch supports terms search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + pdrName__in: [t.context.pdrNames[0], t.context.pdrNames[5]].join(','), + PANSent__in: 'true,false', + }; + let dbSearch = new PdrSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 2); + t.is(response.results?.length, 2); + + queryStringParameters = { + limit: 100, + pdrName__in: [t.context.pdrNames[0], t.context.pdrNames[5]].join(','), + PANSent__in: 'true', + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('PdrSearch supports collectionId terms search', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + collectionId__in: [t.context.collectionId2, constructCollectionId('fakecollectionterms', 'v1')].join(','), + }; + let dbSearch = new PdrSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); + + queryStringParameters = { + limit: 100, + collectionId__in: [t.context.collectionId, t.context.collectionId2].join(','), + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('PdrSearch supports provider terms search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + provider__in: [t.context.provider.name, 'fakeproviderterms'].join(','), + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('PdrSearch supports execution terms search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + execution__in: [`https://example.con/${t.context.execution.arn}`, 'fakepdrterms'].join(','), + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('PdrSearch supports search when pdr field does not match the given value', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + pdrName__not: t.context.pdrNames[0], + PANSent__not: 'true', + }; + let dbSearch = new PdrSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); + + queryStringParameters = { + limit: 100, + pdrName__not: t.context.pdrNames[0], + PANSent__not: 'false', + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 24); + t.is(response.results?.length, 24); +}); + +test('PdrSearch supports search which collectionId does not match the given value', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + collectionId__not: t.context.collectionId2, + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('PdrSearch supports search which provider does not match the given value', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + provider__not: t.context.provider.name, + }; + let dbSearch = new PdrSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); + + queryStringParameters = { + limit: 100, + provider__not: 'providernotexist', + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); + +test('PdrSearch supports search which execution does not match the given value', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + execution__not: `https://example.com/${t.context.execution.arn}`, + }; + let dbSearch = new PdrSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); + + queryStringParameters = { + limit: 100, + execution__not: 'executionnotexist', + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('PdrSearch supports search which checks existence of PDR field', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + originalUrl__exists: 'true', + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 1); + t.is(response.results?.length, 1); +}); + +test('PdrSearch supports search which checks existence of collectionId', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + collectionId__exists: 'true', + }; + let dbSearch = new PdrSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + queryStringParameters = { + limit: 100, + collectionId__exists: 'false', + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); +}); + +test('PdrSearch supports search which checks existence of provider', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + provider__exists: 'true', + }; + let dbSearch = new PdrSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + + queryStringParameters = { + limit: 100, + provider__exists: 'false', + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 0); + t.is(response.results?.length, 0); +}); + +test('PdrSearch supports search which checks existence of execution', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 100, + execution__exists: 'true', + }; + let dbSearch = new PdrSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); + + queryStringParameters = { + limit: 100, + execution__exists: 'false', + }; + dbSearch = new PdrSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); +}); + +test('PdrSearch returns the correct record', async (t) => { + const { knex } = t.context; + const dbRecord = t.context.pdrs[2]; + const queryStringParameters = { + limit: 100, + pdrName: dbRecord.name, + }; + const dbSearch = new PdrSearch({ queryStringParameters }); + const { results, meta } = await dbSearch.query(knex); + t.is(meta.count, 1); + t.is(results?.length, 1); + + const expectedApiRecord = { + pdrName: dbRecord.name, + provider: t.context.provider.name, + collectionId: t.context.collectionId2, + status: dbRecord.status, + createdAt: dbRecord.created_at.getTime(), + progress: dbRecord.progress, + execution: `https://console.aws.amazon.com/states/home?region=us-east-1#/executions/details/${t.context.execution.arn}`, + PANSent: dbRecord.pan_sent, + PANmessage: dbRecord.pan_message, + stats: { total: 0, failed: 0, completed: 0, processing: 0 }, + address: dbRecord.address, + duration: dbRecord.duration, + updatedAt: dbRecord.updated_at.getTime(), + }; + + t.deepEqual(results?.[0], expectedApiRecord); +}); From f9cdc5d36a557563819307cef4a1420c30469cb1 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Fri, 6 Sep 2024 14:36:38 -0400 Subject: [PATCH 34/61] CUMULUS-3837: modify lines to remove some disable lint max line length --- .../api/lambdas/create-reconciliation-report.js | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/packages/api/lambdas/create-reconciliation-report.js b/packages/api/lambdas/create-reconciliation-report.js index 30be0fbf2a2..8de2c0b70a4 100644 --- a/packages/api/lambdas/create-reconciliation-report.js +++ b/packages/api/lambdas/create-reconciliation-report.js @@ -751,7 +751,9 @@ async function createReconciliationReport(recReportParams) { bucketReports.forEach((bucketReport) => { report.filesInCumulus.okCount += bucketReport.okCount; - report.filesInCumulus.onlyInS3 = report.filesInCumulus.onlyInS3.concat(bucketReport.onlyInS3); // eslint-disable-line max-len + report.filesInCumulus.onlyInS3 = report.filesInCumulus.onlyInS3.concat( + bucketReport.onlyInS3 + ); report.filesInCumulus.onlyInDb = report.filesInCumulus.onlyInDb.concat( bucketReport.onlyInDb ); @@ -835,7 +837,9 @@ async function processRequest(params) { location: buildS3Uri(systemBucket, reportKey), }; let [reportPgRecord] = await reconciliationReportPgModel.create(knex, builtReportRecord); - let reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); // eslint-disable-line max-len + let reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport( + reportPgRecord + ); await indexReconciliationReport(esClient, reportApiRecord, process.env.ES_INDEX); log.info(`Report added to database as pending: ${JSON.stringify(reportApiRecord)}.`); @@ -867,7 +871,9 @@ async function processRequest(params) { status: 'Generated', }; [reportPgRecord] = await reconciliationReportPgModel.upsert(knex, generatedRecord); - reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); // eslint-disable-line max-len + reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport( + reportPgRecord + ); await indexReconciliationReport(esClient, reportApiRecord, process.env.ES_INDEX); } catch (error) { log.error(`Error caught in createReconciliationReport creating ${reportType} report ${reportRecordName}. ${error}`); // eslint-disable-line max-len @@ -880,7 +886,9 @@ async function processRequest(params) { }, }; [reportPgRecord] = await reconciliationReportPgModel.upsert(knex, erroredRecord); - reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); // eslint-disable-line max-len + reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport( + reportPgRecord + ); await indexReconciliationReport( esClient, reportApiRecord, From 9e01a3d2822c0cea5aacc1ca5b259157922d2d79 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Thu, 12 Sep 2024 13:01:14 -0400 Subject: [PATCH 35/61] CUMULUS-3837: modify recon report api/endpoint to use PG table, add and address some ts-check issues in endpoint file, shorten translate func name --- .../CreateReconciliationReportSpec.js | 12 +-- .../api/endpoints/reconciliation-reports.js | 92 +++++++++++++------ .../lambdas/create-reconciliation-report.js | 16 +--- .../test-create-reconciliation-report.js | 6 +- packages/db/src/index.ts | 2 +- .../src/translate/reconciliation_reports.ts | 2 +- 6 files changed, 77 insertions(+), 53 deletions(-) diff --git a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js index 7b826443d60..9bb2aa9237d 100644 --- a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js +++ b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js @@ -332,7 +332,6 @@ describe('When there are granule differences and granule reconciliation is run', config = await loadConfig(); - process.env.ReconciliationReportsTable = `${config.stackName}-ReconciliationReportsTable`; process.env.CMR_ENVIRONMENT = 'UAT'; cmrClient = await createCmrClient(config); @@ -351,10 +350,8 @@ describe('When there are granule differences and granule reconciliation is run', console.log('XXX Waiting for setupCollectionAndTestData'); await setupCollectionAndTestData(config, testSuffix, testDataFolder); - console.log('XXX Completed for setupCollectionAndTestData'); + console.log('XXX Completed setupCollectionAndTestData'); - // Write an extra file to the DynamoDB Files table - // TODO: is ^ comment above correct, or should it be modified? extraFileInDb = { bucket: protectedBucket, key: randomString(), @@ -388,8 +385,8 @@ describe('When there are granule differences and granule reconciliation is run', collectionId, constructCollectionId(extraCumulusCollection.name, extraCumulusCollection.version), ]; - await waitForCollectionRecordsInList(config.stackName, collectionIds, { timestamp__from: ingestTime }); + console.log('XXXXX Completed collections in list'); // update one of the granule files in database so that that file won't match with CMR console.log('XXXXX Waiting for getGranule()'); @@ -398,7 +395,7 @@ describe('When there are granule differences and granule reconciliation is run', granuleId: publishedGranuleId, collectionId, }); - console.log('XXXXX Completed for getGranule()'); + console.log('XXXXX Completed getGranule()'); await waitForGranuleRecordUpdatedInList(config.stackName, granuleBeforeUpdate); console.log(`XXXXX Waiting for updateGranuleFile(${publishedGranuleId})`); ({ originalGranuleFile, updatedGranuleFile } = await updateGranuleFile( @@ -407,7 +404,7 @@ describe('When there are granule differences and granule reconciliation is run', /jpg$/, 'jpg2' )); - console.log(`XXXXX Completed for updateGranuleFile(${publishedGranuleId})`); + console.log(`XXXXX Completed updateGranuleFile(${publishedGranuleId})`); const [dbGranule, granuleAfterUpdate] = await Promise.all([ getGranule({ prefix: config.stackName, granuleId: dbGranuleId, collectionId }), @@ -418,6 +415,7 @@ describe('When there are granule differences and granule reconciliation is run', waitForGranuleRecordUpdatedInList(config.stackName, dbGranule), waitForGranuleRecordUpdatedInList(config.stackName, granuleAfterUpdate), ]); + console.log('XXXX Completed granules updated in list'); } catch (error) { console.log(error); beforeAllFailed = error; diff --git a/packages/api/endpoints/reconciliation-reports.js b/packages/api/endpoints/reconciliation-reports.js index 98069825aab..9a7f0a4ff5a 100644 --- a/packages/api/endpoints/reconciliation-reports.js +++ b/packages/api/endpoints/reconciliation-reports.js @@ -1,3 +1,5 @@ +//@ts-check + 'use strict'; const router = require('express-promise-router')(); @@ -20,7 +22,12 @@ const Logger = require('@cumulus/logger'); const { Search, getEsClient } = require('@cumulus/es-client/search'); const indexer = require('@cumulus/es-client/indexer'); -const models = require('../models'); +const { + ReconciliationReportPgModel, + createRejectableTransaction, + getKnexClient, + translatePostgresReconReportToApiReconReport, +} = require('@cumulus/db'); const { normalizeEvent } = require('../lib/reconciliationReport/normalizeEvent'); const startAsyncOperation = require('../lib/startAsyncOperation'); const { asyncOperationEndpointErrorHandler } = require('../app/middleware'); @@ -56,10 +63,14 @@ async function listReports(req, res) { */ async function getReport(req, res) { const name = req.params.name; - const reconciliationReportModel = new models.ReconciliationReport(); - + try { - const result = await reconciliationReportModel.get({ name }); + const reconciliationReportPgModel = new ReconciliationReportPgModel(); + const knex = await getKnexClient(); + const result = await reconciliationReportPgModel.get(knex, { name }); + if (!result.location) { + return res.boom.badRequest('The reconciliation report record does not contain a location.'); + } const { Bucket, Key } = parseS3Uri(result.location); const reportExists = await fileExists(Bucket, Key); if (!reportExists) { @@ -77,20 +88,26 @@ async function getReport(req, res) { ); if (Key.endsWith('.json') || Key.endsWith('.csv')) { - const reportSize = await getObjectSize({ s3: s3(), bucket: Bucket, key: Key }); + const reportSize = await getObjectSize({ s3: s3(), bucket: Bucket, key: Key }) ?? 0; // or check for undefined and throw? // estimated payload size, add extra const estimatedPayloadSize = presignedS3Url.length + reportSize + 50; - if ( - estimatedPayloadSize - > (process.env.maxResponsePayloadSizeBytes || maxResponsePayloadSizeBytes) - ) { + let maxResponsePayloadSize; + if (process.env.maxResponsePayloadSizeBytes) { + maxResponsePayloadSize = Number(process.env.maxResponsePayloadSizeBytes) + } else { + maxResponsePayloadSize = maxResponsePayloadSizeBytes + } + if (estimatedPayloadSize > maxResponsePayloadSize) { res.json({ presignedS3Url, data: `Error: Report ${name} exceeded maximum allowed payload size`, }); } else { - const file = await getS3Object(Bucket, Key); + const file = await getS3Object(Bucket, Key); // TODO should use not deprecated method??? logger.debug(`Sending json file with contentLength ${file.ContentLength}`); + if (!file.Body) { + return res.boom.badRequest('Report file does not have a body.'); + } const fileBody = await getObjectStreamContents(file.Body); return res.json({ presignedS3Url, @@ -98,14 +115,14 @@ async function getReport(req, res) { }); } } - logger.debug('reconciliation report getReport received an unhandled report type.'); + logger.debug('Reconciliation report getReport received an unhandled report type.'); } catch (error) { if (error instanceof RecordDoesNotExist) { return res.boom.notFound(`No record found for ${name}`); } throw error; } - return res.boom.badImplementation('reconciliation report getReport failed in an indeterminate manner.'); + return res.boom.badImplementation('Reconciliation report getReport failed in an indeterminate manner.'); } /** @@ -117,27 +134,42 @@ async function getReport(req, res) { */ async function deleteReport(req, res) { const name = req.params.name; - const reconciliationReportModel = new models.ReconciliationReport(); - const record = await reconciliationReportModel.get({ name }); + try { + const reconciliationReportPgModel = new ReconciliationReportPgModel(); + const knex = await getKnexClient(); + const record = await reconciliationReportPgModel.get(knex, { name }); + if (!record.location) { + return res.boom.badRequest('The reconciliation report record does not contain a location!'); + } + const { Bucket, Key } = parseS3Uri(record.location); - const { Bucket, Key } = parseS3Uri(record.location); - if (await fileExists(Bucket, Key)) { - await deleteS3Object(Bucket, Key); - } - await reconciliationReportModel.delete({ name }); - - if (inTestMode()) { - const esClient = await getEsClient(process.env.ES_HOST); - await indexer.deleteRecord({ - esClient, - id: name, - type: 'reconciliationReport', - index: process.env.ES_INDEX, - ignore: [404], - }); + await createRejectableTransaction(knex, async (trx) => { + if (await fileExists(Bucket, Key)) { + await deleteS3Object(Bucket, Key); + } + await reconciliationReportPgModel.delete(knex, { name }); + }) + + if (inTestMode()) { + const esClient = await getEsClient(process.env.ES_HOST); + await indexer.deleteRecord({ + esClient, + id: name, + type: 'reconciliationReport', + index: process.env.ES_INDEX, + ignore: [404], + }); + } + + return res.send({ message: 'Report deleted' }); + } catch (error) { + if (error instanceof RecordDoesNotExist) { + return res.boom.notFound(`No record found for ${name}`); + } + throw error; } - return res.send({ message: 'Report deleted' }); + return res.boom.badImplementation('Reconciliation report deleteReport failed in an indeterminate manner.'); } /** diff --git a/packages/api/lambdas/create-reconciliation-report.js b/packages/api/lambdas/create-reconciliation-report.js index 8de2c0b70a4..fb97a74e780 100644 --- a/packages/api/lambdas/create-reconciliation-report.js +++ b/packages/api/lambdas/create-reconciliation-report.js @@ -30,7 +30,7 @@ const Logger = require('@cumulus/logger'); const { ReconciliationReportPgModel, - translatePostgresReconciliationReportToApiReconciliationReport, + translatePostgresReconReportToApiReconReport, } = require('@cumulus/db'); const { createInternalReconciliationReport } = require('./internal-reconciliation-report'); const { createGranuleInventoryReport } = require('./reports/granule-inventory-report'); @@ -837,9 +837,7 @@ async function processRequest(params) { location: buildS3Uri(systemBucket, reportKey), }; let [reportPgRecord] = await reconciliationReportPgModel.create(knex, builtReportRecord); - let reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport( - reportPgRecord - ); + let reportApiRecord = translatePostgresReconReportToApiReconReport(reportPgRecord); await indexReconciliationReport(esClient, reportApiRecord, process.env.ES_INDEX); log.info(`Report added to database as pending: ${JSON.stringify(reportApiRecord)}.`); @@ -871,9 +869,7 @@ async function processRequest(params) { status: 'Generated', }; [reportPgRecord] = await reconciliationReportPgModel.upsert(knex, generatedRecord); - reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport( - reportPgRecord - ); + reportApiRecord = translatePostgresReconReportToApiReconReport(reportPgRecord); await indexReconciliationReport(esClient, reportApiRecord, process.env.ES_INDEX); } catch (error) { log.error(`Error caught in createReconciliationReport creating ${reportType} report ${reportRecordName}. ${error}`); // eslint-disable-line max-len @@ -886,9 +882,7 @@ async function processRequest(params) { }, }; [reportPgRecord] = await reconciliationReportPgModel.upsert(knex, erroredRecord); - reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport( - reportPgRecord - ); + reportApiRecord = translatePostgresReconReportToApiReconReport(reportPgRecord); await indexReconciliationReport( esClient, reportApiRecord, @@ -898,7 +892,7 @@ async function processRequest(params) { } reportPgRecord = await reconciliationReportPgModel.get(knex, { name: builtReportRecord.name }); - return translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); + return translatePostgresReconReportToApiReconReport(reportPgRecord); } async function handler(event) { diff --git a/packages/api/tests/lambdas/test-create-reconciliation-report.js b/packages/api/tests/lambdas/test-create-reconciliation-report.js index 3fe9203795f..81082bbd23f 100644 --- a/packages/api/tests/lambdas/test-create-reconciliation-report.js +++ b/packages/api/tests/lambdas/test-create-reconciliation-report.js @@ -39,7 +39,7 @@ const { fakeGranuleRecordFactory, translatePostgresCollectionToApiCollection, translateApiGranuleToPostgresGranule, - translatePostgresReconciliationReportToApiReconciliationReport, + translatePostgresReconReportToApiReconReport, upsertGranuleWithExecutionJoinRecord, } = require('@cumulus/db'); const { getDistributionBucketMapKey } = require('@cumulus/distribution-utils'); @@ -1866,7 +1866,7 @@ test.serial('When report creation fails, reconciliation report status is set to ); // reconciliation report lambda outputs the translated API version, not the PG version, so // it should be translated for comparison, at least for the comparison with the ES (API) version - const reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); // eslint-disable-line max-len + const reportApiRecord = translatePostgresReconReportToApiReconReport(reportPgRecord); t.is(reportApiRecord.status, 'Failed'); t.is(reportApiRecord.type, 'Inventory'); @@ -2218,7 +2218,7 @@ test.serial('When there is an error for an ORCA backup report, it throws', async ); // reconciliation report lambda outputs the translated API version, not the PG version, so // it should be translated for comparison, at least for the comparison with the ES (API) version - const reportApiRecord = translatePostgresReconciliationReportToApiReconciliationReport(reportPgRecord); // eslint-disable-line max-len + const reportApiRecord = translatePostgresReconReportToApiReconReport(reportPgRecord); t.is(reportApiRecord.status, 'Failed'); t.is(reportApiRecord.type, event.reportType); diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index f97dcc9ecc7..c63de22464d 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -104,7 +104,7 @@ export { translatePostgresProviderToApiProvider, } from './translate/providers'; export { - translatePostgresReconciliationReportToApiReconciliationReport, + translatePostgresReconReportToApiReconReport, } from './translate/reconciliation_reports'; export { translatePostgresRuleToApiRule, diff --git a/packages/db/src/translate/reconciliation_reports.ts b/packages/db/src/translate/reconciliation_reports.ts index 819fe85c728..fe8de424388 100644 --- a/packages/db/src/translate/reconciliation_reports.ts +++ b/packages/db/src/translate/reconciliation_reports.ts @@ -6,7 +6,7 @@ import { PostgresReconciliationReportRecord } from '../types/reconciliation_repo * @param {Object} pgReconciliationReport - a PostgreSQL reconciliation report record * @returns {Object} an API reconciliation report record */ -export const translatePostgresReconciliationReportToApiReconciliationReport = ( +export const translatePostgresReconReportToApiReconReport = ( pgReconciliationReport: PostgresReconciliationReportRecord ) => { const apiReconciliationReport = { From 322e135698c715292f446be6603cce29d0e0b3ef Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Thu, 12 Sep 2024 13:01:14 -0400 Subject: [PATCH 36/61] CUMULUS-3837: modify recon report api/endpoint to use PG table, add and address some ts-check issues in endpoint file, shorten translate func name --- packages/api/endpoints/reconciliation-reports.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/api/endpoints/reconciliation-reports.js b/packages/api/endpoints/reconciliation-reports.js index 9a7f0a4ff5a..ff4f69ee500 100644 --- a/packages/api/endpoints/reconciliation-reports.js +++ b/packages/api/endpoints/reconciliation-reports.js @@ -88,7 +88,7 @@ async function getReport(req, res) { ); if (Key.endsWith('.json') || Key.endsWith('.csv')) { - const reportSize = await getObjectSize({ s3: s3(), bucket: Bucket, key: Key }) ?? 0; // or check for undefined and throw? + const reportSize = await getObjectSize({ s3: s3(), bucket: Bucket, key: Key }) ?? 0; // estimated payload size, add extra const estimatedPayloadSize = presignedS3Url.length + reportSize + 50; let maxResponsePayloadSize; @@ -103,7 +103,7 @@ async function getReport(req, res) { data: `Error: Report ${name} exceeded maximum allowed payload size`, }); } else { - const file = await getS3Object(Bucket, Key); // TODO should use not deprecated method??? + const file = await getS3Object(Bucket, Key); logger.debug(`Sending json file with contentLength ${file.ContentLength}`); if (!file.Body) { return res.boom.badRequest('Report file does not have a body.'); From 417057a03aa834031b1a44541d1ae3213019a28b Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Tue, 17 Sep 2024 15:11:16 -0400 Subject: [PATCH 37/61] CUMULUS-3837: fix recon report endpoint tests, update migration, types, and translate files --- CHANGELOG.md | 8 ++- .../api/endpoints/reconciliation-reports.js | 44 ++++++------- .../api/tests/endpoints/test-executions.js | 2 - .../endpoints/test-reconciliation-reports.js | 63 +++++++++++++------ packages/db/src/index.ts | 2 +- ...217_create_reconciliation_reports_table.ts | 9 ++- .../src/translate/reconciliation_reports.ts | 19 +++--- .../db/src/types/reconciliation_report.ts | 13 ++-- .../types/api/reconciliation_reports.d.ts | 18 ++++++ 9 files changed, 109 insertions(+), 69 deletions(-) create mode 100644 packages/types/api/reconciliation_reports.d.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index b7f61c68114..1b779002d82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,9 +25,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Update `@cumlus/api/ecs/async-operation` to not update Elasticsearch index when reporting status of async operation - **CUMULUS-3837** - - added `reconciliation_reports` table in RDS, including indexes - - created model and types for `reconciliationReports` - - updated reconciliation reports lambda to write to new RDS table instead of Dynamo + - Added `reconciliation_reports` table in RDS, including indexes + - Created pg model, types, and translation for `reconciliationReports` in `@cumulus/db` + - Created api types for `reconciliation_reports` in `@cumulus/types/api` + - Updated reconciliation reports lambda to write to new RDS table instead of Dynamo + - Updated `@cumulus/api/endpoints/reconciliation-reports` `getReport` and `deleteReport` to work with the new RDS table instead of Dynamo ## [Unreleased] diff --git a/packages/api/endpoints/reconciliation-reports.js b/packages/api/endpoints/reconciliation-reports.js index ff4f69ee500..b05caf7e9b6 100644 --- a/packages/api/endpoints/reconciliation-reports.js +++ b/packages/api/endpoints/reconciliation-reports.js @@ -8,7 +8,7 @@ const { deleteS3Object, fileExists, getObjectSize, - getS3Object, + getObject, parseS3Uri, buildS3Uri, getObjectStreamContents, @@ -26,7 +26,6 @@ const { ReconciliationReportPgModel, createRejectableTransaction, getKnexClient, - translatePostgresReconReportToApiReconReport, } = require('@cumulus/db'); const { normalizeEvent } = require('../lib/reconciliationReport/normalizeEvent'); const startAsyncOperation = require('../lib/startAsyncOperation'); @@ -91,19 +90,15 @@ async function getReport(req, res) { const reportSize = await getObjectSize({ s3: s3(), bucket: Bucket, key: Key }) ?? 0; // estimated payload size, add extra const estimatedPayloadSize = presignedS3Url.length + reportSize + 50; - let maxResponsePayloadSize; - if (process.env.maxResponsePayloadSizeBytes) { - maxResponsePayloadSize = Number(process.env.maxResponsePayloadSizeBytes) - } else { - maxResponsePayloadSize = maxResponsePayloadSizeBytes - } - if (estimatedPayloadSize > maxResponsePayloadSize) { + if (estimatedPayloadSize > + Number(process.env.maxResponsePayloadSizeBytes || maxResponsePayloadSizeBytes) + ) { res.json({ presignedS3Url, data: `Error: Report ${name} exceeded maximum allowed payload size`, }); } else { - const file = await getS3Object(Bucket, Key); + const file = await getObject(s3(), { Bucket, Key }); logger.debug(`Sending json file with contentLength ${file.ContentLength}`); if (!file.Body) { return res.boom.badRequest('Report file does not have a body.'); @@ -122,6 +117,7 @@ async function getReport(req, res) { } throw error; } + return res.boom.badImplementation('Reconciliation report getReport failed in an indeterminate manner.'); } @@ -148,20 +144,7 @@ async function deleteReport(req, res) { await deleteS3Object(Bucket, Key); } await reconciliationReportPgModel.delete(knex, { name }); - }) - - if (inTestMode()) { - const esClient = await getEsClient(process.env.ES_HOST); - await indexer.deleteRecord({ - esClient, - id: name, - type: 'reconciliationReport', - index: process.env.ES_INDEX, - ignore: [404], - }); - } - - return res.send({ message: 'Report deleted' }); + }); } catch (error) { if (error instanceof RecordDoesNotExist) { return res.boom.notFound(`No record found for ${name}`); @@ -169,7 +152,18 @@ async function deleteReport(req, res) { throw error; } - return res.boom.badImplementation('Reconciliation report deleteReport failed in an indeterminate manner.'); + if (inTestMode()) { + const esClient = await getEsClient(process.env.ES_HOST); + await indexer.deleteRecord({ + esClient, + id: name, + type: 'reconciliationReport', + index: process.env.ES_INDEX, + ignore: [404], + }); + } + + return res.send({ message: 'Report deleted' }); } /** diff --git a/packages/api/tests/endpoints/test-executions.js b/packages/api/tests/endpoints/test-executions.js index a192c35d3a1..96439c58b26 100644 --- a/packages/api/tests/endpoints/test-executions.js +++ b/packages/api/tests/endpoints/test-executions.js @@ -78,8 +78,6 @@ process.env.TOKEN_SECRET = randomId('secret'); test.before(async (t) => { process.env = { ...process.env, - ...localStackConnectionEnv, - PG_DATABASE: testDbName, METRICS_ES_HOST: 'fakehost', METRICS_ES_USER: randomId('metricsUser'), METRICS_ES_PASS: randomId('metricsPass'), diff --git a/packages/api/tests/endpoints/test-reconciliation-reports.js b/packages/api/tests/endpoints/test-reconciliation-reports.js index fd9a4f39533..7fb46cfe532 100644 --- a/packages/api/tests/endpoints/test-reconciliation-reports.js +++ b/packages/api/tests/endpoints/test-reconciliation-reports.js @@ -7,8 +7,16 @@ const isEqual = require('lodash/isEqual'); const isMatch = require('lodash/isMatch'); const omit = require('lodash/omit'); const request = require('supertest'); +const cryptoRandomString = require('crypto-random-string'); -const { localStackConnectionEnv } = require('@cumulus/db'); +const { + ReconciliationReportPgModel, + generateLocalTestDb, + localStackConnectionEnv, + migrationDir, + fakeReconciliationReportRecordFactory, + translatePostgresReconReportToApiReconReport, +} = require('@cumulus/db'); const awsServices = require('@cumulus/aws-client/services'); const { buildS3Uri, @@ -24,7 +32,6 @@ const { getEsClient } = require('@cumulus/es-client/search'); const startAsyncOperation = require('../../lib/startAsyncOperation'); const { createFakeJwtAuthToken, - fakeReconciliationReportFactory, setAuthorizedOAuthUsers, } = require('../../lib/testUtils'); const assertions = require('../../lib/assertions'); @@ -35,7 +42,6 @@ process.env.invoke = 'granule-reconciliation-reports'; process.env.stackName = 'test-stack'; process.env.system_bucket = 'testsystembucket'; process.env.AccessTokensTable = randomId('accessTokensTable'); -process.env.ReconciliationReportsTable = randomId('recReportsTable'); process.env.TOKEN_SECRET = randomId('tokenSecret'); process.env.stackName = randomId('stackname'); process.env.system_bucket = randomId('bucket'); @@ -44,12 +50,8 @@ process.env.AsyncOperationTaskDefinition = randomId('asyncOpTaskDefinition'); process.env.EcsCluster = randomId('ecsCluster'); // import the express app after setting the env variables -const { - app, -} = require('../../app'); -const { - createReport, -} = require('../../endpoints/reconciliation-reports'); +const { app } = require('../../app'); +const { createReport } = require('../../endpoints/reconciliation-reports'); const { normalizeEvent } = require('../../lib/reconciliationReport/normalizeEvent'); const { buildFakeExpressResponse } = require('./utils'); @@ -57,12 +59,20 @@ const { buildFakeExpressResponse } = require('./utils'); let esClient; const esIndex = randomId('esindex'); +const testDbName = `test_recon_reports_${cryptoRandomString({ length: 10 })}`; + let jwtAuthToken; let accessTokenModel; let reconciliationReportModel; let fakeReportRecords = []; -test.before(async () => { +test.before(async (t) => { + process.env = { + ...process.env, + ...localStackConnectionEnv, + PG_DATABASE: testDbName, + }; + // create esClient esClient = await getEsClient('fakehost'); @@ -78,8 +88,6 @@ test.before(async () => { accessTokenModel = new models.AccessToken(); await accessTokenModel.createTable(); - reconciliationReportModel = new models.ReconciliationReport(); - await reconciliationReportModel.createTable(); await awsServices.s3().createBucket({ Bucket: process.env.system_bucket, @@ -93,6 +101,17 @@ test.before(async () => { username, }); + const { knex, knexAdmin } = await generateLocalTestDb(testDbName, migrationDir); + t.context.knex = knex; + t.context.knexAdmin = knexAdmin; + process.env = { + ...process.env, + ...localStackConnectionEnv, + PG_DATABASE: testDbName, + }; + + t.context.reconciliationReportPgModel = new ReconciliationReportPgModel(); + const reportNameTypes = [ { name: randomId('report1'), type: 'Inventory' }, { name: randomId('report2'), type: 'Granule Inventory' }, @@ -102,7 +121,7 @@ test.before(async () => { const reportDirectory = `${process.env.stackName}/reconciliation-reports`; const typeToExtension = (type) => ((type === 'Granule Inventory') ? '.csv' : '.json'); - fakeReportRecords = reportNameTypes.map((nameType) => fakeReconciliationReportFactory({ + fakeReportRecords = reportNameTypes.map((nameType) => fakeReconciliationReportRecordFactory({ name: nameType.name, type: nameType.type, location: buildS3Uri(process.env.system_bucket, @@ -119,15 +138,17 @@ test.before(async () => { }), }))); + let esResponse // add records to es await Promise.all(fakeReportRecords.map((reportRecord) => - reconciliationReportModel.create(reportRecord) - .then((record) => indexer.indexReconciliationReport(esClient, record, esAlias)))); + t.context.reconciliationReportPgModel.create(knex, reportRecord) + .then(([reportPgRecord]) => translatePostgresReconReportToApiReconReport(reportPgRecord)) + .then((repApiRecord) => indexer.indexReconciliationReport(esClient, repApiRecord, esAlias)) + )); }); -test.after.always(async () => { +test.after.always(async (t) => { await accessTokenModel.deleteTable(); - await reconciliationReportModel.deleteTable(); await esClient.client.indices.delete({ index: esIndex, }); @@ -231,8 +252,14 @@ test.serial('default returns list of reports', async (t) => { const recordsAreEqual = (record1, record2) => isEqual(omit(record1, ['updatedAt', 'timestamp']), omit(record2, ['updatedAt', 'timestamp'])); + // fakeReportRecords were created with the factory that creates PG version recon reports, so + // should be translated as the list endpoint returns the API version of recon reports + const fakeReportApiRecords = fakeReportRecords.map((fakeRecord) => { + return translatePostgresReconReportToApiReconReport(fakeRecord); + }); + results.results.forEach((item) => { - const recordsFound = fakeReportRecords.filter((record) => recordsAreEqual(record, item)); + const recordsFound = fakeReportApiRecords.filter((record) => recordsAreEqual(record, item)); t.is(recordsFound.length, 1); }); }); diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index c63de22464d..52e0ed1ffe0 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -167,8 +167,8 @@ export { StatsSearch, } from './search/StatsSearch'; -export { BasePgModel } from './models/base'; export { AsyncOperationPgModel } from './models/async_operation'; +export { BasePgModel } from './models/base'; export { CollectionPgModel } from './models/collection'; export { ExecutionPgModel } from './models/execution'; export { FilePgModel } from './models/file'; diff --git a/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts b/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts index a29d2818b41..ebf624c3f1f 100644 --- a/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts +++ b/packages/db/src/migrations/20240814185217_create_reconciliation_reports_table.ts @@ -7,14 +7,17 @@ export const up = async (knex: Knex): Promise => { .primary(); table .text('name') - .comment('Reconciliation Report name'); + .comment('Reconciliation Report name') + .notNullable(); table .enum('type', ['Granule Inventory', 'Granule Not Found', 'Internal', 'Inventory', 'ORCA Backup']) - .comment('Type of Reconciliation Report'); + .comment('Type of Reconciliation Report') + .notNullable(); table .enum('status', ['Generated', 'Pending', 'Failed']) - .comment('Status of Reconciliation Report'); + .comment('Status of Reconciliation Report') + .notNullable(); table .text('location') .comment('Location of Reconciliation Report'); diff --git a/packages/db/src/translate/reconciliation_reports.ts b/packages/db/src/translate/reconciliation_reports.ts index fe8de424388..d257f459d49 100644 --- a/packages/db/src/translate/reconciliation_reports.ts +++ b/packages/db/src/translate/reconciliation_reports.ts @@ -1,23 +1,22 @@ import { PostgresReconciliationReportRecord } from '../types/reconciliation_report'; +import { ApiReconciliationReportRecord } from '@cumulus/types/api/reconciliation_reports'; + +const { removeNilProperties } = require('@cumulus/common/util'); +const pick = require('lodash/pick'); /** * Generate an API Reconciliation Report record from a PostgreSQL record. * - * @param {Object} pgReconciliationReport - a PostgreSQL reconciliation report record + * @param pgReconciliationReport - a PostgreSQL reconciliation report record * @returns {Object} an API reconciliation report record */ export const translatePostgresReconReportToApiReconReport = ( pgReconciliationReport: PostgresReconciliationReportRecord -) => { - const apiReconciliationReport = { - // id or cumulus_id? - name: pgReconciliationReport.name, - type: pgReconciliationReport.type, - status: pgReconciliationReport.status, - location: pgReconciliationReport.location, - error: pgReconciliationReport.error, +): ApiReconciliationReportRecord => { + const apiReconciliationReport = removeNilProperties({ + ...pick(pgReconciliationReport, ['name', 'type', 'status', 'location', 'error']), createdAt: pgReconciliationReport.created_at?.getTime(), updatedAt: pgReconciliationReport.updated_at?.getTime(), - }; + }); return apiReconciliationReport; }; diff --git a/packages/db/src/types/reconciliation_report.ts b/packages/db/src/types/reconciliation_report.ts index 6839f42ea00..8d50b628e27 100644 --- a/packages/db/src/types/reconciliation_report.ts +++ b/packages/db/src/types/reconciliation_report.ts @@ -1,8 +1,7 @@ -export type ReconciliationReportType = - 'Granule Inventory' | 'Granule Not Found' | 'Internal' | 'Inventory' | 'ORCA Backup'; -export type ReconciliationReportStatus = 'Generated' | 'Pending' | 'Failed'; -// rules imports some types from '@cumulus/types/api/rules', -// not sure if these should be moved there later +import { + ReconciliationReportType, + ReconciliationReportStatus, +} from '@cumulus/types/api/reconciliation_reports'; /** * PostgresReconciliationReport @@ -17,8 +16,8 @@ export interface PostgresReconciliationReport { status: ReconciliationReportStatus, location?: string, error?: object, - created_at: Date | undefined, - updated_at: Date | undefined, + created_at?: Date, + updated_at?: Date, } /** diff --git a/packages/types/api/reconciliation_reports.d.ts b/packages/types/api/reconciliation_reports.d.ts new file mode 100644 index 00000000000..d2b19183264 --- /dev/null +++ b/packages/types/api/reconciliation_reports.d.ts @@ -0,0 +1,18 @@ +export type ReconciliationReportType = + 'Granule Inventory' | 'Granule Not Found' | 'Internal' | 'Inventory' | 'ORCA Backup'; +export type ReconciliationReportStatus = 'Generated' | 'Pending' | 'Failed'; + +export interface ApiReconciliationReport { + name: string, + type: ReconciliationReportType, + status: ReconciliationReportStatus, + location?: string, + error?: object, + createdAt?: number, + updatedAt?: number, +} + +export interface ApiReconciliationReportRecord extends ApiReconciliationReport { + createdAt: number, + updatedAt: number, +} \ No newline at end of file From 85e9fab01ceac87ab86576d3420e98fb17fb822b Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Wed, 18 Sep 2024 10:41:25 -0400 Subject: [PATCH 38/61] CUMULUS-3837: fix eslint errors --- packages/api/endpoints/reconciliation-reports.js | 6 +++--- .../endpoints/test-reconciliation-reports.js | 15 ++++++--------- .../db/src/translate/reconciliation_reports.ts | 4 ++-- packages/db/src/types/reconciliation_report.ts | 2 +- packages/types/api/reconciliation_reports.d.ts | 2 +- 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/packages/api/endpoints/reconciliation-reports.js b/packages/api/endpoints/reconciliation-reports.js index b05caf7e9b6..7973622efa6 100644 --- a/packages/api/endpoints/reconciliation-reports.js +++ b/packages/api/endpoints/reconciliation-reports.js @@ -62,7 +62,7 @@ async function listReports(req, res) { */ async function getReport(req, res) { const name = req.params.name; - + try { const reconciliationReportPgModel = new ReconciliationReportPgModel(); const knex = await getKnexClient(); @@ -90,7 +90,7 @@ async function getReport(req, res) { const reportSize = await getObjectSize({ s3: s3(), bucket: Bucket, key: Key }) ?? 0; // estimated payload size, add extra const estimatedPayloadSize = presignedS3Url.length + reportSize + 50; - if (estimatedPayloadSize > + if (estimatedPayloadSize > Number(process.env.maxResponsePayloadSizeBytes || maxResponsePayloadSizeBytes) ) { res.json({ @@ -139,7 +139,7 @@ async function deleteReport(req, res) { } const { Bucket, Key } = parseS3Uri(record.location); - await createRejectableTransaction(knex, async (trx) => { + await createRejectableTransaction(knex, async () => { if (await fileExists(Bucket, Key)) { await deleteS3Object(Bucket, Key); } diff --git a/packages/api/tests/endpoints/test-reconciliation-reports.js b/packages/api/tests/endpoints/test-reconciliation-reports.js index 7fb46cfe532..9227477a3b0 100644 --- a/packages/api/tests/endpoints/test-reconciliation-reports.js +++ b/packages/api/tests/endpoints/test-reconciliation-reports.js @@ -63,7 +63,6 @@ const testDbName = `test_recon_reports_${cryptoRandomString({ length: 10 })}`; let jwtAuthToken; let accessTokenModel; -let reconciliationReportModel; let fakeReportRecords = []; test.before(async (t) => { @@ -88,7 +87,6 @@ test.before(async (t) => { accessTokenModel = new models.AccessToken(); await accessTokenModel.createTable(); - await awsServices.s3().createBucket({ Bucket: process.env.system_bucket, }); @@ -138,16 +136,15 @@ test.before(async (t) => { }), }))); - let esResponse // add records to es await Promise.all(fakeReportRecords.map((reportRecord) => t.context.reconciliationReportPgModel.create(knex, reportRecord) .then(([reportPgRecord]) => translatePostgresReconReportToApiReconReport(reportPgRecord)) .then((repApiRecord) => indexer.indexReconciliationReport(esClient, repApiRecord, esAlias)) - )); + )); }); -test.after.always(async (t) => { +test.after.always(async () => { await accessTokenModel.deleteTable(); await esClient.client.indices.delete({ index: esIndex, @@ -252,11 +249,11 @@ test.serial('default returns list of reports', async (t) => { const recordsAreEqual = (record1, record2) => isEqual(omit(record1, ['updatedAt', 'timestamp']), omit(record2, ['updatedAt', 'timestamp'])); - // fakeReportRecords were created with the factory that creates PG version recon reports, so + // fakeReportRecords were created with the factory that creates PG version recon reports, so // should be translated as the list endpoint returns the API version of recon reports - const fakeReportApiRecords = fakeReportRecords.map((fakeRecord) => { - return translatePostgresReconReportToApiReconReport(fakeRecord); - }); + const fakeReportApiRecords = fakeReportRecords.map((fakeRecord) => + translatePostgresReconReportToApiReconReport(fakeRecord) + ); results.results.forEach((item) => { const recordsFound = fakeReportApiRecords.filter((record) => recordsAreEqual(record, item)); diff --git a/packages/db/src/translate/reconciliation_reports.ts b/packages/db/src/translate/reconciliation_reports.ts index d257f459d49..1f684b341f9 100644 --- a/packages/db/src/translate/reconciliation_reports.ts +++ b/packages/db/src/translate/reconciliation_reports.ts @@ -1,5 +1,5 @@ -import { PostgresReconciliationReportRecord } from '../types/reconciliation_report'; import { ApiReconciliationReportRecord } from '@cumulus/types/api/reconciliation_reports'; +import { PostgresReconciliationReportRecord } from '../types/reconciliation_report'; const { removeNilProperties } = require('@cumulus/common/util'); const pick = require('lodash/pick'); @@ -8,7 +8,7 @@ const pick = require('lodash/pick'); * Generate an API Reconciliation Report record from a PostgreSQL record. * * @param pgReconciliationReport - a PostgreSQL reconciliation report record - * @returns {Object} an API reconciliation report record + * @returns ApiReconciliationReportRecord - an API reconciliation report record */ export const translatePostgresReconReportToApiReconReport = ( pgReconciliationReport: PostgresReconciliationReportRecord diff --git a/packages/db/src/types/reconciliation_report.ts b/packages/db/src/types/reconciliation_report.ts index 8d50b628e27..bd6671f8016 100644 --- a/packages/db/src/types/reconciliation_report.ts +++ b/packages/db/src/types/reconciliation_report.ts @@ -1,4 +1,4 @@ -import { +import { ReconciliationReportType, ReconciliationReportStatus, } from '@cumulus/types/api/reconciliation_reports'; diff --git a/packages/types/api/reconciliation_reports.d.ts b/packages/types/api/reconciliation_reports.d.ts index d2b19183264..5473740551d 100644 --- a/packages/types/api/reconciliation_reports.d.ts +++ b/packages/types/api/reconciliation_reports.d.ts @@ -15,4 +15,4 @@ export interface ApiReconciliationReport { export interface ApiReconciliationReportRecord extends ApiReconciliationReport { createdAt: number, updatedAt: number, -} \ No newline at end of file +} From 4345b820f1ee9bde17b59f0fef3ef4c14b1430fe Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Wed, 18 Sep 2024 10:41:25 -0400 Subject: [PATCH 39/61] CUMULUS-3837: fix eslint errors --- .../api/tests/endpoints/test-reconciliation-reports.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/api/tests/endpoints/test-reconciliation-reports.js b/packages/api/tests/endpoints/test-reconciliation-reports.js index 9227477a3b0..c6c95553f58 100644 --- a/packages/api/tests/endpoints/test-reconciliation-reports.js +++ b/packages/api/tests/endpoints/test-reconciliation-reports.js @@ -140,8 +140,8 @@ test.before(async (t) => { await Promise.all(fakeReportRecords.map((reportRecord) => t.context.reconciliationReportPgModel.create(knex, reportRecord) .then(([reportPgRecord]) => translatePostgresReconReportToApiReconReport(reportPgRecord)) - .then((repApiRecord) => indexer.indexReconciliationReport(esClient, repApiRecord, esAlias)) - )); + .then((repApiRecord) => indexer.indexReconciliationReport(esClient, repApiRecord, esAlias))) + ); }); test.after.always(async () => { @@ -252,8 +252,7 @@ test.serial('default returns list of reports', async (t) => { // fakeReportRecords were created with the factory that creates PG version recon reports, so // should be translated as the list endpoint returns the API version of recon reports const fakeReportApiRecords = fakeReportRecords.map((fakeRecord) => - translatePostgresReconReportToApiReconReport(fakeRecord) - ); + translatePostgresReconReportToApiReconReport(fakeRecord)); results.results.forEach((item) => { const recordsFound = fakeReportApiRecords.filter((record) => recordsAreEqual(record, item)); From 1a8192beccd5c8cce3fd2bb33c3e709a747be8a6 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Wed, 18 Sep 2024 10:41:25 -0400 Subject: [PATCH 40/61] CUMULUS-3837: fix eslint errors --- .../endpoints/test-reconciliation-reports.js | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/packages/api/tests/endpoints/test-reconciliation-reports.js b/packages/api/tests/endpoints/test-reconciliation-reports.js index c6c95553f58..17d148aad57 100644 --- a/packages/api/tests/endpoints/test-reconciliation-reports.js +++ b/packages/api/tests/endpoints/test-reconciliation-reports.js @@ -137,10 +137,19 @@ test.before(async (t) => { }))); // add records to es - await Promise.all(fakeReportRecords.map((reportRecord) => - t.context.reconciliationReportPgModel.create(knex, reportRecord) - .then(([reportPgRecord]) => translatePostgresReconReportToApiReconReport(reportPgRecord)) - .then((repApiRecord) => indexer.indexReconciliationReport(esClient, repApiRecord, esAlias))) + await Promise.all( + fakeReportRecords.map((reportRecord) => + t.context.reconciliationReportPgModel + .create(knex, reportRecord) + .then( + ([reportPgRecord]) => + translatePostgresReconReportToApiReconReport(reportPgRecord) + ) + .then( + (reportApiRecord) => + indexer.indexReconciliationReport(esClient, reportApiRecord, esAlias) + ) + ) ); }); From cdf23ff86f2aa384294e538d6b62695d78a97dbc Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Wed, 18 Sep 2024 12:28:08 -0400 Subject: [PATCH 41/61] CUMULUS-3837: add unit tests for translate method --- .../endpoints/test-reconciliation-reports.js | 3 +- .../translate/test-reconciliation-reports.js | 60 +++++++++++++++++++ 2 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 packages/db/tests/translate/test-reconciliation-reports.js diff --git a/packages/api/tests/endpoints/test-reconciliation-reports.js b/packages/api/tests/endpoints/test-reconciliation-reports.js index 17d148aad57..a235284c552 100644 --- a/packages/api/tests/endpoints/test-reconciliation-reports.js +++ b/packages/api/tests/endpoints/test-reconciliation-reports.js @@ -148,8 +148,7 @@ test.before(async (t) => { .then( (reportApiRecord) => indexer.indexReconciliationReport(esClient, reportApiRecord, esAlias) - ) - ) + )) ); }); diff --git a/packages/db/tests/translate/test-reconciliation-reports.js b/packages/db/tests/translate/test-reconciliation-reports.js new file mode 100644 index 00000000000..3e4c4183a5b --- /dev/null +++ b/packages/db/tests/translate/test-reconciliation-reports.js @@ -0,0 +1,60 @@ +const test = require('ava'); +const { randomId } = require('@cumulus/common/test-utils'); + +const { translatePostgresReconReportToApiReconReport } = require('../../dist/translate/reconciliation_reports'); + +const pick = require('lodash/pick'); + +test('translatePostgresReconReportToApiReconReport translates a Postgres Reconciliation Report to an API Reconciliation Report', async(t) => { + const createdTime = new Date(Date.now()); + const updatedTime = new Date(Date.now()); + + const pgReconReport = { + name: randomId('report'), + type: 'Granule Inventory', + status: 'Generated', + location: 's3://cumulus-test-sandbox-private/reconciliation-reports', + error: null, + created_at: createdTime, + updated_at: updatedTime, + } + + const expectedApiReconReport = { + ...pick(pgReconReport, ['name', 'type', 'status', 'location']), + // no error b/c null or undefined should be removed + createdAt: createdTime.getTime(), + updatedAt: updatedTime.getTime(), + } + + const translatedReport = translatePostgresReconReportToApiReconReport(pgReconReport); + + t.deepEqual(expectedApiReconReport, translatedReport); +}); + +test('translatePostgresReconReportToApiReconReport translates a error Postgres Reconciliation Report with an error to an API Reconciliation Report', async (t) => { + const createdTime = new Date(Date.now()); + const updatedTime = new Date(Date.now()); + + const pgReconReport = { + name: randomId('report'), + type: 'Granule Not Found', + status: 'Failed', + location: 's3://cumulus-test-sandbox-private/reconciliation-reports', + error: { + Error: 'some error message', + Cause: 'some error cause', + }, + created_at: createdTime, + updated_at: updatedTime, + } + + const expectedApiReconReport = { + ...pick(pgReconReport, ['name', 'type', 'status', 'location', 'error']), + createdAt: createdTime.getTime(), + updatedAt: updatedTime.getTime(), + } + + const translatedReport = translatePostgresReconReportToApiReconReport(pgReconReport); + + t.deepEqual(expectedApiReconReport, translatedReport); +}); \ No newline at end of file From 0ed9152ccfc56a2e3612cd5607b5a688f402fb79 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Wed, 18 Sep 2024 12:28:08 -0400 Subject: [PATCH 42/61] CUMULUS-3837: add unit tests for translate method --- .../translate/test-reconciliation-reports.js | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/packages/db/tests/translate/test-reconciliation-reports.js b/packages/db/tests/translate/test-reconciliation-reports.js index 3e4c4183a5b..2314c78f700 100644 --- a/packages/db/tests/translate/test-reconciliation-reports.js +++ b/packages/db/tests/translate/test-reconciliation-reports.js @@ -1,11 +1,10 @@ const test = require('ava'); const { randomId } = require('@cumulus/common/test-utils'); +const pick = require('lodash/pick'); const { translatePostgresReconReportToApiReconReport } = require('../../dist/translate/reconciliation_reports'); -const pick = require('lodash/pick'); - -test('translatePostgresReconReportToApiReconReport translates a Postgres Reconciliation Report to an API Reconciliation Report', async(t) => { +test('translatePostgresReconReportToApiReconReport translates a Postgres Reconciliation Report to an API Reconciliation Report', (t) => { const createdTime = new Date(Date.now()); const updatedTime = new Date(Date.now()); @@ -17,21 +16,21 @@ test('translatePostgresReconReportToApiReconReport translates a Postgres Reconci error: null, created_at: createdTime, updated_at: updatedTime, - } + }; const expectedApiReconReport = { ...pick(pgReconReport, ['name', 'type', 'status', 'location']), // no error b/c null or undefined should be removed createdAt: createdTime.getTime(), updatedAt: updatedTime.getTime(), - } + }; const translatedReport = translatePostgresReconReportToApiReconReport(pgReconReport); t.deepEqual(expectedApiReconReport, translatedReport); }); -test('translatePostgresReconReportToApiReconReport translates a error Postgres Reconciliation Report with an error to an API Reconciliation Report', async (t) => { +test('translatePostgresReconReportToApiReconReport translates Postgres Reconciliation Report with an error to an API Reconciliation Report', (t) => { const createdTime = new Date(Date.now()); const updatedTime = new Date(Date.now()); @@ -46,15 +45,15 @@ test('translatePostgresReconReportToApiReconReport translates a error Postgres R }, created_at: createdTime, updated_at: updatedTime, - } + }; const expectedApiReconReport = { ...pick(pgReconReport, ['name', 'type', 'status', 'location', 'error']), createdAt: createdTime.getTime(), updatedAt: updatedTime.getTime(), - } + }; const translatedReport = translatePostgresReconReportToApiReconReport(pgReconReport); t.deepEqual(expectedApiReconReport, translatedReport); -}); \ No newline at end of file +}); From df70f15853a50eb4bab88db465d87afa867f700d Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Wed, 18 Sep 2024 13:41:40 -0400 Subject: [PATCH 43/61] CUMULUS-3837: remove redundant lines --- packages/api/tests/endpoints/test-reconciliation-reports.js | 6 ------ 1 file changed, 6 deletions(-) diff --git a/packages/api/tests/endpoints/test-reconciliation-reports.js b/packages/api/tests/endpoints/test-reconciliation-reports.js index a235284c552..a6f8dd8d0c1 100644 --- a/packages/api/tests/endpoints/test-reconciliation-reports.js +++ b/packages/api/tests/endpoints/test-reconciliation-reports.js @@ -66,12 +66,6 @@ let accessTokenModel; let fakeReportRecords = []; test.before(async (t) => { - process.env = { - ...process.env, - ...localStackConnectionEnv, - PG_DATABASE: testDbName, - }; - // create esClient esClient = await getEsClient('fakehost'); From 568d5c0de4a5ea556ccb9823ad03253fd40108e8 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Thu, 19 Sep 2024 10:37:12 -0400 Subject: [PATCH 44/61] CUMULUS-3837: remove a testDb --- packages/api/tests/endpoints/test-reconciliation-reports.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/packages/api/tests/endpoints/test-reconciliation-reports.js b/packages/api/tests/endpoints/test-reconciliation-reports.js index a6f8dd8d0c1..7d8beb123f4 100644 --- a/packages/api/tests/endpoints/test-reconciliation-reports.js +++ b/packages/api/tests/endpoints/test-reconciliation-reports.js @@ -12,6 +12,7 @@ const cryptoRandomString = require('crypto-random-string'); const { ReconciliationReportPgModel, generateLocalTestDb, + destroyLocalTestDb, localStackConnectionEnv, migrationDir, fakeReconciliationReportRecordFactory, @@ -152,6 +153,11 @@ test.after.always(async () => { index: esIndex, }); await recursivelyDeleteS3Bucket(process.env.system_bucket); + await destroyLocalTestDb({ + knex: t.context.knex, + knexAdmin: t.context.knexAdmin, + testDbName, + }); }); test.serial('CUMULUS-911 GET without pathParameters and without an Authorization header returns an Authorization Missing response', async (t) => { From f45d85aba63e2be2fe88dca35299ae50050b31fa Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Thu, 19 Sep 2024 10:37:12 -0400 Subject: [PATCH 45/61] CUMULUS-3837: remove a testDb --- packages/api/tests/endpoints/test-reconciliation-reports.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/tests/endpoints/test-reconciliation-reports.js b/packages/api/tests/endpoints/test-reconciliation-reports.js index 7d8beb123f4..b6d6455c4f6 100644 --- a/packages/api/tests/endpoints/test-reconciliation-reports.js +++ b/packages/api/tests/endpoints/test-reconciliation-reports.js @@ -147,7 +147,7 @@ test.before(async (t) => { ); }); -test.after.always(async () => { +test.after.always(async (t) => { await accessTokenModel.deleteTable(); await esClient.client.indices.delete({ index: esIndex, From f91bd4034d3581e74983dcf1c8c4eec8b4713e89 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Thu, 19 Sep 2024 12:26:10 -0400 Subject: [PATCH 46/61] CUMULUS-3837: move lines out of try/catch --- .../api/endpoints/reconciliation-reports.js | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/packages/api/endpoints/reconciliation-reports.js b/packages/api/endpoints/reconciliation-reports.js index 7973622efa6..c878e55ad14 100644 --- a/packages/api/endpoints/reconciliation-reports.js +++ b/packages/api/endpoints/reconciliation-reports.js @@ -130,21 +130,12 @@ async function getReport(req, res) { */ async function deleteReport(req, res) { const name = req.params.name; - try { - const reconciliationReportPgModel = new ReconciliationReportPgModel(); - const knex = await getKnexClient(); - const record = await reconciliationReportPgModel.get(knex, { name }); - if (!record.location) { - return res.boom.badRequest('The reconciliation report record does not contain a location!'); - } - const { Bucket, Key } = parseS3Uri(record.location); + let record; - await createRejectableTransaction(knex, async () => { - if (await fileExists(Bucket, Key)) { - await deleteS3Object(Bucket, Key); - } - await reconciliationReportPgModel.delete(knex, { name }); - }); + const reconciliationReportPgModel = new ReconciliationReportPgModel(); + const knex = await getKnexClient(); + try { + record = await reconciliationReportPgModel.get(knex, { name }); } catch (error) { if (error instanceof RecordDoesNotExist) { return res.boom.notFound(`No record found for ${name}`); @@ -152,6 +143,18 @@ async function deleteReport(req, res) { throw error; } + if (!record.location) { + return res.boom.badRequest('The reconciliation report record does not contain a location!'); + } + const { Bucket, Key } = parseS3Uri(record.location); + + await createRejectableTransaction(knex, async () => { + if (await fileExists(Bucket, Key)) { + await deleteS3Object(Bucket, Key); + } + await reconciliationReportPgModel.delete(knex, { name }); + }); + if (inTestMode()) { const esClient = await getEsClient(process.env.ES_HOST); await indexer.deleteRecord({ From baa10b2e2d3d15e1cf6b8b2ede7519c1149e3c57 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Thu, 19 Sep 2024 13:17:36 -0400 Subject: [PATCH 47/61] CUMULUS-3837: make db/index back to not very ordered --- packages/db/src/index.ts | 52 ++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index 52e0ed1ffe0..43bbda900a5 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -45,50 +45,59 @@ export { PostgresExecutionRecord, } from './types/execution'; export { - PostgresFile, - PostgresFileRecord, -} from './types/file'; + PostgresProvider, + PostgresProviderRecord, +} from './types/provider'; +export { + PostgresRule, + PostgresRuleRecord, +} from './types/rule'; export { PostgresGranule, PostgresGranuleRecord, } from './types/granule'; -export { - PostgresGranuleExecution, -} from './types/granule-execution'; export { PostgresPdr, PostgresPdrRecord, } from './types/pdr'; export { - PostgresProvider, - PostgresProviderRecord, -} from './types/provider'; + PostgresFile, + PostgresFileRecord, +} from './types/file'; +export { + PostgresGranuleExecution, +} from './types/granule-execution'; export { PostgresReconciliationReport, PostgresReconciliationReportRecord, } from './types/reconciliation_report'; -export { - PostgresRule, - PostgresRuleRecord, -} from './types/rule'; export { translateApiAsyncOperationToPostgresAsyncOperation, translatePostgresAsyncOperationToApiAsyncOperation, } from './translate/async_operations'; +export { + translateApiFiletoPostgresFile, + translatePostgresFileToApiFile, +} from './translate/file'; export { translateApiCollectionToPostgresCollection, translatePostgresCollectionToApiCollection, } from './translate/collections'; +export { + translateApiProviderToPostgresProvider, + translatePostgresProviderToApiProvider, +} from './translate/providers'; +export { + translatePostgresRuleToApiRule, + translateApiRuleToPostgresRule, + translateApiRuleToPostgresRuleRaw, +} from './translate/rules'; export { translateApiExecutionToPostgresExecution, translateApiExecutionToPostgresExecutionWithoutNilsRemoved, translatePostgresExecutionToApiExecution, } from './translate/executions'; -export { - translateApiFiletoPostgresFile, - translatePostgresFileToApiFile, -} from './translate/file'; export { translateApiGranuleToPostgresGranule, translateApiGranuleToPostgresGranuleWithoutNilsRemoved, @@ -99,18 +108,9 @@ export { translateApiPdrToPostgresPdr, translatePostgresPdrToApiPdr, } from './translate/pdr'; -export { - translateApiProviderToPostgresProvider, - translatePostgresProviderToApiProvider, -} from './translate/providers'; export { translatePostgresReconReportToApiReconReport, } from './translate/reconciliation_reports'; -export { - translatePostgresRuleToApiRule, - translateApiRuleToPostgresRule, - translateApiRuleToPostgresRuleRaw, -} from './translate/rules'; export { getCollectionsByGranuleIds, From 1a7a104cada1c11fa177b368e2766b7ead3f88ba Mon Sep 17 00:00:00 2001 From: Jonathan Kovarik Date: Tue, 1 Oct 2024 13:41:29 -0600 Subject: [PATCH 48/61] Update default reject configuration --- example/cumulus-tf/variables.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/example/cumulus-tf/variables.tf b/example/cumulus-tf/variables.tf index e506a492bd0..4ab4927a925 100644 --- a/example/cumulus-tf/variables.tf +++ b/example/cumulus-tf/variables.tf @@ -339,6 +339,7 @@ variable "rds_connection_timing_configuration" { createTimeoutMillis: 20000, idleTimeoutMillis: 1000, reapIntervalMillis: 1000, + rejectUnauthorized: false, } } From cfd6ad3b550847af4370947f1f1b961aa062db4b Mon Sep 17 00:00:00 2001 From: Jonathan Kovarik Date: Tue, 1 Oct 2024 13:42:25 -0600 Subject: [PATCH 49/61] Revert "Update default reject configuration" This reverts commit 1a7a104cada1c11fa177b368e2766b7ead3f88ba. --- example/cumulus-tf/variables.tf | 1 - 1 file changed, 1 deletion(-) diff --git a/example/cumulus-tf/variables.tf b/example/cumulus-tf/variables.tf index 4ab4927a925..e506a492bd0 100644 --- a/example/cumulus-tf/variables.tf +++ b/example/cumulus-tf/variables.tf @@ -339,7 +339,6 @@ variable "rds_connection_timing_configuration" { createTimeoutMillis: 20000, idleTimeoutMillis: 1000, reapIntervalMillis: 1000, - rejectUnauthorized: false, } } From 80c8e577e03aef403333d77bd811728397e2b9ac Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Wed, 2 Oct 2024 20:17:41 -0400 Subject: [PATCH 50/61] CUMULUS-3833: Migrate ReconciliationReports data from DynamoDB to Postgres (#3797) * CUMULUS-3833: Migrate ReconciliationReports to RDS * Update ReconciliationReportsMigration lambda * update lambda readme * correct names * update reconciliation-report-migration * revert migration-helper-async-operation no need async * update lambda * fix terraform * update test coverage number * remove unused variable * add used variable back * update package description --- CHANGELOG.md | 16 ++ .../.nycrc.json | 7 + .../reconciliation-report-migration/README.md | 18 ++ .../reconciliation-report-migration/iam.tf | 70 +++++ .../reconciliation-report-migration/main.tf | 35 +++ .../outputs.tf | 3 + .../package.json | 45 ++++ .../src/index.ts | 24 ++ .../src/reconciliation-reports.ts | 88 +++++++ .../src/types.ts | 10 + .../tests/test-index.js | 104 ++++++++ .../tests/test-reconciliation-reports.js | 245 ++++++++++++++++++ .../tsconfig.json | 11 + .../variables.tf | 59 +++++ .../versions.tf | 9 + .../webpack.config.js | 53 ++++ packages/db/src/index.ts | 1 + .../src/translate/reconciliation_reports.ts | 19 +- .../reconciliation_report_migration.tf | 21 ++ 19 files changed, 837 insertions(+), 1 deletion(-) create mode 100644 lambdas/reconciliation-report-migration/.nycrc.json create mode 100644 lambdas/reconciliation-report-migration/README.md create mode 100644 lambdas/reconciliation-report-migration/iam.tf create mode 100644 lambdas/reconciliation-report-migration/main.tf create mode 100644 lambdas/reconciliation-report-migration/outputs.tf create mode 100644 lambdas/reconciliation-report-migration/package.json create mode 100644 lambdas/reconciliation-report-migration/src/index.ts create mode 100644 lambdas/reconciliation-report-migration/src/reconciliation-reports.ts create mode 100644 lambdas/reconciliation-report-migration/src/types.ts create mode 100644 lambdas/reconciliation-report-migration/tests/test-index.js create mode 100644 lambdas/reconciliation-report-migration/tests/test-reconciliation-reports.js create mode 100644 lambdas/reconciliation-report-migration/tsconfig.json create mode 100644 lambdas/reconciliation-report-migration/variables.tf create mode 100644 lambdas/reconciliation-report-migration/versions.tf create mode 100644 lambdas/reconciliation-report-migration/webpack.config.js create mode 100644 tf-modules/cumulus/reconciliation_report_migration.tf diff --git a/CHANGELOG.md b/CHANGELOG.md index c73b8fcf421..ee626b4e919 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,19 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## Phase 2 Release +### Migration Notes + +#### CUMULUS-3833 Migration of ReconciliationReports from DynamoDB to Postgres after Cumulus is upgraded. + +To invoke the Lambda and start the ReconciliationReport migration, you can use the AWS Console or CLI: + +```bash +aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE +``` + +- `PREFIX` is your Cumulus deployment prefix. +- `OUTFILE` (**optional**) is the filepath where the Lambda output will be saved. + ### Replace ElasticSearch Phase 2 - **CUMULUS-3229** @@ -26,6 +39,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). Elasticsearch - Update `@cumlus/api/ecs/async-operation` to not update Elasticsearch index when reporting status of async operation +- **CUMULUS-3833** + - Added `ReconciliationReportMigration` lambda to migrate ReconciliationReports from DynamoDB + to Postgres - **CUMULUS-3837** - Added `reconciliation_reports` table in RDS, including indexes - Created pg model, types, and translation for `reconciliationReports` in `@cumulus/db` diff --git a/lambdas/reconciliation-report-migration/.nycrc.json b/lambdas/reconciliation-report-migration/.nycrc.json new file mode 100644 index 00000000000..d7000c7c12b --- /dev/null +++ b/lambdas/reconciliation-report-migration/.nycrc.json @@ -0,0 +1,7 @@ +{ + "extends": "../../nyc.config.js", + "lines": 95.0, + "branches": 80.0, + "statements": 95.0, + "functions": 98.0 +} \ No newline at end of file diff --git a/lambdas/reconciliation-report-migration/README.md b/lambdas/reconciliation-report-migration/README.md new file mode 100644 index 00000000000..b117ad7a914 --- /dev/null +++ b/lambdas/reconciliation-report-migration/README.md @@ -0,0 +1,18 @@ +# ReconciliationReportMigration Lambda + +The lambda migrates existing ReconciliationReports data from DynamoDB to PostgreSQL. + +To invoke the Lambda and start the ReconciliationReport migration, you can use the AWS Console or CLI: + +```bash +aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE +``` + +- `PREFIX` is your Cumulus deployment prefix. +- `OUTFILE` (**optional**) is the filepath where the Lambda output will be saved. + +The result will be a migration summary. For example: + +``` +{"reconciliation_reports":{"total_dynamo_db_records":36,"migrated":36,"failed":0,"skipped":0}} +``` diff --git a/lambdas/reconciliation-report-migration/iam.tf b/lambdas/reconciliation-report-migration/iam.tf new file mode 100644 index 00000000000..3fae7f2bd89 --- /dev/null +++ b/lambdas/reconciliation-report-migration/iam.tf @@ -0,0 +1,70 @@ +data "aws_iam_policy_document" "lambda_assume_role_policy" { + statement { + actions = ["sts:AssumeRole"] + principals { + type = "Service" + identifiers = ["lambda.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "reconciliation_report_migration" { + name = "${var.prefix}-reconciliation-report-migration" + assume_role_policy = data.aws_iam_policy_document.lambda_assume_role_policy.json + permissions_boundary = var.permissions_boundary_arn + + tags = var.tags +} + +data "aws_iam_policy_document" "reconciliation_report_migration" { + statement { + actions = [ + "ec2:CreateNetworkInterface", + "ec2:DeleteNetworkInterface", + "ec2:DescribeNetworkInterfaces", + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:DescribeLogStreams", + "logs:PutLogEvents" + ] + resources = ["*"] + } + + statement { + actions = [ + "dynamodb:Scan", + ] + resources = [ + var.dynamo_tables.reconciliation_reports.arn, + ] + } + + statement { + actions = [ + "secretsmanager:GetSecretValue" + ] + resources = [var.rds_user_access_secret_arn] + } +} + +resource "aws_iam_role_policy" "reconciliation_report_migration" { + name = "${var.prefix}_reconciliation_report_migration" + role = aws_iam_role.reconciliation_report_migration.id + policy = data.aws_iam_policy_document.reconciliation_report_migration.json +} + +resource "aws_security_group" "reconciliation_report_migration" { + count = length(var.lambda_subnet_ids) == 0 ? 0 : 1 + + name = "${var.prefix}-reconciliation-report-migration" + vpc_id = var.vpc_id + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = var.tags +} diff --git a/lambdas/reconciliation-report-migration/main.tf b/lambdas/reconciliation-report-migration/main.tf new file mode 100644 index 00000000000..bb7e543fe16 --- /dev/null +++ b/lambdas/reconciliation-report-migration/main.tf @@ -0,0 +1,35 @@ +locals { + lambda_path = "${path.module}/dist/webpack/lambda.zip" +} + +resource "aws_lambda_function" "reconciliation_report_migration" { + function_name = "${var.prefix}-ReconciliationReportMigration" + filename = local.lambda_path + source_code_hash = filebase64sha256(local.lambda_path) + handler = "index.handler" + role = aws_iam_role.reconciliation_report_migration.arn + runtime = "nodejs20.x" + timeout = lookup(var.lambda_timeouts, "ReconciliationReportMigration", 900) + memory_size = lookup(var.lambda_memory_sizes, "ReconciliationReportMigration", 1024) + + environment { + variables = { + databaseCredentialSecretArn = var.rds_user_access_secret_arn + ReconciliationReportsTable = var.dynamo_tables.reconciliation_reports.name + stackName = var.prefix + } + } + + dynamic "vpc_config" { + for_each = length(var.lambda_subnet_ids) == 0 ? [] : [1] + content { + subnet_ids = var.lambda_subnet_ids + security_group_ids = compact([ + aws_security_group.reconciliation_report_migration[0].id, + var.rds_security_group_id + ]) + } + } + + tags = var.tags +} diff --git a/lambdas/reconciliation-report-migration/outputs.tf b/lambdas/reconciliation-report-migration/outputs.tf new file mode 100644 index 00000000000..122c24f1abc --- /dev/null +++ b/lambdas/reconciliation-report-migration/outputs.tf @@ -0,0 +1,3 @@ +output "reconciliation_report_migration_function_arn" { + value = aws_lambda_function.reconciliation_report_migration.arn +} diff --git a/lambdas/reconciliation-report-migration/package.json b/lambdas/reconciliation-report-migration/package.json new file mode 100644 index 00000000000..be99e45c024 --- /dev/null +++ b/lambdas/reconciliation-report-migration/package.json @@ -0,0 +1,45 @@ +{ + "name": "@cumulus/reconciliation-report-migration", + "version": "19.0.0", + "description": "Lambda function for reconciliation report migration from DynamoDB to Postgres", + "author": "Cumulus Authors", + "license": "Apache-2.0", + "engines": { + "node": ">=20.12.2" + }, + "private": true, + "main": "./dist/lambda/index.js", + "types": "./dist/lambda/index.d.ts", + "scripts": { + "clean": "rm -rf dist", + "build": "rm -rf dist && mkdir dist && npm run prepare && npm run webpack", + "build-lambda-zip": "cd dist/webpack && node ../../../../bin/zip.js lambda.zip index.js", + "package": "npm run clean && npm run prepare && npm run webpack && npm run build-lambda-zip", + "test": "../../node_modules/.bin/ava", + "test:ci": "../../scripts/run_package_ci_unit.sh", + "test:coverage": "../../node_modules/.bin/nyc npm test", + "prepare": "npm run tsc", + "tsc": "../../node_modules/.bin/tsc", + "tsc:listEmittedFiles": "../../node_modules/.bin/tsc --listEmittedFiles", + "webpack": "../../node_modules/.bin/webpack" + }, + "ava": { + "files": [ + "tests/**/*.js" + ], + "timeout": "15m", + "failFast": true + }, + "dependencies": { + "@cumulus/api": "19.0.0", + "@cumulus/aws-client": "19.0.0", + "@cumulus/common": "19.0.0", + "@cumulus/db": "19.0.0", + "@cumulus/errors": "19.0.0", + "@cumulus/logger": "19.0.0", + "@cumulus/types": "19.0.0", + "knex": "2.4.1", + "lodash": "^4.17.21", + "pg": "~8.12" + } +} diff --git a/lambdas/reconciliation-report-migration/src/index.ts b/lambdas/reconciliation-report-migration/src/index.ts new file mode 100644 index 00000000000..a5c394d28fa --- /dev/null +++ b/lambdas/reconciliation-report-migration/src/index.ts @@ -0,0 +1,24 @@ +import { getKnexClient } from '@cumulus/db'; +import Logger from '@cumulus/logger'; + +import { migrateReconciliationReports } from './reconciliation-reports'; +import { MigrationSummary } from './types'; + +const logger = new Logger({ sender: '@cumulus/reconciliation-report-migration' }); + +export interface HandlerEvent { + env?: NodeJS.ProcessEnv +} + +export const handler = async (event: HandlerEvent): Promise => { + const env = event.env ?? process.env; + const knex = await getKnexClient({ env }); + + try { + const migrationSummary = await migrateReconciliationReports(env, knex); + logger.info(JSON.stringify(migrationSummary)); + return { reconciliation_reports: migrationSummary }; + } finally { + await knex.destroy(); + } +}; diff --git a/lambdas/reconciliation-report-migration/src/reconciliation-reports.ts b/lambdas/reconciliation-report-migration/src/reconciliation-reports.ts new file mode 100644 index 00000000000..32dec7c570e --- /dev/null +++ b/lambdas/reconciliation-report-migration/src/reconciliation-reports.ts @@ -0,0 +1,88 @@ +import { Knex } from 'knex'; + +import { DynamoDbSearchQueue } from '@cumulus/aws-client'; +import { envUtils } from '@cumulus/common'; +import { + ReconciliationReportPgModel, + translateApiReconReportToPostgresReconReport, +} from '@cumulus/db'; +import { RecordAlreadyMigrated, RecordDoesNotExist } from '@cumulus/errors'; +import Logger from '@cumulus/logger'; +import { ApiReconciliationReportRecord } from '@cumulus/types/api/reconciliation_reports'; + +import { MigrationResult } from './types'; + +const logger = new Logger({ sender: '@cumulus/data-migration/reconciliation-reports' }); + +export const migrateReconciliationReportRecord = async ( + dynamoRecord: ApiReconciliationReportRecord, + knex: Knex +): Promise => { + const reconReportPgModel = new ReconciliationReportPgModel(); + + let existingRecord; + try { + existingRecord = await reconReportPgModel.get(knex, { name: dynamoRecord.name }); + } catch (error) { + if (!(error instanceof RecordDoesNotExist)) { + throw error; + } + } + + if (existingRecord + && dynamoRecord.updatedAt + && existingRecord.updated_at >= new Date(dynamoRecord.updatedAt)) { + throw new RecordAlreadyMigrated(`Reconciliation report ${dynamoRecord.name} was already migrated, skipping`); + } + + const updatedRecord = translateApiReconReportToPostgresReconReport( + dynamoRecord + ); + + await reconReportPgModel.upsert(knex, updatedRecord); +}; + +export const migrateReconciliationReports = async ( + env: NodeJS.ProcessEnv, + knex: Knex +): Promise => { + const reconciliationReportsTable = envUtils.getRequiredEnvVar('ReconciliationReportsTable', env); + + const searchQueue = new DynamoDbSearchQueue({ + TableName: reconciliationReportsTable, + }); + + const migrationSummary = { + total_dynamo_db_records: 0, + migrated: 0, + failed: 0, + skipped: 0, + }; + + let record = await searchQueue.peek(); + /* eslint-disable no-await-in-loop */ + while (record) { + migrationSummary.total_dynamo_db_records += 1; + + try { + await migrateReconciliationReportRecord(record as any, knex); + migrationSummary.migrated += 1; + } catch (error) { + if (error instanceof RecordAlreadyMigrated) { + migrationSummary.skipped += 1; + } else { + migrationSummary.failed += 1; + logger.error( + `Could not create reconciliationReport record in RDS for Dynamo reconciliationReport name ${record.name}:`, + error + ); + } + } + + await searchQueue.shift(); + record = await searchQueue.peek(); + } + /* eslint-enable no-await-in-loop */ + logger.info(`successfully migrated ${migrationSummary.migrated} reconciliationReport records`); + return migrationSummary; +}; diff --git a/lambdas/reconciliation-report-migration/src/types.ts b/lambdas/reconciliation-report-migration/src/types.ts new file mode 100644 index 00000000000..08119110cae --- /dev/null +++ b/lambdas/reconciliation-report-migration/src/types.ts @@ -0,0 +1,10 @@ +export type MigrationResult = { + total_dynamo_db_records: number, + skipped: number, + migrated: number, + failed: number, +}; + +export type MigrationSummary = { + reconciliation_reports: MigrationResult +}; diff --git a/lambdas/reconciliation-report-migration/tests/test-index.js b/lambdas/reconciliation-report-migration/tests/test-index.js new file mode 100644 index 00000000000..b355c0b100d --- /dev/null +++ b/lambdas/reconciliation-report-migration/tests/test-index.js @@ -0,0 +1,104 @@ +const test = require('ava'); +const cryptoRandomString = require('crypto-random-string'); + +const ReconciliationReport = require('@cumulus/api/models/reconciliation-reports'); + +const { + createBucket, + putJsonS3Object, + recursivelyDeleteS3Bucket, +} = require('@cumulus/aws-client/S3'); + +const { + generateLocalTestDb, + destroyLocalTestDb, + localStackConnectionEnv, + migrationDir, +} = require('@cumulus/db'); + +const { handler } = require('../dist/lambda'); +const testDbName = `reconciliation_report_migration_1_${cryptoRandomString({ length: 10 })}`; +const workflow = cryptoRandomString({ length: 10 }); + +test.before(async (t) => { + process.env = { + ...process.env, + ...localStackConnectionEnv, + PG_DATABASE: testDbName, + stackName: cryptoRandomString({ length: 10 }), + system_bucket: cryptoRandomString({ length: 10 }), + ReconciliationReportsTable: cryptoRandomString({ length: 10 }), + }; + + await createBucket(process.env.system_bucket); + + const workflowfile = `${process.env.stackName}/workflows/${workflow}.json`; + const messageTemplateKey = `${process.env.stackName}/workflow_template.json`; + + t.context.reconciliationReportsModel = new ReconciliationReport({ + stackName: process.env.stackName, + systemBucket: process.env.system_bucket, + }); + + await Promise.all([ + t.context.reconciliationReportsModel.createTable(), + ]); + + await Promise.all([ + putJsonS3Object( + process.env.system_bucket, + messageTemplateKey, + { meta: 'meta' } + ), + putJsonS3Object( + process.env.system_bucket, + workflowfile, + { testworkflow: 'workflow-config' } + ), + ]); + const { knex, knexAdmin } = await generateLocalTestDb(testDbName, migrationDir); + t.context.knex = knex; + t.context.knexAdmin = knexAdmin; +}); + +test.after.always(async (t) => { + await t.context.reconciliationReportsModel.deleteTable(); + + await recursivelyDeleteS3Bucket(process.env.system_bucket); + + await destroyLocalTestDb({ + knex: t.context.knex, + knexAdmin: t.context.knexAdmin, + testDbName, + }); +}); + +test('handler migrates reconciliation reports', async (t) => { + const { reconciliationReportsModel } = t.context; + + const fakeReconciliationReport = { + name: cryptoRandomString({ length: 5 }), + type: 'Granule Inventory', + status: 'Generated', + error: {}, + createdAt: (Date.now() - 1000), + updatedAt: Date.now(), + }; + + await Promise.all([ + reconciliationReportsModel.create(fakeReconciliationReport), + ]); + + t.teardown(() => reconciliationReportsModel.delete({ name: fakeReconciliationReport.name })); + + const call = await handler({}); + const expected = { + reconciliation_reports: { + failed: 0, + migrated: 1, + skipped: 0, + total_dynamo_db_records: 1, + }, + }; + t.deepEqual(call, expected); +}); diff --git a/lambdas/reconciliation-report-migration/tests/test-reconciliation-reports.js b/lambdas/reconciliation-report-migration/tests/test-reconciliation-reports.js new file mode 100644 index 00000000000..79afe2d4a58 --- /dev/null +++ b/lambdas/reconciliation-report-migration/tests/test-reconciliation-reports.js @@ -0,0 +1,245 @@ +const cryptoRandomString = require('crypto-random-string'); +const omit = require('lodash/omit'); +const test = require('ava'); + +const ReconciliationReport = require('@cumulus/api/models/reconciliation-reports'); +const { dynamodbDocClient } = require('@cumulus/aws-client/services'); +const { + createBucket, + recursivelyDeleteS3Bucket, +} = require('@cumulus/aws-client/S3'); +const { + generateLocalTestDb, + destroyLocalTestDb, + ReconciliationReportPgModel, + migrationDir, +} = require('@cumulus/db'); +const { RecordAlreadyMigrated } = require('@cumulus/errors'); + +const { + migrateReconciliationReportRecord, + migrateReconciliationReports, +} = require('../dist/lambda/reconciliation-reports'); + +const testDbName = `reconciliation_reports_migration_${cryptoRandomString({ length: 10 })}`; + +const generateFakeReconciliationReport = (params) => ({ + name: cryptoRandomString({ length: 5 }), + type: 'Granule Inventory', + status: 'Generated', + error: {}, + location: `s3://${cryptoRandomString({ length: 10 })}/${cryptoRandomString({ length: 10 })}`, + createdAt: (Date.now() - 1000), + updatedAt: Date.now(), + ...params, +}); + +let reconciliationReportsModel; + +test.before(async (t) => { + process.env.stackName = cryptoRandomString({ length: 10 }); + process.env.system_bucket = cryptoRandomString({ length: 10 }); + process.env.ReconciliationReportsTable = cryptoRandomString({ length: 10 }); + + await createBucket(process.env.system_bucket); + + reconciliationReportsModel = new ReconciliationReport({ + stackName: process.env.stackName, + systemBucket: process.env.system_bucket, + }); + await reconciliationReportsModel.createTable(); + + t.context.reconciliationReportPgModel = new ReconciliationReportPgModel(); + + const { knex, knexAdmin } = await generateLocalTestDb(testDbName, migrationDir); + t.context.knex = knex; + t.context.knexAdmin = knexAdmin; +}); + +test.afterEach.always(async (t) => { + await t.context.knex('reconciliation_reports').del(); +}); + +test.after.always(async (t) => { + await reconciliationReportsModel.deleteTable(); + await recursivelyDeleteS3Bucket(process.env.system_bucket); + await destroyLocalTestDb({ + knex: t.context.knex, + knexAdmin: t.context.knexAdmin, + testDbName, + }); +}); + +test.serial('migrateReconciliationReportRecord correctly migrates reconciliationReport record', async (t) => { + const { knex, reconciliationReportPgModel } = t.context; + + const fakeReconReport = generateFakeReconciliationReport(); + await migrateReconciliationReportRecord(fakeReconReport, t.context.knex); + + const createdRecord = await reconciliationReportPgModel.get( + knex, + { name: fakeReconReport.name } + ); + + t.deepEqual( + omit(createdRecord, ['cumulus_id']), + omit({ + ...fakeReconReport, + created_at: new Date(fakeReconReport.createdAt), + updated_at: new Date(fakeReconReport.updatedAt), + }, ['createdAt', 'updatedAt']) + ); +}); + +test.serial('migrateReconciliationReportRecord correctly migrates reconciliationReport record where record.error is an object', async (t) => { + const error = { exception: 'there is an error' }; + const fakeReconReport = generateFakeReconciliationReport({ error }); + await migrateReconciliationReportRecord(fakeReconReport, t.context.knex); + + const createdRecord = await t.context.knex.queryBuilder() + .select() + .table('reconciliation_reports') + .where({ name: fakeReconReport.name }) + .first(); + + t.deepEqual( + omit(createdRecord, ['cumulus_id']), + omit({ + ...fakeReconReport, + created_at: new Date(fakeReconReport.createdAt), + updated_at: new Date(fakeReconReport.updatedAt), + }, ['createdAt', 'updatedAt']) + ); +}); + +test.serial('migrateReconciliationReportRecord migrates reconciliationReport record with undefined nullables', async (t) => { + const { knex, reconciliationReportPgModel } = t.context; + + const fakeReconReport = generateFakeReconciliationReport(); + delete fakeReconReport.error; + delete fakeReconReport.location; + await migrateReconciliationReportRecord(fakeReconReport, t.context.knex); + + const createdRecord = await reconciliationReportPgModel.get( + knex, + { name: fakeReconReport.name } + ); + + t.deepEqual( + omit(createdRecord, ['cumulus_id']), + omit({ + ...fakeReconReport, + error: null, + location: null, + created_at: new Date(fakeReconReport.createdAt), + updated_at: new Date(fakeReconReport.updatedAt), + }, ['createdAt', 'updatedAt']) + ); +}); + +test.serial('migrateReconciliationReportRecord throws RecordAlreadyMigrated error if already migrated record is newer', async (t) => { + const fakeReconReport = generateFakeReconciliationReport({ + updatedAt: Date.now(), + }); + + await migrateReconciliationReportRecord(fakeReconReport, t.context.knex); + + const olderFakeReconReport = { + ...fakeReconReport, + updatedAt: Date.now() - 1000, // older than fakeReconReport + }; + + await t.throwsAsync( + migrateReconciliationReportRecord(olderFakeReconReport, t.context.knex), + { instanceOf: RecordAlreadyMigrated } + ); +}); + +test.serial('migrateReconciliationReportRecord updates an already migrated record if the updated date is newer', async (t) => { + const { knex, reconciliationReportPgModel } = t.context; + + const fakeReconReport = generateFakeReconciliationReport({ + updatedAt: Date.now() - 1000, + }); + await migrateReconciliationReportRecord(fakeReconReport, t.context.knex); + + const newerFakeReconReport = generateFakeReconciliationReport({ + ...fakeReconReport, + updatedAt: Date.now(), + }); + await migrateReconciliationReportRecord(newerFakeReconReport, t.context.knex); + + const createdRecord = await reconciliationReportPgModel.get( + knex, + { name: fakeReconReport.name } + ); + + t.deepEqual(createdRecord.updated_at, new Date(newerFakeReconReport.updatedAt)); +}); + +test.serial('migrateReconciliationReports processes multiple reconciliation reports', async (t) => { + const { knex, reconciliationReportPgModel } = t.context; + + const fakeReconReport1 = generateFakeReconciliationReport(); + const fakeReconReport2 = generateFakeReconciliationReport(); + + await Promise.all([ + reconciliationReportsModel.create(fakeReconReport1), + reconciliationReportsModel.create(fakeReconReport2), + ]); + t.teardown(() => Promise.all([ + reconciliationReportsModel.delete({ name: fakeReconReport1.name }), + reconciliationReportsModel.delete({ name: fakeReconReport2.name }), + ])); + + const migrationSummary = await migrateReconciliationReports(process.env, t.context.knex); + t.deepEqual(migrationSummary, { + total_dynamo_db_records: 2, + skipped: 0, + failed: 0, + migrated: 2, + }); + + const records = await reconciliationReportPgModel.search( + knex, + {} + ); + t.is(records.length, 2); +}); + +test.serial('migrateReconciliationReports processes all non-failing records', async (t) => { + const { knex, reconciliationReportPgModel } = t.context; + + const fakeReconReport1 = generateFakeReconciliationReport(); + const fakeReconReport2 = generateFakeReconciliationReport(); + + // remove required source field so that record will fail + delete fakeReconReport1.status; + + await Promise.all([ + // Have to use Dynamo client directly because creating + // via model won't allow creation of an invalid record + dynamodbDocClient().put({ + TableName: process.env.ReconciliationReportsTable, + Item: fakeReconReport1, + }), + reconciliationReportsModel.create(fakeReconReport2), + ]); + t.teardown(() => Promise.all([ + reconciliationReportsModel.delete({ name: fakeReconReport1.name }), + reconciliationReportsModel.delete({ name: fakeReconReport2.name }), + ])); + + const migrationSummary = await migrateReconciliationReports(process.env, t.context.knex); + t.deepEqual(migrationSummary, { + total_dynamo_db_records: 2, + skipped: 0, + failed: 1, + migrated: 1, + }); + const records = await reconciliationReportPgModel.search( + knex, + {} + ); + t.is(records.length, 1); +}); diff --git a/lambdas/reconciliation-report-migration/tsconfig.json b/lambdas/reconciliation-report-migration/tsconfig.json new file mode 100644 index 00000000000..4b4ae9578ce --- /dev/null +++ b/lambdas/reconciliation-report-migration/tsconfig.json @@ -0,0 +1,11 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "outDir": "dist/lambda", + "declaration": false, + "declarationMap": false, + "sourceMap": true, + "removeComments": true + }, + "include": ["src"], +} diff --git a/lambdas/reconciliation-report-migration/variables.tf b/lambdas/reconciliation-report-migration/variables.tf new file mode 100644 index 00000000000..ad4ff7463cf --- /dev/null +++ b/lambdas/reconciliation-report-migration/variables.tf @@ -0,0 +1,59 @@ +# Required + +variable "dynamo_tables" { + description = "A map of objects with the `arn` and `name` of every DynamoDB table for your Cumulus deployment." + type = map(object({ name = string, arn = string })) +} + +variable "permissions_boundary_arn" { + type = string +} + +variable "prefix" { + type = string +} + +variable "rds_user_access_secret_arn" { + description = "RDS User Database Login Credential Secret ID" + type = string +} + +variable "system_bucket" { + description = "The name of the S3 bucket to be used for staging deployment files" + type = string +} + +# Optional + +variable "lambda_memory_sizes" { + description = "Configurable map of memory sizes for lambdas" + type = map(number) + default = {} +} + +variable "lambda_timeouts" { + description = "Configurable map of timeouts for lambdas" + type = map(number) + default = {} +} + +variable "lambda_subnet_ids" { + type = list(string) + default = [] +} + +variable "rds_security_group_id" { + description = "RDS Security Group used for access to RDS cluster" + type = string + default = "" +} + +variable "tags" { + type = map(string) + default = {} +} + +variable "vpc_id" { + type = string + default = null +} diff --git a/lambdas/reconciliation-report-migration/versions.tf b/lambdas/reconciliation-report-migration/versions.tf new file mode 100644 index 00000000000..c62a4968cfd --- /dev/null +++ b/lambdas/reconciliation-report-migration/versions.tf @@ -0,0 +1,9 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } + required_version = ">= 1.5" +} diff --git a/lambdas/reconciliation-report-migration/webpack.config.js b/lambdas/reconciliation-report-migration/webpack.config.js new file mode 100644 index 00000000000..b0a194e3834 --- /dev/null +++ b/lambdas/reconciliation-report-migration/webpack.config.js @@ -0,0 +1,53 @@ + +const path = require('path'); +const { IgnorePlugin } = require('webpack'); + +const ignoredPackages = [ + 'better-sqlite3', + 'mssql', + 'mssql/lib/base', + 'mssql/package.json', + 'mysql', + 'mysql2', + 'oracledb', + 'pg-native', + 'pg-query-stream', + 'sqlite3', + 'tedious' +]; + +module.exports = { + plugins: [ + new IgnorePlugin({ + resourceRegExp: new RegExp(`^(${ignoredPackages.join('|')})$`) + }), + ], + mode: 'production', + entry: './dist/lambda/index.js', + output: { + chunkFormat: false, + libraryTarget: 'commonjs2', + filename: 'index.js', + path: path.resolve(__dirname, 'dist', 'webpack') + }, + module: { + rules: [ + { + test: /\.js$/, + exclude: /node_modules/, + use: [ + { + loader: 'babel-loader', + options: { + cacheDirectory: true + }, + }, + ], + }, + ], + }, + target: 'node', + externals: [ + /@aws-sdk\// + ] +}; diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index 97e693c3466..b26a673067c 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -109,6 +109,7 @@ export { translatePostgresPdrToApiPdr, } from './translate/pdr'; export { + translateApiReconReportToPostgresReconReport, translatePostgresReconReportToApiReconReport, } from './translate/reconciliation_reports'; diff --git a/packages/db/src/translate/reconciliation_reports.ts b/packages/db/src/translate/reconciliation_reports.ts index 1f684b341f9..64ec486460e 100644 --- a/packages/db/src/translate/reconciliation_reports.ts +++ b/packages/db/src/translate/reconciliation_reports.ts @@ -1,9 +1,26 @@ import { ApiReconciliationReportRecord } from '@cumulus/types/api/reconciliation_reports'; -import { PostgresReconciliationReportRecord } from '../types/reconciliation_report'; +import { PostgresReconciliationReport, PostgresReconciliationReportRecord } from '../types/reconciliation_report'; const { removeNilProperties } = require('@cumulus/common/util'); const pick = require('lodash/pick'); +/** + * Generate a PostgreSQL Reconciliation Report from an API record. + * + * @param record - an API reconciliation report record + * @returns a PostgreSQL reconciliation report + */ +export const translateApiReconReportToPostgresReconReport = ( + record: ApiReconciliationReportRecord +): PostgresReconciliationReport => { + const pgReconciliationReport: PostgresReconciliationReport = removeNilProperties({ + ...pick(record, ['name', 'type', 'status', 'location', 'error']), + created_at: (record.createdAt ? new Date(record.createdAt) : undefined), + updated_at: (record.updatedAt ? new Date(record.updatedAt) : undefined), + }); + return pgReconciliationReport; +}; + /** * Generate an API Reconciliation Report record from a PostgreSQL record. * diff --git a/tf-modules/cumulus/reconciliation_report_migration.tf b/tf-modules/cumulus/reconciliation_report_migration.tf new file mode 100644 index 00000000000..c6132d136a4 --- /dev/null +++ b/tf-modules/cumulus/reconciliation_report_migration.tf @@ -0,0 +1,21 @@ +module "reconciliation_report_migration_lambda" { + source = "../../lambdas/reconciliation-report-migration" + + prefix = var.prefix + system_bucket = var.system_bucket + + dynamo_tables = var.dynamo_tables + + lambda_subnet_ids = var.lambda_subnet_ids + lambda_timeouts = var.lambda_timeouts + lambda_memory_sizes = var.lambda_memory_sizes + + permissions_boundary_arn = var.permissions_boundary_arn + + rds_security_group_id = var.rds_security_group + rds_user_access_secret_arn = var.rds_user_access_secret_arn + + tags = var.tags + vpc_id = var.vpc_id +} + From 90d8f6d6d4a1dea39e9df2ba6b853ddbae11ee2a Mon Sep 17 00:00:00 2001 From: Jonathan Kovarik Date: Thu, 10 Oct 2024 14:48:41 -0600 Subject: [PATCH 51/61] CUMULUS-3806 update Orca Recon Report to use postgres as source of truth (#3794) * Update ORCA report to only read from postgres database * Parameterize getGranulesByApiPropertiesQuery * Fix unit bugs * Fix unexplained lint error :( * Fix granule translation for report output * Fix/improve spec tests * Update typings for orca-backup-reconciliation-report * Pre-review tag minor fixes * Remove configuration TODO, in favor of PR comment * Update typings * Remove premature mods * Update CHANGELOG * Fix recon report spec * Minor typedef refactor * Fix units * Re-order CHANGELOG * Fix merge lint issue --- CHANGELOG.md | 9 +- .../CreateReconciliationReportSpec.js | 148 ++-------- .../lambdas/create-reconciliation-report.js | 15 +- .../lambdas/internal-reconciliation-report.js | 8 +- .../reports/granule-inventory-report.js | 8 +- .../orca-backup-reconciliation-report.js | 273 ++++++++++++++---- .../test-create-reconciliation-report.js | 148 +++------- .../test-orca-backup-reconciliation-report.js | 207 +++++++++---- packages/db/src/lib/granule.ts | 66 +++-- packages/db/tests/lib/test-granule.js | 96 +++--- 10 files changed, 543 insertions(+), 435 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee626b4e919..586ae420383 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,13 @@ aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE Elasticsearch - Update `@cumlus/api/ecs/async-operation` to not update Elasticsearch index when reporting status of async operation +- **CUMULUS-3806** + - Update `@cumulus/db/lib/granule.getGranulesByApiPropertiesQuery` to + be parameterized and include a modifier on `temporalBoundByCreatedAt` + - Remove endpoint call to and all tests for Internal Reconciliation Reports + and updated API to throw an error if report is requested + - Update Orca reconciliation reports to pull granules for comparison from + postgres via `getGranulesByApiPropertiesQuery` - **CUMULUS-3833** - Added `ReconciliationReportMigration` lambda to migrate ReconciliationReports from DynamoDB to Postgres @@ -46,7 +53,7 @@ aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE - Added `reconciliation_reports` table in RDS, including indexes - Created pg model, types, and translation for `reconciliationReports` in `@cumulus/db` - Created api types for `reconciliation_reports` in `@cumulus/types/api` - - Updated reconciliation reports lambda to write to new RDS table instead of Dynamo + - Updated reconciliation reports lambda to write to new RDS table instead of Dynamo - Updated `@cumulus/api/endpoints/reconciliation-reports` `getReport` and `deleteReport` to work with the new RDS table instead of Dynamo ## [Unreleased] diff --git a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js index 9bb2aa9237d..25842c9ae39 100644 --- a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js +++ b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js @@ -111,13 +111,10 @@ const createActiveCollection = async (prefix, sourceBucket) => { const sourcePath = `${prefix}/tmp/${randomId('test-')}`; // Create the collection - const newCollection = await createCollection( - prefix, - { - duplicateHandling: 'error', - process: 'modis', - } - ); + const newCollection = await createCollection(prefix, { + duplicateHandling: 'error', + process: 'modis', + }); // Create the S3 provider const provider = await createProvider(prefix, { host: sourceBucket }); @@ -150,7 +147,12 @@ const createActiveCollection = async (prefix, sourceBucket) => { }; const workflowExecution = await buildAndExecuteWorkflow( - prefix, sourceBucket, 'IngestGranule', newCollection, provider, inputPayload + prefix, + sourceBucket, + 'IngestGranule', + newCollection, + provider, + inputPayload ); ingestGranuleExecutionArn = workflowExecution.executionArn; @@ -160,7 +162,10 @@ const createActiveCollection = async (prefix, sourceBucket) => { { prefix, granuleId: inputPayload.granules[0].granuleId, - collectionId: constructCollectionId(newCollection.name, newCollection.version), + collectionId: constructCollectionId( + newCollection.name, + newCollection.version + ), }, 'completed' ); @@ -172,10 +177,15 @@ const createActiveCollection = async (prefix, sourceBucket) => { status: 'completed', }); - await getGranuleWithStatus({ prefix, + await getGranuleWithStatus({ + prefix, granuleId, - collectionId: constructCollectionId(newCollection.name, newCollection.version), - status: 'completed' }); + collectionId: constructCollectionId( + newCollection.name, + newCollection.version + ), + status: 'completed', + }); return newCollection; }; @@ -611,114 +621,6 @@ describe('When there are granule differences and granule reconciliation is run', }); }); - // TODO: the internal report functionality will be removed after collections/granules is changed to no longer use ES - xdescribe('Create an Internal Reconciliation Report to monitor internal discrepancies', () => { - // report record in db and report in s3 - let reportRecord; - let report; - let internalReportAsyncOperationId; - - afterAll(async () => { - if (internalReportAsyncOperationId) { - await deleteAsyncOperation({ prefix: config.stackName, asyncOperationId: internalReportAsyncOperationId }); - } - }); - - it('generates an async operation through the Cumulus API', async () => { - if (beforeAllFailed) fail(beforeAllFailed); - const request = { - reportType: 'Internal', - reportName: randomId('InternalReport'), - startTimestamp, - endTimestamp: moment.utc().format(), - collectionId, - granuleId: [publishedGranuleId, dbGranuleId, randomId('granuleId')], - provider: [randomId('provider'), `s3_provider${testSuffix}`], - }; - const response = await reconciliationReportsApi.createReconciliationReport({ - prefix: config.stackName, - request, - }); - - const responseBody = JSON.parse(response.body); - internalReportAsyncOperationId = responseBody.id; - console.log('internalReportAsyncOperationId', internalReportAsyncOperationId); - expect(response.statusCode).toBe(202); - }); - - it('generates reconciliation report through the Cumulus API', async () => { - if (beforeAllFailed) fail(beforeAllFailed); - let asyncOperation; - try { - asyncOperation = await waitForAsyncOperationStatus({ - id: internalReportAsyncOperationId, - status: 'SUCCEEDED', - stackName: config.stackName, - retryOptions: { - retries: 60, - factor: 1.08, - }, - }); - } catch (error) { - fail(error); - } - expect(asyncOperation.operationType).toBe('Reconciliation Report'); - reportRecord = JSON.parse(asyncOperation.output); - }); - - it('fetches a reconciliation report through the Cumulus API', async () => { - if (beforeAllFailed) fail(beforeAllFailed); - const reportContent = await fetchReconciliationReport(config.stackName, reportRecord.name); - report = JSON.parse(reportContent); - expect(report.reportType).toBe('Internal'); - expect(report.status).toBe('SUCCESS'); - }); - - it('generates a report showing number of collections that are in both ES and DB', () => { - if (beforeAllFailed) fail(beforeAllFailed); - expect(report.collections.okCount).toBe(1); - expect(report.collections.withConflicts.length).toBe(0); - expect(report.collections.onlyInEs.length).toBe(0); - expect(report.collections.onlyInDb.length).toBe(0); - }); - - it('generates a report showing number of granules that are in both ES and DB', () => { - if (beforeAllFailed) fail(beforeAllFailed); - expect(report.granules.okCount).toBe(2); - expect(report.granules.withConflicts.length).toBe(0); - if (report.granules.withConflicts.length !== 0) { - console.log(`XXXX ${JSON.stringify(report.granules.withConflicts)}`); - } - expect(report.granules.onlyInEs.length).toBe(0); - expect(report.granules.onlyInDb.length).toBe(0); - }); - - it('deletes a reconciliation report through the Cumulus API', async () => { - if (beforeAllFailed) fail(beforeAllFailed); - await reconciliationReportsApi.deleteReconciliationReport({ - prefix: config.stackName, - name: reportRecord.name, - }); - - const parsed = parseS3Uri(reportRecord.location); - const exists = await fileExists(parsed.Bucket, parsed.Key); - expect(exists).toBeFalse(); - - let responseError; - try { - await reconciliationReportsApi.getReconciliationReport({ - prefix: config.stackName, - name: reportRecord.name, - }); - } catch (error) { - responseError = error; - } - - expect(responseError.statusCode).toBe(404); - expect(JSON.parse(responseError.apiMessage).message).toBe(`No record found for ${reportRecord.name}`); - }); - }); - describe('Creates \'Granule Inventory\' reports.', () => { let reportRecord; let reportArray; @@ -834,7 +736,7 @@ describe('When there are granule differences and granule reconciliation is run', }); // TODO: fix tests in CUMULUS-3806 when CreateReconciliationReport lambda is changed to query postgres - xdescribe('Create an ORCA Backup Reconciliation Report to monitor ORCA backup discrepancies', () => { + describe('Create an ORCA Backup Reconciliation Report to monitor ORCA backup discrepancies', () => { // report record in db and report in s3 let reportRecord; let report; @@ -915,6 +817,8 @@ describe('When there are granule differences and granule reconciliation is run', expect(granules.conflictFilesCount).toBe(6); expect(granules.onlyInCumulus.length).toBe(1); expect(granules.onlyInCumulus[0].granuleId).toBe(dbGranuleId); + expect(granules.onlyInCumulus[0].collectionId).toBe(collectionId); + expect(granules.onlyInCumulus[0].provider).toBe(`s3_provider${testSuffix}`); expect(granules.onlyInCumulus[0].okFilesCount).toBe(1); expect(granules.onlyInCumulus[0].cumulusFilesCount).toBe(5); expect(granules.onlyInCumulus[0].orcaFilesCount).toBe(0); @@ -926,6 +830,8 @@ describe('When there are granule differences and granule reconciliation is run', } expect(granules.withConflicts.length).toBe(1); expect(granules.withConflicts[0].granuleId).toBe(publishedGranuleId); + expect(granules.withConflicts[0].collectionId).toBe(collectionId); + expect(granules.withConflicts[0].provider).toBe(`s3_provider${testSuffix}`); expect(granules.withConflicts[0].okFilesCount).toBe(4); expect(granules.withConflicts[0].cumulusFilesCount).toBe(5); expect(granules.withConflicts[0].orcaFilesCount).toBe(4); diff --git a/packages/api/lambdas/create-reconciliation-report.js b/packages/api/lambdas/create-reconciliation-report.js index fb97a74e780..450861433dd 100644 --- a/packages/api/lambdas/create-reconciliation-report.js +++ b/packages/api/lambdas/create-reconciliation-report.js @@ -24,15 +24,14 @@ const { const { ESCollectionGranuleQueue } = require('@cumulus/es-client/esCollectionGranuleQueue'); const Collection = require('@cumulus/es-client/collections'); const { ESSearchQueue } = require('@cumulus/es-client/esSearchQueue'); -const { indexReconciliationReport } = require('@cumulus/es-client/indexer'); -const { getEsClient } = require('@cumulus/es-client/search'); const Logger = require('@cumulus/logger'); +const { getEsClient } = require('@cumulus/es-client/search'); +const { indexReconciliationReport } = require('@cumulus/es-client/indexer'); const { ReconciliationReportPgModel, translatePostgresReconReportToApiReconReport, } = require('@cumulus/db'); -const { createInternalReconciliationReport } = require('./internal-reconciliation-report'); const { createGranuleInventoryReport } = require('./reports/granule-inventory-report'); const { createOrcaBackupReconciliationReport } = require('./reports/orca-backup-reconciliation-report'); const { errorify, filenamify } = require('../lib/utils'); @@ -819,8 +818,8 @@ async function processRequest(params) { reportName, systemBucket, stackName, - esClient = await getEsClient(), knex = await getKnexClient(env), + esClient = await getEsClient(), } = params; const createStartTime = moment.utc(); const reportRecordName = reportName @@ -854,14 +853,17 @@ async function processRequest(params) { }; log.info(`Beginning ${reportType} report with params: ${JSON.stringify(recReportParams)}`); if (reportType === 'Internal') { - await createInternalReconciliationReport(recReportParams); + log.error( + 'Internal Reconciliation Reports are no longer valid, as Cumulus is no longer utilizing Elasticsearch' + ); + throw new Error('Internal Reconciliation Reports are no longer valid'); } else if (reportType === 'Granule Inventory') { await createGranuleInventoryReport(recReportParams); } else if (reportType === 'ORCA Backup') { await createOrcaBackupReconciliationReport(recReportParams); } else { // reportType is in ['Inventory', 'Granule Not Found'] - await createReconciliationReport(recReportParams); + await createReconciliationReport(recReportParams); // TODO Update to not use elasticsearch } const generatedRecord = { @@ -900,6 +902,7 @@ async function handler(event) { process.env.CMR_LIMIT = process.env.CMR_LIMIT || 5000; process.env.CMR_PAGE_SIZE = process.env.CMR_PAGE_SIZE || 200; + //TODO: Remove irrelevant env vars from terraform after ES reports are removed const varsToLog = ['CMR_LIMIT', 'CMR_PAGE_SIZE', 'ES_SCROLL', 'ES_SCROLL_SIZE']; const envsToLog = pickBy(process.env, (value, key) => varsToLog.includes(key)); log.info(`CMR and ES Environment variables: ${JSON.stringify(envsToLog)}`); diff --git a/packages/api/lambdas/internal-reconciliation-report.js b/packages/api/lambdas/internal-reconciliation-report.js index 657da6500d6..0eb926628fa 100644 --- a/packages/api/lambdas/internal-reconciliation-report.js +++ b/packages/api/lambdas/internal-reconciliation-report.js @@ -263,11 +263,11 @@ async function reportForGranulesByCollectionId(collectionId, recReportParams) { ...recReportParams, collectionIds: collectionId, }); - const granulesSearchQuery = getGranulesByApiPropertiesQuery( - recReportParams.knex, + const granulesSearchQuery = getGranulesByApiPropertiesQuery({ + knex: recReportParams.knex, searchParams, - ['collectionName', 'collectionVersion', 'granule_id'] - ); + sortByFields: ['collectionName', 'collectionVersion', 'granule_id'], + }); const pgGranulesSearchClient = new QuerySearchClient( granulesSearchQuery, 100 // arbitrary limit on how items are fetched at once diff --git a/packages/api/lambdas/reports/granule-inventory-report.js b/packages/api/lambdas/reports/granule-inventory-report.js index e4f9d92e4bb..7a6abcf157e 100644 --- a/packages/api/lambdas/reports/granule-inventory-report.js +++ b/packages/api/lambdas/reports/granule-inventory-report.js @@ -42,11 +42,11 @@ async function createGranuleInventoryReport(recReportParams) { const { reportKey, systemBucket } = recReportParams; const searchParams = convertToDBGranuleSearchParams(recReportParams); - const granulesSearchQuery = getGranulesByApiPropertiesQuery( - recReportParams.knex, + const granulesSearchQuery = getGranulesByApiPropertiesQuery({ + knex: recReportParams.knex, searchParams, - ['collectionName', 'collectionVersion', 'granule_id'] - ); + sortByFields: ['collectionName', 'collectionVersion', 'granule_id'], + }); const pgGranulesSearchClient = new QuerySearchClient( granulesSearchQuery, 100 // arbitrary limit on how items are fetched at once diff --git a/packages/api/lambdas/reports/orca-backup-reconciliation-report.js b/packages/api/lambdas/reports/orca-backup-reconciliation-report.js index bac82da2dbb..c22fecaabed 100644 --- a/packages/api/lambdas/reports/orca-backup-reconciliation-report.js +++ b/packages/api/lambdas/reports/orca-backup-reconciliation-report.js @@ -1,3 +1,5 @@ +//@ts-check + 'use strict'; const cloneDeep = require('lodash/cloneDeep'); @@ -8,19 +10,83 @@ const set = require('lodash/set'); const moment = require('moment'); const path = require('path'); +const { + getGranulesByApiPropertiesQuery, + QuerySearchClient, + getKnexClient, + FilePgModel, +} = require('@cumulus/db'); const { s3 } = require('@cumulus/aws-client/services'); -const { ESSearchQueue } = require('@cumulus/es-client/esSearchQueue'); const Logger = require('@cumulus/logger'); -const { constructCollectionId } = require('@cumulus/message/Collections'); +const { deconstructCollectionId, constructCollectionId } = require('@cumulus/message/Collections'); +const filePgModel = new FilePgModel(); const { - convertToESCollectionSearchParams, - convertToESGranuleSearchParamsWithCreatedAtRange, + convertToDBGranuleSearchParams, convertToOrcaGranuleSearchParams, initialReportHeader, } = require('../../lib/reconciliationReport'); const ORCASearchCatalogQueue = require('../../lib/ORCASearchCatalogQueue'); +// Typedefs +/** + * @typedef {Object} ConflictFile + * @property {string} fileName + * @property {string} bucket + * @property {string} key + * @property {string} [orcaBucket] + * @property {string} reason + */ + +/** @typedef { import('@cumulus/db').PostgresGranuleRecord } PostgresGranuleRecord */ +/** + * @typedef {Object} GranuleReport + * @property {boolean} ok + * @property {number} okFilesCount + * @property {number} cumulusFilesCount + * @property {number} orcaFilesCount + * @property {string} granuleId + * @property {string} collectionId + * @property {string} provider + * @property {number} createdAt + * @property {number} updatedAt + * @property {ConflictFile[]} conflictFiles + */ +/** + * @typedef {Object} CollectionConfig + */ + +/** @typedef {import('@cumulus/db').PostgresFileRecord} PostgresFileRecord */ + +/** + * @typedef {Object} OrcaReportGranuleObject + * @property {string} collectionId - The ID of the collection + * @property {string} collectionName - The name of the collection associated with the granule + * @property {string} collectionVersion - The version of + * the collection associated with the granule + * @property {string} providerName - The name of the provider associated with the granule + * @property {PostgresFileRecord[]} files - The files associated with the granule + */ +/** +* @typedef {import('knex').Knex} Knex +*/ +/** + * @typedef {Object} GranulesReport + * @property {number} okCount - The count of granules that are OK. + * @property {number} cumulusCount - The count of granules in Cumulus. + * @property {number} orcaCount - The count of granules in ORCA. + * @property {number} okFilesCount - The count of files that are OK. + * @property {number} cumulusFilesCount - The count of files in Cumulus. + * @property {number} orcaFilesCount - The count of files in ORCA. + * @property {number} conflictFilesCount - The count of files with conflicts. + * @property {Array} withConflicts - The list of granules with conflicts. + * @property {Array} onlyInCumulus - The list of granules only in Cumulus. + * @property {Array} onlyInOrca - The list of granules only in ORCA. + */ + +/** @typedef {OrcaReportGranuleObject & PostgresGranuleRecord } CumulusGranule */ + const log = new Logger({ sender: '@api/lambdas/orca-backup-reconciliation-report' }); const fileConflictTypes = { @@ -29,29 +95,44 @@ const fileConflictTypes = { onlyInOrca: 'onlyInOrca', }; -const granuleFields = ['granuleId', 'collectionId', 'provider', 'createdAt', 'updatedAt']; - /** * Fetch orca configuration for all or specified collections * * @param {Object} recReportParams - input report params - * @param {Object} recReportParams.collectionIds - array of collectionIds - * @returns {Promise} - list of { collectionId, orca configuration } + * @param {String[]} recReportParams.collectionIds - array of collectionIds + * @returns {Promise} - list of { collectionId, orca configuration } */ async function fetchCollectionsConfig(recReportParams) { + const knex = await getKnexClient(); + /** @type {CollectionConfig} */ const collectionsConfig = {}; - const searchParams = convertToESCollectionSearchParams(pick(recReportParams, ['collectionIds'])); - const esCollectionsIterator = new ESSearchQueue( - { ...searchParams, sort_key: ['name', 'version'] }, 'collection', process.env.ES_INDEX - ); - let nextEsItem = await esCollectionsIterator.shift(); - while (nextEsItem) { - const collectionId = constructCollectionId(nextEsItem.name, nextEsItem.version); - const excludedFileExtensions = get(nextEsItem, 'meta.orca.excludedFileExtensions'); - if (excludedFileExtensions) set(collectionsConfig, `${collectionId}.orca.excludedFileExtensions`, excludedFileExtensions); - nextEsItem = await esCollectionsIterator.shift(); // eslint-disable-line no-await-in-loop + const query = knex('collections') + .select('name', 'version', 'meta'); + if (recReportParams.collectionIds) { //TODO typing + const collectionObjects = recReportParams.collectionIds.map((collectionId) => + deconstructCollectionId(collectionId)); + query.where((builder) => { + collectionObjects.forEach(({ name, version }) => { + builder.orWhere((qb) => { + qb.where('name', name).andWhere('version', version); + }); + }); + }); } + const pgCollectionSearchClient = new QuerySearchClient(query, 100); + + /** @type {{ name: string, version: string, meta: Object }} */ + // @ts-ignore TODO: Ticket CUMULUS-3887 filed to resolve + let nextPgItem = await pgCollectionSearchClient.shift(); + while (nextPgItem) { + const collectionId = constructCollectionId(nextPgItem.name, nextPgItem.version); + const excludedFileExtensions = get(nextPgItem, 'meta.orca.excludedFileExtensions'); + if (excludedFileExtensions) set(collectionsConfig, `${collectionId}.orca.excludedFileExtensions`, excludedFileExtensions); + /** @type {{ name: string, version: string, meta: Object }} */ + // @ts-ignore TODO: Ticket CUMULUS-3887 filed to resolve + nextPgItem = await pgCollectionSearchClient.shift(); // eslint-disable-line no-await-in-loop + } return collectionsConfig; } @@ -72,18 +153,28 @@ function shouldFileBeExcludedFromOrca(collectionsConfig, collectionId, fileName) * compare cumulus granule with its orcaGranule if any, and generate report * * @param {Object} params - * @param {Object} params.collectionsConfig - collections configuration - * @param {Object} params.cumulusGranule - cumulus granule + * @param {CollectionConfig} params.collectionsConfig - collections configuration + * @param {CumulusGranule} params.cumulusGranule - cumulus granule * @param {Object} params.orcaGranule - orca granule - * @returns {Object} - discrepency report of the granule + * @returns {GranuleReport} - discrepancy report of the granule */ function getReportForOneGranule({ collectionsConfig, cumulusGranule, orcaGranule }) { + /** @type {GranuleReport} */ const granuleReport = { ok: false, okFilesCount: 0, cumulusFilesCount: 0, orcaFilesCount: 0, - ...pick(cumulusGranule, granuleFields), + ...{ + granuleId: cumulusGranule.granule_id, + collectionId: constructCollectionId( + cumulusGranule.collectionName, + cumulusGranule.collectionVersion + ), + provider: cumulusGranule.providerName, + createdAt: cumulusGranule.created_at.getTime(), + updatedAt: cumulusGranule.updated_at.getTime(), + }, conflictFiles: [], }; @@ -100,6 +191,11 @@ function getReportForOneGranule({ collectionsConfig, cumulusGranule, orcaGranule // if no granule file conflicts, set granuleReport.ok to true // reducer, key: fileName, value: file object with selected fields + /** + * @param {Object} accumulator + * @param {PostgresFileRecord} currentValue + * @returns {Object} + */ const cumulusFileReducer = (accumulator, currentValue) => { const fileName = path.basename(currentValue.key); return ({ @@ -115,7 +211,9 @@ function getReportForOneGranule({ collectionsConfig, cumulusGranule, orcaGranule }); }; - const cumulusFiles = get(cumulusGranule, 'files', []).reduce(cumulusFileReducer, {}); + const cumulusFilesArray = /** @type {PostgresFileRecord[]} */ (get(cumulusGranule, 'files', [])); + const cumulusFiles = cumulusFilesArray.reduce(cumulusFileReducer, {}); + const orcaFiles = get(orcaGranule, 'files', []).reduce(orcaFileReducer, {}); const allFileNames = Object.keys({ ...cumulusFiles, ...orcaFiles }); allFileNames.forEach((fileName) => { @@ -123,9 +221,19 @@ function getReportForOneGranule({ collectionsConfig, cumulusGranule, orcaGranule granuleReport.cumulusFilesCount += 1; granuleReport.orcaFilesCount += 1; - if (!shouldFileBeExcludedFromOrca(collectionsConfig, cumulusGranule.collectionId, fileName)) { + if ( + !shouldFileBeExcludedFromOrca( + collectionsConfig, + constructCollectionId( + cumulusGranule.collectionName, + cumulusGranule.collectionVersion + ), + fileName + ) + ) { granuleReport.okFilesCount += 1; } else { + /** @type {ConflictFile} */ const conflictFile = { fileName, ...cumulusFiles[fileName], @@ -137,7 +245,16 @@ function getReportForOneGranule({ collectionsConfig, cumulusGranule, orcaGranule } else if (cumulusFiles[fileName] && orcaFiles[fileName] === undefined) { granuleReport.cumulusFilesCount += 1; - if (shouldFileBeExcludedFromOrca(collectionsConfig, cumulusGranule.collectionId, fileName)) { + if ( + shouldFileBeExcludedFromOrca( + collectionsConfig, + constructCollectionId( + cumulusGranule.collectionName, + cumulusGranule.collectionVersion + ), + fileName + ) + ) { granuleReport.okFilesCount += 1; } else { const conflictFile = { @@ -184,10 +301,39 @@ function constructOrcaOnlyGranuleForReport(orcaGranule) { return granule; } -function addGranuleToReport({ granulesReport, collectionsConfig, cumulusGranule, orcaGranule }) { +/** + * Adds a granule to the reconciliation report object + * + * @param {Object} params - The parameters for the function. + * @param {GranulesReport} params.granulesReport - The report object to update. + * @param {CollectionConfig} params.collectionsConfig - The collections configuration. + * @param {CumulusGranule} params.cumulusGranule - The Cumulus granule to add to the report. + * @param {Object} [params.orcaGranule] - The ORCA granule to compare against (optional). + * @param {Knex} params.knex - The Knex database connection. + * @returns {Promise} The updated granules report. + * @throws {Error} If cumulusGranule is not defined. + */ +async function addGranuleToReport({ + granulesReport, + collectionsConfig, + cumulusGranule, + orcaGranule, + knex, +}) { + if (!cumulusGranule) { + throw new Error('cumulusGranule must be defined to add to the orca report'); + } + const modifiedCumulusGranule = { ...cumulusGranule }; + + modifiedCumulusGranule.files = await filePgModel.search(knex, { + granule_cumulus_id: cumulusGranule.cumulus_id, + }); + /* eslint-disable no-param-reassign */ const granReport = getReportForOneGranule({ - collectionsConfig, cumulusGranule, orcaGranule, + collectionsConfig, + cumulusGranule: modifiedCumulusGranule, + orcaGranule, }); if (granReport.ok) { @@ -208,7 +354,8 @@ function addGranuleToReport({ granulesReport, collectionsConfig, cumulusGranule, /** * Compare the granule holdings in Cumulus with ORCA * - * @param {Object} recReportParams - lambda's input filtering parameters + * @param {Object} recReportParams - input report params + * @param {String[]} recReportParams.collectionIds - array of collectionIds * @returns {Promise} an object with the okCount, onlyInCumulus, onlyInOrca * and withConfilcts */ @@ -221,6 +368,7 @@ async function orcaReconciliationReportForGranules(recReportParams) { // Report granules only in cumulus // Report granules only in orca log.info(`orcaReconciliationReportForGranules ${JSON.stringify(recReportParams)}`); + /** @type {GranulesReport} */ const granulesReport = { okCount: 0, cumulusCount: 0, @@ -235,17 +383,23 @@ async function orcaReconciliationReportForGranules(recReportParams) { }; const collectionsConfig = await fetchCollectionsConfig(recReportParams); - log.debug(`fetchESCollections returned ${JSON.stringify(collectionsConfig)}`); - - const esSearchParams = convertToESGranuleSearchParamsWithCreatedAtRange(recReportParams); - log.debug(`Create ES granule iterator with ${JSON.stringify(esSearchParams)}`); - const esGranulesIterator = new ESSearchQueue( - { - ...esSearchParams, - sort_key: ['granuleId', 'collectionId'], - }, - 'granule', - process.env.ES_INDEX + log.debug(`fetchCollections returned ${JSON.stringify(collectionsConfig)}`); + + const knex = await getKnexClient(); + const searchParams = convertToDBGranuleSearchParams(recReportParams); + + const granulesSearchQuery = getGranulesByApiPropertiesQuery({ + knex, + searchParams, + sortByFields: ['granule_id', 'collectionName', 'collectionVersion'], + temporalBoundByCreatedAt: true, + }); + + log.debug(`Create PG granule iterator with ${granulesSearchQuery}`); + + const pgGranulesIterator = new QuerySearchClient( + granulesSearchQuery, + 100 // arbitrary limit on how items are fetched at once ); const orcaSearchParams = convertToOrcaGranuleSearchParams(recReportParams); @@ -253,22 +407,30 @@ async function orcaReconciliationReportForGranules(recReportParams) { const orcaGranulesIterator = new ORCASearchCatalogQueue(orcaSearchParams); try { + /** @type {[CumulusGranule, any]} */ + // @ts-ignore TODO: Ticket CUMULUS-3887 filed to resolve let [nextCumulusItem, nextOrcaItem] = await Promise.all( - [esGranulesIterator.peek(), orcaGranulesIterator.peek()] + [ + /** @type CumulusGranule */ + pgGranulesIterator.peek(), + orcaGranulesIterator.peek(), + ] ); while (nextCumulusItem && nextOrcaItem) { - const nextCumulusId = `${nextCumulusItem.granuleId}:${nextCumulusItem.collectionId}`; + const nextCumulusId = `${nextCumulusItem.granule_id}:${constructCollectionId(nextCumulusItem.collectionName, nextCumulusItem.collectionVersion)}`; const nextOrcaId = `${nextOrcaItem.id}:${nextOrcaItem.collectionId}`; if (nextCumulusId < nextOrcaId) { // Found an item that is only in Cumulus and not in ORCA. - addGranuleToReport({ + // eslint-disable-next-line no-await-in-loop + await addGranuleToReport({ granulesReport, collectionsConfig, cumulusGranule: nextCumulusItem, + knex, }); granulesReport.cumulusCount += 1; - await esGranulesIterator.shift(); // eslint-disable-line no-await-in-loop + await pgGranulesIterator.shift(); // eslint-disable-line no-await-in-loop } else if (nextCumulusId > nextOrcaId) { // Found an item that is only in ORCA and not in Cumulus granulesReport.onlyInOrca.push(constructOrcaOnlyGranuleForReport(nextOrcaItem)); @@ -277,29 +439,36 @@ async function orcaReconciliationReportForGranules(recReportParams) { } else { // Found an item that is in both ORCA and Cumulus database // Check if the granule (files) should be in orca, and act accordingly - addGranuleToReport({ + // eslint-disable-next-line no-await-in-loop + await addGranuleToReport({ granulesReport, collectionsConfig, cumulusGranule: nextCumulusItem, orcaGranule: nextOrcaItem, + knex, }); granulesReport.cumulusCount += 1; granulesReport.orcaCount += 1; - await esGranulesIterator.shift(); // eslint-disable-line no-await-in-loop + await pgGranulesIterator.shift(); // eslint-disable-line no-await-in-loop await orcaGranulesIterator.shift(); // eslint-disable-line no-await-in-loop } - - [nextCumulusItem, nextOrcaItem] = await Promise.all([esGranulesIterator.peek(), orcaGranulesIterator.peek()]); // eslint-disable-line max-len, no-await-in-loop + /** @type {[CumulusGranule, any]} */ + // @ts-ignore TODO: Ticket CUMULUS-3887 filed to resolve + [nextCumulusItem, nextOrcaItem] = await Promise.all([pgGranulesIterator.peek(), orcaGranulesIterator.peek()]); // eslint-disable-line max-len, no-await-in-loop } // Add any remaining cumulus items to the report - while (await esGranulesIterator.peek()) { // eslint-disable-line no-await-in-loop - const cumulusItem = await esGranulesIterator.shift(); // eslint-disable-line no-await-in-loop + while (await pgGranulesIterator.peek()) { // eslint-disable-line no-await-in-loop + /** @type {CumulusGranule} */ + // @ts-ignore TODO: Ticket CUMULUS-3887 filed to resolve + const cumulusItem = await pgGranulesIterator.shift(); // eslint-disable-line no-await-in-loop // Found an item that is only in Cumulus database and not in ORCA. - addGranuleToReport({ + // eslint-disable-next-line no-await-in-loop + await addGranuleToReport({ granulesReport, collectionsConfig, cumulusGranule: cumulusItem, + knex, }); granulesReport.cumulusCount += 1; } @@ -338,7 +507,7 @@ async function orcaReconciliationReportForGranules(recReportParams) { * @param {string} recReportParams.stackName - the name of the CUMULUS stack * @param {moment} recReportParams.startTimestamp - beginning report datetime ISO timestamp * @param {string} recReportParams.systemBucket - the name of the CUMULUS system bucket - * @returns {Promise} a Promise that resolves when the report has been + * @returns {Promise} a Promise that resolves when the report has been * uploaded to S3 */ async function createOrcaBackupReconciliationReport(recReportParams) { @@ -399,7 +568,7 @@ async function createOrcaBackupReconciliationReport(recReportParams) { report.status = 'SUCCESS'; // Write the full report to S3 - return s3().putObject({ + await s3().putObject({ Bucket: systemBucket, Key: reportKey, Body: JSON.stringify(report, undefined, 2), diff --git a/packages/api/tests/lambdas/test-create-reconciliation-report.js b/packages/api/tests/lambdas/test-create-reconciliation-report.js index 81082bbd23f..0461c6005c4 100644 --- a/packages/api/tests/lambdas/test-create-reconciliation-report.js +++ b/packages/api/tests/lambdas/test-create-reconciliation-report.js @@ -25,6 +25,9 @@ const { getBucketsConfigKey } = require('@cumulus/common/stack'); const { constructCollectionId } = require('@cumulus/message/Collections'); const { randomString, randomId } = require('@cumulus/common/test-utils'); const { + ProviderPgModel, + fakeProviderRecordFactory, + translateApiFiletoPostgresFile, generateLocalTestDb, destroyLocalTestDb, localStackConnectionEnv, @@ -40,7 +43,6 @@ const { translatePostgresCollectionToApiCollection, translateApiGranuleToPostgresGranule, translatePostgresReconReportToApiReconReport, - upsertGranuleWithExecutionJoinRecord, } = require('@cumulus/db'); const { getDistributionBucketMapKey } = require('@cumulus/distribution-utils'); const indexer = require('@cumulus/es-client/indexer'); @@ -48,7 +50,6 @@ const { Search, getEsClient } = require('@cumulus/es-client/search'); const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); const { - fakeCollectionFactory, fakeGranuleFactoryV2, fakeOrcaGranuleFactory, } = require('../../lib/testUtils'); @@ -352,6 +353,7 @@ test.before(async (t) => { t.context.knex = knex; t.context.knexAdmin = knexAdmin; + t.context.providerPgModel = new ProviderPgModel(); t.context.collectionPgModel = new CollectionPgModel(); t.context.executionPgModel = new ExecutionPgModel(); t.context.filePgModel = new FilePgModel(); @@ -1879,79 +1881,6 @@ test.serial('When report creation fails, reconciliation report status is set to t.like(esRecord, reportApiRecord); }); -test.serial('A valid internal reconciliation report is generated when ES and DB are in sync', async (t) => { - const { - knex, - execution, - executionCumulusId, - } = t.context; - - const collection = fakeCollectionRecordFactory(); - const collectionId = constructCollectionId( - collection.name, - collection.version - ); - const [pgCollection] = await t.context.collectionPgModel.create( - t.context.knex, - collection - ); - await indexer.indexCollection( - esClient, - translatePostgresCollectionToApiCollection(pgCollection), - esAlias - ); - - const matchingGrans = range(10).map(() => fakeGranuleFactoryV2({ - collectionId, - execution: execution.url, - })); - await Promise.all( - matchingGrans.map(async (gran) => { - await indexer.indexGranule(esClient, gran, esAlias); - const pgGranule = await translateApiGranuleToPostgresGranule({ - dynamoRecord: gran, - knexOrTransaction: knex, - }); - await upsertGranuleWithExecutionJoinRecord({ - executionCumulusId, - granule: pgGranule, - knexTransaction: knex, - }); - }) - ); - - const event = { - systemBucket: t.context.systemBucket, - stackName: t.context.stackName, - reportType: 'Internal', - reportName: randomId('reportName'), - collectionId, - startTimestamp: moment.utc().subtract(1, 'hour').format(), - endTimestamp: moment.utc().add(1, 'hour').format(), - }; - - const reportRecord = await handler(event); - t.is(reportRecord.status, 'Generated'); - t.is(reportRecord.name, event.reportName); - t.is(reportRecord.type, event.reportType); - - const report = await fetchCompletedReport(reportRecord); - t.is(report.status, 'SUCCESS'); - t.is(report.error, undefined); - t.is(report.reportType, 'Internal'); - t.is(report.collections.okCount, 1); - t.is(report.collections.onlyInEs.length, 0); - t.is(report.collections.onlyInDb.length, 0); - t.is(report.collections.withConflicts.length, 0); - t.is(report.granules.okCount, 10); - t.is(report.granules.onlyInEs.length, 0); - t.is(report.granules.onlyInDb.length, 0); - t.is(report.granules.withConflicts.length, 0); - - const esRecord = await t.context.esReportClient.get(reportRecord.name); - t.like(esRecord, reportRecord); -}); - test.serial('Creates a valid Granule Inventory report', async (t) => { const { granulePgModel, @@ -2009,12 +1938,13 @@ test.serial('Creates a valid Granule Inventory report', async (t) => { }); test.serial('A valid ORCA Backup reconciliation report is generated', async (t) => { - const collection = fakeCollectionFactory({ + const { knex, collectionPgModel, granulePgModel, providerPgModel, filePgModel } = t.context; + const collection = fakeCollectionRecordFactory({ name: 'fakeCollection', version: 'v2', }); - await indexer.indexCollection(esClient, collection, esAlias); + await collectionPgModel.create(knex, collection); const collectionId = constructCollectionId(collection.name, collection.version); const matchingCumulusGran = { @@ -2046,7 +1976,23 @@ test.serial('A valid ORCA Backup reconciliation report is generated', async (t) ], }; - await indexer.indexGranule(esClient, matchingCumulusGran, esAlias); + await providerPgModel.create( + knex, + fakeProviderRecordFactory({ name: matchingCumulusGran.provider }) + ); + const pgGranule = await translateApiGranuleToPostgresGranule({ + dynamoRecord: matchingCumulusGran, + knexOrTransaction: knex, + }); + const pgGranuleRecord = await granulePgModel.create(knex, pgGranule); + await Promise.all( + matchingCumulusGran.files.map((file) => + filePgModel.create(knex, { + ...translateApiFiletoPostgresFile(file), + granule_cumulus_id: pgGranuleRecord[0].cumulus_id, + })) + ); + const searchOrcaStub = sinon.stub(ORCASearchCatalogQueue.prototype, 'searchOrca'); searchOrcaStub.resolves({ anotherPage: false, granules: [matchingOrcaGran] }); t.teardown(() => searchOrcaStub.restore()); @@ -2087,39 +2033,6 @@ test.serial('A valid ORCA Backup reconciliation report is generated', async (t) t.like(esRecord, reportRecord); }); -test.serial('Internal Reconciliation report JSON is formatted', async (t) => { - const matchingColls = range(5).map(() => fakeCollectionFactory()); - const collectionId = constructCollectionId(matchingColls[0].name, matchingColls[0].version); - const matchingGrans = range(10).map(() => fakeGranuleFactoryV2({ collectionId })); - await Promise.all( - matchingColls.map((collection) => indexer.indexCollection(esClient, collection, esAlias)) - ); - await Promise.all( - matchingGrans.map((gran) => indexer.indexGranule(esClient, gran, esAlias)) - ); - - const event = { - systemBucket: t.context.systemBucket, - stackName: t.context.stackName, - reportType: 'Internal', - reportName: randomId('reportName'), - collectionId, - startTimestamp: moment.utc().subtract(1, 'hour').format(), - endTimestamp: moment.utc().add(1, 'hour').format(), - }; - - const reportRecord = await handler(event); - - const formattedReport = await fetchCompletedReportString(reportRecord); - - // Force report to unformatted (single line) - const unformattedReportString = JSON.stringify(JSON.parse(formattedReport), undefined, 0); - const unformattedReportObj = JSON.parse(unformattedReportString); - - t.true(!unformattedReportString.includes('\n')); // validate unformatted report is on a single line - t.is(formattedReport, JSON.stringify(unformattedReportObj, undefined, 2)); -}); - test.serial('Inventory reconciliation report JSON is formatted', async (t) => { const dataBuckets = range(2).map(() => randomId('bucket')); await Promise.all(dataBuckets.map((bucket) => @@ -2230,3 +2143,16 @@ test.serial('When there is an error for an ORCA backup report, it throws', async const esRecord = await t.context.esReportClient.get(reportName); t.like(esRecord, reportApiRecord); }); + +test.serial('Internal reconciliation report type throws an error', async (t) => { + const event = { + systemBucket: t.context.systemBucket, + stackName: t.context.stackName, + reportType: 'Internal', + }; + + await t.throwsAsync( + handler(event), + { message: 'Internal Reconciliation Reports are no longer valid' } + ); +}); diff --git a/packages/api/tests/lambdas/test-orca-backup-reconciliation-report.js b/packages/api/tests/lambdas/test-orca-backup-reconciliation-report.js index c2cad6d78ae..3a00af398a4 100644 --- a/packages/api/tests/lambdas/test-orca-backup-reconciliation-report.js +++ b/packages/api/tests/lambdas/test-orca-backup-reconciliation-report.js @@ -4,11 +4,27 @@ const test = require('ava'); const rewire = require('rewire'); const sinon = require('sinon'); const sortBy = require('lodash/sortBy'); +const omit = require('lodash/omit'); +const cryptoRandomString = require('crypto-random-string'); +// TODO abstract this setup const { randomId } = require('@cumulus/common/test-utils'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); -const indexer = require('@cumulus/es-client/indexer'); -const { getEsClient } = require('@cumulus/es-client/search'); +const { deconstructCollectionId } = require('@cumulus/message/Collections'); +const { + fakeProviderRecordFactory, + CollectionPgModel, + GranulePgModel, + FilePgModel, + GranulesExecutionsPgModel, + ProviderPgModel, + migrationDir, + destroyLocalTestDb, + generateLocalTestDb, + translateApiGranuleToPostgresGranule, + translateApiCollectionToPostgresCollection, + localStackConnectionEnv, + translateApiFiletoPostgresFile, +} = require('@cumulus/db'); const { fakeCollectionFactory, @@ -24,9 +40,19 @@ const ORCASearchCatalogQueue = require('../../lib/ORCASearchCatalogQueue'); const shouldFileBeExcludedFromOrca = OBRP.__get__('shouldFileBeExcludedFromOrca'); const getReportForOneGranule = OBRP.__get__('getReportForOneGranule'); -let esAlias; -let esIndex; -let esClient; +function translateTestGranuleObject(apiGranule) { + const { name: collectionName, version: collectionVersion } = + deconstructCollectionId(apiGranule.collectionId); + const ProviderName = apiGranule.provider; + return { + ...(omit(apiGranule, ['collectionId', 'provider', 'createdAt', 'updatedAt'])), + collectionName, + collectionVersion, + ProviderName, + created_at: new Date(apiGranule.createdAt), + updated_at: new Date(apiGranule.updatedAt), + }; +} function fakeCollectionsAndGranules() { const fakeCollectionV1 = fakeCollectionFactory({ @@ -73,7 +99,7 @@ function fakeCollectionsAndGranules() { ], }; - // granule is in cumulus only, should not be in orca, and conform to configuratio + // granule is in cumulus only, should not be in orca, and conform to configuration const matchingCumulusOnlyGran = { ...fakeGranuleFactoryV2(), granuleId: randomId('matchingCumulusOnlyGranId'), @@ -82,12 +108,12 @@ function fakeCollectionsAndGranules() { { bucket: 'cumulus-protected-bucket', fileName: 'fakeFileName.xml', - key: 'fakePath/fakeFileName.xml', + key: 'fakePath/fakeFileName4.xml', }, { bucket: 'cumulus-protected-bucket', fileName: 'fakeFileName.hdf.met', - key: 'fakePath/fakeFileName.hdf.met', + key: 'fakePath/fakeFileName4.hdf.met', }, ], }; @@ -102,22 +128,22 @@ function fakeCollectionsAndGranules() { { bucket: 'cumulus-protected-bucket', fileName: 'fakeFileName.hdf', - key: 'fakePath/fakeFileName.hdf', + key: 'fakePath/fakeFileName3.hdf', }, { bucket: 'cumulus-private-bucket', fileName: 'fakeFileName.hdf.met', - key: 'fakePath/fakeFileName.hdf.met', + key: 'fakePath/fakeFileName3.hdf.met', }, { bucket: 'cumulus-fake-bucket', fileName: 'fakeFileName_onlyInCumulus.jpg', - key: 'fakePath/fakeFileName_onlyInCumulus.jpg', + key: 'fakePath/fakeFileName3_onlyInCumulus.jpg', }, { bucket: 'cumulus-fake-bucket-2', fileName: 'fakeFileName.cmr.xml', - key: 'fakePath/fakeFileName.cmr.xml', + key: 'fakePath/fakeFileName3.cmr.xml', }, ], }; @@ -131,19 +157,19 @@ function fakeCollectionsAndGranules() { name: 'fakeFileName.hdf', cumulusArchiveLocation: 'cumulus-protected-bucket', orcaArchiveLocation: 'orca-bucket', - keyPath: 'fakePath/fakeFileName.hdf', + keyPath: 'fakePath/fakeFileName3.hdf', }, { name: 'fakeFileName_onlyInOrca.jpg', cumulusArchiveLocation: 'cumulus-fake-bucket', orcaArchiveLocation: 'orca-bucket', - keyPath: 'fakePath/fakeFileName_onlyInOrca.jpg', + keyPath: 'fakePath/fakeFileName3_onlyInOrca.jpg', }, { name: 'fakeFileName.cmr.xml', cumulusArchiveLocation: 'cumulus-fake-bucket-2', orcaArchiveLocation: 'orca-bucket', - keyPath: 'fakePath/fakeFileName.cmr.xml', + keyPath: 'fakePath/fakeFileName3.cmr.xml', }, ], }; @@ -191,19 +217,34 @@ test.beforeEach(async (t) => { t.context.systemBucket = randomId('bucket'); process.env.system_bucket = t.context.systemBucket; - esAlias = randomId('esalias'); - esIndex = randomId('esindex'); - process.env.ES_INDEX = esAlias; - await bootstrapElasticSearch({ - host: 'fakehost', - index: esIndex, - alias: esAlias, - }); - esClient = await getEsClient(); + // Setup Postgres DB + + t.context.testDbName = `orca_backup_recon_${cryptoRandomString({ length: 10 })}`; + const { knexAdmin, knex } = await generateLocalTestDb( + t.context.testDbName, + migrationDir, + { dbMaxPool: 10 } + ); + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + + t.context.granulePgModel = new GranulePgModel(); + t.context.collectionPgModel = new CollectionPgModel(); + t.context.granulesExecutionsPgModel = new GranulesExecutionsPgModel(); + t.context.filePgModel = new FilePgModel(); + + process.env = { + ...process.env, + ...localStackConnectionEnv, + PG_DATABASE: t.context.testDbName, + dbMaxPool: 10, + }; }); -test.afterEach.always(async () => { - await esClient.client.indices.delete({ index: esIndex }); +test.afterEach.always(async (t) => { + await destroyLocalTestDb({ + ...t.context, + }); }); test.serial('shouldFileBeExcludedFromOrca returns true for configured file types', (t) => { @@ -224,13 +265,19 @@ test.serial('shouldFileBeExcludedFromOrca returns true for configured file types t.false(shouldFileBeExcludedFromOrca(collectionsConfig, `${randomId('coll')}`, randomId('file'))); }); -test.serial('getReportForOneGranule reports ok for one granule in both cumulus and orca with no file discrepancy', (t) => { +test.serial('getReportForOneGranule reports ok for one granule in both cumulus and orca with no file discrepancy', async (t) => { + const { knex } = t.context; const collectionsConfig = {}; const { matchingCumulusGran: cumulusGranule, matchingOrcaGran: orcaGranule, } = fakeCollectionsAndGranules(); - const report = getReportForOneGranule({ collectionsConfig, cumulusGranule, orcaGranule }); + const report = await getReportForOneGranule({ + collectionsConfig, + cumulusGranule: translateTestGranuleObject(cumulusGranule), + orcaGranule, + knex, + }); t.true(report.ok); t.is(report.okFilesCount, 1); t.is(report.cumulusFilesCount, 1); @@ -238,7 +285,9 @@ test.serial('getReportForOneGranule reports ok for one granule in both cumulus a t.is(report.conflictFiles.length, 0); }); -test.serial('getReportForOneGranule reports no ok for one granule in both cumulus and orca with file discrepancy', (t) => { +test.serial('getReportForOneGranule reports no ok for one granule in both cumulus and orca with file discrepancy', async (t) => { + const { knex } = t.context; + const collectionsConfig = { fakeCollection___v1: { orca: { @@ -246,11 +295,17 @@ test.serial('getReportForOneGranule reports no ok for one granule in both cumulu }, }, }; - const { - conflictCumulusGran: cumulusGranule, - conflictOrcaGran: orcaGranule, - } = fakeCollectionsAndGranules(); - const report = getReportForOneGranule({ collectionsConfig, cumulusGranule, orcaGranule }); + + const granules = fakeCollectionsAndGranules(); + const cumulusGranule = translateTestGranuleObject(granules.conflictCumulusGran); + const orcaGranule = granules.conflictOrcaGran; + + const report = await getReportForOneGranule({ + collectionsConfig, + cumulusGranule, + orcaGranule, + knex, + }); t.false(report.ok); t.is(report.okFilesCount, 2); t.is(report.cumulusFilesCount, 4); @@ -281,10 +336,14 @@ test.serial('getReportForOneGranule reports ok for one granule in cumulus only w }, }, }; - const { - matchingCumulusOnlyGran: cumulusGranule, - } = fakeCollectionsAndGranules(); - const report = getReportForOneGranule({ collectionsConfig, cumulusGranule }); + + const granules = fakeCollectionsAndGranules(); + const cumulusGranule = translateTestGranuleObject(granules.matchingCumulusOnlyGran); + + const report = getReportForOneGranule({ + collectionsConfig, + cumulusGranule, + }); t.true(report.ok); t.is(report.okFilesCount, 2); t.is(report.cumulusFilesCount, 2); @@ -300,10 +359,14 @@ test.serial('getReportForOneGranule reports not ok for one granule in cumulus on }, }, }; - const { - conflictCumulusOnlyGran: cumulusGranule, - } = fakeCollectionsAndGranules(); - const report = getReportForOneGranule({ collectionsConfig, cumulusGranule }); + + const granules = fakeCollectionsAndGranules(); + const cumulusGranule = translateTestGranuleObject(granules.conflictCumulusOnlyGran); + + const report = getReportForOneGranule({ + collectionsConfig, + cumulusGranule, + }); t.false(report.ok); t.is(report.okFilesCount, 1); t.is(report.cumulusFilesCount, 2); @@ -313,10 +376,14 @@ test.serial('getReportForOneGranule reports not ok for one granule in cumulus on test.serial('getReportForOneGranule reports ok for one granule in cumulus only with no files', (t) => { const collectionsConfig = {}; - const { - cumulusOnlyGranNoFile: cumulusGranule, - } = fakeCollectionsAndGranules(); - const report = getReportForOneGranule({ collectionsConfig, cumulusGranule }); + + const granules = fakeCollectionsAndGranules(); + const cumulusGranule = translateTestGranuleObject(granules.cumulusOnlyGranNoFile); + + const report = getReportForOneGranule({ + collectionsConfig, + cumulusGranule, + }); t.true(report.ok); t.is(report.okFilesCount, 0); t.is(report.cumulusFilesCount, 0); @@ -325,6 +392,7 @@ test.serial('getReportForOneGranule reports ok for one granule in cumulus only w }); test.serial('orcaReconciliationReportForGranules reports discrepancy of granule holdings in cumulus and orca', async (t) => { + const { collectionPgModel, granulePgModel, filePgModel, knex } = t.context; const { fakeCollectionV1, fakeCollectionV2, @@ -338,24 +406,49 @@ test.serial('orcaReconciliationReportForGranules reports discrepancy of granule conflictCumulusOnlyGran, } = fakeCollectionsAndGranules(); - const esGranules = [ + // Create provider + const fakeProvider = fakeProviderRecordFactory({ name: 'fakeProvider' }); + const fakeProvider2 = fakeProviderRecordFactory({ name: 'fakeProvider2' }); + const providerPgModel = new ProviderPgModel(); + await Promise.all( + [fakeProvider, fakeProvider2].map((p) => + providerPgModel.create(knex, p)) + ); + + // Create collections + const pgCollections = await Promise.all( + [fakeCollectionV1, fakeCollectionV2].map((c) => translateApiCollectionToPostgresCollection(c)) + ); + await Promise.all( + pgCollections.map((collection) => collectionPgModel.create(knex, collection)) + ); + + const apiGranules = [ cumulusOnlyGranNoFile, conflictCumulusGran, matchingCumulusGran, matchingCumulusOnlyGran, conflictCumulusOnlyGran, ]; - const esCollections = [fakeCollectionV1, fakeCollectionV2]; - // add granules and related collections to es and db - await Promise.all( - esCollections.map(async (collection) => { - await indexer.indexCollection(esClient, collection, esAlias); - }) - ); + // Create granules await Promise.all( - esGranules.map(async (granule) => { - await indexer.indexGranule(esClient, granule, esAlias); + apiGranules.map(async (granule) => { + const pgGranule = await translateApiGranuleToPostgresGranule({ + dynamoRecord: granule, + knexOrTransaction: knex, + }); + const pgRecord = await granulePgModel.create(knex, pgGranule); + if (!granule.files) { + return; + } + const pgFiles = granule.files.map((f) => (translateApiFiletoPostgresFile(f))); + await Promise.all( + pgFiles.map(async (f) => await filePgModel.create(knex, { + ...f, + granule_cumulus_id: pgRecord[0].cumulus_id, + })) + ); }) ); diff --git a/packages/db/src/lib/granule.ts b/packages/db/src/lib/granule.ts index b6e2ca273fb..05da2df9ec3 100644 --- a/packages/db/src/lib/granule.ts +++ b/packages/db/src/lib/granule.ts @@ -205,38 +205,42 @@ export const getApiGranuleExecutionCumulusIds = async ( /** * Helper to build a query to search granules by various API granule record properties. * - * @param {Knex} knex - DB client - * @param {Object} searchParams - * @param {string | Array} [searchParams.collectionIds] - Collection ID - * @param {string | Array} [searchParams.granuleIds] - array of granule IDs - * @param {string} [searchParams.providerNames] - Provider names - * @param {UpdatedAtRange} [searchParams.updatedAtRange] - Date range for updated_at column - * @param {string} [searchParams.status] - Granule status to search by - * @param {string | Array} [sortByFields] - Field(s) to sort by + * @param params + * @param params.knex - DB client + * @param params.searchParams + * @param [params.searchParams.collectionIds] - Collection ID + * @param [params.searchParams.granuleIds] - array of granule IDs + * @param [params.searchParams.providerNames] - Provider names + * @param [params.searchParams.updatedAtRange] - Date range for updated_at column + * @param [params.searchParams.status] - Granule status to search by + * @param [params.sortByFields] - Field(s) to sort by + * @param params.temporalBoundByCreatedAt -- If true, temporal bounds + * are applied to created_at column instead of updated_at column * @returns {Knex.QueryBuilder} */ -export const getGranulesByApiPropertiesQuery = ( +export const getGranulesByApiPropertiesQuery = ({ + knex, + searchParams, + sortByFields = [], + temporalBoundByCreatedAt = false, +} : { knex: Knex, - { - collectionIds, - granuleIds, - providerNames, - updatedAtRange = {}, - status, - }: { + searchParams: { collectionIds?: string | string[], granuleIds?: string | string[], providerNames?: string[], updatedAtRange?: UpdatedAtRange, - status?: string, + status?: string }, - sortByFields?: string | string[] -): Knex.QueryBuilder => { + sortByFields?: string | string[], + temporalBoundByCreatedAt: boolean, +}) : Knex.QueryBuilder => { const { granules: granulesTable, collections: collectionsTable, providers: providersTable, } = TableNames; + const temporalColumn = temporalBoundByCreatedAt ? 'created_at' : 'updated_at'; return knex(granulesTable) .select(`${granulesTable}.*`) .select({ @@ -247,8 +251,8 @@ export const getGranulesByApiPropertiesQuery = ( .innerJoin(collectionsTable, `${granulesTable}.collection_cumulus_id`, `${collectionsTable}.cumulus_id`) .leftJoin(providersTable, `${granulesTable}.provider_cumulus_id`, `${providersTable}.cumulus_id`) .modify((queryBuilder) => { - if (collectionIds) { - const collectionIdFilters = [collectionIds].flat(); + if (searchParams.collectionIds) { + const collectionIdFilters = [searchParams.collectionIds].flat(); const collectionIdConcatField = `(${collectionsTable}.name || '${collectionIdSeparator}' || ${collectionsTable}.version)`; const collectionIdInClause = collectionIdFilters.map(() => '?').join(','); queryBuilder.whereRaw( @@ -256,25 +260,25 @@ export const getGranulesByApiPropertiesQuery = ( collectionIdFilters ); } - if (granuleIds) { - const granuleIdFilters = [granuleIds].flat(); + if (searchParams.granuleIds) { + const granuleIdFilters = [searchParams.granuleIds].flat(); queryBuilder.where((nestedQueryBuilder) => { granuleIdFilters.forEach((granuleId) => { nestedQueryBuilder.orWhere(`${granulesTable}.granule_id`, 'LIKE', `%${granuleId}%`); }); }); } - if (providerNames) { - queryBuilder.whereIn(`${providersTable}.name`, providerNames); + if (searchParams.providerNames) { + queryBuilder.whereIn(`${providersTable}.name`, searchParams.providerNames); } - if (updatedAtRange.updatedAtFrom) { - queryBuilder.where(`${granulesTable}.updated_at`, '>=', updatedAtRange.updatedAtFrom); + if (searchParams?.updatedAtRange?.updatedAtFrom) { + queryBuilder.where(`${granulesTable}.${temporalColumn}`, '>=', searchParams.updatedAtRange.updatedAtFrom); } - if (updatedAtRange.updatedAtTo) { - queryBuilder.where(`${granulesTable}.updated_at`, '<=', updatedAtRange.updatedAtTo); + if (searchParams?.updatedAtRange?.updatedAtTo) { + queryBuilder.where(`${granulesTable}.${temporalColumn}`, '<=', searchParams.updatedAtRange.updatedAtTo); } - if (status) { - queryBuilder.where(`${granulesTable}.status`, status); + if (searchParams.status) { + queryBuilder.where(`${granulesTable}.status`, searchParams.status); } if (sortByFields) { queryBuilder.orderBy([sortByFields].flat()); diff --git a/packages/db/tests/lib/test-granule.js b/packages/db/tests/lib/test-granule.js index d1e05577267..abc68a8d67d 100644 --- a/packages/db/tests/lib/test-granule.js +++ b/packages/db/tests/lib/test-granule.js @@ -523,12 +523,12 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by single ); t.teardown(() => granulePgModel.delete(knex, { cumulus_id: granule.cumulus_id })); - const record = await getGranulesByApiPropertiesQuery( + const record = await getGranulesByApiPropertiesQuery({ knex, - { + searchParams: { collectionIds: collectionId, - } - ); + }, + }); t.deepEqual( [{ ...granule, @@ -581,13 +581,13 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by multipl granulePgModel.delete(knex, { cumulus_id: granule.cumulus_id }) ))); - const records = await getGranulesByApiPropertiesQuery( + const records = await getGranulesByApiPropertiesQuery({ knex, - { + searchParams: { collectionIds: [collectionId, collectionId2], }, - ['granule_id'] - ); + sortByFields: ['granule_id'], + }); t.deepEqual( [{ ...granules.find((granule) => granule.granule_id === granule1.granule_id), @@ -622,12 +622,12 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by single t.teardown(() => granulePgModel.delete(knex, { cumulus_id: granule.cumulus_id })); - const records = await getGranulesByApiPropertiesQuery( + const records = await getGranulesByApiPropertiesQuery({ knex, - { + searchParams: { granuleIds: [granule.granule_id], - } - ); + }, + }); t.deepEqual( [{ ...granule, @@ -668,12 +668,12 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by multipl granulePgModel.delete(knex, { cumulus_id: granule.cumulus_id }) ))); - const records = await getGranulesByApiPropertiesQuery( + const records = await getGranulesByApiPropertiesQuery({ knex, - { + searchParams: { granuleIds: [granules[0].granule_id, granules[1].granule_id], - } - ); + }, + }); t.deepEqual( [{ ...granules[0], @@ -711,12 +711,12 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by provide '*' ); t.teardown(() => granulePgModel.delete(knex, { cumulus_id: granule.cumulus_id })); - const records = await getGranulesByApiPropertiesQuery( + const records = await getGranulesByApiPropertiesQuery({ knex, - { + searchParams: { providerNames: [fakeProvider.name], - } - ); + }, + }); t.deepEqual( [{ ...granule, @@ -760,12 +760,12 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by status' (granule) => granulePgModel.delete(knex, { cumulus_id: granule.cumulus_id }) ))); - const records = await getGranulesByApiPropertiesQuery( + const records = await getGranulesByApiPropertiesQuery({ knex, - { + searchParams: { status: 'completed', - } - ); + }, + }); t.is(records.length, 1); t.deepEqual( [{ @@ -799,14 +799,14 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by updated ); t.teardown(() => granulePgModel.delete(knex, { cumulus_id: granule.cumulus_id })); - const records = await getGranulesByApiPropertiesQuery( + const records = await getGranulesByApiPropertiesQuery({ knex, - { + searchParams: { updatedAtRange: { updatedAtFrom: updatedAt, }, - } - ); + }, + }); t.deepEqual( [{ ...granule, @@ -817,14 +817,14 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by updated records ); - const records2 = await getGranulesByApiPropertiesQuery( + const records2 = await getGranulesByApiPropertiesQuery({ knex, - { + searchParams: { updatedAtRange: { updatedAtFrom: new Date(now - 1), }, - } - ); + }, + }); t.deepEqual( [{ ...granule, @@ -857,14 +857,14 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by updated ); t.teardown(() => granulePgModel.delete(knex, { cumulus_id: granule.cumulus_id })); - const records = await getGranulesByApiPropertiesQuery( + const records = await getGranulesByApiPropertiesQuery({ knex, - { + searchParams: { updatedAtRange: { updatedAtTo: updatedAt, }, - } - ); + }, + }); t.deepEqual( [{ ...granule, @@ -875,14 +875,14 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by updated records ); - const records2 = await getGranulesByApiPropertiesQuery( + const records2 = await getGranulesByApiPropertiesQuery({ knex, - { + searchParams: { updatedAtRange: { updatedAtTo: new Date(now + 1), }, - } - ); + }, + }); t.deepEqual( [{ ...granule, @@ -915,15 +915,15 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by updated ); t.teardown(() => granulePgModel.delete(knex, { cumulus_id: granule.cumulus_id })); - const records = await getGranulesByApiPropertiesQuery( + const records = await getGranulesByApiPropertiesQuery({ knex, - { + searchParams: { updatedAtRange: { updatedAtFrom: updatedAt, updatedAtTo: updatedAt, }, - } - ); + }, + }); t.deepEqual( [{ ...granule, @@ -934,15 +934,15 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by updated records ); - const records2 = await getGranulesByApiPropertiesQuery( + const records2 = await getGranulesByApiPropertiesQuery({ knex, - { + searchParams: { updatedAtRange: { updatedAtFrom: new Date(now - 1), updatedAtTo: new Date(now + 1), }, - } - ); + }, + }); t.deepEqual( [{ ...granule, From 348b210f08a3176aeaf4a715bfe3c247474a8985 Mon Sep 17 00:00:00 2001 From: Jonathan Kovarik Date: Wed, 23 Oct 2024 16:11:03 -0600 Subject: [PATCH 52/61] Jk/cumulus 3806 2 (#3811) * Update ORCA report to only read from postgres database * Parameterize getGranulesByApiPropertiesQuery * Fix unit bugs * Fix unexplained lint error :( * Fix granule translation for report output * Fix/improve spec tests * Update typings for orca-backup-reconciliation-report * Pre-review tag minor fixes * Remove configuration TODO, in favor of PR comment * Update typings * Remove premature mods * Update CHANGELOG * Fix recon report spec * Minor typedef refactor * Fix units * WIP - fixup * re-enable disabled test, remove unused code * Dedupe knex objects/queues * Fix linting :bell: * Update db package typing * Update cmr-client typings * Update test with bad test parameters/setup * Add typing fixes/etc to cmr-utils js * Add basic recon report typings * Fix annotations for normalizeEvent * Minor lint fix * Additional typing fixes * Fix knex typings * Remove remaining ES refs, tidy up * Remove shouldAggregateGranulesForCollections test * Fix/update unit tests broken due to normalizeEvent mods * Updated test-collections to create database per test, use common fixtures * Add tests, refactor collection lib queries * Re-order CHANGELOG * Fix merge mangle * Updating default configuration to modify RDS cert configuration * Revert "Updating default configuration to modify RDS cert configuration" This reverts commit 7e77b2d413a20d9a080ce5a1fcb94749eefd2709. * re-activate integration tests, add collation fix to granule sorting * Add FileReport typings * Misc typing fixes/updates * Update ORCA typings * Minor comment update * Remove TODO * Types refactor * Add types file * Update DB package to allow for limit:null/unbounded queries * Minor typing refactor * Fix param arguments * Add collation options * Minor comment update * Refactor collection query to use @cumulus/db/search * Remove debugging log output * Remove type-bound error conditional * Minor typing update * Add missing unit tests * Add CHANGELOG updates * Split defs out into seperate file * Fix unneeded newline/lint * Remove excess typedef * Fix bad param passing, update typing in createReconcilationReportForBucket * Fix docstring typing * Seperate typedefs into seperate file * Update error types * Fix typings * Fix unit test error type mistake --- CHANGELOG.md | 17 +- .../CreateReconciliationReportSpec.js | 3 +- .../api/endpoints/reconciliation-reports.js | 7 + .../create-reconciliation-report-types.js | 60 ++ .../lambdas/create-reconciliation-report.js | 327 +++++---- .../reports/granule-inventory-report.js | 11 +- .../orca-backup-reconciliation-report.js | 25 +- .../api/lib/reconciliationReport-types.js | 19 + packages/api/lib/reconciliationReport.js | 38 +- .../reconciliationReport/normalizeEvent.js | 74 ++- packages/api/lib/types.js | 46 ++ ...-create-reconciliation-report-internals.js | 34 - .../test-create-reconciliation-report.js | 624 ++++++++---------- .../lambdas/test-granule-inventory-report.js | 3 +- .../test-internal-reconciliation-report.js | 16 +- .../test-normalizeEvent.js | 32 +- packages/cmr-client/src/CMR.ts | 23 +- .../cmr-client/src/CMRSearchConceptQueue.ts | 35 +- packages/cmr-client/tests/test-CMR.js | 18 +- packages/cmrjs/src/cmr-utils.js | 32 +- packages/db/src/index.ts | 1 + packages/db/src/lib/QuerySearchClient.ts | 2 - packages/db/src/lib/collection.ts | 55 ++ packages/db/src/lib/granule.ts | 26 +- .../db/src/models/reconciliation_report.ts | 4 +- packages/db/src/search/BaseSearch.ts | 8 +- packages/db/src/search/queries.ts | 7 +- packages/db/src/types/search.ts | 5 +- packages/db/tests/lib/test-collection.js | 218 ++++-- packages/db/tests/lib/test-granule.js | 47 ++ packages/db/tests/search/test-queries.js | 10 + 31 files changed, 1091 insertions(+), 736 deletions(-) create mode 100644 packages/api/lambdas/create-reconciliation-report-types.js create mode 100644 packages/api/lib/reconciliationReport-types.js create mode 100644 packages/api/lib/types.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 422cf803d9b..5d08e27a049 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,18 +40,29 @@ aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE - Update `@cumlus/api/ecs/async-operation` to not update Elasticsearch index when reporting status of async operation - **CUMULUS-3806** + - Update `@cumulus/db/search` to allow for ordered collation as a + dbQueryParameter + - Update `@cumulus/db/search` to allow `dbQueryParameters.limit` to be set to + `null` to allow for optional unlimited page sizes in search results + - Update/add type annotations/logic fixes to `@cumulus/api` reconciliation report code + - Annotation/typing fixes to `@cumulus/cmr-client` + - Typing fixes to `@cumulus/db` + - Re-enable Reconciliation Report integration tests + - Update `@cumulus/client/CMR.getToken` to throw if a non-launchpad token is requested without a username + - Update `Inventory` and `Granule Not Found` reports to query postgreSQL + database instead of elasticsearch + - Update `@cumulus/db/lib/granule.getGranulesByApiPropertiesQuery` to + allow order by collation to be optionally specified - Update `@cumulus/db/lib/granule.getGranulesByApiPropertiesQuery` to be parameterized and include a modifier on `temporalBoundByCreatedAt` - Remove endpoint call to and all tests for Internal Reconciliation Reports and updated API to throw an error if report is requested - Update Orca reconciliation reports to pull granules for comparison from postgres via `getGranulesByApiPropertiesQuery` -- **CUMULUS-3833** - - Added `ReconciliationReportMigration` lambda to migrate ReconciliationReports from DynamoDB - to Postgres - **CUMULUS-3837** - Added `reconciliation_reports` table in RDS, including indexes - Created pg model, types, and translation for `reconciliationReports` in `@cumulus/db` +- **CUMULUS-3833** - Created api types for `reconciliation_reports` in `@cumulus/types/api` - Updated reconciliation reports lambda to write to new RDS table instead of Dynamo - Updated `@cumulus/api/endpoints/reconciliation-reports` `getReport` and `deleteReport` to work with the new RDS table instead of Dynamo diff --git a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js index 25842c9ae39..e52dc8a6966 100644 --- a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js +++ b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js @@ -436,8 +436,7 @@ describe('When there are granule differences and granule reconciliation is run', if (beforeAllFailed) fail(beforeAllFailed); }); - // TODO: fix tests in CUMULUS-3806 when CreateReconciliationReport lambda is changed to query postgres - xdescribe('Create an Inventory Reconciliation Report to monitor inventory discrepancies', () => { + describe('Create an Inventory Reconciliation Report to monitor inventory discrepancies', () => { // report record in db and report in s3 let reportRecord; let report; diff --git a/packages/api/endpoints/reconciliation-reports.js b/packages/api/endpoints/reconciliation-reports.js index c878e55ad14..7c97183cfbb 100644 --- a/packages/api/endpoints/reconciliation-reports.js +++ b/packages/api/endpoints/reconciliation-reports.js @@ -35,6 +35,11 @@ const { getFunctionNameFromRequestContext } = require('../lib/request'); const logger = new Logger({ sender: '@cumulus/api' }); const maxResponsePayloadSizeBytes = 6 * 1000 * 1000; +/** +* @typedef {import('../lib/types').NormalizedRecReportParams} NormalizedRecReportParams +* @typedef {import('../lib/types').RecReportParams} RecReportParams +*/ + /** * List all reconciliation reports * @@ -173,10 +178,12 @@ async function deleteReport(req, res) { * Creates a new report * * @param {Object} req - express request object + * @param {RecReportParams} req.body * @param {Object} res - express response object * @returns {Promise} the promise of express response object */ async function createReport(req, res) { + /** @type NormalizedRecReportParams */ let validatedInput; try { validatedInput = normalizeEvent(req.body); diff --git a/packages/api/lambdas/create-reconciliation-report-types.js b/packages/api/lambdas/create-reconciliation-report-types.js new file mode 100644 index 00000000000..2ba35b18596 --- /dev/null +++ b/packages/api/lambdas/create-reconciliation-report-types.js @@ -0,0 +1,60 @@ +/** + * @typedef {import('@cumulus/types/api/files').ApiFile} ApiFile + */ + +/** + * @typedef {Object} Env + * @property {string} [CONCURRENCY] - The concurrency level for processing. + * @property {string} [ES_INDEX] - The Elasticsearch index. + * @property {string} [AWS_REGION] - The AWS region. + * @property {string} [AWS_ACCESS_KEY_ID] - The AWS access key ID. + * @property {string} [AWS_SECRET_ACCESS_KEY] - The AWS secret access key. + * @property {string} [AWS_SESSION_TOKEN] - The AWS session token. + * @property {string} [NODE_ENV] - The Node.js environment (e.g., 'development', 'production'). + * @property {string} [DATABASE_URL] - The database connection URL. + * @property {string} [key] string - Any other environment variable as a string. + */ + +/** + * @typedef {Object} CMRCollectionItem + * @property {Object} umm - The UMM (Unified Metadata Model) object for the granule. + * @property {string} umm.ShortName - The short name of the collection. + * @property {string} umm.Version - The version of the collection. + * @property {Array} umm.RelatedUrls - The related URLs for the granule. + */ + +/** + * @typedef {Object} CMRItem + * @property {Object} umm - The UMM (Unified Metadata Model) object for the granule. + * @property {string} umm.GranuleUR - The unique identifier for the granule in CMR. + * @property {Object} umm.CollectionReference - The collection reference object. + * @property {string} umm.CollectionReference.ShortName - The short name of the collection. + * @property {string} umm.CollectionReference.Version - The version of the collection. + * @property {Array} umm.RelatedUrls - The related URLs for the granule. + */ + +/** + * @typedef {Object} FilesReport + * @property {number} okCount + * @property {ApiFile[]} onlyInCumulus + * @property {ApiFile[]} onlyInCmr + * + */ + +/** + * @typedef {Object} GranulesReport + * @property {number} okCount - The count of OK granules. + * @property {Array<{GranuleUR: string, ShortName: string, Version: string}>} onlyInCmr + * - The list of granules only in Cumulus. + * @property {Array<{granuleId: string, collectionId: string}>} onlyInCumulus + */ + +/** + * @typedef {Object} FilesInCumulus + * @property {number} okCount + * @property {Object} okCountByGranule + * @property {string[]} onlyInS3 + * @property {Object[]} onlyInDb + */ + +module.exports = {}; diff --git a/packages/api/lambdas/create-reconciliation-report.js b/packages/api/lambdas/create-reconciliation-report.js index 450861433dd..e404ea0a32b 100644 --- a/packages/api/lambdas/create-reconciliation-report.js +++ b/packages/api/lambdas/create-reconciliation-report.js @@ -1,3 +1,5 @@ +//@ts-check + 'use strict'; const cloneDeep = require('lodash/cloneDeep'); @@ -11,19 +13,21 @@ const S3ListObjectsV2Queue = require('@cumulus/aws-client/S3ListObjectsV2Queue') const { s3 } = require('@cumulus/aws-client/services'); const BucketsConfig = require('@cumulus/common/BucketsConfig'); const { getBucketsConfigKey } = require('@cumulus/common/stack'); +const { removeNilProperties } = require('@cumulus/common/util'); const { fetchDistributionBucketMap } = require('@cumulus/distribution-utils'); const { constructCollectionId, deconstructCollectionId } = require('@cumulus/message/Collections'); const { CMRSearchConceptQueue } = require('@cumulus/cmr-client'); const { constructOnlineAccessUrl, getCmrSettings } = require('@cumulus/cmrjs/cmr-utils'); const { + CollectionSearch, getFilesAndGranuleInfoQuery, + getGranulesByApiPropertiesQuery, getKnexClient, + getUniqueCollectionsByGranuleFilter, QuerySearchClient, + translatePostgresFileToApiFile, } = require('@cumulus/db'); -const { ESCollectionGranuleQueue } = require('@cumulus/es-client/esCollectionGranuleQueue'); -const Collection = require('@cumulus/es-client/collections'); -const { ESSearchQueue } = require('@cumulus/es-client/esSearchQueue'); const Logger = require('@cumulus/logger'); const { getEsClient } = require('@cumulus/es-client/search'); const { indexReconciliationReport } = require('@cumulus/es-client/indexer'); @@ -37,8 +41,7 @@ const { createOrcaBackupReconciliationReport } = require('./reports/orca-backup- const { errorify, filenamify } = require('../lib/utils'); const { cmrGranuleSearchParams, - convertToESCollectionSearchParams, - convertToESGranuleSearchParams, + convertToDBGranuleSearchParams, initialReportHeader, } = require('../lib/reconciliationReport'); @@ -46,6 +49,32 @@ const log = new Logger({ sender: '@api/lambdas/create-reconciliation-report' }); const isDataBucket = (bucketConfig) => ['private', 'public', 'protected'].includes(bucketConfig.type); +// Typescript annotations + +/** + * @typedef {typeof process.env } ProcessEnv + * @typedef {import('knex').Knex} Knex + * @typedef {import('@cumulus/es-client/search').EsClient} EsClient + * @typedef {import('../lib/types').NormalizedRecReportParams } NormalizedRecReportParams + * @typedef {import('../lib/types').EnhancedNormalizedRecReportParams} + * EnhancedNormalizedRecReportParams + * @typedef {import('@cumulus/cmr-client/CMR').CMRConstructorParams} CMRSettings + * @typedef {import('@cumulus/db').PostgresReconciliationReportRecord} + * PostgresReconciliationReportRecord + * @typedef {import('@cumulus/types/api/reconciliation_reports').ReconciliationReportStatus} + * ReconciliationReportStatus + * @typedef {import('@cumulus/types/api/reconciliation_reports').ReconciliationReportType} + * ReconciliationReportType + * @typedef {import('@cumulus/types/api/files').ApiFile} ApiFile + * @typedef {import('@cumulus/db').PostgresGranuleRecord} PostgresGranuleRecord + * @typedef {import('./create-reconciliation-report-types').Env } Env + * @typedef {import('./create-reconciliation-report-types').CMRCollectionItem } CMRCollectionItem + * @typedef {import('./create-reconciliation-report-types').CMRItem } CMRItem + * @typedef {import('./create-reconciliation-report-types').FilesReport } FilesReport + * @typedef {import('./create-reconciliation-report-types').GranulesReport } GranulesReport + * @typedef {import('./create-reconciliation-report-types').FilesInCumulus } FilesInCumulus + */ + /** * * @param {string} reportType - reconciliation report type @@ -101,41 +130,32 @@ function isOneWayGranuleReport(reportParams) { } /** - * Checks to see if the searchParams have any value that would require a - * filtered search in ES - * @param {Object} searchParams - * @returns {boolean} returns true if searchParams contain a key that causes filtering to occur. - */ -function shouldAggregateGranulesForCollections(searchParams) { - return [ - 'updatedAt__from', - 'updatedAt__to', - 'granuleId__in', - 'provider__in', - ].some((e) => !!searchParams[e]); -} - -/** - * fetch CMR collections and filter the returned UMM CMR collections by the desired collectionIds + * Fetches collections from the CMR (Common Metadata Repository) and returns their IDs. + * + * @param {EnhancedNormalizedRecReportParams} recReportParams - The parameters for the function. + * @returns {Promise} A promise that resolves to an array of collection IDs from the CMR. * - * @param {Object} recReportParams - input report params - * @param {Array} recReportParams.collectionIds - array of collectionIds to keep - * @returns {Array} filtered list of collectionIds returned from CMR + * @example + * await fetchCMRCollections({ collectionIds: ['COLLECTION_1', 'COLLECTION_2'] }); */ async function fetchCMRCollections({ collectionIds }) { const cmrSettings = await getCmrSettings(); - const cmrCollectionsIterator = new CMRSearchConceptQueue({ - cmrSettings, - type: 'collections', - format: 'umm_json', - }); + const cmrCollectionsIterator = /** @type {CMRSearchConceptQueue} */( + new CMRSearchConceptQueue({ + cmrSettings, + type: 'collections', + format: 'umm_json', + })); const allCmrCollectionIds = []; let nextCmrItem = await cmrCollectionsIterator.shift(); while (nextCmrItem) { - allCmrCollectionIds - .push(constructCollectionId(nextCmrItem.umm.ShortName, nextCmrItem.umm.Version)); - nextCmrItem = await cmrCollectionsIterator.shift(); // eslint-disable-line no-await-in-loop + allCmrCollectionIds.push( + constructCollectionId(nextCmrItem.umm.ShortName, nextCmrItem.umm.Version) + ); + nextCmrItem + // eslint-disable-next-line no-await-in-loop + = /** @type {CMRCollectionItem | null} */ (await cmrCollectionsIterator.shift()); } const cmrCollectionIds = allCmrCollectionIds.sort(); @@ -145,31 +165,42 @@ async function fetchCMRCollections({ collectionIds }) { } /** - * Fetch collections in Elasticsearch. - * @param {Object} recReportParams - input report params. - * @returns {Promise} - list of collectionIds that match input paramaters + * Fetches collections from the database based on the provided parameters. + * + * @param {EnhancedNormalizedRecReportParams} recReportParams - The reconciliation + * report parameters. + * @returns {Promise} A promise that resolves to an array of collection IDs. */ -async function fetchESCollections(recReportParams) { - const esCollectionSearchParams = convertToESCollectionSearchParams(recReportParams); - const esGranuleSearchParams = convertToESGranuleSearchParams(recReportParams); - let esCollectionIds; - // [MHS, 09/02/2020] We are doing these two because we can't use - // aggregations on scrolls yet until we update elasticsearch version. - if (shouldAggregateGranulesForCollections(esGranuleSearchParams)) { - // Build an ESCollection and call the aggregateGranuleCollections to - // get list of collection ids that have granules that have been updated - const esCollection = new Collection({ queryStringParameters: esGranuleSearchParams }, 'collection', process.env.ES_INDEX); - const esCollectionItems = await esCollection.aggregateGranuleCollections(); - esCollectionIds = esCollectionItems.sort(); - } else { - // return all ES collections - const esCollection = new ESSearchQueue(esCollectionSearchParams, 'collection', process.env.ES_INDEX); - const esCollectionItems = await esCollection.empty(); - esCollectionIds = esCollectionItems.map( - (item) => constructCollectionId(item.name, item.version) - ).sort(); +async function fetchDbCollections(recReportParams) { + const { + collectionIds, + endTimestamp, + granuleIds, + knex, + providers, + startTimestamp, + } = recReportParams; + if (providers || granuleIds || startTimestamp || endTimestamp) { + const filteredDbCollections = await getUniqueCollectionsByGranuleFilter({ + ...recReportParams, + }); + return filteredDbCollections.map((collection) => + constructCollectionId(collection.name, collection.version)); } - return esCollectionIds; + + const queryStringParameters = removeNilProperties({ + _id__in: collectionIds ? collectionIds.join(',') : undefined, + timestamp__from: startTimestamp, + timestamp__to: endTimestamp, + sort_key: ['name', 'version'], + collate: 'C', + }); + const searchResponse = await new CollectionSearch({ + queryStringParameters: { ...queryStringParameters, limit: null }, + }).query(knex); + const dbCollections = searchResponse.results; + return dbCollections.map((collection) => + constructCollectionId(collection.name, collection.version)); } /** @@ -177,7 +208,7 @@ async function fetchESCollections(recReportParams) { * PostgreSQL, and that there are no extras in either S3 or PostgreSQL * * @param {string} Bucket - the bucket containing files to be reconciled - * @param {Object} recReportParams - input report params. + * @param {EnhancedNormalizedRecReportParams} recReportParams - input report params. * @returns {Promise} a report */ async function createReconciliationReportForBucket(Bucket, recReportParams) { @@ -281,8 +312,8 @@ async function createReconciliationReportForBucket(Bucket, recReportParams) { /** * Compare the collection holdings in CMR with Cumulus * - * @param {Object} recReportParams - lambda's input filtering parameters to - * narrow limit of report. + * @param {EnhancedNormalizedRecReportParams} recReportParams - lambda's input filtering + * parameters to narrow limit of report. * @returns {Promise} an object with the okCollections, onlyInCumulus and * onlyInCmr */ @@ -304,17 +335,20 @@ async function reconciliationReportForCollections(recReportParams) { // get all collections from CMR and sort them, since CMR query doesn't support // 'Version' as sort_key log.debug('Fetching collections from CMR.'); - const cmrCollectionIds = await fetchCMRCollections(recReportParams); - const esCollectionIds = await fetchESCollections(recReportParams); - log.info(`Comparing ${cmrCollectionIds.length} CMR collections to ${esCollectionIds.length} Elasticsearch collections`); + const cmrCollectionIds = (await fetchCMRCollections(recReportParams)).sort(); + const dbCollectionIds = (await fetchDbCollections(recReportParams)).sort(); - let nextDbCollectionId = esCollectionIds[0]; + log.info(`Comparing ${cmrCollectionIds.length} CMR collections to ${dbCollectionIds.length} PostgreSQL collections`); + + /** @type {string | undefined } */ + let nextDbCollectionId = dbCollectionIds[0]; + /** @type {string | undefined } */ let nextCmrCollectionId = cmrCollectionIds[0]; while (nextDbCollectionId && nextCmrCollectionId) { if (nextDbCollectionId < nextCmrCollectionId) { // Found an item that is only in Cumulus database and not in cmr - esCollectionIds.shift(); + dbCollectionIds.shift(); collectionsOnlyInCumulus.push(nextDbCollectionId); } else if (nextDbCollectionId > nextCmrCollectionId) { // Found an item that is only in cmr and not in Cumulus database @@ -323,16 +357,16 @@ async function reconciliationReportForCollections(recReportParams) { } else { // Found an item that is in both cmr and database okCollections.push(nextDbCollectionId); - esCollectionIds.shift(); + dbCollectionIds.shift(); cmrCollectionIds.shift(); } - nextDbCollectionId = (esCollectionIds.length !== 0) ? esCollectionIds[0] : undefined; + nextDbCollectionId = (dbCollectionIds.length !== 0) ? dbCollectionIds[0] : undefined; nextCmrCollectionId = (cmrCollectionIds.length !== 0) ? cmrCollectionIds[0] : undefined; } // Add any remaining database items to the report - collectionsOnlyInCumulus = collectionsOnlyInCumulus.concat(esCollectionIds); + collectionsOnlyInCumulus = collectionsOnlyInCumulus.concat(dbCollectionIds); // Add any remaining CMR items to the report if (!oneWayReport) collectionsOnlyInCmr = collectionsOnlyInCmr.concat(cmrCollectionIds); @@ -360,6 +394,10 @@ async function reconciliationReportForCollections(recReportParams) { * @returns {Promise} - an object with the okCount, onlyInCumulus, onlyInCmr */ async function reconciliationReportForGranuleFiles(params) { + if (!process.env.DISTRIBUTION_ENDPOINT) { + throw new Error('DISTRIBUTION_ENDPOINT is not defined in function environment variables, but is required'); + } + const distEndpoint = process.env.DISTRIBUTION_ENDPOINT; const { granuleInDb, granuleInCmr, bucketsConfig, distributionBucketMap } = params; let okCount = 0; const onlyInCumulus = []; @@ -389,7 +427,7 @@ async function reconciliationReportForGranuleFiles(params) { // not all files should be in CMR const distributionAccessUrl = await constructOnlineAccessUrl({ file: granuleFiles[urlFileName], - distEndpoint: process.env.DISTRIBUTION_ENDPOINT, + distEndpoint, bucketTypes, urlType: 'distribution', distributionBucketMap, @@ -397,7 +435,7 @@ async function reconciliationReportForGranuleFiles(params) { const s3AccessUrl = await constructOnlineAccessUrl({ file: granuleFiles[urlFileName], - distEndpoint: process.env.DISTRIBUTION_ENDPOINT, + distEndpoint, bucketTypes, urlType: 's3', distributionBucketMap, @@ -464,14 +502,17 @@ exports.reconciliationReportForGranuleFiles = reconciliationReportForGranuleFile /** * Compare the granule holdings in CMR with Cumulus for a given collection * - * @param {Object} params - parameters - * @param {string} params.collectionId - the collection which has the granules to be - * reconciled - * @param {Object} params.bucketsConfig - bucket configuration object - * @param {Object} params.distributionBucketMap - mapping of bucket->distirubtion path values - * (e.g. { bucket: distribution path }) - * @param {Object} params.recReportParams - Lambda report paramaters for narrowing focus - * @returns {Promise} - an object with the granulesReport and filesReport + * @param {Object} params - parameters + * @param {string} params.collectionId - the collection which has the granules to be + * reconciled + * @param {Object} params.bucketsConfig - bucket configuration object + * @param {Object} params.distributionBucketMap - mapping of bucket->distirubtion path values + * (e.g. { bucket: distribution path }) + * @param {EnhancedNormalizedRecReportParams} params.recReportParams - Lambda report paramaters for + * narrowing focus database + * @returns {Promise<{ granulesReport: GranulesReport, filesReport: FilesReport }>} + * - an object with the granulesReport and + * filesReport */ async function reconciliationReportForGranules(params) { // compare granule holdings: @@ -481,41 +522,56 @@ async function reconciliationReportForGranules(params) { // Report granules only in CUMULUS log.info(`reconciliationReportForGranules(${params.collectionId})`); const { collectionId, bucketsConfig, distributionBucketMap, recReportParams } = params; + const { knex } = recReportParams; const { name, version } = deconstructCollectionId(collectionId); + + /** @type {GranulesReport} */ const granulesReport = { okCount: 0, onlyInCumulus: [], onlyInCmr: [] }; + /** @type {FilesReport} */ const filesReport = { okCount: 0, onlyInCumulus: [], onlyInCmr: [] }; try { - const cmrSettings = await getCmrSettings(); - const searchParams = new URLSearchParams({ short_name: name, version: version, sort_key: ['granule_ur'] }); + const cmrSettings = /** @type CMRSettings */(await getCmrSettings()); + const searchParams = new URLSearchParams({ short_name: name, version: version, sort_key: 'granule_ur' }); cmrGranuleSearchParams(recReportParams).forEach(([paramName, paramValue]) => { searchParams.append(paramName, paramValue); }); log.debug(`fetch CMRSearchConceptQueue(${collectionId}) with searchParams: ${JSON.stringify(searchParams)}`); - const cmrGranulesIterator = new CMRSearchConceptQueue({ + const cmrGranulesIterator + = /** @type {CMRSearchConceptQueue} */(new CMRSearchConceptQueue({ cmrSettings, type: 'granules', searchParams, format: 'umm_json', + })); + + const dbSearchParams = convertToDBGranuleSearchParams({ + ...recReportParams, + collectionIds: [collectionId], + }); + const granulesSearchQuery = getGranulesByApiPropertiesQuery({ + knex, + searchParams: { ...dbSearchParams, collate: 'C' }, + sortByFields: 'granules.granule_id', }); - const esGranuleSearchParamsByCollectionId = convertToESGranuleSearchParams( - { ...recReportParams, collectionIds: [collectionId] } - ); + const pgGranulesIterator = + /** @type {QuerySearchClient} */ ( + new QuerySearchClient( + granulesSearchQuery, + 100 // arbitrary limit on how items are fetched at once + ) + ); - log.debug(`Create ES granule iterator with ${JSON.stringify(esGranuleSearchParamsByCollectionId)}`); - const esGranulesIterator = new ESCollectionGranuleQueue( - esGranuleSearchParamsByCollectionId, process.env.ES_INDEX - ); const oneWay = isOneWayGranuleReport(recReportParams); log.debug(`is oneWay granule report: ${collectionId}, ${oneWay}`); let [nextDbItem, nextCmrItem] = await Promise.all( - [esGranulesIterator.peek(), cmrGranulesIterator.peek()] + [(pgGranulesIterator.peek()), cmrGranulesIterator.peek()] ); while (nextDbItem && nextCmrItem) { - const nextDbGranuleId = nextDbItem.granuleId; + const nextDbGranuleId = nextDbItem.granule_id; const nextCmrGranuleId = nextCmrItem.umm.GranuleUR; if (nextDbGranuleId < nextCmrGranuleId) { @@ -524,7 +580,7 @@ async function reconciliationReportForGranules(params) { granuleId: nextDbGranuleId, collectionId: collectionId, }); - await esGranulesIterator.shift(); // eslint-disable-line no-await-in-loop + await pgGranulesIterator.shift(); // eslint-disable-line no-await-in-loop } else if (nextDbGranuleId > nextCmrGranuleId) { // Found an item that is only in CMR and not in Cumulus database if (!oneWay) { @@ -538,10 +594,16 @@ async function reconciliationReportForGranules(params) { } else { // Found an item that is in both CMR and Cumulus database granulesReport.okCount += 1; + // eslint-disable-next-line no-await-in-loop + const postgresGranuleFiles = await getFilesAndGranuleInfoQuery({ + knex, + searchParams: { granule_cumulus_id: nextDbItem.cumulus_id }, + sortColumns: ['key'], + }); const granuleInDb = { granuleId: nextDbGranuleId, collectionId: collectionId, - files: nextDbItem.files, + files: postgresGranuleFiles.map((f) => translatePostgresFileToApiFile(f)), }; const granuleInCmr = { GranuleUR: nextCmrGranuleId, @@ -549,7 +611,7 @@ async function reconciliationReportForGranules(params) { Version: nextCmrItem.umm.CollectionReference.Version, RelatedUrls: nextCmrItem.umm.RelatedUrls, }; - await esGranulesIterator.shift(); // eslint-disable-line no-await-in-loop + await pgGranulesIterator.shift(); // eslint-disable-line no-await-in-loop await cmrGranulesIterator.shift(); // eslint-disable-line no-await-in-loop // compare the files now to avoid keeping the granules' information in memory @@ -562,14 +624,17 @@ async function reconciliationReportForGranules(params) { filesReport.onlyInCmr = filesReport.onlyInCmr.concat(fileReport.onlyInCmr); } - [nextDbItem, nextCmrItem] = await Promise.all([esGranulesIterator.peek(), cmrGranulesIterator.peek()]); // eslint-disable-line max-len, no-await-in-loop + [nextDbItem, nextCmrItem] = await Promise.all([pgGranulesIterator.peek(), cmrGranulesIterator.peek()]); // eslint-disable-line max-len, no-await-in-loop } - // Add any remaining ES/PostgreSQL items to the report - while (await esGranulesIterator.peek()) { // eslint-disable-line no-await-in-loop - const dbItem = await esGranulesIterator.shift(); // eslint-disable-line no-await-in-loop + // Add any remaining PostgreSQL items to the report + while (await pgGranulesIterator.peek()) { // eslint-disable-line no-await-in-loop + const dbItem = await pgGranulesIterator.shift(); // eslint-disable-line no-await-in-loop + if (!dbItem) { + throw new Error('database returned item is null in reconciliationReportForGranules'); + } granulesReport.onlyInCumulus.push({ - granuleId: dbItem.granuleId, + granuleId: dbItem.granule_id, collectionId: collectionId, }); } @@ -578,6 +643,9 @@ async function reconciliationReportForGranules(params) { if (!oneWay) { while (await cmrGranulesIterator.peek()) { // eslint-disable-line no-await-in-loop const cmrItem = await cmrGranulesIterator.shift(); // eslint-disable-line no-await-in-loop + if (!cmrItem) { + throw new Error('CMR returned item is null in reconciliationReportForGranules'); + } granulesReport.onlyInCmr.push({ GranuleUR: cmrItem.umm.GranuleUR, ShortName: nextCmrItem.umm.CollectionReference.ShortName, @@ -607,20 +675,21 @@ exports.reconciliationReportForGranules = reconciliationReportForGranules; /** * Compare the holdings in CMR with Cumulus' internal data store, report any discrepancies * - * @param {Object} params . - parameters - * @param {Object} params.bucketsConfig - bucket configuration object - * @param {Object} params.distributionBucketMap - mapping of bucket->distirubtion path values + * @param {Object} params . - parameters + * @param {Object} params.bucketsConfig - bucket configuration object + * @param {Object} params.distributionBucketMap - mapping of bucket->distirubtion path values * (e.g. { bucket: distribution path }) - * @param {Object} [params.recReportParams] - optional Lambda endpoint's input params to - * narrow report focus - * @param {number} [params.recReportParams.StartTimestamp] - * @param {number} [params.recReportParams.EndTimestamp] - * @param {string} [params.recReportparams.collectionIds] - * @returns {Promise} - a reconciliation report + * @param {EnhancedNormalizedRecReportParams} params.recReportParams - Lambda endpoint's input + * params to narrow focus of report + * @returns {Promise} - a reconciliation report */ async function reconciliationReportForCumulusCMR(params) { log.info(`reconciliationReportForCumulusCMR with params ${JSON.stringify(params)}`); - const { bucketsConfig, distributionBucketMap, recReportParams } = params; + const { + bucketsConfig, + distributionBucketMap, + recReportParams, + } = params; const collectionReport = await reconciliationReportForCollections(recReportParams); const collectionsInCumulusCmr = { okCount: collectionReport.okCollections.length, @@ -668,7 +737,7 @@ async function reconciliationReportForCumulusCMR(params) { * @param {Object} report - report to upload * @param {string} systemBucket - system bucket * @param {string} reportKey - report key - * @returns {Promise} + * @returns - A promise that resolves with the status of the return object */ function _uploadReportToS3(report, systemBucket, reportKey) { return s3().putObject({ @@ -681,17 +750,8 @@ function _uploadReportToS3(report, systemBucket, reportKey) { /** * Create a Reconciliation report and save it to S3 * - * @param {Object} recReportParams - params - * @param {Object} recReportParams.reportType - the report type - * @param {moment} recReportParams.createStartTime - when the report creation was begun - * @param {moment} recReportParams.endTimestamp - ending report datetime ISO Timestamp - * @param {string} recReportParams.location - location to inventory for report - * @param {string} recReportParams.reportKey - the s3 report key - * @param {string} recReportParams.stackName - the name of the CUMULUS stack - * @param {moment} recReportParams.startTimestamp - beginning report datetime ISO timestamp - * @param {string} recReportParams.systemBucket - the name of the CUMULUS system bucket - * @param {Knex} recReportParams.knex - Database client for interacting with PostgreSQL database - * @returns {Promise} a Promise that resolves when the report has been + * @param {EnhancedNormalizedRecReportParams} recReportParams - params + * @returns - a Promise that resolves when the report has been * uploaded to S3 */ async function createReconciliationReport(recReportParams) { @@ -700,7 +760,6 @@ async function createReconciliationReport(recReportParams) { stackName, systemBucket, location, - knex, } = recReportParams; log.info(`createReconciliationReport (${JSON.stringify(recReportParams)})`); // Fetch the bucket names to reconcile @@ -713,6 +772,7 @@ async function createReconciliationReport(recReportParams) { const bucketsConfig = new BucketsConfig(bucketsConfigJson); // Write an initial report to S3 + /** @type {FilesInCumulus} */ const filesInCumulus = { okCount: 0, okCountByGranule: {}, @@ -725,6 +785,7 @@ async function createReconciliationReport(recReportParams) { onlyInCumulus: [], onlyInCmr: [], }; + let report = { ...initialReportHeader(recReportParams), filesInCumulus, @@ -742,7 +803,7 @@ async function createReconciliationReport(recReportParams) { // Create a report for each bucket const promisedBucketReports = dataBuckets.map( - (bucket) => createReconciliationReportForBucket(bucket, recReportParams, knex) + (bucket) => createReconciliationReportForBucket(bucket, recReportParams) ); const bucketReports = await Promise.all(promisedBucketReports); @@ -766,7 +827,7 @@ async function createReconciliationReport(recReportParams) { + bucketGranuleCount; }); } else { - delete report.filesInCumulus.okCountByGranule; + report.filesInCumulus.okCountByGranule = {}; } }); } @@ -805,10 +866,12 @@ async function createReconciliationReport(recReportParams) { * @param {Object} params - params * @param {string} params.systemBucket - the name of the CUMULUS system bucket * @param {string} params.stackName - the name of the CUMULUS stack - * @param {string} params.reportType - the type of reconciliation report + * @param {ReconciliationReportType} params.reportType - the type of reconciliation report * @param {string} params.reportName - the name of the report - * @param {Knex} params.knex - Knex client to interact with pg - * @returns {Object} report record saved to the database + * @param {Env} params.env - the environment variables + * @param {Knex} params.knex - Optional Instance of a Knex client for testing + * @param {EsClient} params.esClient - Optional Instance of an Elasticsearch client for testing + * @returns {Promise} report record saved to the database */ async function processRequest(params) { log.info(`processing reconciliation report request with params: ${JSON.stringify(params)}`); @@ -818,7 +881,7 @@ async function processRequest(params) { reportName, systemBucket, stackName, - knex = await getKnexClient(env), + knex = await getKnexClient({ env }), esClient = await getEsClient(), } = params; const createStartTime = moment.utc(); @@ -832,6 +895,7 @@ async function processRequest(params) { const builtReportRecord = { name: reportRecordName, type: reportType, + /** @type ReconciliationReportStatus */ status: 'Pending', location: buildS3Uri(systemBucket, reportKey), }; @@ -840,7 +904,7 @@ async function processRequest(params) { await indexReconciliationReport(esClient, reportApiRecord, process.env.ES_INDEX); log.info(`Report added to database as pending: ${JSON.stringify(reportApiRecord)}.`); - const concurrency = env.CONCURRENCY || 3; + const concurrency = env.CONCURRENCY || '3'; try { const recReportParams = { @@ -856,18 +920,20 @@ async function processRequest(params) { log.error( 'Internal Reconciliation Reports are no longer valid, as Cumulus is no longer utilizing Elasticsearch' ); + //TODO remove internal rec report code throw new Error('Internal Reconciliation Reports are no longer valid'); } else if (reportType === 'Granule Inventory') { await createGranuleInventoryReport(recReportParams); } else if (reportType === 'ORCA Backup') { await createOrcaBackupReconciliationReport(recReportParams); - } else { + } else if (['Inventory', 'Granule Not Found'].includes(reportType)) { // reportType is in ['Inventory', 'Granule Not Found'] - await createReconciliationReport(recReportParams); // TODO Update to not use elasticsearch + await createReconciliationReport(recReportParams); } const generatedRecord = { ...reportPgRecord, + /** @type ReconciliationReportStatus */ status: 'Generated', }; [reportPgRecord] = await reconciliationReportPgModel.upsert(knex, generatedRecord); @@ -877,6 +943,7 @@ async function processRequest(params) { log.error(`Error caught in createReconciliationReport creating ${reportType} report ${reportRecordName}. ${error}`); // eslint-disable-line max-len const erroredRecord = { ...reportPgRecord, + /** @type ReconciliationReportStatus */ status: 'Failed', error: { Error: error.message, @@ -899,8 +966,8 @@ async function processRequest(params) { async function handler(event) { // increase the limit of search result from CMR.searchCollections/searchGranules - process.env.CMR_LIMIT = process.env.CMR_LIMIT || 5000; - process.env.CMR_PAGE_SIZE = process.env.CMR_PAGE_SIZE || 200; + process.env.CMR_LIMIT = process.env.CMR_LIMIT || '5000'; + process.env.CMR_PAGE_SIZE = process.env.CMR_PAGE_SIZE || '200'; //TODO: Remove irrelevant env vars from terraform after ES reports are removed const varsToLog = ['CMR_LIMIT', 'CMR_PAGE_SIZE', 'ES_SCROLL', 'ES_SCROLL_SIZE']; diff --git a/packages/api/lambdas/reports/granule-inventory-report.js b/packages/api/lambdas/reports/granule-inventory-report.js index 7a6abcf157e..98dd7bfd7fc 100644 --- a/packages/api/lambdas/reports/granule-inventory-report.js +++ b/packages/api/lambdas/reports/granule-inventory-report.js @@ -1,3 +1,5 @@ +//@ts-check + 'use strict'; const noop = require('lodash/noop'); @@ -15,11 +17,14 @@ const log = new Logger({ sender: '@api/lambdas/granule-inventory-report' }); const { convertToDBGranuleSearchParams } = require('../../lib/reconciliationReport'); +/** + * @typedef {import('../../lib/types').EnhancedNormalizedRecReportParams} + * EnhancedNormalizedRecReportParams + */ + /** * Builds a CSV file of all granules in the Cumulus DB - * @param {Object} recReportParams - * @param {string} recReportParams.reportKey - s3 key to store report - * @param {string} recReportParams.systemBucket - bucket to store report. + * @param {EnhancedNormalizedRecReportParams} recReportParams * @returns {Promise} - promise of a report written to s3. */ async function createGranuleInventoryReport(recReportParams) { diff --git a/packages/api/lambdas/reports/orca-backup-reconciliation-report.js b/packages/api/lambdas/reports/orca-backup-reconciliation-report.js index c22fecaabed..be8e50fb43f 100644 --- a/packages/api/lambdas/reports/orca-backup-reconciliation-report.js +++ b/packages/api/lambdas/reports/orca-backup-reconciliation-report.js @@ -38,7 +38,12 @@ const ORCASearchCatalogQueue = require('../../lib/ORCASearchCatalogQueue'); * @property {string} reason */ -/** @typedef { import('@cumulus/db').PostgresGranuleRecord } PostgresGranuleRecord */ +/** + * @typedef { import('@cumulus/db').PostgresGranuleRecord } PostgresGranuleRecord + * @typedef {import('../../lib/types').EnhancedNormalizedRecReportParams } + * EnhancedNormalizedRecReportParams + */ + /** * @typedef {Object} GranuleReport * @property {boolean} ok @@ -98,8 +103,7 @@ const fileConflictTypes = { /** * Fetch orca configuration for all or specified collections * - * @param {Object} recReportParams - input report params - * @param {String[]} recReportParams.collectionIds - array of collectionIds + * @param {EnhancedNormalizedRecReportParams} recReportParams - input report params * @returns {Promise} - list of { collectionId, orca configuration } */ async function fetchCollectionsConfig(recReportParams) { @@ -354,8 +358,7 @@ async function addGranuleToReport({ /** * Compare the granule holdings in Cumulus with ORCA * - * @param {Object} recReportParams - input report params - * @param {String[]} recReportParams.collectionIds - array of collectionIds + * @param {EnhancedNormalizedRecReportParams} recReportParams - input report params * @returns {Promise} an object with the okCount, onlyInCumulus, onlyInOrca * and withConfilcts */ @@ -496,17 +499,7 @@ async function orcaReconciliationReportForGranules(recReportParams) { /** * Create an ORCA Backup Reconciliation report and save it to S3 * - * @param {Object} recReportParams - params - * @param {Object} recReportParams.collectionIds - array of collectionIds - * @param {Object} recReportParams.providers - array of providers - * @param {Object} recReportParams.granuleIds - array of granuleIds - * @param {Object} recReportParams.reportType - the report type - * @param {moment} recReportParams.createStartTime - when the report creation was begun - * @param {moment} recReportParams.endTimestamp - ending report datetime ISO Timestamp - * @param {string} recReportParams.reportKey - the s3 report key - * @param {string} recReportParams.stackName - the name of the CUMULUS stack - * @param {moment} recReportParams.startTimestamp - beginning report datetime ISO timestamp - * @param {string} recReportParams.systemBucket - the name of the CUMULUS system bucket + * @param {EnhancedNormalizedRecReportParams} recReportParams - params * @returns {Promise} a Promise that resolves when the report has been * uploaded to S3 */ diff --git a/packages/api/lib/reconciliationReport-types.js b/packages/api/lib/reconciliationReport-types.js new file mode 100644 index 00000000000..16e3b5096ef --- /dev/null +++ b/packages/api/lib/reconciliationReport-types.js @@ -0,0 +1,19 @@ +/** + * @typedef {Object} ReportHeader + * @property {string | undefined} collectionId - The collection ID. + * @property {string | string[] | undefined} collectionIds - The collection IDs. + * @property {string | undefined} createEndTime - The end time of the report creation. + * @property {string} createStartTime - The start time of the report creation. + * @property {string | undefined} error - Any error that occurred. + * @property {string | undefined} granuleId - The granule ID. + * @property {string | string[] | undefined} granuleIds - The granule IDs. + * @property {string | string[] | undefined} provider - The provider. + * @property {string | string[] | undefined} providers - The providers. + * @property {string | undefined} location - The location. + * @property {string | undefined} reportEndTime - The end time of the report. + * @property {string | undefined} reportStartTime - The start time of the report. + * @property {string} reportType - The type of the report. + * @property {string} status - The status of the report. + */ + +module.exports = {}; diff --git a/packages/api/lib/reconciliationReport.js b/packages/api/lib/reconciliationReport.js index 328ce25cfd9..7cebd39e0f6 100644 --- a/packages/api/lib/reconciliationReport.js +++ b/packages/api/lib/reconciliationReport.js @@ -1,3 +1,5 @@ +//@ts-check + 'use strict'; const isEqual = require('lodash/isEqual'); @@ -9,6 +11,14 @@ const Logger = require('@cumulus/logger'); const log = new Logger({ sender: '@api/lambdas/create-reconciliation-report' }); +/** + * @typedef {import('../lib/types').RecReportParams } RecReportParams + * @typedef {import('../lib/types').EnhancedNormalizedRecReportParams } + * EnhancedNormalizedRecReportParams + * @typedef {import('../lib/types').NormalizedRecReportParams } NormalizedRecReportParams + * @typedef {import('./reconciliationReport-types').ReportHeader } ReportHeader + */ + /** * Extra search params to add to the cmrGranules searchConceptQueue * @@ -23,6 +33,7 @@ function cmrGranuleSearchParams(recReportParams) { return []; } +// TODO: remove /** * Prepare a list of collectionIds into an _id__in object * @@ -36,7 +47,7 @@ function searchParamsForCollectionIdArray(collectionIds) { /** * @param {string} dateable - any input valid for a JS Date contstructor. - * @returns {number} - primitive value of input date string or undefined, if + * @returns {number | undefined} - primitive value of input date string or undefined, if * input string not convertable. */ function dateToValue(dateable) { @@ -49,6 +60,7 @@ function dateStringToDateOrNull(dateable) { return !Number.isNaN(date.valueOf()) ? date : undefined; } +// TODO - Remove this in 3806 PR #3 /** * * @param {Object} params - request params to convert to Elasticsearch params @@ -71,12 +83,12 @@ function convertToESCollectionSearchParams(params) { * convertToDBCollectionSearchObject - Creates Postgres search object from * InternalRecReport Parameters * @param {Object} params - request params to convert to database params - * @param {[Object]} params.collectionIds - List containing single Collection object + * @param {string[]} [params.collectionIds] - List containing single Collection object * multiple or no collections will result in a * search object without a collection object - * @param {moment} params.endTimestamp - ending report datetime ISO Timestamp - * @param {moment} params.startTimestamp - beginning report datetime ISO timestamp - * @returns {[Object]} - array of objects of desired + * @param {string} [params.endTimestamp] - ending report datetime ISO Timestamp + * @param {string} [params.startTimestamp] - beginning report datetime ISO timestamp + * @returns {Object[]} - array of objects of desired * parameters formatted for database collection * search */ @@ -121,8 +133,8 @@ function convertToESGranuleSearchParams(params) { /** * Convert reconciliation report parameters to PostgreSQL database search params. * - * @param {Object} params - request params to convert to database params - * @returns {Object} object of desired parameters formated for database granule search + * @param {EnhancedNormalizedRecReportParams} params - request params to convert to database params + * @returns object of desired parameters formatted for database granule search */ function convertToDBGranuleSearchParams(params) { const { @@ -152,7 +164,7 @@ function convertToDBGranuleSearchParams(params) { * convert to es search parameters using createdAt for report time range * * @param {Object} params - request params to convert to Elasticsearch params - * @returns {Object} object of desired parameters formated for Elasticsearch. + * @returns {Object} object of desired parameters formatted for Elasticsearch. */ function convertToESGranuleSearchParamsWithCreatedAtRange(params) { const searchParamsWithUpdatedAt = convertToESGranuleSearchParams(params); @@ -167,7 +179,7 @@ function convertToESGranuleSearchParamsWithCreatedAtRange(params) { /** * * @param {Object} params - request params to convert to orca params - * @returns {Object} object of desired parameters formated for orca + * @returns {Object} object of desired parameters formatted for orca */ function convertToOrcaGranuleSearchParams(params) { const { collectionIds, granuleIds, providers, startTimestamp, endTimestamp } = params; @@ -183,12 +195,8 @@ function convertToOrcaGranuleSearchParams(params) { /** * create initial report header * - * @param {Object} recReportParams - params - * @param {Object} recReportParams.reportType - the report type - * @param {moment} recReportParams.createStartTime - when the report creation was begun - * @param {moment} recReportParams.endTimestamp - ending report datetime ISO Timestamp - * @param {moment} recReportParams.startTimestamp - beginning report datetime ISO timestamp - * @returns {Object} report header + * @param {EnhancedNormalizedRecReportParams} recReportParams - params + * @returns {ReportHeader} report header */ function initialReportHeader(recReportParams) { const { diff --git a/packages/api/lib/reconciliationReport/normalizeEvent.js b/packages/api/lib/reconciliationReport/normalizeEvent.js index 88cd2283df1..b623f7e4a51 100644 --- a/packages/api/lib/reconciliationReport/normalizeEvent.js +++ b/packages/api/lib/reconciliationReport/normalizeEvent.js @@ -1,15 +1,22 @@ +//@ts-check + 'use strict'; /*eslint prefer-const: ["error", {"destructuring": "all"}]*/ const isString = require('lodash/isString'); const { removeNilProperties } = require('@cumulus/common/util'); -const { InvalidArgument } = require('@cumulus/errors'); +const { InvalidArgument, MissingRequiredArgument } = require('@cumulus/errors'); + +/** + * @typedef {import('../types').RecReportParams } RecReportParams + * @typedef {import('../types').NormalizedRecReportParams } NormalizedRecReportParams + */ /** * ensures input reportType can be handled by the lambda code. * * @param {string} reportType - * @returns {undefined} - if reportType is valid + * @returns {void} - if reportType is valid * @throws {InvalidArgument} - otherwise */ function validateReportType(reportType) { @@ -31,7 +38,7 @@ function validateReportType(reportType) { /** * Convert input to an ISO timestamp. * @param {any} dateable - any type convertable to JS Date - * @returns {string} - date formated as ISO timestamp; + * @returns {string | undefined} - date formated as ISO timestamp; */ function isoTimestamp(dateable) { if (dateable) { @@ -45,26 +52,19 @@ function isoTimestamp(dateable) { } /** - * Transforms input granuleId into correct parameters for use in the - * Reconciliation Report lambda. - * @param {Array|string} granuleId - list of granule Ids - * @param {Object} modifiedEvent - input event - * @returns {Object} updated input even with correct granuleId and granuleIds values. + * Normalizes the input into an array of granule IDs. + * + * @param {string|string[]|undefined} granuleId - The granule ID or an array of granule IDs. + * @returns {string[]|undefined} An array of granule IDs, or undefined if no granule ID is provided. */ -function updateGranuleIds(granuleId, modifiedEvent) { - let returnEvent = { ...modifiedEvent }; - if (granuleId) { - // transform input granuleId into an array on granuleIds - const granuleIds = isString(granuleId) ? [granuleId] : granuleId; - returnEvent = { ...modifiedEvent, granuleIds }; - } - return returnEvent; +function generateGranuleIds(granuleId) { + return granuleId ? (isString(granuleId) ? [granuleId] : granuleId) : undefined; } /** * Transforms input collectionId into correct parameters for use in the * Reconciliation Report lambda. - * @param {Array|string} collectionId - list of collection Ids + * @param {string[]|string | undefined} collectionId - list of collection Ids * @param {Object} modifiedEvent - input event * @returns {Object} updated input even with correct collectionId and collectionIds values. */ @@ -78,26 +78,32 @@ function updateCollectionIds(collectionId, modifiedEvent) { return returnEvent; } -function updateProviders(provider, modifiedEvent) { - let returnEvent = { ...modifiedEvent }; - if (provider) { - // transform input provider into an array on providers - const providers = isString(provider) ? [provider] : provider; - returnEvent = { ...modifiedEvent, providers }; - } - return returnEvent; +/** + * Normalizes the input provider into an array of providers. + * + * @param {string|string[]|undefined} provider - The provider or list of providers. + * @returns {string[]|undefined} An array of providers, or undefined if no provider is provided. + */ +function generateProviders(provider) { + return provider ? (isString(provider) ? [provider] : provider) : undefined; } /** * Converts input parameters to normalized versions to pass on to the report * functions. Ensures any input dates are formatted as ISO strings. * - * @param {Object} event - input payload - * @returns {Object} - Object with normalized parameters + * @param {RecReportParams} event - input payload + * @returns {NormalizedRecReportParams} - Object with normalized parameters */ function normalizeEvent(event) { const systemBucket = event.systemBucket || process.env.system_bucket; + if (!systemBucket) { + throw new MissingRequiredArgument('systemBucket is required.'); + } const stackName = event.stackName || process.env.stackName; + if (!stackName) { + throw new MissingRequiredArgument('stackName is required.'); + } const startTimestamp = isoTimestamp(event.startTimestamp); const endTimestamp = isoTimestamp(event.endTimestamp); @@ -105,7 +111,11 @@ function normalizeEvent(event) { validateReportType(reportType); let { - collectionIds: anyCollectionIds, collectionId, granuleId, provider, ...modifiedEvent + collectionIds: anyCollectionIds, + collectionId = undefined, + granuleId = undefined, + provider = undefined, + ...modifiedEvent } = { ...event }; if (anyCollectionIds) { throw new InvalidArgument('`collectionIds` is not a valid input key for a reconciliation report, use `collectionId` instead.'); @@ -120,16 +130,16 @@ function normalizeEvent(event) { throw new InvalidArgument(`${reportType} reports cannot be launched with more than one input (granuleId, collectionId, or provider).`); } modifiedEvent = updateCollectionIds(collectionId, modifiedEvent); - modifiedEvent = updateGranuleIds(granuleId, modifiedEvent); - modifiedEvent = updateProviders(provider, modifiedEvent); - return removeNilProperties({ + return (removeNilProperties({ ...modifiedEvent, systemBucket, stackName, startTimestamp, endTimestamp, reportType, - }); + granuleIds: generateGranuleIds(granuleId), + providers: generateProviders(provider), + })); } exports.normalizeEvent = normalizeEvent; diff --git a/packages/api/lib/types.js b/packages/api/lib/types.js new file mode 100644 index 00000000000..2d63fc7decc --- /dev/null +++ b/packages/api/lib/types.js @@ -0,0 +1,46 @@ +/** + * @typedef {Object} NormalizedRecReportParams + * @property {string[]} [collectionIds] - An optional array of collection IDs. + * @property {string[]} [granuleIds] - An optional array of granule IDs. + * @property {string[]} [providers] - An optional array of provider names. + * @property {string} [startTimestamp] - An optional start timestamp for the report. + * @property {string} [endTimestamp] - An optional end timestamp for the report. + * @property {string} [reportType] - An optional type of the report. + * @property {string} [location] + * @property {string} stackName + * @property {string} systemBucket + * @property {string} [status] - Optional granule status filter for report + */ + +/** + * @typedef {Object} EnhancedParams + * @property {Moment.moment} createStartTime - Report creation start time. + * @property {string} reportKey - Key to store report object in S3 + * @property {string} reportType - Type of the report + * @property {Knex} knex - Knex instance + * @property {string} concurrency - Concurrency used in report generation + * @property {string} [location] - Location of the report +*/ + +/** + * @typedef { NormalizedRecReportParams & EnhancedParams} EnhancedNormalizedRecReportParams + */ + +/** + * @typedef {Object} RecReportParams + * @property {string[]} [collectionIds] - An optional array of collection IDs. + * @property {string[]} [granuleIds] - An optional array of granule IDs. + * @property {string[]} [providers] - An optional array of provider names. + * @property {string|Date} [startTimestamp] - An optional start timestamp for the report. + * @property {string|Date} [endTimestamp] - An optional end timestamp for the report. + * @property {string} [reportType] - An optional type of the report. + * @property {boolean} [includeDeleted] - An optional flag to include deleted records. + * @property {boolean} [ignoreFilesConfig] - An optional flag to ignore files configuration. + * @property {string} [bucket] - An optional bucket name for the report. + * @property {string} [stackName] - An optional stack name for the report. + * @property {string} [systemBucket] - An optional system bucket name for the report. + * @property {string} [location] + * @property {string} [status] - Optional granule status filter for report + */ + +module.exports = {}; diff --git a/packages/api/tests/lambdas/test-create-reconciliation-report-internals.js b/packages/api/tests/lambdas/test-create-reconciliation-report-internals.js index 819a1acc326..ab4e9b248e9 100644 --- a/packages/api/tests/lambdas/test-create-reconciliation-report-internals.js +++ b/packages/api/tests/lambdas/test-create-reconciliation-report-internals.js @@ -9,7 +9,6 @@ const CRP = rewire('../../lambdas/create-reconciliation-report'); const linkingFilesToGranules = CRP.__get__('linkingFilesToGranules'); const isOneWayCollectionReport = CRP.__get__('isOneWayCollectionReport'); const isOneWayGranuleReport = CRP.__get__('isOneWayGranuleReport'); -const shouldAggregateGranulesForCollections = CRP.__get__('shouldAggregateGranulesForCollections'); test( 'isOneWayCollectionReport returns true only when one or more specific parameters ' @@ -86,39 +85,6 @@ test( } ); -test( - 'shouldAggregateGranulesForCollections returns true only when one or more specific parameters ' - + ' are present on the reconciliation report object.', - (t) => { - const paramsThatShouldReturnTrue = ['updatedAt__to', 'updatedAt__from']; - const paramsThatShouldReturnFalse = [ - 'stackName', - 'systemBucket', - 'startTimestamp', - 'anythingAtAll', - ]; - - paramsThatShouldReturnTrue.map((p) => - t.true(shouldAggregateGranulesForCollections({ [p]: randomId('value') }))); - - paramsThatShouldReturnFalse.map((p) => - t.false(shouldAggregateGranulesForCollections({ [p]: randomId('value') }))); - - const allTrueKeys = paramsThatShouldReturnTrue.reduce( - (accum, current) => ({ ...accum, [current]: randomId('value') }), - {} - ); - t.true(shouldAggregateGranulesForCollections(allTrueKeys)); - - const allFalseKeys = paramsThatShouldReturnFalse.reduce( - (accum, current) => ({ ...accum, [current]: randomId('value') }), - {} - ); - t.false(shouldAggregateGranulesForCollections(allFalseKeys)); - t.true(shouldAggregateGranulesForCollections({ ...allTrueKeys, ...allFalseKeys })); - } -); - test('linkingFilesToGranules return values', (t) => { const reportTypesToReturnFalse = ['Granule Inventory', 'Internal', 'Inventory']; const reportTypesToReturnTrue = ['Granule Not Found']; diff --git a/packages/api/tests/lambdas/test-create-reconciliation-report.js b/packages/api/tests/lambdas/test-create-reconciliation-report.js index 0461c6005c4..fead1d4b13b 100644 --- a/packages/api/tests/lambdas/test-create-reconciliation-report.js +++ b/packages/api/tests/lambdas/test-create-reconciliation-report.js @@ -8,6 +8,7 @@ const pMap = require('p-map'); const omit = require('lodash/omit'); const range = require('lodash/range'); const sample = require('lodash/sample'); +const compact = require('lodash/compact'); const sinon = require('sinon'); const sortBy = require('lodash/sortBy'); const test = require('ava'); @@ -25,28 +26,27 @@ const { getBucketsConfigKey } = require('@cumulus/common/stack'); const { constructCollectionId } = require('@cumulus/message/Collections'); const { randomString, randomId } = require('@cumulus/common/test-utils'); const { - ProviderPgModel, - fakeProviderRecordFactory, - translateApiFiletoPostgresFile, - generateLocalTestDb, - destroyLocalTestDb, - localStackConnectionEnv, - migrationDir, CollectionPgModel, + destroyLocalTestDb, ExecutionPgModel, - FilePgModel, - GranulePgModel, - ReconciliationReportPgModel, fakeCollectionRecordFactory, fakeExecutionRecordFactory, fakeGranuleRecordFactory, - translatePostgresCollectionToApiCollection, + fakeProviderRecordFactory, + FilePgModel, + generateLocalTestDb, + GranulePgModel, + localStackConnectionEnv, + migrationDir, + ProviderPgModel, + ReconciliationReportPgModel, + translateApiCollectionToPostgresCollection, + translateApiFiletoPostgresFile, translateApiGranuleToPostgresGranule, translatePostgresReconReportToApiReconReport, } = require('@cumulus/db'); const { getDistributionBucketMapKey } = require('@cumulus/distribution-utils'); -const indexer = require('@cumulus/es-client/indexer'); -const { Search, getEsClient } = require('@cumulus/es-client/search'); +const { Search } = require('@cumulus/es-client/search'); const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); const { @@ -64,10 +64,14 @@ const handler = (event) => unwrappedHandler(normalizeEvent(event)); let esAlias; let esIndex; -let esClient; const createBucket = (Bucket) => awsServices.s3().createBucket({ Bucket }); -const testDbName = `create_rec_reports_${cryptoRandomString({ length: 10 })}`; +const requiredStaticCollectionFields = { + granuleIdExtraction: randomString(), + granuleId: randomString(), + sampleFileName: randomString(), + files: [], +}; function createDistributionBucketMapFromBuckets(buckets) { let bucketMap = {}; @@ -126,59 +130,112 @@ async function storeFilesToS3(files) { ); } -/** - * Index a single collection to elasticsearch. If the collection object has an - * updatedAt value, use a sinon stub to set the time of the granule to that - * input time. - * @param {Object} collection - a collection object -* @returns {Promise} - promise of indexed collection with active granule -*/ -async function storeCollection(collection) { - let stub; - if (collection.updatedAt) { - stub = sinon.stub(Date, 'now').returns(collection.updatedAt); - } - try { - await indexer.indexCollection(esClient, collection, esAlias); - return indexer.indexGranule( - esClient, - fakeGranuleFactoryV2({ - collectionId: constructCollectionId(collection.name, collection.version), - updatedAt: collection.updatedAt, - provider: randomString(), - }), - esAlias - ); - } finally { - if (collection.updatedAt) stub.restore(); - } +async function storeCollectionAndGranuleToPostgres(collection, context) { + const postgresCollection = translateApiCollectionToPostgresCollection({ + ...collection, + ...requiredStaticCollectionFields, + }); + const [pgCollectionRecord] = await context.collectionPgModel.create( + context.knex, + postgresCollection + ); + const [pgProviderRecord] = await context.providerPgModel.create( + context.knex, + fakeProviderRecordFactory(), + ['name', 'cumulus_id'] + ); + const collectionGranule = fakeGranuleRecordFactory({ + updated_at: pgCollectionRecord.updated_at, + created_at: pgCollectionRecord.created_at, + collection_cumulus_id: pgCollectionRecord.cumulus_id, + provider_cumulus_id: pgProviderRecord.cumulus_id, + }); + await context.granulePgModel.create(context.knex, collectionGranule); + return { + granule: { + ...collectionGranule, + collectionId: `${collection.name}___${collection.version}`, + }, + collection: { + ...pgCollectionRecord, + providerName: pgProviderRecord.name, + }, + }; } -/** - * Index Dated collections to ES for testing timeranges. These need to happen - * in sequence because of the way we are stubbing Date.now() during indexing. - * - * @param {Array} collections - list of collection objects - * @returns {Promise} - Promise of collections indexed - */ -function storeCollectionsToElasticsearch(collections) { - let result = Promise.resolve(); - collections.forEach((collection) => { - result = result.then(() => storeCollection(collection)); - }); - return result; +async function storeCollectionsWithGranuleToPostgres(collections, context) { + const records = await Promise.all( + collections.map((collection) => storeCollectionAndGranuleToPostgres(collection, context)) + ); + return { + collections: records.map((record) => record.collection), + granules: records.map((record) => record.granule), + }; } -/** - * Index granules to ES for testing - * - * @param {Array} granules - list of granules objects - * @returns {Promise} - Promise of indexed granules - */ -async function storeGranulesToElasticsearch(granules) { - await Promise.all( - granules.map((granule) => indexer.indexGranule(esClient, granule, esAlias)) +async function generateRandomGranules(t, { + bucketRange = 2, + collectionRange = 10, + granuleRange = 10, + fileRange = 10, + stubCmr = true, +} = {}) { + const { filePgModel, granulePgModel, knex } = t.context; + + const dataBuckets = range(bucketRange).map(() => randomId('bucket')); + await Promise.all(dataBuckets.map((bucket) => + createBucket(bucket) + .then(() => t.context.bucketsToCleanup.push(bucket)))); + + // Write the buckets config to S3 + await storeBucketsConfigToS3( + dataBuckets, + t.context.systemBucket, + t.context.stackName + ); + + // Create collections that are in sync + const matchingColls = range(collectionRange).map(() => ({ + name: randomId('name'), + version: randomId('vers'), + })); + const { collections: postgresCollections } = + await storeCollectionsWithGranuleToPostgres(matchingColls, t.context); + const collectionCumulusId = postgresCollections[0].cumulus_id; + + // Create random files + const pgGranules = await granulePgModel.insert( + knex, + range(granuleRange).map(() => fakeGranuleRecordFactory({ + collection_cumulus_id: collectionCumulusId, + })), + ['cumulus_id', 'granule_id'] ); + const files = range(fileRange).map((i) => ({ + bucket: dataBuckets[i % dataBuckets.length], + key: randomId('key', 10), + granule_cumulus_id: pgGranules[i].cumulus_id, + })); + + // Store the files to S3 and postgres + await Promise.all([ + storeFilesToS3(files), + filePgModel.insert(knex, files), + ]); + + if (stubCmr) { + const cmrCollections = sortBy(matchingColls, ['name', 'version']) + .map((cmrCollection) => ({ + umm: { ShortName: cmrCollection.name, Version: cmrCollection.version }, + })); + CMR.prototype.searchConcept.restore(); + const cmrSearchStub = sinon.stub(CMR.prototype, 'searchConcept'); + cmrSearchStub.withArgs('collections').onCall(0).resolves(cmrCollections); + cmrSearchStub.withArgs('collections').onCall(1).resolves([]); + cmrSearchStub.withArgs('granules').resolves([]); + } + + return { files, granules: pgGranules, matchingColls, dataBuckets }; } async function fetchCompletedReport(reportRecord) { @@ -192,35 +249,6 @@ async function fetchCompletedReportString(reportRecord) { .then((response) => getObjectStreamContents(response.Body)); } -/** - * Looks up and returns the granulesIds given a list of collectionIds. - * @param {Array} collectionIds - list of collectionIds - * @returns {Array} list of matching granuleIds - */ -async function granuleIdsFromCollectionIds(collectionIds) { - const esValues = await (new Search( - { queryStringParameters: { collectionId__in: collectionIds.join(',') } }, - 'granule', - esAlias - )).query(); - return esValues.results.map((value) => value.granuleId); -} - -/** - * Looks up and returns the providers given a list of collectionIds. - * @param {Array} collectionIds - list of collectionIds - * @returns {Array} list of matching providers - */ -async function providersFromCollectionIds(collectionIds) { - const esValues = await (new Search( - { queryStringParameters: { collectionId__in: collectionIds.join(',') } }, - 'granule', - esAlias - )).query(); - - return esValues.results.map((value) => value.provider); -} - const randomBetween = (a, b) => Math.floor(Math.random() * (b - a + 1) + a); const randomTimeBetween = (t1, t2) => randomBetween(t1, t2); @@ -229,8 +257,11 @@ const randomTimeBetween = (t1, t2) => randomBetween(t1, t2); * random collections where some fall within the start and end timestamps. * Also creates a number that are only in ES, as well as some that are only * "returned by CMR" (as a stubbed function) - * @param {Object} t - AVA test context. - * @returns {Object} setupVars - Object with information about the current + * + * @param t.t + * @param {object} t - AVA test context. + * @param t.params + * @returns {object} setupVars - Object with information about the current * state of elasticsearch and CMR mock. * The object returned has: * + startTimestamp - beginning of matching timerange @@ -245,7 +276,7 @@ const randomTimeBetween = (t1, t2) => randomBetween(t1, t2); * excluded from CMR mock. (only in ES out of range) * + extraCmrCollections - collections not in ES but returned by the CMR mock. */ -const setupElasticAndCMRForTests = async ({ t, params = {} }) => { +const setupDatabaseAndCMRForTests = async ({ t, params = {} }) => { const dataBuckets = range(2).map(() => randomId('bucket')); await Promise.all( dataBuckets.map((bucket) => @@ -275,30 +306,35 @@ const setupElasticAndCMRForTests = async ({ t, params = {} }) => { // Create collections that are in sync ES/CMR during the time period const matchingCollections = range(numMatchingCollections).map((r) => ({ + ...requiredStaticCollectionFields, name: randomId(`name${r}-`), version: randomId('vers'), updatedAt: randomTimeBetween(startTimestamp, endTimestamp), })); // Create collections in sync ES/CMR outside of the timestamps range const matchingCollectionsOutsideRange = range(numMatchingCollectionsOutOfRange).map((r) => ({ + ...requiredStaticCollectionFields, name: randomId(`name${r}-`), version: randomId('vers'), updatedAt: randomTimeBetween(monthEarlier, startTimestamp - 1), })); // Create collections in ES only within the timestamp range const extraESCollections = range(numExtraESCollections).map((r) => ({ + ...requiredStaticCollectionFields, name: randomId(`extraES${r}-`), version: randomId('vers'), updatedAt: randomTimeBetween(startTimestamp, endTimestamp), })); // Create collections in ES only outside of the timestamp range const extraESCollectionsOutOfRange = range(numExtraESCollectionsOutOfRange).map((r) => ({ + ...requiredStaticCollectionFields, name: randomId(`extraES${r}-`), version: randomId('vers'), updatedAt: randomTimeBetween(endTimestamp + 1, monthLater), })); // create extra cmr collections that fall inside of the range. const extraCmrCollections = range(numExtraCmrCollections).map((r) => ({ + ...requiredStaticCollectionFields, name: randomId(`extraCmr${r}-`), version: randomId('vers'), updatedAt: randomTimeBetween(startTimestamp, endTimestamp), @@ -320,13 +356,21 @@ const setupElasticAndCMRForTests = async ({ t, params = {} }) => { cmrSearchStub.withArgs('collections').onCall(1).resolves([]); cmrSearchStub.withArgs('granules').resolves([]); - await storeCollectionsToElasticsearch( - matchingCollections - .concat(matchingCollectionsOutsideRange) - .concat(extraESCollections) - .concat(extraESCollectionsOutOfRange) - ); + const { collections: createdCollections, granules: collectionGranules } = + await storeCollectionsWithGranuleToPostgres( + matchingCollections + .concat(matchingCollectionsOutsideRange) + .concat(extraESCollections) + .concat(extraESCollectionsOutOfRange), + t.context + ); + const mappedProviders = {}; + createdCollections.forEach((collection) => { + mappedProviders[ + constructCollectionId(collection.name, collection.version) + ] = collection.providerName; + }); return { startTimestamp, endTimestamp, @@ -335,33 +379,36 @@ const setupElasticAndCMRForTests = async ({ t, params = {} }) => { extraESCollections, extraESCollectionsOutOfRange, extraCmrCollections, + collectionGranules, + mappedProviders, }; }; -test.before(async (t) => { - process.env = { - ...process.env, - ...localStackConnectionEnv, - PG_DATABASE: testDbName, - }; +test.before(async () => { process.env.cmr_password_secret_name = randomId('cmr-secret-name'); + process.env.DISTRIBUTION_ENDPOINT = 'TEST_ENDPOINT'; await awsServices.secretsManager().createSecret({ Name: process.env.cmr_password_secret_name, SecretString: randomId('cmr-password'), }); - const { knex, knexAdmin } = await generateLocalTestDb(testDbName, migrationDir); +}); + +test.beforeEach(async (t) => { + t.context.testDbName = `create_rec_reports_${cryptoRandomString({ length: 10 })}`; + process.env = { + ...process.env, + ...localStackConnectionEnv, + PG_DATABASE: t.context.testDbName, + }; + const { knex, knexAdmin } = await generateLocalTestDb(t.context.testDbName, migrationDir); t.context.knex = knex; t.context.knexAdmin = knexAdmin; - t.context.providerPgModel = new ProviderPgModel(); t.context.collectionPgModel = new CollectionPgModel(); t.context.executionPgModel = new ExecutionPgModel(); t.context.filePgModel = new FilePgModel(); t.context.granulePgModel = new GranulePgModel(); t.context.reconciliationReportPgModel = new ReconciliationReportPgModel(); -}); - -test.beforeEach(async (t) => { t.context.bucketsToCleanup = []; t.context.stackName = randomId('stack'); t.context.systemBucket = randomId('bucket'); @@ -382,13 +429,22 @@ test.beforeEach(async (t) => { index: esIndex, alias: esAlias, }); - esClient = await getEsClient(); t.context.esReportClient = new Search( {}, 'reconciliationReport', process.env.ES_INDEX ); + // write 4 providers to the database + t.context.providers = await Promise.all(new Array(4).fill().map(async () => { + const [pgProvider] = await t.context.providerPgModel.create( + t.context.knex, + fakeProviderRecordFactory(), + ['cumulus_id', 'name'] + ); + return pgProvider; + })); + t.context.execution = fakeExecutionRecordFactory(); const [pgExecution] = await t.context.executionPgModel.create( t.context.knex, @@ -399,26 +455,27 @@ test.beforeEach(async (t) => { }); test.afterEach.always(async (t) => { - await Promise.all(flatten(t.context.bucketsToCleanup.map(recursivelyDeleteS3Bucket))); + await Promise.all( + flatten(t.context.bucketsToCleanup.map(recursivelyDeleteS3Bucket)) + ); await t.context.executionPgModel.delete( t.context.knex, { cumulus_id: t.context.executionCumulusId } ); CMR.prototype.searchConcept.restore(); - await esClient.client.indices.delete({ index: esIndex }); + await destroyLocalTestDb({ + knex: t.context.knex, + knexAdmin: t.context.knexAdmin, + testDbName: t.context.testDbName, + }); }); -test.after.always(async (t) => { +test.after.always(async () => { await awsServices.secretsManager().deleteSecret({ SecretId: process.env.cmr_password_secret_name, ForceDeleteWithoutRecovery: true, }); delete process.env.cmr_password_secret_name; - await destroyLocalTestDb({ - knex: t.context.knex, - knexAdmin: t.context.knexAdmin, - testDbName, - }); }); test.serial('Generates valid reconciliation report for no buckets', async (t) => { @@ -462,68 +519,9 @@ test.serial('Generates valid reconciliation report for no buckets', async (t) => t.like(esRecord, reportRecord); }); +// TODO - use this to make generic the data to PG test.serial('Generates valid GNF reconciliation report when everything is in sync', async (t) => { - const { filePgModel, granulePgModel, knex } = t.context; - - const dataBuckets = range(2).map(() => randomId('bucket')); - await Promise.all(dataBuckets.map((bucket) => - createBucket(bucket) - .then(() => t.context.bucketsToCleanup.push(bucket)))); - - // Write the buckets config to S3 - await storeBucketsConfigToS3( - dataBuckets, - t.context.systemBucket, - t.context.stackName - ); - - // Create collections that are in sync - const matchingColls = range(10).map(() => ({ - name: randomId('name'), - version: randomId('vers'), - })); - await storeCollectionsToElasticsearch(matchingColls); - - const collection = fakeCollectionRecordFactory({ - name: matchingColls[0].name, - version: matchingColls[0].version, - }); - const [pgCollection] = await t.context.collectionPgModel.create( - t.context.knex, - collection - ); - const collectionCumulusId = pgCollection.cumulus_id; - - // Create random files - const pgGranules = await granulePgModel.insert( - knex, - range(10).map(() => fakeGranuleRecordFactory({ - collection_cumulus_id: collectionCumulusId, - })) - ); - const files = range(10).map((i) => ({ - bucket: dataBuckets[i % dataBuckets.length], - key: randomId('key'), - granule_cumulus_id: pgGranules[i].cumulus_id, - })); - - // Store the files to S3 and DynamoDB - await Promise.all([ - storeFilesToS3(files), - filePgModel.insert(knex, files), - ]); - - const cmrCollections = sortBy(matchingColls, ['name', 'version']) - .map((cmrCollection) => ({ - umm: { ShortName: cmrCollection.name, Version: cmrCollection.version }, - })); - - CMR.prototype.searchConcept.restore(); - const cmrSearchStub = sinon.stub(CMR.prototype, 'searchConcept'); - cmrSearchStub.withArgs('collections').onCall(0).resolves(cmrCollections); - cmrSearchStub.withArgs('collections').onCall(1).resolves([]); - cmrSearchStub.withArgs('granules').resolves([]); - + const { files, matchingColls } = await generateRandomGranules(t); const event = { systemBucket: t.context.systemBucket, stackName: t.context.stackName, @@ -562,68 +560,7 @@ test.serial('Generates valid GNF reconciliation report when everything is in syn }); test.serial('Generates a valid Inventory reconciliation report when everything is in sync', async (t) => { - const { filePgModel, granulePgModel, knex } = t.context; - - const dataBuckets = range(2).map(() => randomId('bucket')); - await Promise.all(dataBuckets.map((bucket) => - createBucket(bucket) - .then(() => t.context.bucketsToCleanup.push(bucket)))); - - // Write the buckets config to S3 - await storeBucketsConfigToS3( - dataBuckets, - t.context.systemBucket, - t.context.stackName - ); - - // Create collections that are in sync - const matchingColls = range(10).map(() => ({ - name: randomId('name'), - version: randomId('vers'), - })); - await storeCollectionsToElasticsearch(matchingColls); - - const collection = fakeCollectionRecordFactory({ - name: matchingColls[0].name, - version: matchingColls[0].version, - }); - const [pgCollection] = await t.context.collectionPgModel.create( - t.context.knex, - collection - ); - const collectionCumulusId = pgCollection.cumulus_id; - - // Create random files - const pgGranules = await granulePgModel.insert( - knex, - range(10).map(() => fakeGranuleRecordFactory({ - collection_cumulus_id: collectionCumulusId, - })) - ); - const files = range(10).map((i) => ({ - bucket: dataBuckets[i % dataBuckets.length], - key: randomId('key'), - granule_cumulus_id: pgGranules[i].cumulus_id, - })); - - // Store the files to S3 and DynamoDB - await Promise.all([ - storeFilesToS3(files), - filePgModel.insert(knex, files), - ]); - - const cmrCollections = sortBy(matchingColls, ['name', 'version']) - .map((cmrCollection) => ({ - umm: { ShortName: cmrCollection.name, Version: cmrCollection.version }, - })); - - CMR.prototype.searchConcept.restore(); - const cmrSearchStub = sinon.stub(CMR.prototype, 'searchConcept'); - cmrSearchStub.withArgs('collections').onCall(0).resolves(cmrCollections); - cmrSearchStub.withArgs('collections').onCall(1).resolves([]); - cmrSearchStub.withArgs('granules').resolves([]); - - await storeCollectionsToElasticsearch(matchingColls); + const { files, matchingColls } = await generateRandomGranules(t); const event = { systemBucket: t.context.systemBucket, @@ -639,7 +576,7 @@ test.serial('Generates a valid Inventory reconciliation report when everything i const collectionsInCumulusCmr = report.collectionsInCumulusCmr; t.is(report.status, 'SUCCESS'); - t.is(filesInCumulus.okCountByGranule, undefined); + t.deepEqual(filesInCumulus.okCountByGranule, {}); t.is(report.error, undefined); t.is(filesInCumulus.okCount, files.length); @@ -655,46 +592,15 @@ test.serial('Generates a valid Inventory reconciliation report when everything i }); test.serial('Generates valid reconciliation report when there are extra internal S3 objects', async (t) => { - const { filePgModel, granulePgModel, knex } = t.context; - - const collection = fakeCollectionRecordFactory(); - const [pgCollection] = await t.context.collectionPgModel.create( - t.context.knex, - collection - ); - const collectionCumulusId = pgCollection.cumulus_id; - - const dataBuckets = range(2).map(() => randomId('bucket')); - await Promise.all(dataBuckets.map((bucket) => - createBucket(bucket) - .then(() => t.context.bucketsToCleanup.push(bucket)))); - - // Write the buckets config to S3 - await storeBucketsConfigToS3( - dataBuckets, - t.context.systemBucket, - t.context.stackName - ); - - // Create files that are in sync - const pgGranules = await granulePgModel.insert( - knex, - range(10).map(() => fakeGranuleRecordFactory({ - collection_cumulus_id: collectionCumulusId, - })) - ); - const matchingFiles = range(10).map((i) => ({ - bucket: sample(dataBuckets), - key: randomId('key'), - granule_cumulus_id: pgGranules[i].cumulus_id, - })); + const { dataBuckets, files } = await generateRandomGranules(t, { + collectionRange: 1, + stubCmr: false, + }); const extraS3File1 = { bucket: sample(dataBuckets), key: randomId('key') }; const extraS3File2 = { bucket: sample(dataBuckets), key: randomId('key') }; - // Store the files to S3 and Elasticsearch - await storeFilesToS3(matchingFiles.concat([extraS3File1, extraS3File2])); - await filePgModel.insert(knex, matchingFiles); + await storeFilesToS3(files.concat([extraS3File1, extraS3File2])); const event = { systemBucket: t.context.systemBucket, @@ -709,7 +615,7 @@ test.serial('Generates valid reconciliation report when there are extra internal const filesInCumulus = report.filesInCumulus; t.is(report.status, 'SUCCESS'); t.is(report.error, undefined); - t.is(filesInCumulus.okCount, matchingFiles.length); + t.is(filesInCumulus.okCount, files.length); const granuleIds = Object.keys(filesInCumulus.okCountByGranule); granuleIds.forEach((granuleId) => { @@ -728,61 +634,25 @@ test.serial('Generates valid reconciliation report when there are extra internal t.true(createStartTime <= createEndTime); }); -test.serial('Generates valid reconciliation report when there are extra internal DynamoDB objects', async (t) => { - const { filePgModel, granulePgModel, knex } = t.context; - - const dataBuckets = range(2).map(() => randomString()); - await Promise.all(dataBuckets.map((bucket) => - createBucket(bucket) - .then(() => t.context.bucketsToCleanup.push(bucket)))); - - // Write the buckets config to S3 - await storeBucketsConfigToS3( - dataBuckets, - t.context.systemBucket, - t.context.stackName - ); - - const collection = fakeCollectionRecordFactory(); - const [pgCollection] = await t.context.collectionPgModel.create( - t.context.knex, - collection - ); - const collectionCumulusId = pgCollection.cumulus_id; - - // Create files that are in sync - const granules = range(12).map(() => fakeGranuleRecordFactory({ - collection_cumulus_id: collectionCumulusId, - })); - const pgGranules = await granulePgModel.insert( - knex, - granules - ); - const matchingFiles = range(10).map((i) => ({ - bucket: sample(dataBuckets), - key: randomId('key'), - granule_cumulus_id: pgGranules[i].cumulus_id, - })); +test.serial('Generates valid reconciliation report when there are extra internal Postgres objects', async (t) => { + const { granules, files, dataBuckets } = await generateRandomGranules(t, { + collectionRange: 1, + granuleRange: 12, + }); + const [extraFileGranule1, extraFileGranule2] = granules.slice(10, 12); const extraDbFile1 = { bucket: sample(dataBuckets), key: randomString(), - granule_cumulus_id: pgGranules[10].cumulus_id, - granule_id: granules[10].granule_id, + granule_cumulus_id: extraFileGranule1.cumulus_id, }; const extraDbFile2 = { bucket: sample(dataBuckets), key: randomString(), - granule_cumulus_id: pgGranules[11].cumulus_id, - granule_id: granules[11].granule_id, + granule_cumulus_id: extraFileGranule2.cumulus_id, }; - // Store the files to S3 and DynamoDB - await storeFilesToS3(matchingFiles); - await filePgModel.insert(knex, matchingFiles.concat([ - omit(extraDbFile1, 'granule_id'), - omit(extraDbFile2, 'granule_id'), - ])); + await t.context.filePgModel.insert(t.context.knex, [extraDbFile1, extraDbFile2]); const event = { systemBucket: t.context.systemBucket, @@ -797,7 +667,7 @@ test.serial('Generates valid reconciliation report when there are extra internal const filesInCumulus = report.filesInCumulus; t.is(report.status, 'SUCCESS'); t.is(report.error, undefined); - t.is(filesInCumulus.okCount, matchingFiles.length); + t.is(filesInCumulus.okCount, files.length); t.is(filesInCumulus.onlyInS3.length, 0); const totalOkCount = Object.values(filesInCumulus.okCountByGranule).reduce( @@ -808,17 +678,17 @@ test.serial('Generates valid reconciliation report when there are extra internal t.is(filesInCumulus.onlyInDb.length, 2); t.truthy(filesInCumulus.onlyInDb.find((f) => f.uri === buildS3Uri(extraDbFile1.bucket, extraDbFile1.key) - && f.granuleId === extraDbFile1.granule_id)); + && f.granuleId === extraFileGranule1.granule_id)); t.truthy(filesInCumulus.onlyInDb.find((f) => f.uri === buildS3Uri(extraDbFile2.bucket, extraDbFile2.key) - && f.granuleId === extraDbFile2.granule_id)); + && f.granuleId === extraFileGranule2.granule_id)); const createStartTime = moment(report.createStartTime); const createEndTime = moment(report.createEndTime); t.true(createStartTime <= createEndTime); }); -test.serial('Generates valid reconciliation report when internally, there are both extra DynamoDB and extra S3 files', async (t) => { +test.serial('Generates valid reconciliation report when internally, there are both extra postgres and extra S3 files', async (t) => { const { filePgModel, granulePgModel, knex } = t.context; const collection = fakeCollectionRecordFactory(); @@ -870,7 +740,7 @@ test.serial('Generates valid reconciliation report when internally, there are bo granule_id: granules[11].granule_id, }; - // Store the files to S3 and DynamoDB + // Store the files to S3 and postgres await storeFilesToS3(matchingFiles.concat([extraS3File1, extraS3File2])); await filePgModel.insert(knex, matchingFiles.concat([ omit(extraDbFile1, 'granule_id'), @@ -914,13 +784,13 @@ test.serial('Generates valid reconciliation report when internally, there are bo t.true(createStartTime <= createEndTime); }); -test.serial('Generates valid reconciliation report when there are both extra ES and CMR collections', async (t) => { +test.serial('Generates valid reconciliation report when there are both extra postGres and CMR collections', async (t) => { const params = { numMatchingCollectionsOutOfRange: 0, numExtraESCollectionsOutOfRange: 0, }; - const setupVars = await setupElasticAndCMRForTests({ t, params }); + const setupVars = await setupDatabaseAndCMRForTests({ t, params }); const event = { systemBucket: t.context.systemBucket, @@ -953,9 +823,9 @@ test.serial('Generates valid reconciliation report when there are both extra ES }); test.serial( - 'With input time params, generates a valid filtered reconciliation report, when there are extra cumulus/ES and CMR collections', + 'With input time params, generates a valid filtered reconciliation report, when there are extra cumulus database and CMR collections', async (t) => { - const { startTimestamp, endTimestamp, ...setupVars } = await setupElasticAndCMRForTests({ t }); + const { startTimestamp, endTimestamp, ...setupVars } = await setupDatabaseAndCMRForTests({ t }); const event = { systemBucket: t.context.systemBucket, @@ -1001,7 +871,7 @@ test.serial( ); test.serial( - 'With location param as S3, generates a valid reconciliation report for only S3 and DynamoDB', + 'With location param as S3, generates a valid reconciliation report for only S3 and postgres', async (t) => { const { filePgModel, granulePgModel, knex } = t.context; @@ -1082,7 +952,7 @@ test.serial( numExtraESCollectionsOutOfRange: 0, }; - const setupVars = await setupElasticAndCMRForTests({ t, params }); + const setupVars = await setupDatabaseAndCMRForTests({ t, params }); const event = { systemBucket: t.context.systemBucket, @@ -1113,9 +983,9 @@ test.serial( ); test.serial( - 'Generates valid reconciliation report without time params and there are extra cumulus/ES and CMR collections', + 'Generates valid reconciliation report without time params and there are extra cumulus DB and CMR collections', async (t) => { - const setupVars = await setupElasticAndCMRForTests({ t }); + const setupVars = await setupDatabaseAndCMRForTests({ t }); const eventNoTimeStamps = { systemBucket: t.context.systemBucket, @@ -1136,7 +1006,7 @@ test.serial( setupVars.matchingCollections.length + setupVars.matchingCollectionsOutsideRange.length ); - // all extra ES collections are found + // all extra DB collections are found t.is( collectionsInCumulusCmr.onlyInCumulus.length, setupVars.extraESCollections.length + setupVars.extraESCollectionsOutOfRange.length @@ -1160,9 +1030,9 @@ test.serial( ); test.serial( - 'Generates valid ONE WAY reconciliation report with time params and filters by collectionIds when there are extra cumulus/ES and CMR collections', + 'Generates valid ONE WAY reconciliation report with time params and filters by collectionIds when there are extra cumulus DB and CMR collections', async (t) => { - const { startTimestamp, endTimestamp, ...setupVars } = await setupElasticAndCMRForTests({ t }); + const { startTimestamp, endTimestamp, ...setupVars } = await setupDatabaseAndCMRForTests({ t }); const testCollection = [ setupVars.matchingCollections[3], @@ -1215,7 +1085,7 @@ test.serial( test.serial( 'When a collectionId is in both CMR and Cumulus a valid bi-directional reconciliation report is created.', async (t) => { - const setupVars = await setupElasticAndCMRForTests({ t }); + const setupVars = await setupDatabaseAndCMRForTests({ t }); const testCollection = setupVars.matchingCollections[3]; console.log(`testCollection: ${JSON.stringify(testCollection)}`); @@ -1245,7 +1115,7 @@ test.serial( test.serial( 'When an array of collectionId exists only in CMR, creates a valid bi-directional reconciliation report.', async (t) => { - const setupVars = await setupElasticAndCMRForTests({ t }); + const setupVars = await setupDatabaseAndCMRForTests({ t }); const testCollection = [ setupVars.extraCmrCollections[3], @@ -1283,7 +1153,7 @@ test.serial( test.serial( 'When a filtered collectionId exists only in Cumulus, generates a valid bi-directional reconciliation report.', async (t) => { - const setupVars = await setupElasticAndCMRForTests({ t }); + const setupVars = await setupDatabaseAndCMRForTests({ t }); const testCollection = setupVars.extraESCollections[3]; console.log(`testCollection: ${JSON.stringify(testCollection)}`); @@ -1319,7 +1189,7 @@ test.serial( test.serial( 'Generates valid ONE WAY reconciliation report with time params and filters by granuleIds when there are extra cumulus/ES and CMR collections', async (t) => { - const { startTimestamp, endTimestamp, ...setupVars } = await setupElasticAndCMRForTests({ t }); + const { startTimestamp, endTimestamp, ...setupVars } = await setupDatabaseAndCMRForTests({ t }); const testCollection = [ setupVars.matchingCollections[3], @@ -1329,7 +1199,11 @@ test.serial( ]; const testCollectionIds = testCollection.map((c) => constructCollectionId(c.name, c.version)); - const testGranuleIds = await granuleIdsFromCollectionIds(testCollectionIds); + + //set testGranuleIds to be all setupVars.collectionGranules that are in testCollectionIds + const testGranuleIds = setupVars.collectionGranules + .filter((g) => testCollectionIds.includes(g.collectionId)) + .map((g) => g.granule_id); console.log(`granuleIds: ${JSON.stringify(testGranuleIds)}`); @@ -1348,14 +1222,12 @@ test.serial( const collectionsInCumulusCmr = report.collectionsInCumulusCmr; t.is(report.status, 'SUCCESS'); t.is(report.error, undefined); - t.is(collectionsInCumulusCmr.okCount, 1); // cumulus filters collections by granuleId and only returned test one t.is(collectionsInCumulusCmr.onlyInCumulus.length, 1); t.true(collectionsInCumulusCmr.onlyInCumulus.includes(testCollectionIds[2])); - // ONE WAY only comparison because of input timestampes t.is(collectionsInCumulusCmr.onlyInCmr.length, 0); const reportStartTime = report.reportStartTime; @@ -1374,7 +1246,7 @@ test.serial( test.serial( 'When an array of granuleId exists, creates a valid one-way reconciliation report.', async (t) => { - const setupVars = await setupElasticAndCMRForTests({ t }); + const setupVars = await setupDatabaseAndCMRForTests({ t }); const testCollection = [ setupVars.extraCmrCollections[3], @@ -1383,7 +1255,9 @@ test.serial( ]; const testCollectionIds = testCollection.map((c) => constructCollectionId(c.name, c.version)); - const testGranuleIds = await granuleIdsFromCollectionIds(testCollectionIds); + const testGranuleIds = setupVars.collectionGranules + .filter((g) => testCollectionIds.includes(g.collectionId)) + .map((g) => g.granule_id); console.log(`testGranuleIds: ${JSON.stringify(testGranuleIds)}`); @@ -1397,11 +1271,11 @@ test.serial( t.is(reportRecord.status, 'Generated'); const report = await fetchCompletedReport(reportRecord); - const collectionsInCumulusCmr = report.collectionsInCumulusCmr; t.is(report.status, 'SUCCESS'); t.is(report.error, undefined); // Filtered by input granuleIds + const collectionsInCumulusCmr = report.collectionsInCumulusCmr; t.is(collectionsInCumulusCmr.okCount, 1); t.is(collectionsInCumulusCmr.onlyInCumulus.length, 1); t.true(collectionsInCumulusCmr.onlyInCumulus.includes(testCollectionIds[2])); @@ -1416,7 +1290,8 @@ test.serial( test.serial( 'When an array of providers exists, creates a valid one-way reconciliation report.', async (t) => { - const setupVars = await setupElasticAndCMRForTests({ t }); + const setupVars = await setupDatabaseAndCMRForTests({ t }); + // TODO: collections work! Failures should be granules now. const testCollection = [ setupVars.extraCmrCollections[3], @@ -1425,7 +1300,9 @@ test.serial( ]; const testCollectionIds = testCollection.map((c) => constructCollectionId(c.name, c.version)); - const testProviders = await providersFromCollectionIds(testCollectionIds); + const testProviders = compact(testCollection.map( + (c) => setupVars.mappedProviders[constructCollectionId(c.name, c.version)] + )); const event = { systemBucket: t.context.systemBucket, @@ -1447,7 +1324,6 @@ test.serial( t.is(collectionsInCumulusCmr.okCount, 1); t.is(collectionsInCumulusCmr.onlyInCumulus.length, 1); t.true(collectionsInCumulusCmr.onlyInCumulus.includes(testCollectionIds[2])); - t.is(granulesInCumulusCmr.okCount, 0); t.is(granulesInCumulusCmr.onlyInCumulus.length, 1); @@ -1460,11 +1336,23 @@ test.serial( } ); -test.serial('reconciliationReportForGranules reports discrepancy of granule holdings in CUMULUS and CMR', async (t) => { +// TODO - this test feels *wholly* not great are we relying on spec tests? +// TODO - add test for *multiple* collections, etc. // SPEC TESTS? +test.serial('reconciliationReportForGranules reports discrepancy of granule holdings in CUMULUS and CMR for a single collection', async (t) => { + // TODO - common methods? const shortName = randomString(); const version = randomString(); const collectionId = constructCollectionId(shortName, version); + const postgresCollectionRecord = fakeCollectionRecordFactory({ + name: shortName, + version, + }); + await t.context.collectionPgModel.create( + t.context.knex, + postgresCollectionRecord + ); + // create granules that are in sync const matchingGrans = range(10).map(() => fakeGranuleFactoryV2({ collectionId: collectionId, status: 'completed', files: [] })); @@ -1490,13 +1378,23 @@ test.serial('reconciliationReportForGranules reports discrepancy of granule hold cmrSearchStub.withArgs('granules').onCall(0).resolves(cmrGranules); cmrSearchStub.withArgs('granules').onCall(1).resolves([]); - await storeGranulesToElasticsearch(matchingGrans.concat(extraDbGrans)); + await Promise.all( + matchingGrans + .concat(extraDbGrans) + .map(async (granule) => { + const pgGranule = await translateApiGranuleToPostgresGranule({ + dynamoRecord: granule, + knexOrTransaction: t.context.knex, + }); + return await t.context.granulePgModel.create(t.context.knex, pgGranule); + }) + ); const { granulesReport, filesReport } = await reconciliationReportForGranules({ collectionId, bucketsConfig: new BucketsConfig({}), distributionBucketMap: {}, - recReportParams: {}, + recReportParams: { knex: t.context.knex }, }); t.is(granulesReport.okCount, 10); @@ -1897,12 +1795,6 @@ test.serial('Creates a valid Granule Inventory report', async (t) => { collection ); const collectionCumulusId = pgCollection.cumulus_id; - await indexer.indexCollection( - esClient, - translatePostgresCollectionToApiCollection(pgCollection), - esAlias - ); - const matchingGrans = range(10).map(() => fakeGranuleRecordFactory({ collection_cumulus_id: collectionCumulusId, })); @@ -2075,7 +1967,7 @@ test.serial('Inventory reconciliation report JSON is formatted', async (t) => { cmrSearchStub.withArgs('collections').onCall(1).resolves([]); cmrSearchStub.withArgs('granules').resolves([]); - await storeCollectionsToElasticsearch(matchingColls); + await storeCollectionsWithGranuleToPostgres(matchingColls, t.context); const eventFormatted = { systemBucket: t.context.systemBucket, diff --git a/packages/api/tests/lambdas/test-granule-inventory-report.js b/packages/api/tests/lambdas/test-granule-inventory-report.js index 074bf81f33c..6830310c23b 100644 --- a/packages/api/tests/lambdas/test-granule-inventory-report.js +++ b/packages/api/tests/lambdas/test-granule-inventory-report.js @@ -87,7 +87,7 @@ test.serial('Writes a file containing all granules to S3.', async (t) => { const reportKey = `${t.context.stackName}/reconciliation-reports/${reportRecordName}.csv`; const systemBucket = t.context.systemBucket; const reportParams = { - ...normalizeEvent({ reportType: 'Granule Inventory' }), + ...normalizeEvent({ reportType: 'Granule Inventory', stackName: 'TestStack' }), reportKey, systemBucket, knex: t.context.knex, @@ -165,6 +165,7 @@ test.serial('Writes a file containing a filtered set of granules to S3.', async collectionId, status, granuleId: 'test', + stackName: 'testStack', }), reportKey, systemBucket, diff --git a/packages/api/tests/lambdas/test-internal-reconciliation-report.js b/packages/api/tests/lambdas/test-internal-reconciliation-report.js index c9496fd4596..00d90ba8d66 100644 --- a/packages/api/tests/lambdas/test-internal-reconciliation-report.js +++ b/packages/api/tests/lambdas/test-internal-reconciliation-report.js @@ -142,7 +142,7 @@ test.serial('internalRecReportForCollections reports discrepancy of collection h startTimestamp: moment.utc().subtract(1, 'hour').format(), endTimestamp: moment.utc().add(1, 'hour').format(), }; - report = await internalRecReportForCollections(normalizeEvent(searchParams)); + report = await internalRecReportForCollections(normalizeEvent({ ...searchParams, stackName: 'testStack' })); t.is(report.okCount, 10); t.is(report.onlyInEs.length, 2); t.is(report.onlyInDb.length, 2); @@ -154,7 +154,7 @@ test.serial('internalRecReportForCollections reports discrepancy of collection h endTimestamp: moment.utc().add(2, 'hour').format(), }; - report = await internalRecReportForCollections(normalizeEvent(paramsTimeOutOfRange)); + report = await internalRecReportForCollections(normalizeEvent({ ...paramsTimeOutOfRange, stackName: 'testStack' })); t.is(report.okCount, 0); t.is(report.onlyInEs.length, 0); t.is(report.onlyInDb.length, 0); @@ -164,7 +164,7 @@ test.serial('internalRecReportForCollections reports discrepancy of collection h const collectionId = constructCollectionId(conflictCollInDb.name, conflictCollInDb.version); const paramsCollectionId = { ...searchParams, collectionId: [collectionId, randomId('c')] }; - report = await internalRecReportForCollections(normalizeEvent(paramsCollectionId)); + report = await internalRecReportForCollections(normalizeEvent({ ...paramsCollectionId, stackName: 'testStack' })); t.is(report.okCount, 0); t.is(report.onlyInEs.length, 0); t.is(report.onlyInDb.length, 0); @@ -269,7 +269,7 @@ test.serial('internalRecReportForGranules reports discrepancy of granule holding endTimestamp: moment.utc().add(1, 'hour').format(), }; report = await internalRecReportForGranules({ - ...normalizeEvent(searchParams), + ...normalizeEvent({ ...searchParams, stackName: 'testStack' }), knex, }); t.is(report.okCount, 20); @@ -284,7 +284,7 @@ test.serial('internalRecReportForGranules reports discrepancy of granule holding }; report = await internalRecReportForGranules({ - ...normalizeEvent(outOfRangeParams), + ...normalizeEvent({ ...outOfRangeParams, stackName: 'testStack' }), knex, }); t.is(report.okCount, 0); @@ -295,7 +295,7 @@ test.serial('internalRecReportForGranules reports discrepancy of granule holding // collectionId, provider parameters const collectionProviderParams = { ...searchParams, collectionId, provider: provider.name }; report = await internalRecReportForGranules({ - ...normalizeEvent(collectionProviderParams), + ...normalizeEvent({ ...collectionProviderParams, stackName: 'testStack' }), knex, }); t.is(report.okCount, 10); @@ -310,7 +310,7 @@ test.serial('internalRecReportForGranules reports discrepancy of granule holding // provider parameter const providerParams = { ...searchParams, provider: [randomId('p'), provider.name] }; report = await internalRecReportForGranules({ - ...normalizeEvent(providerParams), + ...normalizeEvent({ ...providerParams, stackName: 'testStack' }), knex, }); t.is(report.okCount, 20); @@ -330,7 +330,7 @@ test.serial('internalRecReportForGranules reports discrepancy of granule holding collectionId: [collectionId, extraEsGrans[0].collectionId, extraEsGrans[1].collectionId], }; report = await internalRecReportForGranules({ - ...normalizeEvent(granuleIdParams), + ...normalizeEvent({ ...granuleIdParams, stackName: 'testStack' }), knex, }); t.is(report.okCount, 0); diff --git a/packages/api/tests/lib/reconciliationReport/test-normalizeEvent.js b/packages/api/tests/lib/reconciliationReport/test-normalizeEvent.js index c71989788f9..b197e6e016a 100644 --- a/packages/api/tests/lib/reconciliationReport/test-normalizeEvent.js +++ b/packages/api/tests/lib/reconciliationReport/test-normalizeEvent.js @@ -1,6 +1,6 @@ const test = require('ava'); const omit = require('lodash/omit'); -const { InvalidArgument } = require('@cumulus/errors'); +const { InvalidArgument, MissingRequiredArgument } = require('@cumulus/errors'); const { constructCollectionId } = require('@cumulus/message/Collections'); const { randomId } = require('@cumulus/common/test-utils'); const { normalizeEvent } = require('../../../lib/reconciliationReport/normalizeEvent'); @@ -209,7 +209,7 @@ test('normalizeEvent throws error if provider and granuleId are passed to non-In test('Invalid report type throws InvalidArgument error', (t) => { const reportType = randomId('badType'); - const inputEvent = { reportType }; + const inputEvent = { reportType, systemBucket: 'systemBucket', stackName: 'stackName' }; t.throws(() => normalizeEvent(inputEvent), { instanceOf: InvalidArgument, @@ -220,6 +220,32 @@ test('Invalid report type throws InvalidArgument error', (t) => { test('valid Reports types from reconciliation schema do not throw an error.', (t) => { const validReportTypes = reconciliationReport.properties.type.enum; validReportTypes.forEach((reportType) => { - t.notThrows(() => normalizeEvent({ reportType })); + t.notThrows(() => normalizeEvent({ reportType, systemBucket: 'systemBucket', stackName: 'stackName' })); + }); +}); + +test('normalizeEvent throws error if no systemBucket is provided', (t) => { + const inputEvent = { + endTimestamp: new Date().toISOString(), + reportType: 'Inventory', + stackName: 'stackName', + startTimestamp: new Date().toISOString(), + }; + t.throws(() => normalizeEvent(inputEvent), { + instanceOf: MissingRequiredArgument, + message: 'systemBucket is required.', + }); +}); + +test('normalizeEvent throws error if no stackName is provided', (t) => { + const inputEvent = { + endTimestamp: new Date().toISOString(), + reportType: 'Inventory', + startTimestamp: new Date().toISOString(), + systemBucket: 'systemBucket', + }; + t.throws(() => normalizeEvent(inputEvent), { + instanceOf: MissingRequiredArgument, + message: 'stackName is required.', }); }); diff --git a/packages/cmr-client/src/CMR.ts b/packages/cmr-client/src/CMR.ts index b9a04b1bb4a..b8096f6efdd 100644 --- a/packages/cmr-client/src/CMR.ts +++ b/packages/cmr-client/src/CMR.ts @@ -41,7 +41,7 @@ export interface CMRConstructorParams { passwordSecretName?: string provider: string, token?: string, - username: string, + username?: string, oauthProvider: string, } @@ -67,11 +67,13 @@ export interface CMRConstructorParams { * clientId: 'my-clientId', * token: 'cmr_or_launchpad_token' * }); + * TODO: this should be subclassed or refactored to a functional style + * due to branch logic/complexity in token vs password/username handling */ export class CMR { clientId: string; provider: string; - username: string; + username?: string; oauthProvider: string; password?: string; passwordSecretName?: string; @@ -79,17 +81,6 @@ export class CMR { /** * The constructor for the CMR class - * - * @param {Object} params - * @param {string} params.provider - the CMR provider id - * @param {string} params.clientId - the CMR clientId - * @param {string} params.username - CMR username, not used if token is provided - * @param {string} params.passwordSecretName - CMR password secret, not used if token is provided - * @param {string} params.password - CMR password, not used if token or - * passwordSecretName is provided - * @param {string} params.token - CMR or Launchpad token, - * if not provided, CMR username and password are used to get a cmr token - * @param {string} params.oauthProvider - Oauth provider: earthdata or launchpad */ constructor(params: CMRConstructorParams) { this.clientId = params.clientId; @@ -131,6 +122,12 @@ export class CMR { * @returns {Promise.} the token */ async getToken(): Promise { + if (this.oauthProvider === 'launchpad') { + return this.token; + } + if (!this.username) { + throw new Error('Username not specified for non-launchpad CMR client'); + } return this.token ? this.token : updateToken(this.username, await this.getCmrPassword()); diff --git a/packages/cmr-client/src/CMRSearchConceptQueue.ts b/packages/cmr-client/src/CMRSearchConceptQueue.ts index ea0b142bd19..543b967ad6a 100644 --- a/packages/cmr-client/src/CMRSearchConceptQueue.ts +++ b/packages/cmr-client/src/CMRSearchConceptQueue.ts @@ -2,18 +2,12 @@ import { CMR, CMRConstructorParams } from './CMR'; /** * Shim to correctly add a default provider_short_name to the input searchParams - * - * @param {Object} params - * @param {URLSearchParams} params.searchParams - input search - * parameters for searchConceptQueue. This parameter can be either a - * URLSearchParam object or a plain Object. - * @returns {URLSearchParams} - input object appeneded with a default provider_short_name */ export const providerParams = ({ searchParams = new URLSearchParams(), cmrSettings, }: { - searchParams: URLSearchParams, + searchParams?: URLSearchParams, cmrSettings: { provider: string } @@ -28,7 +22,7 @@ export const providerParams = ({ export interface CMRSearchConceptQueueConstructorParams { cmrSettings: CMRConstructorParams, type: string, - searchParams: URLSearchParams, + searchParams?: URLSearchParams, format?: string } @@ -49,18 +43,18 @@ export interface CMRSearchConceptQueueConstructorParams { * format: 'json' * }); */ -export class CMRSearchConceptQueue { +export class CMRSearchConceptQueue { type: string; params: URLSearchParams; format?: string; - items: unknown[]; + items: (T | null)[]; CMR: CMR; /** * The constructor for the CMRSearchConceptQueue class * * @param {Object} params - * @param {string} params.cmrSettings - the CMR settings for the requests - the provider, + * @param {Object} params.cmrSettings - the CMR settings for the requests - the provider, * clientId, and either launchpad token or EDL username and password * @param {string} params.type - the type of search 'granule' or 'collection' * @param {URLSearchParams} [params.searchParams={}] - the search parameters @@ -84,10 +78,12 @@ export class CMRSearchConceptQueue { * This does not remove the object from the queue. When there are no more * items in the queue, returns 'null'. * - * @returns {Promise} an item from the CMR search */ - async peek(): Promise { + async peek(): Promise { if (this.items.length === 0) await this.fetchItems(); + if (this.items[0] === null) { + return null; + } return this.items[0]; } @@ -95,12 +91,15 @@ export class CMRSearchConceptQueue { * Remove the next item from the queue * * When there are no more items in the queue, returns `null`. - * - * @returns {Promise} an item from the CMR search */ - async shift(): Promise { + async shift(): Promise { if (this.items.length === 0) await this.fetchItems(); - return this.items.shift(); + const item = this.items.shift(); + // eslint-disable-next-line lodash/prefer-is-nil + if (item === null || item === undefined) { + return null; + } + return item; } /** @@ -116,7 +115,7 @@ export class CMRSearchConceptQueue { this.format, false ); - this.items = results; + this.items = results as T[]; const paramsPageNum = this.params.get('page_num') ?? '0'; this.params.set('page_num', String(Number(paramsPageNum) + 1)); diff --git a/packages/cmr-client/tests/test-CMR.js b/packages/cmr-client/tests/test-CMR.js index ef1641c657b..1f787c1e3af 100644 --- a/packages/cmr-client/tests/test-CMR.js +++ b/packages/cmr-client/tests/test-CMR.js @@ -166,7 +166,7 @@ test('getReadHeaders returns clientId and token for launchpad', (t) => { }); test.serial('ingestUMMGranule() returns CMRInternalError when CMR is down', async (t) => { - const cmrSearch = new CMR({ provider: 'my-provider', token: 'abc', clientId: 'client' }); + const cmrSearch = new CMR({ oauthProvider: 'launchpad', token: 'abc', clientId: 'client' }); const ummgMetadata = { GranuleUR: 'asdf' }; @@ -192,7 +192,7 @@ test.serial('ingestUMMGranule() returns CMRInternalError when CMR is down', asyn }); test.serial('ingestUMMGranule() throws an exception if the input fails validation', async (t) => { - const cmrSearch = new CMR({ provider: 'my-provider', token: 'abc', clientId: 'client' }); + const cmrSearch = new CMR({ oauthProvider: 'launchpad', token: 'abc', clientId: 'client' }); const ummgMetadata = { GranuleUR: 'asdf' }; @@ -257,3 +257,17 @@ test('getToken returns a token when the user\'s token is provided', async (t) => t.is(await cmrObj.getToken(), 'abcde'); }); + +test('getToken throws if no username is provided when using Earthdata Login', async (t) => { + const cmrObj = new CMR({ + provider: 'CUMULUS', + clientId: 'clientId', + password: 'password', + oauthProvider: 'earthdata', + }); + + await t.throwsAsync( + () => cmrObj.getToken(), + { message: 'Username not specified for non-launchpad CMR client' } + ); +}); diff --git a/packages/cmrjs/src/cmr-utils.js b/packages/cmrjs/src/cmr-utils.js index 59fa5ff81b6..a786c51f899 100644 --- a/packages/cmrjs/src/cmr-utils.js +++ b/packages/cmrjs/src/cmr-utils.js @@ -448,6 +448,14 @@ function generateFileUrl({ return undefined; } +/** + * @typedef {Object} OnlineAccessUrl + * @property {string} URL - The generated file URL. + * @property {string} URLDescription - The description of the URL (used by ECHO10). + * @property {string} Description - The description of the URL (used by UMMG). + * @property {string} Type - The type of the URL (used by ECHO10/UMMG). + */ + /** * Construct online access url for a given file and a url type. * @@ -458,8 +466,8 @@ function generateFileUrl({ * @param {Object} params.urlType - url type, distribution or s3 * @param {distributionBucketMap} params.distributionBucketMap - Object with bucket:tea-path mapping * for all distribution bucketss - * @param {boolean} params.useDirectS3Type - indicate if direct s3 access type is used - * @returns {(Object | undefined)} online access url object, undefined if no URL exists + * @param {boolean} [params.useDirectS3Type] - indicate if direct s3 access type is used + * @returns {(OnlineAccessUrl | undefined)} online access url object, undefined if no URL exists */ function constructOnlineAccessUrl({ file, @@ -741,15 +749,17 @@ async function updateUMMGMetadata({ * Helper to build an CMR settings object, used to initialize CMR. * * @param {Object} cmrConfig - CMR configuration object - * @param {string} cmrConfig.oauthProvider - Oauth provider: launchpad or earthdata - * @param {string} cmrConfig.provider - the CMR provider - * @param {string} cmrConfig.clientId - Client id for CMR requests - * @param {string} cmrConfig.passphraseSecretName - Launchpad passphrase secret name - * @param {string} cmrConfig.api - Launchpad api - * @param {string} cmrConfig.certificate - Launchpad certificate - * @param {string} cmrConfig.username - EDL username - * @param {string} cmrConfig.passwordSecretName - CMR password secret name - * @returns {Promise} object to create CMR instance - contains the + * @param {string} [cmrConfig.oauthProvider] - Oauth provider: launchpad or earthdata + * @param {string} [cmrConfig.provider] - the CMR provider + * @param {string} [cmrConfig.clientId] - Client id for CMR requests + * @param {string} [cmrConfig.passphraseSecretName] - Launchpad passphrase secret name + * @param {string} [cmrConfig.api] - Launchpad api + * @param {string} [cmrConfig.certificate] - Launchpad certificate + * @param {string} [cmrConfig.username] - EDL username + * @param {string} [cmrConfig.passwordSecretName] - CMR password secret name + * @returns {Promise} + * object to + * create CMR instance - contains the * provider, clientId, and either launchpad token or EDL username and * password */ diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index b26a673067c..c14385d500f 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -115,6 +115,7 @@ export { export { getCollectionsByGranuleIds, + getUniqueCollectionsByGranuleFilter, } from './lib/collection'; export { diff --git a/packages/db/src/lib/QuerySearchClient.ts b/packages/db/src/lib/QuerySearchClient.ts index acb890e144d..470c7b30e9c 100644 --- a/packages/db/src/lib/QuerySearchClient.ts +++ b/packages/db/src/lib/QuerySearchClient.ts @@ -43,7 +43,6 @@ class QuerySearchClient { * * This does not remove the object from the queue. * - * @returns {Promise} - record from PostgreSQL table */ async peek() { if (this.records.length === 0) await this.fetchRecords(); @@ -53,7 +52,6 @@ class QuerySearchClient { /** * Remove and return the next item in the results * - * @returns {Promise} - record from PostgreSQL table */ async shift() { if (this.records.length === 0) await this.fetchRecords(); diff --git a/packages/db/src/lib/collection.ts b/packages/db/src/lib/collection.ts index f8b1db297f8..d43c1ab269d 100644 --- a/packages/db/src/lib/collection.ts +++ b/packages/db/src/lib/collection.ts @@ -1,6 +1,8 @@ import { Knex } from 'knex'; import Logger from '@cumulus/logger'; +import { deconstructCollectionId } from '@cumulus/message/Collections'; + import { RetryOnDbConnectionTerminateError } from './retry'; import { TableNames } from '../tables'; @@ -27,3 +29,56 @@ export const getCollectionsByGranuleIds = async ( .groupBy(`${collectionsTable}.cumulus_id`); return await RetryOnDbConnectionTerminateError(query, {}, log); }; + +// TODO - This function is going to be super-non-performant +// We need to identify the specific need here and see if we can optimize + +export const getUniqueCollectionsByGranuleFilter = async (params: { + startTimestamp?: string, + endTimestamp?: string, + collectionIds?: string[], + granuleIds?: string[], + providers?: string[], + knex: Knex, +}) => { + const { knex } = params; + const collectionsTable = TableNames.collections; + const granulesTable = TableNames.granules; + const providersTable = TableNames.providers; + + const query = knex(collectionsTable) + .distinct(`${collectionsTable}.*`) + .innerJoin(granulesTable, `${collectionsTable}.cumulus_id`, `${granulesTable}.collection_cumulus_id`); + + if (params.startTimestamp) { + query.where(`${granulesTable}.updated_at`, '>=', params.startTimestamp); + } + if (params.endTimestamp) { + query.where(`${granulesTable}.updated_at`, '<=', params.endTimestamp); + } + + // Filter by collectionIds + if (params.collectionIds && params.collectionIds.length > 0) { + const collectionNameVersionPairs = params.collectionIds.map((id) => + deconstructCollectionId(id)); + + query.whereIn( + [`${collectionsTable}.name`, `${collectionsTable}.version`], + collectionNameVersionPairs.map(({ name, version }) => [name, version]) + ); + } + + // Filter by granuleIds + if (params.granuleIds && params.granuleIds.length > 0) { + query.whereIn(`${granulesTable}.granule_id`, params.granuleIds); + } + + // Filter by provider names + if (params.providers && params.providers.length > 0) { + query.innerJoin(providersTable, `${granulesTable}.provider_cumulus_id`, `${providersTable}.cumulus_id`); + query.whereIn(`${providersTable}.name`, params.providers); + } + + query.orderBy([`${collectionsTable}.name`, `${collectionsTable}.version`]); + return query; +}; diff --git a/packages/db/src/lib/granule.ts b/packages/db/src/lib/granule.ts index 05da2df9ec3..9bc0878cc47 100644 --- a/packages/db/src/lib/granule.ts +++ b/packages/db/src/lib/granule.ts @@ -204,19 +204,6 @@ export const getApiGranuleExecutionCumulusIds = async ( /** * Helper to build a query to search granules by various API granule record properties. - * - * @param params - * @param params.knex - DB client - * @param params.searchParams - * @param [params.searchParams.collectionIds] - Collection ID - * @param [params.searchParams.granuleIds] - array of granule IDs - * @param [params.searchParams.providerNames] - Provider names - * @param [params.searchParams.updatedAtRange] - Date range for updated_at column - * @param [params.searchParams.status] - Granule status to search by - * @param [params.sortByFields] - Field(s) to sort by - * @param params.temporalBoundByCreatedAt -- If true, temporal bounds - * are applied to created_at column instead of updated_at column - * @returns {Knex.QueryBuilder} */ export const getGranulesByApiPropertiesQuery = ({ knex, @@ -226,14 +213,15 @@ export const getGranulesByApiPropertiesQuery = ({ } : { knex: Knex, searchParams: { + collate?: string, collectionIds?: string | string[], granuleIds?: string | string[], providerNames?: string[], - updatedAtRange?: UpdatedAtRange, status?: string + updatedAtRange?: UpdatedAtRange, }, sortByFields?: string | string[], - temporalBoundByCreatedAt: boolean, + temporalBoundByCreatedAt?: boolean, }) : Knex.QueryBuilder => { const { granules: granulesTable, @@ -281,7 +269,13 @@ export const getGranulesByApiPropertiesQuery = ({ queryBuilder.where(`${granulesTable}.status`, searchParams.status); } if (sortByFields) { - queryBuilder.orderBy([sortByFields].flat()); + if (!searchParams.collate) { + queryBuilder.orderBy([sortByFields].flat()); + } else { + [sortByFields].flat().forEach((field) => { + queryBuilder.orderByRaw(`${field} collate \"${searchParams.collate}\"`); + }); + } } }) .groupBy(`${granulesTable}.cumulus_id`) diff --git a/packages/db/src/models/reconciliation_report.ts b/packages/db/src/models/reconciliation_report.ts index fc5c19a37a9..a8ff040d943 100644 --- a/packages/db/src/models/reconciliation_report.ts +++ b/packages/db/src/models/reconciliation_report.ts @@ -19,13 +19,13 @@ class ReconciliationReportPgModel extends BasePgModel; } upsert( knexOrTransaction: Knex | Knex.Transaction, reconciliationReport: PostgresReconciliationReport - ) { + ): Promise { return knexOrTransaction(this.tableName) .insert(reconciliationReport) .onConflict('name') diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index 22a966c2e2a..9c9b123c233 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -458,7 +458,13 @@ abstract class BaseSearch { const { sort } = dbQueryParameters || this.dbQueryParameters; sort?.forEach((key) => { if (key.column.startsWith('error')) { - searchQuery.orderByRaw(`${this.tableName}.error ->> 'Error' ${key.order}`); + searchQuery.orderByRaw( + `${this.tableName}.error ->> 'Error' ${key.order}` + ); + } else if (dbQueryParameters?.collate) { + searchQuery.orderByRaw( + `${key} collate \"${dbQueryParameters.collate}\"` + ); } else { searchQuery.orderBy([key]); } diff --git a/packages/db/src/search/queries.ts b/packages/db/src/search/queries.ts index ed041e277e0..dd290973ca0 100644 --- a/packages/db/src/search/queries.ts +++ b/packages/db/src/search/queries.ts @@ -243,9 +243,10 @@ export const convertQueryStringToDbQueryParameters = ( const dbQueryParameters: DbQueryParameters = {}; dbQueryParameters.page = Number.parseInt(page ?? '1', 10); - dbQueryParameters.limit = Number.parseInt(limit ?? '10', 10); - dbQueryParameters.offset = (dbQueryParameters.page - 1) * dbQueryParameters.limit; - + if (limit !== null) { + dbQueryParameters.limit = Number.parseInt(limit ?? '10', 10); + dbQueryParameters.offset = (dbQueryParameters.page - 1) * dbQueryParameters.limit; + } if (typeof infix === 'string') dbQueryParameters.infix = infix; if (typeof prefix === 'string') dbQueryParameters.prefix = prefix; if (typeof fields === 'string') dbQueryParameters.fields = fields.split(','); diff --git a/packages/db/src/types/search.ts b/packages/db/src/types/search.ts index 8d129082544..5011167965b 100644 --- a/packages/db/src/types/search.ts +++ b/packages/db/src/types/search.ts @@ -2,14 +2,14 @@ export type QueryStringParameters = { field?: string, fields?: string, infix?: string, - limit?: string, + limit?: string | null, page?: string, order?: string, prefix?: string, includeFullRecord?: string, sort_by?: string, sort_key?: string[], - [key: string]: string | string[] | undefined, + [key: string]: string | string[] | undefined | null, }; export type QueryEvent = { @@ -29,6 +29,7 @@ export type SortType = { }; export type DbQueryParameters = { + collate?: string, fields?: string[], infix?: string, limit?: number, diff --git a/packages/db/tests/lib/test-collection.js b/packages/db/tests/lib/test-collection.js index eeed7fc7a17..928273b723c 100644 --- a/packages/db/tests/lib/test-collection.js +++ b/packages/db/tests/lib/test-collection.js @@ -5,56 +5,85 @@ const sinon = require('sinon'); const cryptoRandomString = require('crypto-random-string'); const { - destroyLocalTestDb, - generateLocalTestDb, - GranulePgModel, CollectionPgModel, + destroyLocalTestDb, fakeCollectionRecordFactory, fakeGranuleRecordFactory, + fakeProviderRecordFactory, + generateLocalTestDb, getCollectionsByGranuleIds, + getUniqueCollectionsByGranuleFilter, + GranulePgModel, migrationDir, + ProviderPgModel, } = require('../../dist'); -const testDbName = `collection_${cryptoRandomString({ length: 10 })}`; - -test.before(async (t) => { +test.beforeEach(async (t) => { + t.context.testDbName = `collection_${cryptoRandomString({ length: 10 })}`; const { knexAdmin, knex } = await generateLocalTestDb( - testDbName, + t.context.testDbName, migrationDir ); t.context.knexAdmin = knexAdmin; t.context.knex = knex; t.context.collectionPgModel = new CollectionPgModel(); + t.context.providerPgModel = new ProviderPgModel(); t.context.granulePgModel = new GranulePgModel(); -}); - -test.after.always(async (t) => { - await destroyLocalTestDb({ - ...t.context, - testDbName, - }); -}); -test('getCollectionsByGranuleIds() returns collections for given granule IDs', async (t) => { - const collection1 = fakeCollectionRecordFactory(); - const collection2 = fakeCollectionRecordFactory(); + t.context.oldTimeStamp = '1950-01-01T00:00:00Z'; + t.context.newTimeStamp = '2020-01-01T00:00:00Z'; - const pgCollections = await t.context.collectionPgModel.insert( + t.context.collections = Array.from({ length: 3 }, (_, index) => { + const name = `collection${index + 1}`; + return fakeCollectionRecordFactory({ name, version: '001' }); + }); + t.context.pgCollections = await t.context.collectionPgModel.insert( t.context.knex, - [collection1, collection2], + t.context.collections, '*' ); + t.context.providers = Array.from({ length: 2 }, (_, index) => { + const name = `provider${index + 1}`; + return fakeProviderRecordFactory({ name }); + }); + t.context.pgProviders = await t.context.providerPgModel.create( + t.context.knex, + t.context.providers + ); - const granules = [ - fakeGranuleRecordFactory({ collection_cumulus_id: pgCollections[0].cumulus_id }), - fakeGranuleRecordFactory({ collection_cumulus_id: pgCollections[1].cumulus_id }), + t.context.granules = [ + fakeGranuleRecordFactory({ + collection_cumulus_id: t.context.pgCollections[0].cumulus_id, + provider_cumulus_id: t.context.pgProviders[0].cumulus_id, + updated_at: t.context.oldTimeStamp, + }), + fakeGranuleRecordFactory({ + collection_cumulus_id: t.context.pgCollections[1].cumulus_id, + provider_cumulus_id: t.context.pgProviders[1].cumulus_id, + updated_at: t.context.oldTimeStamp, + }), + fakeGranuleRecordFactory({ + collection_cumulus_id: t.context.pgCollections[2].cumulus_id, + provider_cumulus_id: t.context.pgProviders[1].cumulus_id, + updated_at: t.context.newTimeStamp, + }), ]; + await t.context.granulePgModel.insert( t.context.knex, - granules + t.context.granules ); +}); +test.afterEach.always(async (t) => { + await destroyLocalTestDb({ + ...t.context, + }); +}); + +test('getCollectionsByGranuleIds() returns collections for given granule IDs', async (t) => { + const { pgCollections, granules } = t.context; const collections = await getCollectionsByGranuleIds( t.context.knex, granules.map((granule) => granule.granule_id) @@ -64,25 +93,17 @@ test('getCollectionsByGranuleIds() returns collections for given granule IDs', a }); test('getCollectionsByGranuleIds() only returns unique collections', async (t) => { - const collection1 = fakeCollectionRecordFactory(); - const collection2 = fakeCollectionRecordFactory(); - - const pgCollections = await t.context.collectionPgModel.insert( - t.context.knex, - [collection1, collection2], - '*' - ); - - const granules = [ - fakeGranuleRecordFactory({ collection_cumulus_id: pgCollections[0].cumulus_id }), - fakeGranuleRecordFactory({ collection_cumulus_id: pgCollections[1].cumulus_id }), - fakeGranuleRecordFactory({ collection_cumulus_id: pgCollections[1].cumulus_id }), - ]; + const { pgCollections } = t.context; + const testGranule = fakeGranuleRecordFactory({ + collection_cumulus_id: pgCollections[1].cumulus_id, + }); await t.context.granulePgModel.insert( t.context.knex, - granules + [testGranule] ); + const granules = [...t.context.granules, testGranule]; + const collections = await getCollectionsByGranuleIds( t.context.knex, granules.map((granule) => granule.granule_id) @@ -92,21 +113,15 @@ test('getCollectionsByGranuleIds() only returns unique collections', async (t) = }); test.serial('getCollectionsByGranuleIds() retries on connection terminated unexpectedly error', async (t) => { - const { knex } = t.context; - const collection1 = fakeCollectionRecordFactory(); - const collection2 = fakeCollectionRecordFactory(); - - const pgCollections = await t.context.collectionPgModel.insert( - knex, - [collection1, collection2], - '*' + const { knex, pgCollections } = t.context; + const testGranule = fakeGranuleRecordFactory({ + collection_cumulus_id: pgCollections[1].cumulus_id, + }); + await t.context.granulePgModel.insert( + t.context.knex, + [testGranule] ); - - const granules = [ - fakeGranuleRecordFactory({ collection_cumulus_id: pgCollections[0].cumulus_id }), - fakeGranuleRecordFactory({ collection_cumulus_id: pgCollections[1].cumulus_id }), - fakeGranuleRecordFactory({ collection_cumulus_id: pgCollections[1].cumulus_id }), - ]; + const granules = [...t.context.granules, testGranule]; const knexStub = sinon.stub(knex, 'select').returns({ select: sinon.stub().returnsThis(), @@ -127,3 +142,100 @@ test.serial('getCollectionsByGranuleIds() retries on connection terminated unexp ); t.is(error.attemptNumber, 4); }); + +test('getUniqueCollectionsByGranuleFilter filters by startTimestamp', async (t) => { + const { knex } = t.context; + const params = { + startTimestamp: '2005-01-01T00:00:00Z', + knex, + }; + + const result = await getUniqueCollectionsByGranuleFilter(params); + t.is(result.length, 1); +}); + +test('getUniqueCollectionsByGranuleFilter filters by endTimestamp', async (t) => { + const { knex } = t.context; + const params = { + endTimestamp: '2005-01-01T00:00:00Z', + knex, + }; + const result = await getUniqueCollectionsByGranuleFilter(params); + t.is(result.length, 2); + t.is(result[0].name, 'collection1'); + t.is(result[1].name, 'collection2'); +}); + +test('getUniqueCollectionsByGranuleFilter filters by collectionIds', async (t) => { + const { knex } = t.context; + const params = { + collectionIds: ['collection1___001', 'collection2___001'], + knex, + }; + + const result = await getUniqueCollectionsByGranuleFilter(params); + t.is(result.length, 2); + t.is(result[0].name, 'collection1'); + t.is(result[0].version, '001'); + t.is(result[1].name, 'collection2'); + t.is(result[1].version, '001'); +}); + +test('getUniqueCollectionsByGranuleFilter filters by granuleIds', async (t) => { + const { knex, granules } = t.context; + const params = { + granuleIds: [granules[0].granule_id], + knex, + }; + + const result = await getUniqueCollectionsByGranuleFilter(params); + t.is(result.length, 1); + t.is(result[0].name, 'collection1'); + t.is(result[0].version, '001'); +}); + +test('getUniqueCollectionsByGranuleFilter filters by providers', async (t) => { + const { knex, providers } = t.context; + const params = { + providers: [providers[0].name], + knex, + }; + + const result = await getUniqueCollectionsByGranuleFilter(params); + t.is(result.length, 1); + t.is(result[0].name, 'collection1'); + t.is(result[0].version, '001'); +}); + +test('getUniqueCollectionsByGranuleFilter orders collections by name', async (t) => { + const { knex } = t.context; + const params = { + knex, + }; + + const result = await getUniqueCollectionsByGranuleFilter(params); + t.is(result.length, 3); + t.is(result[0].name, 'collection1'); + t.is(result[1].name, 'collection2'); + t.is(result[2].name, 'collection3'); +}); + +test('getUniqueCollectionsByGranuleFilter returns distinct collections', async (t) => { + const { knex } = t.context; + const params = { + knex, + }; + + const granule = fakeGranuleRecordFactory({ + collection_cumulus_id: t.context.pgCollections[0].cumulus_id, + provider_cumulus_id: t.context.pgProviders[0].cumulus_id, + updated_at: t.context.oldTimeStamp, + }); + await t.context.granulePgModel.insert( + t.context.knex, + [granule] + ); + + const result = await getUniqueCollectionsByGranuleFilter(params); + t.is(result.length, 3); +}); diff --git a/packages/db/tests/lib/test-granule.js b/packages/db/tests/lib/test-granule.js index abc68a8d67d..ff9797d771b 100644 --- a/packages/db/tests/lib/test-granule.js +++ b/packages/db/tests/lib/test-granule.js @@ -728,6 +728,53 @@ test.serial('getGranulesByApiPropertiesQuery returns correct granules by provide ); }); +test.serial('getGranulesByApiPropertiesQuery returns results POSIX/ASCII sorted when collition is set to "C"', async (t) => { + const { + collectionCumulusId, + knex, + granulePgModel, + providerPgModel, + } = t.context; + + const fakeProvider = fakeProviderRecordFactory(); + const [provider] = await providerPgModel.create(knex, fakeProvider); + + const granules = await granulePgModel.insert( + knex, + [ + fakeGranuleRecordFactory({ + collection_cumulus_id: collectionCumulusId, + provider_cumulus_id: provider.cumulus_id, + status: 'completed', + granule_id: 'MYDGRANULE', + }), + fakeGranuleRecordFactory({ + collection_cumulus_id: collectionCumulusId, + provider_cumulus_id: provider.cumulus_id, + status: 'completed', + granule_id: 'lowerCaseGranuleShouldGoLast', + }), + ], + '*' + ); + t.teardown(() => Promise.all(granules.map( + (granule) => + granulePgModel.delete(knex, { cumulus_id: granule.cumulus_id }) + ))); + const query = getGranulesByApiPropertiesQuery({ + knex, + searchParams: { + collate: 'C', + status: 'completed', + }, + sortByFields: ['granule_id'], + }); + const records = await query; + t.is(records.length, 2); + t.is(records[0].granule_id, 'MYDGRANULE'); + t.is(records[1].granule_id, 'lowerCaseGranuleShouldGoLast'); +}); + test.serial('getGranulesByApiPropertiesQuery returns correct granules by status', async (t) => { const { collectionCumulusId, diff --git a/packages/db/tests/search/test-queries.js b/packages/db/tests/search/test-queries.js index 0a1ecfff67e..9f3f7d1db27 100644 --- a/packages/db/tests/search/test-queries.js +++ b/packages/db/tests/search/test-queries.js @@ -77,6 +77,16 @@ test('convertQueryStringToDbQueryParameters correctly converts api query string t.deepEqual(dbQueryParams, expectedDbQueryParameters); }); +test('convertQueryStringToDbQueryParameters does not include limit/offset parameters if limit is explicitly set to null', (t) => { + const queryStringParameters = { + limit: null, + offset: 3, + }; + const dbQueryParams = convertQueryStringToDbQueryParameters('granule', queryStringParameters); + t.is(dbQueryParams.limit, undefined); + t.is(dbQueryParams.offset, undefined); +}); + test('convertQueryStringToDbQueryParameters correctly converts sortby error parameter to db query parameters', (t) => { const queryStringParameters = { sort_by: 'error.Error.keyword', From 1e1af2eadab9e915532046ab3827f0e9371e53bc Mon Sep 17 00:00:00 2001 From: Jonathan Kovarik Date: Wed, 23 Oct 2024 22:39:04 -0600 Subject: [PATCH 53/61] CUMULUS-3806-3 -- Remove internal rec report references/cleanup (#3822) * Remove internal/ES specific recon report functions * Jk/cumulus 3806 2 (#3811) * Update ORCA report to only read from postgres database * Parameterize getGranulesByApiPropertiesQuery * Fix unit bugs * Fix unexplained lint error :( * Fix granule translation for report output * Fix/improve spec tests * Update typings for orca-backup-reconciliation-report * Pre-review tag minor fixes * Remove configuration TODO, in favor of PR comment * Update typings * Remove premature mods * Update CHANGELOG * Fix recon report spec * Minor typedef refactor * Fix units * WIP - fixup * re-enable disabled test, remove unused code * Dedupe knex objects/queues * Fix linting :bell: * Update db package typing * Update cmr-client typings * Update test with bad test parameters/setup * Add typing fixes/etc to cmr-utils js * Add basic recon report typings * Fix annotations for normalizeEvent * Minor lint fix * Additional typing fixes * Fix knex typings * Remove remaining ES refs, tidy up * Remove shouldAggregateGranulesForCollections test * Fix/update unit tests broken due to normalizeEvent mods * Updated test-collections to create database per test, use common fixtures * Add tests, refactor collection lib queries * Re-order CHANGELOG * Fix merge mangle * Updating default configuration to modify RDS cert configuration * Revert "Updating default configuration to modify RDS cert configuration" This reverts commit 7e77b2d413a20d9a080ce5a1fcb94749eefd2709. * re-activate integration tests, add collation fix to granule sorting * Add FileReport typings * Misc typing fixes/updates * Update ORCA typings * Minor comment update * Remove TODO * Types refactor * Add types file * Update DB package to allow for limit:null/unbounded queries * Minor typing refactor * Fix param arguments * Add collation options * Minor comment update * Refactor collection query to use @cumulus/db/search * Remove debugging log output * Remove type-bound error conditional * Minor typing update * Add missing unit tests * Add CHANGELOG updates * Split defs out into seperate file * Fix unneeded newline/lint * Remove excess typedef * Fix bad param passing, update typing in createReconcilationReportForBucket * Fix docstring typing * Seperate typedefs into seperate file * Update error types * Fix typings * Fix unit test error type mistake * Remove internal recon report units --- .../CreateReconciliationReportSpec.js | 1 - .../lambdas/create-reconciliation-report.js | 1 - .../lambdas/internal-reconciliation-report.js | 461 ----------------- packages/api/lib/reconciliationReport.js | 70 --- .../test-internal-reconciliation-report.js | 483 ------------------ .../tests/lib/test-reconciliationReport.js | 94 ---- tf-modules/archive/reconciliation_report.tf | 1 - 7 files changed, 1111 deletions(-) delete mode 100644 packages/api/lambdas/internal-reconciliation-report.js delete mode 100644 packages/api/tests/lambdas/test-internal-reconciliation-report.js diff --git a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js index e52dc8a6966..3cad6f13204 100644 --- a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js +++ b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js @@ -734,7 +734,6 @@ describe('When there are granule differences and granule reconciliation is run', }); }); - // TODO: fix tests in CUMULUS-3806 when CreateReconciliationReport lambda is changed to query postgres describe('Create an ORCA Backup Reconciliation Report to monitor ORCA backup discrepancies', () => { // report record in db and report in s3 let reportRecord; diff --git a/packages/api/lambdas/create-reconciliation-report.js b/packages/api/lambdas/create-reconciliation-report.js index e404ea0a32b..3822420a9fe 100644 --- a/packages/api/lambdas/create-reconciliation-report.js +++ b/packages/api/lambdas/create-reconciliation-report.js @@ -920,7 +920,6 @@ async function processRequest(params) { log.error( 'Internal Reconciliation Reports are no longer valid, as Cumulus is no longer utilizing Elasticsearch' ); - //TODO remove internal rec report code throw new Error('Internal Reconciliation Reports are no longer valid'); } else if (reportType === 'Granule Inventory') { await createGranuleInventoryReport(recReportParams); diff --git a/packages/api/lambdas/internal-reconciliation-report.js b/packages/api/lambdas/internal-reconciliation-report.js deleted file mode 100644 index 0eb926628fa..00000000000 --- a/packages/api/lambdas/internal-reconciliation-report.js +++ /dev/null @@ -1,461 +0,0 @@ -'use strict'; - -const chunk = require('lodash/chunk'); -const cloneDeep = require('lodash/cloneDeep'); -const pick = require('lodash/pick'); -const sortBy = require('lodash/sortBy'); -const isEqual = require('lodash/isEqual'); -const intersection = require('lodash/intersection'); -const union = require('lodash/union'); -const omit = require('lodash/omit'); -const moment = require('moment'); -const pMap = require('p-map'); - -const Logger = require('@cumulus/logger'); -const { constructCollectionId } = require('@cumulus/message/Collections'); -const { s3 } = require('@cumulus/aws-client/services'); -const { ESSearchQueue } = require('@cumulus/es-client/esSearchQueue'); -const { - CollectionPgModel, - translatePostgresCollectionToApiCollection, - getKnexClient, - getCollectionsByGranuleIds, - getGranulesByApiPropertiesQuery, - QuerySearchClient, - translatePostgresGranuleResultToApiGranule, -} = require('@cumulus/db'); - -const { - convertToDBCollectionSearchObject, - convertToESCollectionSearchParams, - convertToESGranuleSearchParams, - convertToDBGranuleSearchParams, - filterDBCollections, - initialReportHeader, - compareEsGranuleAndApiGranule, -} = require('../lib/reconciliationReport'); - -const log = new Logger({ sender: '@api/lambdas/internal-reconciliation-report' }); - -/** - * Compare the collection holdings in Elasticsearch with Database - * - * @param {Object} recReportParams - lambda's input filtering parameters to - * narrow limit of report. - * @returns {Promise} an object with the okCount, onlyInEs, onlyInDb - * and withConfilcts - */ -async function internalRecReportForCollections(recReportParams) { - log.info(`internalRecReportForCollections (${JSON.stringify(recReportParams)})`); - // compare collection holdings: - // Get collection list in ES ordered by granuleId - // Get collection list in PostgreSQL ordered by granuleId - // Report collections only in ES - // Report collections only in PostgreSQL - // Report collections with different contents - - const searchParams = convertToESCollectionSearchParams(recReportParams); - const esCollectionsIterator = new ESSearchQueue( - { ...searchParams, sort_key: ['name', 'version'] }, 'collection', process.env.ES_INDEX - ); - - const collectionPgModel = new CollectionPgModel(); - const knex = recReportParams.knex || await getKnexClient(); - - // get collections from database and sort them, since the scan result is not ordered - const [ - updatedAtRangeParams, - dbSearchParams, - ] = convertToDBCollectionSearchObject(recReportParams); - - const dbCollectionsSearched = await collectionPgModel.searchWithUpdatedAtRange( - knex, - dbSearchParams, - updatedAtRangeParams - ); - - // TODO - improve this sort - const dbCollectionItems = sortBy( - filterDBCollections(dbCollectionsSearched, recReportParams), - ['name', 'version'] - ); - - let okCount = 0; - const withConflicts = []; - let onlyInEs = []; - let onlyInDb = []; - - const fieldsIgnored = ['timestamp', 'updatedAt', 'createdAt']; - let nextEsItem = await esCollectionsIterator.peek(); - let nextDbItem = dbCollectionItems.length !== 0 - ? translatePostgresCollectionToApiCollection(dbCollectionItems[0]) - : undefined; - - while (nextEsItem && nextDbItem) { - const esCollectionId = constructCollectionId(nextEsItem.name, nextEsItem.version); - const dbCollectionId = constructCollectionId(nextDbItem.name, nextDbItem.version); - - if (esCollectionId < dbCollectionId) { - // Found an item that is only in ES and not in DB - onlyInEs.push(esCollectionId); - await esCollectionsIterator.shift(); // eslint-disable-line no-await-in-loop - } else if (esCollectionId > dbCollectionId) { - // Found an item that is only in DB and not in ES - onlyInDb.push(dbCollectionId); - dbCollectionItems.shift(); - } else { - // Found an item that is in both ES and DB - if ( - isEqual( - omit(nextEsItem, fieldsIgnored), - omit( - nextDbItem, - fieldsIgnored - ) - ) - ) { - okCount += 1; - } else { - withConflicts.push({ es: nextEsItem, db: nextDbItem }); - } - await esCollectionsIterator.shift(); // eslint-disable-line no-await-in-loop - dbCollectionItems.shift(); - } - - nextEsItem = await esCollectionsIterator.peek(); // eslint-disable-line no-await-in-loop - nextDbItem = dbCollectionItems.length !== 0 - ? translatePostgresCollectionToApiCollection(dbCollectionItems[0]) - : undefined; - } - - // Add any remaining ES items to the report - onlyInEs = onlyInEs.concat( - (await esCollectionsIterator.empty()) - .map((item) => constructCollectionId(item.name, item.version)) - ); - - // Add any remaining DB items to the report - onlyInDb = onlyInDb - .concat(dbCollectionItems.map((item) => constructCollectionId(item.name, item.version))); - - return { okCount, withConflicts, onlyInEs, onlyInDb }; -} - -/** - * Get all collectionIds from ES and database combined - * - * @returns {Promise>} list of collectionIds - */ -async function getAllCollections() { - const collectionPgModel = new CollectionPgModel(); - const knex = await getKnexClient(); - - const dbCollections = (await collectionPgModel.search(knex, {})) - .map((collection) => constructCollectionId(collection.name, collection.version)); - - const esCollectionsIterator = new ESSearchQueue( - { sort_key: ['name', 'version'], fields: ['name', 'version'] }, 'collection', process.env.ES_INDEX - ); - const esCollections = (await esCollectionsIterator.empty()) - .map((item) => constructCollectionId(item.name, item.version)); - - return union(dbCollections, esCollections); -} - -async function getAllCollectionIdsByGranuleIds({ - granuleIds, - knex, - concurrency, -}) { - const collectionIds = new Set(); - await pMap( - chunk(granuleIds, 100), - async (granuleIdsBatch) => { - const collections = await getCollectionsByGranuleIds(knex, granuleIdsBatch); - collections.forEach( - (collection) => { - const collectionId = constructCollectionId(collection.name, collection.version); - collectionIds.add(collectionId); - } - ); - }, - { - concurrency, - } - ); - return [...collectionIds]; -} - -/** - * Get list of collections for the given granuleIds - * - * @param {Object} recReportParams - * @param {Array} recReportParams.granuleIds - list of granuleIds - * @returns {Promise>} list of collectionIds - */ -async function getCollectionsForGranules(recReportParams) { - const { - granuleIds, - } = recReportParams; - let dbCollectionIds = []; - log.info('Getting collection IDs by Granule IDs'); - dbCollectionIds = await getAllCollectionIdsByGranuleIds(recReportParams); - - log.info('Completed getting collection IDs'); - - const esGranulesIterator = new ESSearchQueue( - { granuleId__in: granuleIds.join(','), sort_key: ['collectionId'], fields: ['collectionId'] }, 'granule', process.env.ES_INDEX - ); - const esCollections = (await esGranulesIterator.empty()) - .map((granule) => (granule ? granule.collectionId : undefined)); - - return union(dbCollectionIds, esCollections); -} - -/** - * Get list of collections for granule search based on input filtering parameters - * - * @param {Object} recReportParams - lambda's input filtering parameters - * @returns {Promise>} list of collectionIds - */ -async function getCollectionsForGranuleSearch(recReportParams) { - const { collectionIds, granuleIds } = recReportParams; - let collections = []; - if (granuleIds) { - const collectionIdsForGranules = await getCollectionsForGranules(recReportParams); - collections = (collectionIds) - ? intersection(collectionIds, collectionIdsForGranules) - : collectionIdsForGranules; - } else { - collections = collectionIds || await getAllCollections(); - } - return collections; -} - -/** - * Compare the granule holdings for a given collection - * - * @param {string} collectionId - collection id - * @param {Object} recReportParams - lambda's input filtering parameters - * @returns {Promise} an object with the okCount, onlyInEs, onlyInDb - * and withConfilcts - */ -async function reportForGranulesByCollectionId(collectionId, recReportParams) { - // For each collection, - // Get granule list in ES ordered by granuleId - // Get granule list in PostgreSQL ordered by granuleId - // Report granules only in ES - // Report granules only in PostgreSQL - // Report granules with different contents - - const esSearchParams = convertToESGranuleSearchParams(recReportParams); - const esGranulesIterator = new ESSearchQueue( - { - ...esSearchParams, - collectionId, - sort_key: ['granuleId'], - }, - 'granule', - process.env.ES_INDEX - ); - - const searchParams = convertToDBGranuleSearchParams({ - ...recReportParams, - collectionIds: collectionId, - }); - const granulesSearchQuery = getGranulesByApiPropertiesQuery({ - knex: recReportParams.knex, - searchParams, - sortByFields: ['collectionName', 'collectionVersion', 'granule_id'], - }); - const pgGranulesSearchClient = new QuerySearchClient( - granulesSearchQuery, - 100 // arbitrary limit on how items are fetched at once - ); - - let okCount = 0; - const withConflicts = []; - const onlyInEs = []; - const onlyInDb = []; - const granuleFields = ['granuleId', 'collectionId', 'provider', 'createdAt', 'updatedAt']; - - let [nextEsItem, nextDbItem] = await Promise.all([esGranulesIterator.peek(), pgGranulesSearchClient.peek()]); // eslint-disable-line max-len - - /* eslint-disable no-await-in-loop */ - while (nextEsItem && nextDbItem) { - if (nextEsItem.granuleId < nextDbItem.granule_id) { - // Found an item that is only in ES and not in DB - onlyInEs.push(pick(nextEsItem, granuleFields)); - await esGranulesIterator.shift(); - } else if (nextEsItem.granuleId > nextDbItem.granule_id) { - const apiGranule = await translatePostgresGranuleResultToApiGranule( - recReportParams.knex, - nextDbItem - ); - - // Found an item that is only in DB and not in ES - onlyInDb.push(pick(apiGranule, granuleFields)); - await pgGranulesSearchClient.shift(); - } else { - const apiGranule = await translatePostgresGranuleResultToApiGranule( - recReportParams.knex, - nextDbItem - ); - - // Found an item that is in both ES and DB - if (compareEsGranuleAndApiGranule(nextEsItem, apiGranule)) { - okCount += 1; - } else { - withConflicts.push({ es: nextEsItem, db: apiGranule }); - } - await Promise.all([esGranulesIterator.shift(), pgGranulesSearchClient.shift()]); - } - - [nextEsItem, nextDbItem] = await Promise.all([esGranulesIterator.peek(), pgGranulesSearchClient.peek()]); // eslint-disable-line max-len - } - - // Add any remaining ES items to the report - while (await esGranulesIterator.peek()) { - const item = await esGranulesIterator.shift(); - onlyInEs.push(pick(item, granuleFields)); - } - - // Add any remaining DB items to the report - while (await pgGranulesSearchClient.peek()) { - const item = await pgGranulesSearchClient.shift(); - const apiGranule = await translatePostgresGranuleResultToApiGranule(recReportParams.knex, item); - onlyInDb.push(pick(apiGranule, granuleFields)); - } - /* eslint-enable no-await-in-loop */ - - return { okCount, withConflicts, onlyInEs, onlyInDb }; -} - -/** - * Compare the granule holdings in Elasticsearch with Database - * - * @param {Object} recReportParams - lambda's input filtering parameters to - * narrow limit of report. - * @returns {Promise} an object with the okCount, onlyInEs, onlyInDb - * and withConfilcts - */ -async function internalRecReportForGranules(recReportParams) { - log.debug('internal-reconciliation-report internalRecReportForGranules'); - log.info(`internalRecReportForGranules (${JSON.stringify(recReportParams)})`); - // To avoid 'scan' granules table, we query a Global Secondary Index(GSI) in granules - // table with collectionId. - // compare granule holdings: - // Get collections list from db and es based on request parameters or use the collectionId - // from the request - // For each collection, - // compare granule holdings and get report - // Report granules only in ES - // Report granules only in PostgreSQL - // Report granules with different contents - - const collections = await getCollectionsForGranuleSearch(recReportParams); - - const searchParams = omit(recReportParams, ['collectionIds']); - - const reports = await pMap( - collections, - (collectionId) => reportForGranulesByCollectionId(collectionId, searchParams), - { - concurrency: recReportParams.concurrency, - } - ); - - const report = {}; - report.okCount = reports - .reduce((accumulator, currentValue) => accumulator + currentValue.okCount, 0); - report.withConflicts = reports - .reduce((accumulator, currentValue) => accumulator.concat(currentValue.withConflicts), []); - report.onlyInEs = reports - .reduce((accumulator, currentValue) => accumulator.concat(currentValue.onlyInEs), []); - report.onlyInDb = reports - .reduce((accumulator, currentValue) => accumulator.concat(currentValue.onlyInDb), []); - - return report; -} - -/** - * Create a Internal Reconciliation report and save it to S3 - * - * @param {Object} recReportParams - params - * @param {Object} recReportParams.collectionIds - array of collectionIds - * @param {Object} recReportParams.reportType - the report type - * @param {moment} recReportParams.createStartTime - when the report creation was begun - * @param {moment} recReportParams.endTimestamp - ending report datetime ISO Timestamp - * @param {string} recReportParams.reportKey - the s3 report key - * @param {string} recReportParams.stackName - the name of the CUMULUS stack - * @param {moment} recReportParams.startTimestamp - beginning report datetime ISO timestamp - * @param {string} recReportParams.systemBucket - the name of the CUMULUS system bucket - * @returns {Promise} a Promise that resolves when the report has been - * uploaded to S3 - */ -async function createInternalReconciliationReport(recReportParams) { - log.info(`createInternalReconciliationReport parameters ${JSON.stringify(recReportParams)}`); - const { - reportKey, - systemBucket, - } = recReportParams; - - // Write an initial report to S3 - const initialReportFormat = { - okCount: 0, - withConflicts: [], - onlyInEs: [], - onlyInDb: [], - }; - - let report = { - ...initialReportHeader(recReportParams), - collections: cloneDeep(initialReportFormat), - granules: cloneDeep(initialReportFormat), - }; - - try { - await s3().putObject({ - Bucket: systemBucket, - Key: reportKey, - Body: JSON.stringify(report, undefined, 2), - }); - - const [collectionsReport, granulesReport] = await Promise.all([ - internalRecReportForCollections(recReportParams), - internalRecReportForGranules(recReportParams), - ]); - report = Object.assign(report, { collections: collectionsReport, granules: granulesReport }); - - // Create the full report - report.createEndTime = moment.utc().toISOString(); - report.status = 'SUCCESS'; - - // Write the full report to S3 - return s3().putObject({ - Bucket: systemBucket, - Key: reportKey, - Body: JSON.stringify(report, undefined, 2), - }); - } catch (error) { - log.error(`Error caught in createInternalReconciliationReport. ${error}`); - // Create the full report - report.createEndTime = moment.utc().toISOString(); - report.status = 'Failed'; - - // Write the full report to S3 - await s3().putObject({ - Bucket: systemBucket, - Key: reportKey, - Body: JSON.stringify(report, undefined, 2), - }); - throw error; - } -} - -module.exports = { - compareEsGranuleAndApiGranule, - internalRecReportForCollections, - internalRecReportForGranules, - createInternalReconciliationReport, -}; diff --git a/packages/api/lib/reconciliationReport.js b/packages/api/lib/reconciliationReport.js index 7cebd39e0f6..fdacf692709 100644 --- a/packages/api/lib/reconciliationReport.js +++ b/packages/api/lib/reconciliationReport.js @@ -33,18 +33,6 @@ function cmrGranuleSearchParams(recReportParams) { return []; } -// TODO: remove -/** - * Prepare a list of collectionIds into an _id__in object - * - * @param {Array} collectionIds - Array of collectionIds in the form 'name___ver' - * @returns {Object} - object that will return the correct terms search when - * passed to the query command. - */ -function searchParamsForCollectionIdArray(collectionIds) { - return { _id__in: collectionIds.join(',') }; -} - /** * @param {string} dateable - any input valid for a JS Date contstructor. * @returns {number | undefined} - primitive value of input date string or undefined, if @@ -60,25 +48,6 @@ function dateStringToDateOrNull(dateable) { return !Number.isNaN(date.valueOf()) ? date : undefined; } -// TODO - Remove this in 3806 PR #3 -/** - * - * @param {Object} params - request params to convert to Elasticsearch params - * @returns {Object} object of desired parameters formatted for Elasticsearch collection search - */ -function convertToESCollectionSearchParams(params) { - const { collectionIds, startTimestamp, endTimestamp } = params; - const idsIn = collectionIds - ? searchParamsForCollectionIdArray(collectionIds) - : undefined; - const searchParams = { - updatedAt__from: dateToValue(startTimestamp), - updatedAt__to: dateToValue(endTimestamp), - ...idsIn, - }; - return removeNilProperties(searchParams); -} - /** * convertToDBCollectionSearchObject - Creates Postgres search object from * InternalRecReport Parameters @@ -111,25 +80,6 @@ function convertToDBCollectionSearchObject(params) { return searchParams; } -/** - * - * @param {Object} params - request params to convert to Elasticsearch params - * @returns {Object} object of desired parameters formated for Elasticsearch. - */ -function convertToESGranuleSearchParams(params) { - const { collectionIds, granuleIds, providers, startTimestamp, endTimestamp } = params; - const collectionIdIn = collectionIds ? collectionIds.join(',') : undefined; - const granuleIdIn = granuleIds ? granuleIds.join(',') : undefined; - const providerIn = providers ? providers.join(',') : undefined; - return removeNilProperties({ - updatedAt__from: dateToValue(startTimestamp), - updatedAt__to: dateToValue(endTimestamp), - collectionId__in: collectionIdIn, - granuleId__in: granuleIdIn, - provider__in: providerIn, - }); -} - /** * Convert reconciliation report parameters to PostgreSQL database search params. * @@ -160,22 +110,6 @@ function convertToDBGranuleSearchParams(params) { return removeNilProperties(searchParams); } -/** - * convert to es search parameters using createdAt for report time range - * - * @param {Object} params - request params to convert to Elasticsearch params - * @returns {Object} object of desired parameters formatted for Elasticsearch. - */ -function convertToESGranuleSearchParamsWithCreatedAtRange(params) { - const searchParamsWithUpdatedAt = convertToESGranuleSearchParams(params); - const searchParamsWithCreatedAt = { - createdAt__from: searchParamsWithUpdatedAt.updatedAt__from, - createdAt__to: searchParamsWithUpdatedAt.updatedAt__to, - ...omit(searchParamsWithUpdatedAt, ['updatedAt__from', 'updatedAt__to']), - }; - return removeNilProperties(searchParamsWithCreatedAt); -} - /** * * @param {Object} params - request params to convert to orca params @@ -296,12 +230,8 @@ module.exports = { cmrGranuleSearchParams, convertToDBCollectionSearchObject, convertToDBGranuleSearchParams, - convertToESCollectionSearchParams, - convertToESGranuleSearchParams, - convertToESGranuleSearchParamsWithCreatedAtRange, convertToOrcaGranuleSearchParams, filterDBCollections, initialReportHeader, - searchParamsForCollectionIdArray, compareEsGranuleAndApiGranule, }; diff --git a/packages/api/tests/lambdas/test-internal-reconciliation-report.js b/packages/api/tests/lambdas/test-internal-reconciliation-report.js deleted file mode 100644 index 00d90ba8d66..00000000000 --- a/packages/api/tests/lambdas/test-internal-reconciliation-report.js +++ /dev/null @@ -1,483 +0,0 @@ -'use strict'; - -const test = require('ava'); -const moment = require('moment'); -const flatten = require('lodash/flatten'); -const range = require('lodash/range'); -const cryptoRandomString = require('crypto-random-string'); - -const { - recursivelyDeleteS3Bucket, -} = require('@cumulus/aws-client/S3'); -const awsServices = require('@cumulus/aws-client/services'); -const { randomId } = require('@cumulus/common/test-utils'); -const { constructCollectionId, deconstructCollectionId } = require('@cumulus/message/Collections'); -const { generateGranuleApiRecord } = require('@cumulus/message/Granules'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); -const indexer = require('@cumulus/es-client/indexer'); -const { getEsClient } = require('@cumulus/es-client/search'); - -const { - CollectionPgModel, - destroyLocalTestDb, - generateLocalTestDb, - localStackConnectionEnv, - translateApiCollectionToPostgresCollection, - migrationDir, - translateApiGranuleToPostgresGranule, - GranulePgModel, - fakeProviderRecordFactory, - ProviderPgModel, - upsertGranuleWithExecutionJoinRecord, - fakeExecutionRecordFactory, - ExecutionPgModel, - FilePgModel, - translateApiFiletoPostgresFile, -} = require('@cumulus/db'); - -const { - fakeCollectionFactory, - // fakeFileFactory, - fakeGranuleFactoryV2, - fakeFileFactory, -} = require('../../lib/testUtils'); -const { - internalRecReportForCollections, - internalRecReportForGranules, -} = require('../../lambdas/internal-reconciliation-report'); -const { normalizeEvent } = require('../../lib/reconciliationReport/normalizeEvent'); - -let esAlias; -let esIndex; -let esClient; - -test.before((t) => { - t.context.collectionPgModel = new CollectionPgModel(); - t.context.granulePgModel = new GranulePgModel(); - t.context.providerPgModel = new ProviderPgModel(); - t.context.executionPgModel = new ExecutionPgModel(); - t.context.filePgModel = new FilePgModel(); -}); - -test.beforeEach(async (t) => { - t.context.bucketsToCleanup = []; - t.context.stackName = randomId('stack'); - t.context.systemBucket = randomId('bucket'); - process.env.system_bucket = t.context.systemBucket; - - await awsServices.s3().createBucket({ Bucket: t.context.systemBucket }) - .then(() => t.context.bucketsToCleanup.push(t.context.systemBucket)); - - esAlias = randomId('esalias'); - esIndex = randomId('esindex'); - process.env.ES_INDEX = esAlias; - await bootstrapElasticSearch({ - host: 'fakehost', - index: esIndex, - alias: esAlias, - }); - esClient = await getEsClient(); - - t.context.testDbName = `test_internal_recon_${cryptoRandomString({ length: 10 })}`; - const { knex, knexAdmin } = await generateLocalTestDb(t.context.testDbName, migrationDir); - t.context.knex = knex; - t.context.knexAdmin = knexAdmin; - process.env = { - ...process.env, - ...localStackConnectionEnv, - PG_DATABASE: t.context.testDbName, - }; -}); - -test.afterEach.always(async (t) => { - await destroyLocalTestDb({ - knex: t.context.knex, - knexAdmin: t.context.knexAdmin, - testDbName: t.context.testDbName, - }); - await Promise.all(flatten(t.context.bucketsToCleanup.map(recursivelyDeleteS3Bucket))); - await esClient.client.indices.delete({ index: esIndex }); -}); - -test.serial('internalRecReportForCollections reports discrepancy of collection holdings in ES and DB', async (t) => { - const { knex, collectionPgModel } = t.context; - - const matchingColls = range(10).map(() => fakeCollectionFactory()); - const extraDbColls = range(2).map(() => fakeCollectionFactory()); - const extraEsColls = range(2).map(() => fakeCollectionFactory()); - - const conflictCollInDb = fakeCollectionFactory({ meta: { flag: 'db' } }); - const conflictCollInEs = { ...conflictCollInDb, meta: { flag: 'es' } }; - - const esCollections = matchingColls.concat(extraEsColls, conflictCollInEs); - const dbCollections = matchingColls.concat(extraDbColls, conflictCollInDb); - - await Promise.all( - esCollections.map((collection) => indexer.indexCollection(esClient, collection, esAlias)) - ); - - await Promise.all( - dbCollections.map((collection) => - collectionPgModel.create( - knex, - translateApiCollectionToPostgresCollection(collection) - )) - ); - - let report = await internalRecReportForCollections({}); - - t.is(report.okCount, 10); - t.is(report.onlyInEs.length, 2); - t.deepEqual(report.onlyInEs.sort(), - extraEsColls.map((coll) => constructCollectionId(coll.name, coll.version)).sort()); - t.is(report.onlyInDb.length, 2); - t.deepEqual(report.onlyInDb.sort(), - extraDbColls.map((coll) => constructCollectionId(coll.name, coll.version)).sort()); - t.is(report.withConflicts.length, 1); - t.deepEqual(report.withConflicts[0].es.collectionId, conflictCollInEs.collectionId); - t.deepEqual(report.withConflicts[0].db.collectionId, conflictCollInDb.collectionId); - - // start/end time include all the collections - const searchParams = { - startTimestamp: moment.utc().subtract(1, 'hour').format(), - endTimestamp: moment.utc().add(1, 'hour').format(), - }; - report = await internalRecReportForCollections(normalizeEvent({ ...searchParams, stackName: 'testStack' })); - t.is(report.okCount, 10); - t.is(report.onlyInEs.length, 2); - t.is(report.onlyInDb.length, 2); - t.is(report.withConflicts.length, 1); - - // start/end time has no matching collections - const paramsTimeOutOfRange = { - startTimestamp: moment.utc().add(1, 'hour').format(), - endTimestamp: moment.utc().add(2, 'hour').format(), - }; - - report = await internalRecReportForCollections(normalizeEvent({ ...paramsTimeOutOfRange, stackName: 'testStack' })); - t.is(report.okCount, 0); - t.is(report.onlyInEs.length, 0); - t.is(report.onlyInDb.length, 0); - t.is(report.withConflicts.length, 0); - - // collectionId matches the collection with conflicts - const collectionId = constructCollectionId(conflictCollInDb.name, conflictCollInDb.version); - const paramsCollectionId = { ...searchParams, collectionId: [collectionId, randomId('c')] }; - - report = await internalRecReportForCollections(normalizeEvent({ ...paramsCollectionId, stackName: 'testStack' })); - t.is(report.okCount, 0); - t.is(report.onlyInEs.length, 0); - t.is(report.onlyInDb.length, 0); - t.is(report.withConflicts.length, 1); -}); - -test.serial('internalRecReportForGranules reports discrepancy of granule holdings in ES and DB', async (t) => { - const { - knex, - collectionPgModel, - providerPgModel, - executionPgModel, - } = t.context; - - // Create collection in PG/ES - const collectionId = constructCollectionId(randomId('name'), randomId('version')); - - // Create provider in PG - const provider = fakeProviderRecordFactory(); - await providerPgModel.create(knex, provider); - - const matchingGrans = range(10).map(() => fakeGranuleFactoryV2({ - collectionId, - provider: provider.name, - })); - const additionalMatchingGrans = range(10).map(() => fakeGranuleFactoryV2({ - provider: provider.name, - })); - const extraDbGrans = range(2).map(() => fakeGranuleFactoryV2({ - collectionId, - provider: provider.name, - })); - const additionalExtraDbGrans = range(2).map(() => fakeGranuleFactoryV2()); - const extraEsGrans = range(2).map(() => fakeGranuleFactoryV2({ - provider: provider.name, - })); - const additionalExtraEsGrans = range(2) - .map(() => fakeGranuleFactoryV2({ - collectionId, - provider: provider.name, - })); - const conflictGranInDb = fakeGranuleFactoryV2({ collectionId, status: 'completed' }); - const conflictGranInEs = { ...conflictGranInDb, status: 'failed' }; - - const esGranules = matchingGrans - .concat(additionalMatchingGrans, extraEsGrans, additionalExtraEsGrans, conflictGranInEs); - const dbGranules = matchingGrans - .concat(additionalMatchingGrans, extraDbGrans, additionalExtraDbGrans, conflictGranInDb); - - // add granules and related collections to es and db - await Promise.all( - esGranules.map(async (granule) => { - const collection = fakeCollectionFactory({ - ...deconstructCollectionId(granule.collectionId), - }); - await indexer.indexCollection(esClient, collection, esAlias); - await collectionPgModel.upsert( - knex, - translateApiCollectionToPostgresCollection(collection) - ); - await indexer.indexGranule(esClient, granule, esAlias); - }) - ); - - await Promise.all( - dbGranules.map(async (granule) => { - const pgGranule = await translateApiGranuleToPostgresGranule({ - dynamoRecord: granule, - knexOrTransaction: knex, - }); - let pgExecution = {}; - if (granule.execution) { - const pgExecutionData = fakeExecutionRecordFactory({ - url: granule.execution, - }); - ([pgExecution] = await executionPgModel.create(knex, pgExecutionData)); - } - return upsertGranuleWithExecutionJoinRecord({ - executionCumulusId: pgExecution.cumulus_id, - granule: pgGranule, - knexTransaction: knex, - }); - }) - ); - - let report = await internalRecReportForGranules({ knex }); - t.is(report.okCount, 20); - t.is(report.onlyInEs.length, 4); - t.deepEqual(report.onlyInEs.map((gran) => gran.granuleId).sort(), - extraEsGrans.concat(additionalExtraEsGrans).map((gran) => gran.granuleId).sort()); - t.is(report.onlyInDb.length, 4); - t.deepEqual(report.onlyInDb.map((gran) => gran.granuleId).sort(), - extraDbGrans.concat(additionalExtraDbGrans).map((gran) => gran.granuleId).sort()); - t.is(report.withConflicts.length, 1); - t.deepEqual(report.withConflicts[0].es.granuleId, conflictGranInEs.granuleId); - t.deepEqual(report.withConflicts[0].db.granuleId, conflictGranInDb.granuleId); - - // start/end time include all the collections and granules - const searchParams = { - reportType: 'Internal', - startTimestamp: moment.utc().subtract(1, 'hour').format(), - endTimestamp: moment.utc().add(1, 'hour').format(), - }; - report = await internalRecReportForGranules({ - ...normalizeEvent({ ...searchParams, stackName: 'testStack' }), - knex, - }); - t.is(report.okCount, 20); - t.is(report.onlyInEs.length, 4); - t.is(report.onlyInDb.length, 4); - t.is(report.withConflicts.length, 1); - - // start/end time has no matching collections and granules - const outOfRangeParams = { - startTimestamp: moment.utc().add(1, 'hour').format(), - endTimestamp: moment.utc().add(2, 'hour').format(), - }; - - report = await internalRecReportForGranules({ - ...normalizeEvent({ ...outOfRangeParams, stackName: 'testStack' }), - knex, - }); - t.is(report.okCount, 0); - t.is(report.onlyInEs.length, 0); - t.is(report.onlyInDb.length, 0); - t.is(report.withConflicts.length, 0); - - // collectionId, provider parameters - const collectionProviderParams = { ...searchParams, collectionId, provider: provider.name }; - report = await internalRecReportForGranules({ - ...normalizeEvent({ ...collectionProviderParams, stackName: 'testStack' }), - knex, - }); - t.is(report.okCount, 10); - t.is(report.onlyInEs.length, 2); - t.deepEqual(report.onlyInEs.map((gran) => gran.granuleId).sort(), - additionalExtraEsGrans.map((gran) => gran.granuleId).sort()); - t.is(report.onlyInDb.length, 2); - t.deepEqual(report.onlyInDb.map((gran) => gran.granuleId).sort(), - extraDbGrans.map((gran) => gran.granuleId).sort()); - t.is(report.withConflicts.length, 0); - - // provider parameter - const providerParams = { ...searchParams, provider: [randomId('p'), provider.name] }; - report = await internalRecReportForGranules({ - ...normalizeEvent({ ...providerParams, stackName: 'testStack' }), - knex, - }); - t.is(report.okCount, 20); - t.is(report.onlyInEs.length, 4); - t.deepEqual(report.onlyInEs.map((gran) => gran.granuleId).sort(), - extraEsGrans.concat(additionalExtraEsGrans).map((gran) => gran.granuleId).sort()); - t.is(report.onlyInDb.length, 2); - t.deepEqual(report.onlyInDb.map((gran) => gran.granuleId).sort(), - extraDbGrans.map((gran) => gran.granuleId).sort()); - t.is(report.withConflicts.length, 0); - - // collectionId, granuleId parameters - const granuleId = conflictGranInDb.granuleId; - const granuleIdParams = { - ...searchParams, - granuleId: [granuleId, extraEsGrans[0].granuleId, randomId('g')], - collectionId: [collectionId, extraEsGrans[0].collectionId, extraEsGrans[1].collectionId], - }; - report = await internalRecReportForGranules({ - ...normalizeEvent({ ...granuleIdParams, stackName: 'testStack' }), - knex, - }); - t.is(report.okCount, 0); - t.is(report.onlyInEs.length, 1); - t.is(report.onlyInEs[0].granuleId, extraEsGrans[0].granuleId); - t.is(report.onlyInDb.length, 0); - t.is(report.withConflicts.length, 1); -}); - -test.serial('internalRecReportForGranules handles generated granules with custom timestamps', async (t) => { - const { - knex, - collectionPgModel, - providerPgModel, - executionPgModel, - } = t.context; - - // Create collection in PG/ES - const collectionId = constructCollectionId(randomId('name'), randomId('version')); - const collection = fakeCollectionFactory({ - ...deconstructCollectionId(collectionId), - }); - await indexer.indexCollection(esClient, collection, esAlias); - await collectionPgModel.upsert( - knex, - translateApiCollectionToPostgresCollection(collection) - ); - - // Create provider in PG - const provider = fakeProviderRecordFactory(); - await providerPgModel.create(knex, provider); - - // Use date string with extra precision to make sure it is saved - // correctly in dynamo, PG, an Elasticsearch - const dateString = '2018-04-25T21:45:45.524053'; - - await Promise.all(range(5).map(async () => { - const fakeGranule = fakeGranuleFactoryV2({ - collectionId, - provider: provider.name, - }); - - const processingTimeInfo = { - processingStartDateTime: dateString, - processingEndDateTime: dateString, - }; - - const cmrTemporalInfo = { - beginningDateTime: dateString, - endingDateTime: dateString, - productionDateTime: dateString, - lastUpdateDateTime: dateString, - }; - - const apiGranule = await generateGranuleApiRecord({ - ...fakeGranule, - granule: fakeGranule, - executionUrl: fakeGranule.execution, - processingTimeInfo, - cmrTemporalInfo, - }); - const pgGranule = await translateApiGranuleToPostgresGranule({ - dynamoRecord: apiGranule, - knexOrTransaction: knex, - }); - - let pgExecution = {}; - if (apiGranule.execution) { - const pgExecutionData = fakeExecutionRecordFactory({ - url: apiGranule.execution, - }); - ([pgExecution] = await executionPgModel.create(knex, pgExecutionData)); - } - await upsertGranuleWithExecutionJoinRecord({ - executionCumulusId: pgExecution.cumulus_id, - granule: pgGranule, - knexTransaction: knex, - }); - await indexer.indexGranule(esClient, apiGranule, esAlias); - })); - - const report = await internalRecReportForGranules({ knex }); - t.is(report.okCount, 5); - t.is(report.onlyInEs.length, 0); - t.is(report.onlyInDb.length, 0); -}); - -test.serial('internalRecReportForGranules handles granules with files', async (t) => { - const { - knex, - collectionPgModel, - executionPgModel, - filePgModel, - } = t.context; - - // Create collection in PG/ES - const collectionId = constructCollectionId(randomId('name'), randomId('version')); - const collection = fakeCollectionFactory({ - ...deconstructCollectionId(collectionId), - }); - await indexer.indexCollection(esClient, collection, esAlias); - await collectionPgModel.upsert( - knex, - translateApiCollectionToPostgresCollection(collection) - ); - await Promise.all(range(2).map(async () => { - const fakeGranule = fakeGranuleFactoryV2({ - collectionId, - files: [fakeFileFactory(), fakeFileFactory(), fakeFileFactory()], - }); - - const fakeCmrUtils = { - getGranuleTemporalInfo: () => Promise.resolve({}), - }; - const apiGranule = await generateGranuleApiRecord({ - ...fakeGranule, - granule: fakeGranule, - executionUrl: fakeGranule.execution, - cmrUtils: fakeCmrUtils, - }); - const pgGranule = await translateApiGranuleToPostgresGranule({ - dynamoRecord: apiGranule, - knexOrTransaction: knex, - }); - - const pgExecutionData = fakeExecutionRecordFactory({ - url: apiGranule.execution, - }); - const [pgExecution] = await executionPgModel.create(knex, pgExecutionData); - - const [pgGranuleRecord] = await upsertGranuleWithExecutionJoinRecord({ - executionCumulusId: pgExecution.cumulus_id, - granule: pgGranule, - knexTransaction: knex, - }); - await Promise.all(apiGranule.files.map(async (file) => { - const pgFile = translateApiFiletoPostgresFile(file); - await filePgModel.create(knex, { - ...pgFile, - granule_cumulus_id: pgGranuleRecord.cumulus_id, - }); - })); - await indexer.indexGranule(esClient, apiGranule, esAlias); - })); - - const report = await internalRecReportForGranules({ knex }); - t.is(report.okCount, 2); - t.is(report.onlyInEs.length, 0); - t.is(report.onlyInDb.length, 0); -}); diff --git a/packages/api/tests/lib/test-reconciliationReport.js b/packages/api/tests/lib/test-reconciliationReport.js index 3722a3bdb11..2c7255475c7 100644 --- a/packages/api/tests/lib/test-reconciliationReport.js +++ b/packages/api/tests/lib/test-reconciliationReport.js @@ -8,12 +8,8 @@ const { constructCollectionId } = require('@cumulus/message/Collections'); const sortBy = require('lodash/sortBy'); const { convertToDBCollectionSearchObject, - convertToESCollectionSearchParams, - convertToESGranuleSearchParams, - convertToESGranuleSearchParamsWithCreatedAtRange, convertToOrcaGranuleSearchParams, filterDBCollections, - searchParamsForCollectionIdArray, compareEsGranuleAndApiGranule, } = require('../../lib/reconciliationReport'); const { fakeCollectionFactory } = require('../../lib/testUtils'); @@ -37,63 +33,6 @@ test('dateToValue returns undefined for any string that cannot be converted to a testStrings.map((testVal) => t.is(dateToValue(testVal), undefined)); }); -test('convertToESCollectionSearchParams returns correct search object.', (t) => { - const startTimestamp = '2000-10-31T15:00:00.000Z'; - const endTimestamp = '2001-10-31T15:00:00.000Z'; - const testObj = { - startTimestamp, - endTimestamp, - anotherKey: 'anything', - anotherKey2: 'they are ignored', - }; - - const expected = { - updatedAt__from: 973004400000, - updatedAt__to: 1004540400000, - }; - - const actual = convertToESCollectionSearchParams(testObj); - t.deepEqual(actual, expected); -}); - -test('convertToESGranuleSearchParams returns correct search object.', (t) => { - const startTimestamp = '2010-01-01T00:00:00.000Z'; - const endTimestamp = '2011-10-01T12:00:00.000Z'; - const testObj = { - startTimestamp, - endTimestamp, - anotherKey: 'anything', - anotherKey2: 'they are ignored', - }; - - const expected = { - updatedAt__from: 1262304000000, - updatedAt__to: 1317470400000, - }; - - const actual = convertToESGranuleSearchParams(testObj); - t.deepEqual(actual, expected); -}); - -test('convertToESGranuleSearchParamsWithCreatedAtRange returns correct search object.', (t) => { - const startTimestamp = '2010-01-01T00:00:00.000Z'; - const endTimestamp = '2011-10-01T12:00:00.000Z'; - const testObj = { - startTimestamp, - endTimestamp, - anotherKey: 'anything', - anotherKey2: 'they are ignored', - }; - - const expected = { - createdAt__from: 1262304000000, - createdAt__to: 1317470400000, - }; - - const actual = convertToESGranuleSearchParamsWithCreatedAtRange(testObj); - t.deepEqual(actual, expected); -}); - test('convertToOrcaGranuleSearchParams returns correct search object.', (t) => { const startTimestamp = '2010-01-01T00:00:00.000Z'; const endTimestamp = '2011-10-01T12:00:00.000Z'; @@ -119,28 +58,6 @@ test('convertToOrcaGranuleSearchParams returns correct search object.', (t) => { t.deepEqual(actual, expected); }); -test('convertToESCollectionSearchParams returns correct search object with collectionIds.', (t) => { - const startTimestamp = '2000-10-31T15:00:00.000Z'; - const endTimestamp = '2001-10-31T15:00:00.000Z'; - const collectionIds = ['name____version', 'name2___version']; - const testObj = { - startTimestamp, - endTimestamp, - collectionIds, - anotherKey: 'anything', - anotherKey2: 'they are ignored', - }; - - const expected = { - updatedAt__from: 973004400000, - updatedAt__to: 1004540400000, - _id__in: 'name____version,name2___version', - }; - - const actual = convertToESCollectionSearchParams(testObj); - t.deepEqual(actual, expected); -}); - test('convertToDBCollectionSearchParams returns correct search object with collectionIds.', (t) => { const startTimestamp = '2000-10-31T15:00:00.000Z'; const endTimestamp = '2001-10-31T15:00:00.000Z'; @@ -230,17 +147,6 @@ test("filterDBCollections filters collections by recReportParams's collectionIds t.deepEqual(actual, expected); }); -test('searchParamsForCollectionIdArray converts array of collectionIds to a proper object to pass to the query command.', (t) => { - const collectionIds = ['col1___ver1', 'col1___ver2', 'col2___ver1']; - - const expectedInputQueryParams = { - _id__in: 'col1___ver1,col1___ver2,col2___ver1', - }; - - const actualSearchParams = searchParamsForCollectionIdArray(collectionIds); - t.deepEqual(actualSearchParams, expectedInputQueryParams); -}); - test('compareEsGranuleAndApiGranule returns true for matching granules', (t) => { const granule = { granuleId: cryptoRandomString({ length: 5 }), diff --git a/tf-modules/archive/reconciliation_report.tf b/tf-modules/archive/reconciliation_report.tf index 36089d77949..b09ef5a337f 100644 --- a/tf-modules/archive/reconciliation_report.tf +++ b/tf-modules/archive/reconciliation_report.tf @@ -17,7 +17,6 @@ resource "aws_lambda_function" "create_reconciliation_report" { ES_HOST = var.elasticsearch_hostname ES_SCROLL = lookup(var.elasticsearch_client_config, "create_reconciliation_report_es_scroll_duration", "6m") ES_SCROLL_SIZE = lookup(var.elasticsearch_client_config, "create_reconciliation_report_es_scroll_size", 1000) - ReconciliationReportsTable = var.dynamo_tables.reconciliation_reports.name stackName = var.prefix system_bucket = var.system_bucket cmr_client_id = var.cmr_client_id From d548a74eab49a6b1c943dda05c4831a4d9387547 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Fri, 1 Nov 2024 14:20:50 -0400 Subject: [PATCH 54/61] CUMULUS-3718: Update Reconciliation Report List endpoints to query postgres (#3829) * first pass at making this work, tests fail * CUMULUS-3718: make recon reports list endpoint work, add recon reports to stats endpoint * make db/search/ReconciliationReportSearch work * make api list endpoint for recon reports work using the fixed search * fix/clean test file for recon reports endpoints - list test works, remove ES references * add recon reports to stats endpoint * add tests for recon reports stats endpoint * add test for recon reports aggregate in test-StatsSearch * CUMULUS-3718: update changelog * CUMULUS-3718: fix lint errors * CUMULUS-3718: edits for PR comments, add field mapping and test, add test file for ReconciliationReportSearch * CUMULUS-3718: fix lint issues * CUMULUS-3718: remove commented out test that won't work, modifymethods using map * CUMULUS-3718: fix more lint issues, from modifying methods using map --------- Co-authored-by: etcart --- CHANGELOG.md | 3 + audit-ci.json | 6 - .../api/endpoints/reconciliation-reports.js | 27 +- packages/api/endpoints/stats.js | 1 + .../endpoints/test-reconciliation-reports.js | 36 +-- .../endpoints/{stats.js => test-stats.js} | 123 ++++++--- packages/db/src/index.ts | 3 + packages/db/src/search/BaseSearch.ts | 1 + .../src/search/ReconciliationReportSearch.ts | 88 +++++++ packages/db/src/search/StatsSearch.ts | 1 + packages/db/src/search/field-mapping.ts | 28 ++ .../db/tests/search/test-ProviderSearch.js | 34 +-- .../search/test-ReconciliationReportSearch.js | 246 ++++++++++++++++++ packages/db/tests/search/test-RuleSearch.js | 41 ++- packages/db/tests/search/test-StatsSearch.js | 199 +++++++------- .../db/tests/search/test-field-mapping.js | 28 ++ 16 files changed, 624 insertions(+), 241 deletions(-) rename packages/api/tests/endpoints/{stats.js => test-stats.js} (70%) create mode 100644 packages/db/src/search/ReconciliationReportSearch.ts create mode 100644 packages/db/tests/search/test-ReconciliationReportSearch.js diff --git a/CHANGELOG.md b/CHANGELOG.md index d9e317a905e..b514adcf66a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,9 @@ aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE - Created api types for `reconciliation_reports` in `@cumulus/types/api` - Updated reconciliation reports lambda to write to new RDS table instead of Dynamo - Updated `@cumulus/api/endpoints/reconciliation-reports` `getReport` and `deleteReport` to work with the new RDS table instead of Dynamo +- **CUMULUS-3718** + - Updated `reconciliation_reports` list api endpoint and added `ReconciliationReportSearch` class to query postgres + - Added `reconciliationReports` type to stats endpoint, so `aggregate` query will work for reconciliation reports ## [Unreleased] diff --git a/audit-ci.json b/audit-ci.json index ada4b945034..939eba33e1e 100644 --- a/audit-ci.json +++ b/audit-ci.json @@ -3,12 +3,6 @@ "pass-enoaudit": true, "retry-count": 20, "allowlist": [ - { - "GHSA-776f-qx25-q3cc": { - "active": true, - "expiry": "1 July 2023 11:00" - } - }, "jsonpath-plus", "semver" ] diff --git a/packages/api/endpoints/reconciliation-reports.js b/packages/api/endpoints/reconciliation-reports.js index 7c97183cfbb..68467065668 100644 --- a/packages/api/endpoints/reconciliation-reports.js +++ b/packages/api/endpoints/reconciliation-reports.js @@ -16,11 +16,10 @@ const { const S3ObjectStore = require('@cumulus/aws-client/S3ObjectStore'); const { s3 } = require('@cumulus/aws-client/services'); -const { inTestMode } = require('@cumulus/common/test-utils'); const { RecordDoesNotExist } = require('@cumulus/errors'); const Logger = require('@cumulus/logger'); -const { Search, getEsClient } = require('@cumulus/es-client/search'); -const indexer = require('@cumulus/es-client/indexer'); + +const { ReconciliationReportSearch } = require('@cumulus/db'); const { ReconciliationReportPgModel, @@ -48,14 +47,11 @@ const maxResponsePayloadSizeBytes = 6 * 1000 * 1000; * @returns {Promise} the promise of express response object */ async function listReports(req, res) { - const search = new Search( - { queryStringParameters: req.query }, - 'reconciliationReport', - process.env.ES_INDEX + const dbSearch = new ReconciliationReportSearch( + { queryStringParameters: req.query } ); - - const response = await search.query(); - return res.send(response); + const result = await dbSearch.query(); + return res.send(result); } /** @@ -160,17 +156,6 @@ async function deleteReport(req, res) { await reconciliationReportPgModel.delete(knex, { name }); }); - if (inTestMode()) { - const esClient = await getEsClient(process.env.ES_HOST); - await indexer.deleteRecord({ - esClient, - id: name, - type: 'reconciliationReport', - index: process.env.ES_INDEX, - ignore: [404], - }); - } - return res.send({ message: 'Report deleted' }); } diff --git a/packages/api/endpoints/stats.js b/packages/api/endpoints/stats.js index 8a27b380246..1ee9a521a98 100644 --- a/packages/api/endpoints/stats.js +++ b/packages/api/endpoints/stats.js @@ -21,6 +21,7 @@ function getType(req) { logs: 'logs', providers: 'provider', executions: 'execution', + reconciliationReports: 'reconciliationReport', }; const typeRequested = get(req, 'params.type') || get(req, 'query.type'); diff --git a/packages/api/tests/endpoints/test-reconciliation-reports.js b/packages/api/tests/endpoints/test-reconciliation-reports.js index b6d6455c4f6..e22a9302121 100644 --- a/packages/api/tests/endpoints/test-reconciliation-reports.js +++ b/packages/api/tests/endpoints/test-reconciliation-reports.js @@ -26,9 +26,6 @@ const { recursivelyDeleteS3Bucket, } = require('@cumulus/aws-client/S3'); const { randomId } = require('@cumulus/common/test-utils'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); -const indexer = require('@cumulus/es-client/indexer'); -const { getEsClient } = require('@cumulus/es-client/search'); const startAsyncOperation = require('../../lib/startAsyncOperation'); const { @@ -57,9 +54,6 @@ const { normalizeEvent } = require('../../lib/reconciliationReport/normalizeEven const { buildFakeExpressResponse } = require('./utils'); -let esClient; -const esIndex = randomId('esindex'); - const testDbName = `test_recon_reports_${cryptoRandomString({ length: 10 })}`; let jwtAuthToken; @@ -67,18 +61,6 @@ let accessTokenModel; let fakeReportRecords = []; test.before(async (t) => { - // create esClient - esClient = await getEsClient('fakehost'); - - const esAlias = randomId('esalias'); - process.env.ES_INDEX = esAlias; - - // add fake elasticsearch index - await bootstrapElasticSearch({ - host: 'fakehost', - index: esIndex, - alias: esAlias, - }); accessTokenModel = new models.AccessToken(); await accessTokenModel.createTable(); @@ -131,27 +113,11 @@ test.before(async (t) => { }), }))); - // add records to es - await Promise.all( - fakeReportRecords.map((reportRecord) => - t.context.reconciliationReportPgModel - .create(knex, reportRecord) - .then( - ([reportPgRecord]) => - translatePostgresReconReportToApiReconReport(reportPgRecord) - ) - .then( - (reportApiRecord) => - indexer.indexReconciliationReport(esClient, reportApiRecord, esAlias) - )) - ); + await t.context.reconciliationReportPgModel.insert(t.context.knex, fakeReportRecords); }); test.after.always(async (t) => { await accessTokenModel.deleteTable(); - await esClient.client.indices.delete({ - index: esIndex, - }); await recursivelyDeleteS3Bucket(process.env.system_bucket); await destroyLocalTestDb({ knex: t.context.knex, diff --git a/packages/api/tests/endpoints/stats.js b/packages/api/tests/endpoints/test-stats.js similarity index 70% rename from packages/api/tests/endpoints/stats.js rename to packages/api/tests/endpoints/test-stats.js index 1853a7c53ac..f2168a34c81 100644 --- a/packages/api/tests/endpoints/stats.js +++ b/packages/api/tests/endpoints/test-stats.js @@ -9,12 +9,6 @@ const awsServices = require('@cumulus/aws-client/services'); const s3 = require('@cumulus/aws-client/S3'); const { randomId } = require('@cumulus/common/test-utils'); -const models = require('../../models'); -const { - createFakeJwtAuthToken, - setAuthorizedOAuthUsers, -} = require('../../lib/testUtils'); - const { destroyLocalTestDb, generateLocalTestDb, @@ -24,7 +18,15 @@ const { fakeGranuleRecordFactory, migrationDir, localStackConnectionEnv, -} = require('../../../db/dist'); + fakeReconciliationReportRecordFactory, + ReconciliationReportPgModel, +} = require('@cumulus/db'); + +const models = require('../../models'); +const { + createFakeJwtAuthToken, + setAuthorizedOAuthUsers, +} = require('../../lib/testUtils'); const testDbName = randomId('collection'); @@ -74,41 +76,39 @@ test.before(async (t) => { t.context.collectionPgModel = new CollectionPgModel(); t.context.granulePgModel = new GranulePgModel(); + t.context.reconciliationReportPgModel = new ReconciliationReportPgModel(); const statuses = ['queued', 'failed', 'completed', 'running']; const errors = [{ Error: 'UnknownError' }, { Error: 'CumulusMessageAdapterError' }, { Error: 'IngestFailure' }, { Error: 'CmrFailure' }, {}]; - const granules = []; - const collections = []; - - range(20).map((num) => ( - collections.push(fakeCollectionRecordFactory({ - name: `testCollection${num}`, - cumulus_id: num, - })) - )); - - range(100).map((num) => ( - granules.push(fakeGranuleRecordFactory({ - collection_cumulus_id: num % 20, - status: statuses[num % 4], - created_at: num === 99 - ? new Date() : (new Date(2018 + (num % 6), (num % 12), (num % 30))), - updated_at: num === 99 - ? new Date() : (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), - error: errors[num % 5], - duration: num + (num / 10), - })) - )); - - await t.context.collectionPgModel.insert( - t.context.knex, - collections - ); - - await t.context.granulePgModel.insert( - t.context.knex, - granules - ); + const reconReportTypes = ['Granule Inventory', 'Granule Not Found', 'Inventory', 'ORCA Backup']; + const reconReportStatuses = ['Generated', 'Pending', 'Failed']; + + const collections = range(20).map((num) => fakeCollectionRecordFactory({ + name: `testCollection${num}`, + cumulus_id: num, + })); + + const granules = range(100).map((num) => fakeGranuleRecordFactory({ + collection_cumulus_id: num % 20, + status: statuses[num % 4], + created_at: num === 99 + ? new Date() : (new Date(2018 + (num % 6), (num % 12), (num % 30))), + updated_at: num === 99 + ? new Date() : (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), + error: errors[num % 5], + duration: num + (num / 10), + })); + + const reconReports = range(24).map((num) => fakeReconciliationReportRecordFactory({ + type: reconReportTypes[num % 4], + status: reconReportStatuses[num % 3], + created_at: (new Date(2024 + (num % 6), (num % 12), (num % 30))), + updated_at: (new Date(2024 + (num % 6), (num % 12), ((num + 1) % 29))), + })); + + await t.context.collectionPgModel.insert(t.context.knex, collections); + await t.context.granulePgModel.insert(t.context.knex, granules); + await t.context.reconciliationReportPgModel.insert(t.context.knex, reconReports); }); test.after.always(async (t) => { @@ -187,6 +187,12 @@ test('getType gets correct type for providers', (t) => { t.is(type, 'provider'); }); +test('getType gets correct type for reconciliation reports', (t) => { + const type = getType({ params: { type: 'reconciliationReports' } }); + + t.is(type, 'reconciliationReport'); +}); + test('getType returns undefined if type is not supported', (t) => { const type = getType({ params: { type: 'provide' } }); @@ -237,7 +243,7 @@ test('GET /stats returns correct response with date params filters values correc t.is(response.body.granules.value, 17); }); -test('GET /stats/aggregate returns correct response', async (t) => { +test('GET /stats/aggregate with type `granules` returns correct response', async (t) => { const response = await request(app) .get('/stats/aggregate?type=granules') .set('Accept', 'application/json') @@ -254,7 +260,7 @@ test('GET /stats/aggregate returns correct response', async (t) => { t.deepEqual(response.body.count, expectedCount); }); -test('GET /stats/aggregate filters correctly by date', async (t) => { +test('GET /stats/aggregate with type `granules` filters correctly by date', async (t) => { const response = await request(app) .get(`/stats/aggregate?type=granules×tamp__from=${(new Date(2020, 11, 28)).getTime()}×tamp__to=${(new Date(2023, 8, 30)).getTime()}`) .set('Accept', 'application/json') @@ -270,3 +276,38 @@ test('GET /stats/aggregate filters correctly by date', async (t) => { t.is(response.body.meta.count, 40); t.deepEqual(response.body.count, expectedCount); }); + +test('GET /stats/aggregate with type `reconciliationReports` and field `type` returns the correct response', async (t) => { + const response = await request(app) + .get('/stats/aggregate?type=reconciliationReports&field=type') + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .expect(200); + + const expectedCount = [ + { key: 'Granule Inventory', count: 6 }, + { key: 'Granule Not Found', count: 6 }, + { key: 'Inventory', count: 6 }, + { key: 'ORCA Backup', count: 6 }, + ]; + + t.is(response.body.meta.count, 24); + t.deepEqual(response.body.count, expectedCount); +}); + +test('GET /stats/aggregate with type `reconciliationReports` and field `status` returns the correct response', async (t) => { + const response = await request(app) + .get('/stats/aggregate?type=reconciliationReports&field=status') + .set('Accept', 'application/json') + .set('Authorization', `Bearer ${jwtAuthToken}`) + .expect(200); + + const expectedCount = [ + { key: 'Failed', count: 8 }, + { key: 'Generated', count: 8 }, + { key: 'Pending', count: 8 }, + ]; + + t.is(response.body.meta.count, 24); + t.deepEqual(response.body.count, expectedCount); +}); diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index c14385d500f..806bca82169 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -171,6 +171,9 @@ export { export { StatsSearch, } from './search/StatsSearch'; +export { + ReconciliationReportSearch, +} from './search/ReconciliationReportSearch'; export { AsyncOperationPgModel } from './models/async_operation'; export { BasePgModel } from './models/base'; diff --git a/packages/db/src/search/BaseSearch.ts b/packages/db/src/search/BaseSearch.ts index 9c9b123c233..5e6177454a0 100644 --- a/packages/db/src/search/BaseSearch.ts +++ b/packages/db/src/search/BaseSearch.ts @@ -28,6 +28,7 @@ export const typeToTable: { [key: string]: string } = { pdr: TableNames.pdrs, provider: TableNames.providers, rule: TableNames.rules, + reconciliationReport: TableNames.reconciliationReports, }; /** diff --git a/packages/db/src/search/ReconciliationReportSearch.ts b/packages/db/src/search/ReconciliationReportSearch.ts new file mode 100644 index 00000000000..a14bb282426 --- /dev/null +++ b/packages/db/src/search/ReconciliationReportSearch.ts @@ -0,0 +1,88 @@ +import { Knex } from 'knex'; +import Logger from '@cumulus/logger'; +import pick from 'lodash/pick'; + +import { ApiReconciliationReportRecord } from '@cumulus/types/api/reconciliation_reports'; +import { BaseSearch } from './BaseSearch'; +import { DbQueryParameters, QueryEvent } from '../types/search'; +import { translatePostgresReconReportToApiReconReport } from '../translate/reconciliation_reports'; +import { PostgresReconciliationReportRecord } from '../types/reconciliation_report'; +import { TableNames } from '../tables'; + +const log = new Logger({ sender: '@cumulus/db/ReconciliationReportSearch' }); + +/** + * Class to build and execute db search query for granules + */ +export class ReconciliationReportSearch extends BaseSearch { + constructor(event: QueryEvent) { + super(event, 'reconciliationReport'); + } + + /** + * Build basic query + * + * @param knex - DB client + * @returns queries for getting count and search result + */ + protected buildBasicQuery(knex: Knex) + : { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + } { + const { + reconciliationReports: reconciliationReportsTable, + } = TableNames; + const countQuery = knex(this.tableName) + .count('*'); + + const searchQuery = knex(this.tableName) + .select(`${this.tableName}.*`) + .select({ + reconciliationReportsName: `${reconciliationReportsTable}.name`, + }); + return { countQuery, searchQuery }; + } + + /** + * Build queries for infix and prefix + * + * @param params + * @param params.countQuery - query builder for getting count + * @param params.searchQuery - query builder for search + * @param [params.dbQueryParameters] - db query parameters + */ + protected buildInfixPrefixQuery(params: { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + dbQueryParameters?: DbQueryParameters, + }) { + const { countQuery, searchQuery, dbQueryParameters } = params; + const { infix, prefix } = dbQueryParameters ?? this.dbQueryParameters; + if (infix) { + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `%${infix}%`)); + } + if (prefix) { + [countQuery, searchQuery].forEach((query) => query.whereLike(`${this.tableName}.name`, `${prefix}%`)); + } + } + + /** + * Translate postgres records to api records + * + * @param pgRecords - postgres records returned from query + * @returns translated api records + */ + protected translatePostgresRecordsToApiRecords(pgRecords: PostgresReconciliationReportRecord[]) + : Partial[] { + log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); + const { fields } = this.dbQueryParameters; + + const apiRecords = pgRecords.map((pgRecord) => { + const apiRecord = translatePostgresReconReportToApiReconReport(pgRecord); + return fields ? pick(apiRecord, fields) : apiRecord; + }); + + return apiRecords; + } +} diff --git a/packages/db/src/search/StatsSearch.ts b/packages/db/src/search/StatsSearch.ts index 04457d476ed..0b45814b036 100644 --- a/packages/db/src/search/StatsSearch.ts +++ b/packages/db/src/search/StatsSearch.ts @@ -59,6 +59,7 @@ const infixMapping: { [key: string]: string } = { providers: 'name', executions: 'arn', pdrs: 'name', + reconciliationReports: 'name', }; /** diff --git a/packages/db/src/search/field-mapping.ts b/packages/db/src/search/field-mapping.ts index 41a64bb0530..5ad62ae22ce 100644 --- a/packages/db/src/search/field-mapping.ts +++ b/packages/db/src/search/field-mapping.ts @@ -354,6 +354,33 @@ const ruleMapping : { [key: string]: Function } = { }), }; +const reconciliationReportMapping: { [key: string]: Function } = { + name: (value?: string) => ({ + name: value, + }), + type: (value?: string) => ({ + type: value, + }), + status: (value?: string) => ({ + status: value, + }), + location: (value?: string) => ({ + location: value, + }), + error: (value?: string) => ({ + error: value, + }), + createdAt: (value?: string) => ({ + created_at: value && new Date(Number(value)), + }), + updatedAt: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), + timestamp: (value?: string) => ({ + updated_at: value && new Date(Number(value)), + }), +}; + // type and its mapping const supportedMappings: { [key: string]: any } = { granule: granuleMapping, @@ -363,6 +390,7 @@ const supportedMappings: { [key: string]: any } = { pdr: pdrMapping, provider: providerMapping, rule: ruleMapping, + reconciliationReport: reconciliationReportMapping, }; /** diff --git a/packages/db/tests/search/test-ProviderSearch.js b/packages/db/tests/search/test-ProviderSearch.js index 3530118f0ca..57d126bda33 100644 --- a/packages/db/tests/search/test-ProviderSearch.js +++ b/packages/db/tests/search/test-ProviderSearch.js @@ -25,25 +25,19 @@ test.before(async (t) => { t.context.knex = knex; t.context.providerPgModel = new ProviderPgModel(); - const providers = []; - t.context.providerSearchTmestamp = 1579352700000; + t.context.providerSearchTimestamp = 1579352700000; - range(100).map((num) => ( - providers.push(fakeProviderRecordFactory({ - cumulus_id: num, - updated_at: new Date(t.context.providerSearchTmestamp + (num % 2)), - created_at: new Date(t.context.providerSearchTmestamp - (num % 2)), - name: num % 2 === 0 ? `testProvider${num}` : `fakeProvider${num}`, - host: num % 2 === 0 ? 'cumulus-sit' : 'cumulus-uat', - global_connection_limit: num % 2 === 0 ? 0 : 10, - private_key: num % 2 === 0 ? `fakeKey${num}` : undefined, - })) - )); + const providers = range(100).map((num) => fakeProviderRecordFactory({ + cumulus_id: num, + updated_at: new Date(t.context.providerSearchTimestamp + (num % 2)), + created_at: new Date(t.context.providerSearchTimestamp - (num % 2)), + name: num % 2 === 0 ? `testProvider${num}` : `fakeProvider${num}`, + host: num % 2 === 0 ? 'cumulus-sit' : 'cumulus-uat', + global_connection_limit: num % 2 === 0 ? 0 : 10, + private_key: num % 2 === 0 ? `fakeKey${num}` : undefined, + })); - await t.context.providerPgModel.insert( - t.context.knex, - providers - ); + await t.context.providerPgModel.insert(t.context.knex, providers); }); test.after.always(async (t) => { @@ -119,7 +113,7 @@ test('ProviderSearch supports prefix search', async (t) => { test('ProviderSearch supports term search for date field', async (t) => { const { knex } = t.context; - const testUpdatedAt = t.context.providerSearchTmestamp + 1; + const testUpdatedAt = t.context.providerSearchTimestamp + 1; const queryStringParameters = { limit: 200, updatedAt: `${testUpdatedAt}`, @@ -159,8 +153,8 @@ test('ProviderSearch supports term search for string field', async (t) => { test('ProviderSearch supports range search', async (t) => { const { knex } = t.context; - const timestamp1 = t.context.providerSearchTmestamp + 1; - const timestamp2 = t.context.providerSearchTmestamp + 2; + const timestamp1 = t.context.providerSearchTimestamp + 1; + const timestamp2 = t.context.providerSearchTimestamp + 2; const queryStringParameters = { limit: 200, timestamp__from: `${timestamp1}`, diff --git a/packages/db/tests/search/test-ReconciliationReportSearch.js b/packages/db/tests/search/test-ReconciliationReportSearch.js new file mode 100644 index 00000000000..e4a728e025a --- /dev/null +++ b/packages/db/tests/search/test-ReconciliationReportSearch.js @@ -0,0 +1,246 @@ +'use strict'; + +const test = require('ava'); +const cryptoRandomString = require('crypto-random-string'); +const range = require('lodash/range'); +const { ReconciliationReportSearch } = require('../../dist/search/ReconciliationReportSearch'); + +const { + ReconciliationReportPgModel, + fakeReconciliationReportRecordFactory, + generateLocalTestDb, + destroyLocalTestDb, + migrationDir, +} = require('../../dist'); + +const testDbName = `reconReport_${cryptoRandomString({ length: 10 })}`; + +test.before(async (t) => { + const { knexAdmin, knex } = await generateLocalTestDb( + testDbName, + migrationDir + ); + + t.context.knexAdmin = knexAdmin; + t.context.knex = knex; + t.context.reconciliationReportPgModel = new ReconciliationReportPgModel(); + const reconReportTypes = ['Granule Inventory', 'Granule Not Found', 'Inventory', 'ORCA Backup']; + const reconReportStatuses = ['Generated', 'Pending', 'Failed']; + t.context.reconReportSearchTimestamp = 1704100000000; + t.context.reportBucket = cryptoRandomString({ length: 8 }); + t.context.reportKey = cryptoRandomString({ length: 8 }); + + const reconReports = range(50).map((num) => fakeReconciliationReportRecordFactory({ + name: `fakeReconReport-${num + 1}`, + type: reconReportTypes[num % 4], + status: reconReportStatuses[num % 3], + location: `s3://fakeBucket${t.context.reportBucket}/fakeKey${t.context.reportKey}`, + updated_at: new Date(t.context.reconReportSearchTimestamp + (num % 2)), + created_at: new Date(t.context.reconReportSearchTimestamp - (num % 2)), + })); + + await t.context.reconciliationReportPgModel.insert(t.context.knex, reconReports); +}); + +test.after.always(async (t) => { + await destroyLocalTestDb({ + ...t.context, + testDbName, + }); +}); + +test('ReconciliationReportSearch returns the correct response for a basic query', async (t) => { + const { knex } = t.context; + const dbSearch = new ReconciliationReportSearch({}); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results.length, 10); + + const expectedResponse1 = { + name: 'fakeReconReport-1', + type: 'Granule Inventory', + status: 'Generated', + location: `s3://fakeBucket${t.context.reportBucket}/fakeKey${t.context.reportKey}`, + updatedAt: t.context.reconReportSearchTimestamp, + createdAt: t.context.reconReportSearchTimestamp, + }; + + const expectedResponse10 = { + name: 'fakeReconReport-10', + type: 'Granule Not Found', + status: 'Generated', + location: `s3://fakeBucket${t.context.reportBucket}/fakeKey${t.context.reportKey}`, + updatedAt: t.context.reconReportSearchTimestamp + 1, + createdAt: t.context.reconReportSearchTimestamp - 1, + }; + + t.deepEqual(response.results[0], expectedResponse1); + t.deepEqual(response.results[9], expectedResponse10); +}); + +test('ReconciliationReportSearch supports page and limit params', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + limit: 25, + page: 2, + }; + let dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 25); + + queryStringParameters = { + limit: 10, + page: 5, + }; + dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 10); + + queryStringParameters = { + limit: 10, + page: 11, + }; + dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 0); +}); + +test('ReconciliationReportSearch supports prefix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + prefix: 'fakeReconReport-1', + }; + const dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 11); + t.is(response.results?.length, 11); +}); + +test('ReconciliationReportSearch supports infix search', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + infix: 'conReport-2', + }; + const dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 11); + t.is(response.results?.length, 11); +}); + +test('ReconciliationReportSearch supports sorting', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + sort_by: 'type', + order: 'asc', + }; + const dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + t.true(response.results[0].type < response.results[15].type); + t.true(response.results[16].type < response.results[30].type); + t.true(response.results[31].type < response.results[45].type); +}); + +test('ReconciliationReportSearch supports term search for string fields', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + status: 'Generated', + }; + const dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 17); + t.is(response.results?.length, 17); + t.true(response.results?.every((result) => result.status === 'Generated')); +}); + +test('ReconciliationReportSearch supports term search for date fields', async (t) => { + const { knex } = t.context; + const testUpdatedAt = t.context.reconReportSearchTimestamp + 1; + const queryStringParameters = { + limit: 100, + updatedAt: `${testUpdatedAt}`, + }; + const dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 25); + t.is(response.results?.length, 25); + t.true(response.results?.every((report) => report.updatedAt === testUpdatedAt)); +}); + +test('ReconciliationReportSearch supports range search', async (t) => { + const { knex } = t.context; + const timestamp1 = t.context.reconReportSearchTimestamp - 1; + const timestamp2 = t.context.reconReportSearchTimestamp + 1; + const queryStringParameters = { + limit: 100, + timestamp__from: `${timestamp1}`, + timestamp__to: `${timestamp2}`, + }; + const dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); + t.true(response.results?.every((report) => report.updatedAt >= timestamp1 + && report.updatedAt <= timestamp2)); +}); + +test('ReconciliationReportSearch supports search for multiple fields', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 50, + type: 'Inventory', + status: 'Failed', + }; + + const dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 4); + t.is(response.results?.length, 4); + t.true(response.results?.every((report) => + report.type === 'Inventory' && report.status === 'Failed')); +}); + +test('ReconciliationReportSearch returns fields specified', async (t) => { + const { knex } = t.context; + let fields = 'name'; + let queryStringParameters = { + fields, + }; + let dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + let response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 10); + response.results.forEach((report) => t.deepEqual(Object.keys(report), fields.split(','))); + + fields = 'name,type,status'; + queryStringParameters = { + fields, + }; + dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 10); + response.results.forEach((report) => t.deepEqual(Object.keys(report), fields.split(','))); +}); + +test('ReconciliationReportSearch ignores non-existing fields', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 100, + non_existing_field: `non_exist_${cryptoRandomString({ length: 5 })}`, + non_existing_field__from: `non_exist_${cryptoRandomString({ length: 5 })}`, + }; + const dbSearch = new ReconciliationReportSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); +}); diff --git a/packages/db/tests/search/test-RuleSearch.js b/packages/db/tests/search/test-RuleSearch.js index 7ecbcdc7008..d448a1d05b6 100644 --- a/packages/db/tests/search/test-RuleSearch.js +++ b/packages/db/tests/search/test-RuleSearch.js @@ -106,30 +106,21 @@ test.before(async (t) => { updatedAt: new Date(2018, 0, 1), updatedAt2: new Date(2018, 0, 2), }; - - const rules = []; t.context.rulePgModel = new RulePgModel(); - - range(50).map((num) => ( - rules.push(fakeRuleRecordFactory({ - name: `fakeRule-${num}`, - created_at: t.context.ruleSearchFields.createdAt, - updated_at: (num % 2) ? - t.context.ruleSearchFields.updatedAt : t.context.ruleSearchFields.updatedAt2, - enabled: num % 2 === 0, - workflow: `testWorkflow-${num}`, - queue_url: (num % 2) ? 'https://sqs.us-east-1.amazonaws.com/123/456' : null, - collection_cumulus_id: (num % 2) - ? t.context.collectionCumulusId : t.context.collectionCumulusId2, - provider_cumulus_id: (num % 2) - ? t.context.providerCumulusId : t.context.providerCumulusId2, - })) - )); - - await t.context.rulePgModel.insert( - t.context.knex, - rules - ); + const rules = range(50).map((num) => fakeRuleRecordFactory({ + name: `fakeRule-${num}`, + created_at: t.context.ruleSearchFields.createdAt, + updated_at: (num % 2) ? + t.context.ruleSearchFields.updatedAt : t.context.ruleSearchFields.updatedAt2, + enabled: num % 2 === 0, + workflow: `testWorkflow-${num}`, + queue_url: (num % 2) ? 'https://sqs.us-east-1.amazonaws.com/123/456' : null, + collection_cumulus_id: (num % 2) + ? t.context.collectionCumulusId : t.context.collectionCumulusId2, + provider_cumulus_id: (num % 2) + ? t.context.providerCumulusId : t.context.providerCumulusId2, + })); + await t.context.rulePgModel.insert(t.context.knex, rules); }); test.after.always(async (t) => { @@ -139,7 +130,7 @@ test.after.always(async (t) => { }); }); -test('RuleSearch returns correct response for basic query', async (t) => { +test('RuleSearch returns the correct response for a basic query', async (t) => { const { knex } = t.context; const dbSearch = new RuleSearch({}); const results = await dbSearch.query(knex); @@ -183,7 +174,7 @@ test('RuleSearch returns correct response for basic query', async (t) => { t.deepEqual(results.results[9], expectedResponse10); }); -test('RuleSearchsupports page and limit params', async (t) => { +test('RuleSearch supports page and limit params', async (t) => { const { knex } = t.context; let queryStringParameters = { limit: 25, diff --git a/packages/db/tests/search/test-StatsSearch.js b/packages/db/tests/search/test-StatsSearch.js index a2a7faba6dc..825d0f4af74 100644 --- a/packages/db/tests/search/test-StatsSearch.js +++ b/packages/db/tests/search/test-StatsSearch.js @@ -8,17 +8,19 @@ const { StatsSearch } = require('../../dist/search/StatsSearch'); const { destroyLocalTestDb, generateLocalTestDb, - GranulePgModel, CollectionPgModel, + GranulePgModel, + ExecutionPgModel, + PdrPgModel, + ProviderPgModel, + ReconciliationReportPgModel, fakeCollectionRecordFactory, fakeGranuleRecordFactory, + fakeExecutionRecordFactory, + fakePdrRecordFactory, fakeProviderRecordFactory, + fakeReconciliationReportRecordFactory, migrationDir, - fakePdrRecordFactory, - fakeExecutionRecordFactory, - PdrPgModel, - ExecutionPgModel, - ProviderPgModel, } = require('../../dist'); const testDbName = `collection_${cryptoRandomString({ length: 10 })}`; @@ -34,88 +36,67 @@ test.before(async (t) => { t.context.collectionPgModel = new CollectionPgModel(); t.context.granulePgModel = new GranulePgModel(); - t.context.providerPgModel = new ProviderPgModel(); - t.context.pdrPgModel = new PdrPgModel(); t.context.executionPgModel = new ExecutionPgModel(); + t.context.pdrPgModel = new PdrPgModel(); + t.context.providerPgModel = new ProviderPgModel(); + t.context.reconciliationReportPgModel = new ReconciliationReportPgModel(); const statuses = ['queued', 'failed', 'completed', 'running']; const errors = [{ Error: 'UnknownError' }, { Error: 'CumulusMessageAdapterError' }, { Error: 'IngestFailure' }, { Error: 'CmrFailure' }, {}]; - const granules = []; - const collections = []; - const executions = []; - const pdrs = []; - const providers = []; - - range(20).map((num) => ( - collections.push(fakeCollectionRecordFactory({ - name: 'testCollection', - version: `${num}`, - cumulus_id: num, - })) - )); - - range(10).map((num) => ( - providers.push(fakeProviderRecordFactory({ - cumulus_id: num, - name: `testProvider${num}`, - })) - )); - - range(100).map((num) => ( - granules.push(fakeGranuleRecordFactory({ - collection_cumulus_id: num % 20, - granule_id: num % 2 === 0 ? `testGranule${num}` : `query__Granule${num}`, - status: statuses[num % 4], - created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))), - updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), - error: errors[num % 5], - duration: num + (num / 10), - provider_cumulus_id: num % 10, - })) - )); - - range(20).map((num) => ( - pdrs.push(fakePdrRecordFactory({ - collection_cumulus_id: num, - status: statuses[(num % 3) + 1], - provider_cumulus_id: num % 10, - created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))), - updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), - // eslint-disable-next-line no-sequences - })), - executions.push(fakeExecutionRecordFactory({ - collection_cumulus_id: num, - status: statuses[(num % 3) + 1], - error: errors[num % 5], - created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))), - updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), - })) - )); - - await t.context.collectionPgModel.insert( - t.context.knex, - collections - ); - - await t.context.providerPgModel.insert( - t.context.knex, - providers - ); - - await t.context.granulePgModel.insert( - t.context.knex, - granules - ); - - await t.context.executionPgModel.insert( - t.context.knex, - executions - ); - - await t.context.pdrPgModel.insert( - t.context.knex, - pdrs - ); + const reconReportTypes = ['Granule Inventory', 'Granule Not Found', 'Inventory', 'ORCA Backup']; + const reconReportStatuses = ['Generated', 'Pending', 'Failed']; + + const collections = range(20).map((num) => fakeCollectionRecordFactory({ + name: 'testCollection', + version: `${num}`, + cumulus_id: num, + })); + + const providers = range(10).map((num) => fakeProviderRecordFactory({ + cumulus_id: num, + name: `testProvider${num}`, + })); + + const granules = range(100).map((num) => fakeGranuleRecordFactory({ + collection_cumulus_id: num % 20, + granule_id: num % 2 === 0 ? `testGranule${num}` : `query__Granule${num}`, + status: statuses[num % 4], + created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))), + updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), + error: errors[num % 5], + duration: num + (num / 10), + provider_cumulus_id: num % 10, + })); + + const pdrs = range(20).map((num) => fakePdrRecordFactory({ + collection_cumulus_id: num, + status: statuses[(num % 3) + 1], + provider_cumulus_id: num % 10, + created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))), + updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), + })); + + const executions = range(20).map((num) => fakeExecutionRecordFactory({ + collection_cumulus_id: num, + status: statuses[(num % 3) + 1], + error: errors[num % 5], + created_at: (new Date(2018 + (num % 6), (num % 12), (num % 30))), + updated_at: (new Date(2018 + (num % 6), (num % 12), ((num + 1) % 29))), + })); + + const reconReports = range(24).map((num) => fakeReconciliationReportRecordFactory({ + type: reconReportTypes[(num % 4)], + status: reconReportStatuses[(num % 3)], + created_at: (new Date(2024 + (num % 6), (num % 12), (num % 30))), + updated_at: (new Date(2024 + (num % 6), (num % 12), ((num + 1) % 29))), + })); + + await t.context.collectionPgModel.insert(t.context.knex, collections); + await t.context.providerPgModel.insert(t.context.knex, providers); + await t.context.granulePgModel.insert(t.context.knex, granules); + await t.context.executionPgModel.insert(t.context.knex, executions); + await t.context.pdrPgModel.insert(t.context.knex, pdrs); + await t.context.reconciliationReportPgModel.insert(t.context.knex, reconReports); }); test.after.always(async (t) => { @@ -125,7 +106,7 @@ test.after.always(async (t) => { }); }); -test('StatsSearch returns correct response for basic granules query', async (t) => { +test('StatsSearch aggregate returns correct response for basic query with type granules', async (t) => { const { knex } = t.context; const AggregateSearch = new StatsSearch({}, 'granule'); const results = await AggregateSearch.aggregate(knex); @@ -139,7 +120,7 @@ test('StatsSearch returns correct response for basic granules query', async (t) t.deepEqual(results.count, expectedResponse); }); -test('StatsSearch filters correctly by date', async (t) => { +test('StatsSearch aggregate filters granules correctly by date', async (t) => { const { knex } = t.context; const queryStringParameters = { timestamp__from: `${(new Date(2020, 1, 28)).getTime()}`, @@ -158,7 +139,7 @@ test('StatsSearch filters correctly by date', async (t) => { t.deepEqual(results.count, expectedResponse); }); -test('StatsSearch filters executions correctly', async (t) => { +test('StatsSearch aggregate filters executions correctly', async (t) => { const { knex } = t.context; let queryStringParameters = { field: 'status', @@ -205,7 +186,7 @@ test('StatsSearch filters executions correctly', async (t) => { t.is(results3.meta.count, 1); }); -test('StatsSearch filters PDRs correctly', async (t) => { +test('StatsSearch aggregate filters PDRs correctly', async (t) => { const { knex } = t.context; let queryStringParameters = { field: 'status', @@ -247,7 +228,39 @@ test('StatsSearch filters PDRs correctly', async (t) => { t.deepEqual(results3.count, expectedResponse3); }); -test('StatsSearch returns correct response when queried by provider', async (t) => { +test('StatsSearch aggregate filters Reconciliation Reports correctly', async (t) => { + const { knex } = t.context; + let queryStringParameters = { + field: 'type', + }; + + const AggregateSearch = new StatsSearch({ queryStringParameters }, 'reconciliationReport'); + const results = await AggregateSearch.aggregate(knex); + const expectedResponse = [ + { key: 'Granule Inventory', count: 6 }, + { key: 'Granule Not Found', count: 6 }, + { key: 'Inventory', count: 6 }, + { key: 'ORCA Backup', count: 6 }, + ]; + t.is(results.meta.count, 24); + t.deepEqual(results.count, expectedResponse); + + queryStringParameters = { + field: 'status', + }; + + const AggregateSearch2 = new StatsSearch({ queryStringParameters }, 'reconciliationReport'); + const results2 = await AggregateSearch2.aggregate(knex); + const expectedResponse2 = [ + { key: 'Failed', count: 8 }, + { key: 'Generated', count: 8 }, + { key: 'Pending', count: 8 }, + ]; + t.is(results2.meta.count, 24); + t.deepEqual(results2.count, expectedResponse2); +}); + +test('StatsSearch returns correct aggregate response for type granule when queried by provider', async (t) => { const { knex } = t.context; const queryStringParameters = { field: 'status', @@ -261,7 +274,7 @@ test('StatsSearch returns correct response when queried by provider', async (t) t.deepEqual(results.count, expectedResponse); }); -test('StatsSearch returns correct response when queried by collection', async (t) => { +test('StatsSearch returns correct aggregate response for type granule when queried by collection', async (t) => { const { knex } = t.context; const queryStringParameters = { field: 'status', @@ -275,7 +288,7 @@ test('StatsSearch returns correct response when queried by collection', async (t t.deepEqual(results.count, expectedResponse); }); -test('StatsSearch returns correct response when queried by collection and provider', async (t) => { +test('StatsSearch returns correct aggregate response for type granule when queried by collection and provider', async (t) => { const { knex } = t.context; let queryStringParameters = { field: 'status', @@ -318,7 +331,7 @@ test('StatsSearch returns correct response when queried by collection and provid t.deepEqual(results3.count, expectedResponse3); }); -test('StatsSearch returns correct response when queried by error', async (t) => { +test('StatsSearch returns correct aggregate response for type granule when queried by error', async (t) => { const { knex } = t.context; let queryStringParameters = { field: 'error.Error.keyword', @@ -396,7 +409,7 @@ test('StatsSearch can query by infix and prefix when type is defined', async (t) t.deepEqual(results3.count, expectedResponse3); }); -test('StatsSummary works', async (t) => { +test('StatsSearch summary works', async (t) => { const { knex } = t.context; const StatsSummary = new StatsSearch({}, 'granule'); const results = await StatsSummary.summary(knex); diff --git a/packages/db/tests/search/test-field-mapping.js b/packages/db/tests/search/test-field-mapping.js index cccfccfde28..2d1af820556 100644 --- a/packages/db/tests/search/test-field-mapping.js +++ b/packages/db/tests/search/test-field-mapping.js @@ -239,3 +239,31 @@ test('mapQueryStringFieldToDbField correctly converts all rule api fields to db }, {}); t.deepEqual(dbQueryParams, expectedDbParameters); }); + +test('mapQueryStringFieldToDbField correctly converts all reconciliation report api fields to db fields', (t) => { + const queryStringParameters = { + name: 'some report name', + type: 'Granule Not Found', + status: 'Generated', + location: 's3://exampleBucket/examplePath', + createdAt: '1704100000000', + updatedAt: 1704100000000, + }; + + const expectedDbParameters = { + name: 'some report name', + type: 'Granule Not Found', + status: 'Generated', + location: 's3://exampleBucket/examplePath', + created_at: new Date(1704100000000), + updated_at: new Date(1704100000000), + }; + + const apiFieldsList = Object.entries(queryStringParameters) + .map(([name, value]) => ({ name, value })); + const dbQueryParams = apiFieldsList.reduce((acc, queryField) => { + const queryParam = mapQueryStringFieldToDbField('reconciliationReport', queryField); + return { ...acc, ...queryParam }; + }, {}); + t.deepEqual(dbQueryParams, expectedDbParameters); +}); From 34a92056b2f7cd14ed8ca95c8dd23712eac78883 Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Mon, 4 Nov 2024 12:05:02 -0500 Subject: [PATCH 55/61] CUMULUS-3859: Update @cumulus/api/bin/serveUtils to no longer add records to ElasticSearch (#3854) --- CHANGELOG.md | 2 + .../package.json | 16 +++---- packages/api/bin/serveUtils.js | 44 +++++++------------ 3 files changed, 25 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b514adcf66a..be8416bcd21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,8 @@ aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE - **CUMULUS-3718** - Updated `reconciliation_reports` list api endpoint and added `ReconciliationReportSearch` class to query postgres - Added `reconciliationReports` type to stats endpoint, so `aggregate` query will work for reconciliation reports +- **CUMULUS-3859** + - Updated `@cumulus/api/bin/serveUtils` to no longer add records to ElasticSearch ## [Unreleased] diff --git a/lambdas/reconciliation-report-migration/package.json b/lambdas/reconciliation-report-migration/package.json index be99e45c024..98bcf797212 100644 --- a/lambdas/reconciliation-report-migration/package.json +++ b/lambdas/reconciliation-report-migration/package.json @@ -1,6 +1,6 @@ { "name": "@cumulus/reconciliation-report-migration", - "version": "19.0.0", + "version": "19.1.0", "description": "Lambda function for reconciliation report migration from DynamoDB to Postgres", "author": "Cumulus Authors", "license": "Apache-2.0", @@ -31,13 +31,13 @@ "failFast": true }, "dependencies": { - "@cumulus/api": "19.0.0", - "@cumulus/aws-client": "19.0.0", - "@cumulus/common": "19.0.0", - "@cumulus/db": "19.0.0", - "@cumulus/errors": "19.0.0", - "@cumulus/logger": "19.0.0", - "@cumulus/types": "19.0.0", + "@cumulus/api": "19.1.0", + "@cumulus/aws-client": "19.1.0", + "@cumulus/common": "19.1.0", + "@cumulus/db": "19.1.0", + "@cumulus/errors": "19.1.0", + "@cumulus/logger": "19.1.0", + "@cumulus/types": "19.1.0", "knex": "2.4.1", "lodash": "^4.17.21", "pg": "~8.12" diff --git a/packages/api/bin/serveUtils.js b/packages/api/bin/serveUtils.js index 76c96a4e73a..4cba4f4babe 100644 --- a/packages/api/bin/serveUtils.js +++ b/packages/api/bin/serveUtils.js @@ -1,7 +1,6 @@ 'use strict'; const pEachSeries = require('p-each-series'); -const indexer = require('@cumulus/es-client/indexer'); const { AsyncOperationPgModel, CollectionPgModel, @@ -16,21 +15,20 @@ const { migrationDir, PdrPgModel, ProviderPgModel, + ReconciliationReportPgModel, RulePgModel, translateApiCollectionToPostgresCollection, translateApiExecutionToPostgresExecution, translateApiGranuleToPostgresGranule, translateApiPdrToPostgresPdr, translateApiProviderToPostgresProvider, + translateApiReconReportToPostgresReconReport, translateApiRuleToPostgresRule, - translatePostgresExecutionToApiExecution, upsertGranuleWithExecutionJoinRecord, } = require('@cumulus/db'); const { log } = require('console'); -const models = require('../models'); const { createRuleTrigger } = require('../lib/rulesHelpers'); const { fakeGranuleFactoryV2 } = require('../lib/testUtils'); -const { getESClientAndIndex } = require('./local-test-defaults'); /** * Remove all records from api-related postgres tables @@ -89,11 +87,9 @@ async function addCollections(collections) { }, }); - const es = await getESClientAndIndex(); const collectionPgModel = new CollectionPgModel(); return await Promise.all( collections.map(async (c) => { - await indexer.indexCollection(es.client, c, es.index); const dbRecord = await translateApiCollectionToPostgresCollection(c); await collectionPgModel.create(knex, dbRecord); }) @@ -109,7 +105,6 @@ async function addGranules(granules) { }); const executionPgModel = new ExecutionPgModel(); - const es = await getESClientAndIndex(); return await Promise.all( granules.map(async (apiGranule) => { const newGranule = fakeGranuleFactoryV2( @@ -117,7 +112,6 @@ async function addGranules(granules) { ...apiGranule, } ); - await indexer.indexGranule(es.client, newGranule, es.index); const dbRecord = await translateApiGranuleToPostgresGranule({ dynamoRecord: newGranule, knexOrTransaction: knex, @@ -143,11 +137,9 @@ async function addProviders(providers) { }, }); - const es = await getESClientAndIndex(); const providerPgModel = new ProviderPgModel(); return await Promise.all( providers.map(async (provider) => { - await indexer.indexProvider(es.client, provider, es.index); const dbRecord = await translateApiProviderToPostgresProvider(provider); await providerPgModel.create(knex, dbRecord); }) @@ -162,12 +154,10 @@ async function addRules(rules) { }, }); - const es = await getESClientAndIndex(); const rulePgModel = new RulePgModel(); return await Promise.all( rules.map(async (r) => { const ruleRecord = await createRuleTrigger(r); - await indexer.indexRule(es.client, ruleRecord, es.index); const dbRecord = await translateApiRuleToPostgresRule(ruleRecord, knex); await rulePgModel.create(knex, dbRecord); }) @@ -182,8 +172,6 @@ async function addExecutions(executions) { }, }); - const es = await getESClientAndIndex(); - executions.sort((firstEl, secondEl) => { if (!firstEl.parentArn && !secondEl.parentArn) { return 0; @@ -199,12 +187,7 @@ async function addExecutions(executions) { const executionPgModel = new ExecutionPgModel(); const executionsIterator = async (execution) => { const dbRecord = await translateApiExecutionToPostgresExecution(execution, knex); - const [writtenPostgresDbRecord] = await executionPgModel.create(knex, dbRecord); - const apiExecutionRecord = await translatePostgresExecutionToApiExecution( - writtenPostgresDbRecord, - knex - ); - await indexer.indexExecution(es.client, apiExecutionRecord, es.index); + await executionPgModel.create(knex, dbRecord); }; await pEachSeries(executions, executionsIterator); @@ -218,26 +201,29 @@ async function addPdrs(pdrs) { }, }); - const es = await getESClientAndIndex(); const pdrPgModel = new PdrPgModel(); return await Promise.all( pdrs.map(async (p) => { - await indexer.indexPdr(es.client, p, es.index); const dbRecord = await translateApiPdrToPostgresPdr(p, knex); await pdrPgModel.create(knex, dbRecord); }) ); } +// TODO this is dynamodb async function addReconciliationReports(reconciliationReports) { - const reconciliationReportModel = new models.ReconciliationReport(); - const es = await getESClientAndIndex(); + const knex = await getKnexClient({ + env: { + ...envParams, + ...localStackConnectionEnv, + }, + }); + const reconciliationReportPgModel = new ReconciliationReportPgModel(); return await Promise.all( - reconciliationReports.map((r) => - reconciliationReportModel - .create(r) - .then((reconciliationReport) => - indexer.indexReconciliationReport(es.client, reconciliationReport, es.index))) + reconciliationReports.map(async (r) => { + const dbRecord = await translateApiReconReportToPostgresReconReport(r, knex); + await reconciliationReportPgModel.create(knex, dbRecord); + }) ); } From 37e639fc44e73213bac1e4c2e1f9ecad9fe622bb Mon Sep 17 00:00:00 2001 From: etcart <37375117+etcart@users.noreply.github.com> Date: Thu, 7 Nov 2024 11:51:13 -0500 Subject: [PATCH 56/61] Ecarton/cumulus 3698 granules list files executions (#3853) * files in GranuleSearch * changelog * changelog linting * changelo g in the right place * files and inclusion in tests * add changelog * linter updates * integration test bits turned back on with this functionality * revert unneeded change ot outflow from translation * fixes to execution handling, maybe too clunky * remove leftove reference comment * clean up unnecessary join and unused function * allow for flexible request of file by granule cumulus ids * making sure typing is right * memory optimizations in execution handling * linting * add unit tests and a discovered fix * linter cleanup * fix some docstrings in executionSearch * check that limit is with respect to granules and that files are translated to api format * linter error * onlu include files and executions if fullrecord requested * adding integration tests for includeFullRecord * need to request full record in recon report int test * linter errors * add changelog line about includeFullRecord * review grammar and style nits * clean up bare granule distinction * add breaking change entry to changelog * linter fix * changelog breaking change about getExecutionArnByGranuleCumulusId --- CHANGELOG.md | 8 + example/spec/helpers/granuleUtils.js | 5 +- .../CreateReconciliationReportSpec.js | 30 +++- example/spec/parallel/testAPI/granuleSpec.js | 22 ++- packages/db/src/lib/execution.ts | 70 ++++---- packages/db/src/models/file.ts | 12 ++ packages/db/src/search/ExecutionSearch.ts | 8 +- packages/db/src/search/GranuleSearch.ts | 46 ++++- packages/db/tests/models/test-file-model.js | 135 +++++++++++++- .../db/tests/search/test-GranuleSearch.js | 164 +++++++++++++++++- 10 files changed, 438 insertions(+), 62 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index be8416bcd21..43d7845fc4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,6 +76,11 @@ aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE ### Breaking Changes +- **CUMULUS-3698** + - GranuleSearch retrieving files/execution is toggled + by setting "includeFullRecord" field to 'true' in relevant api endpoint params + - GranuleSearch does *not* retrieve files/execution by default unless includeFullRecord is set to 'true' + - @cumulus/db function getExecutionArnByGranuleCumulusId is removed. To replace this function use getExecutionInfoByGranuleCumulusId with parameter executionColumns set to ['arn'] or unset (['arn'] is the default argument) - **CUMULUS-2564** - Updated `sync-granule` task to add `useGranIdPath` as a configuration flag. This modifies the task behavior to stage granules to @@ -92,6 +97,9 @@ aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE ### Added +- **CUMULUS-3698** + - GranuleSearch now can retrieve associated files for granules + - GranuleSearch now can retrieve latest associated execution for granules - **CUMULUS-3919** - Added terraform variables `disableSSL` and `rejectUnauthorized` to `tf-modules/cumulus-rds-tf` module. diff --git a/example/spec/helpers/granuleUtils.js b/example/spec/helpers/granuleUtils.js index 8e9e5af55ab..0cdd838b86c 100644 --- a/example/spec/helpers/granuleUtils.js +++ b/example/spec/helpers/granuleUtils.js @@ -238,8 +238,6 @@ const waitForGranuleRecordUpdatedInList = async (stackName, granule, additionalQ 'beginningDateTime', 'endingDateTime', 'error', - 'execution', // TODO remove after CUMULUS-3698 - 'files', // TODO -2714 this should be removed 'lastUpdateDateTime', 'productionDateTime', 'updatedAt', @@ -255,7 +253,8 @@ const waitForGranuleRecordUpdatedInList = async (stackName, granule, additionalQ }); const results = JSON.parse(resp.body).results; if (results && results.length === 1) { - // TODO - CUMULUS-2714 key sort both files objects for comparison + results[0].files.sort((a, b) => a.cumulus_id - b.cumulus_id); + granule.files.sort((a, b) => a.cumulus_id - b.cumulus_id); const granuleMatches = isEqual(omit(results[0], fieldsIgnored), omit(granule, fieldsIgnored)); if (!granuleMatches) { diff --git a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js index 3cad6f13204..16301e7b2ff 100644 --- a/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js +++ b/example/spec/parallel/createReconciliationReport/CreateReconciliationReportSpec.js @@ -282,8 +282,10 @@ async function updateGranuleFile(prefix, granule, regex, replacement) { const waitForCollectionRecordsInList = async (stackName, collectionIds, additionalQueryParams = {}) => await pWaitFor( async () => { // Verify the collection is returned when listing collections - const collsResp = await getCollections({ prefix: stackName, - query: { _id__in: collectionIds.join(','), ...additionalQueryParams, limit: 30 } }); + const collsResp = await getCollections({ + prefix: stackName, + query: { _id__in: collectionIds.join(','), ...additionalQueryParams, limit: 30 }, + }); const results = get(JSON.parse(collsResp.body), 'results', []); const ids = results.map((c) => constructCollectionId(c.name, c.version)); return isEqual(ids.sort(), collectionIds.sort()); @@ -357,7 +359,9 @@ describe('When there are granule differences and granule reconciliation is run', const testId = createTimestampedTestId(config.stackName, 'CreateReconciliationReport'); testSuffix = createTestSuffix(testId); testDataFolder = createTestDataPath(testId); - + const apiParams = { + includeFullRecord: 'true', + }; console.log('XXX Waiting for setupCollectionAndTestData'); await setupCollectionAndTestData(config, testSuffix, testDataFolder); console.log('XXX Completed setupCollectionAndTestData'); @@ -405,8 +409,12 @@ describe('When there are granule differences and granule reconciliation is run', granuleId: publishedGranuleId, collectionId, }); - console.log('XXXXX Completed getGranule()'); - await waitForGranuleRecordUpdatedInList(config.stackName, granuleBeforeUpdate); + console.log('XXXXX Completed for getGranule()'); + await waitForGranuleRecordUpdatedInList( + config.stackName, + granuleBeforeUpdate, + apiParams + ); console.log(`XXXXX Waiting for updateGranuleFile(${publishedGranuleId})`); ({ originalGranuleFile, updatedGranuleFile } = await updateGranuleFile( config.stackName, @@ -422,8 +430,16 @@ describe('When there are granule differences and granule reconciliation is run', ]); console.log('XXXX Waiting for granules updated in list'); await Promise.all([ - waitForGranuleRecordUpdatedInList(config.stackName, dbGranule), - waitForGranuleRecordUpdatedInList(config.stackName, granuleAfterUpdate), + waitForGranuleRecordUpdatedInList( + config.stackName, + dbGranule, + apiParams + ), + waitForGranuleRecordUpdatedInList( + config.stackName, + granuleAfterUpdate, + apiParams + ), ]); console.log('XXXX Completed granules updated in list'); } catch (error) { diff --git a/example/spec/parallel/testAPI/granuleSpec.js b/example/spec/parallel/testAPI/granuleSpec.js index e9d170fa9e6..afce7a9412c 100644 --- a/example/spec/parallel/testAPI/granuleSpec.js +++ b/example/spec/parallel/testAPI/granuleSpec.js @@ -183,8 +183,26 @@ describe('The Granules API', () => { }); const searchedGranule = JSON.parse(searchResults.body).results[0]; - // TODO CUMULUS-3698 includes files - expect(searchedGranule).toEqual(jasmine.objectContaining(omit(randomGranuleRecord, 'files'))); + expect(searchedGranule).toEqual(jasmine.objectContaining({ + ...randomGranuleRecord, + files: [], + })); + }); + it('can search the granule including files via the API.', async () => { + if (beforeAllError) { + fail(beforeAllError); + } + + const searchResults = await waitForListGranulesResult({ + prefix, + query: { + granuleId: randomGranuleRecord.granuleId, + includeFullRecord: 'true', + }, + }); + + const searchedGranule = JSON.parse(searchResults.body).results[0]; + expect(searchedGranule).toEqual(jasmine.objectContaining(randomGranuleRecord)); }); it('can modify the granule via API.', async () => { diff --git a/packages/db/src/lib/execution.ts b/packages/db/src/lib/execution.ts index dd824e52966..7970a4fb881 100644 --- a/packages/db/src/lib/execution.ts +++ b/packages/db/src/lib/execution.ts @@ -17,42 +17,30 @@ export interface ArnRecord { const log = new Logger({ sender: '@cumulus/db/lib/execution' }); /** - * Returns execution info sorted by most recent first for an input - * Granule Cumulus ID. - * - * @param {Object} params - * @param {Knex | Knex.Transaction} params.knexOrTransaction - * Knex client for reading from RDS database - * @param {Array} params.executionColumns - Columns to return from executions table - * @param {number} params.granuleCumulusId - The primary ID for a Granule - * @param {number} [params.limit] - limit to number of executions to query - * @returns {Promise[]>} - * Array of arn objects with the most recent first. + * Returns execution records sorted by most recent first for an input + * set of Granule Cumulus IDs. + * @returns Array of arn objects with the most recent first. */ -export const getExecutionInfoByGranuleCumulusId = async ({ +export const getExecutionInfoByGranuleCumulusIds = async ({ knexOrTransaction, - granuleCumulusId, - executionColumns = ['arn'], + granuleCumulusIds, limit, }: { knexOrTransaction: Knex | Knex.Transaction, - granuleCumulusId: number, - executionColumns: string[], + granuleCumulusIds: number[], limit?: number -}): Promise[]> => { +}): Promise<{ granule_cumulus_id: number, url: string }[]> => { const knexQuery = knexOrTransaction(TableNames.executions) - .column(executionColumns.map((column) => `${TableNames.executions}.${column}`)) - .where(`${TableNames.granules}.cumulus_id`, granuleCumulusId) + .column([ + `${TableNames.executions}.url`, + `${TableNames.granulesExecutions}.granule_cumulus_id`, + ]) + .whereIn(`${TableNames.granulesExecutions}.granule_cumulus_id`, granuleCumulusIds) .join( TableNames.granulesExecutions, `${TableNames.executions}.cumulus_id`, `${TableNames.granulesExecutions}.execution_cumulus_id` ) - .join( - TableNames.granules, - `${TableNames.granules}.cumulus_id`, - `${TableNames.granulesExecutions}.granule_cumulus_id` - ) .orderBy(`${TableNames.executions}.timestamp`, 'desc'); if (limit) { knexQuery.limit(limit); @@ -61,33 +49,37 @@ export const getExecutionInfoByGranuleCumulusId = async ({ }; /** - * Returns a list of executionArns sorted by most recent first, for an input + * Returns execution records sorted by most recent first for an input * Granule Cumulus ID. * - * @param {Knex | Knex.Transaction} knexOrTransaction + * @param {Object} params + * @param {Knex | Knex.Transaction} params.knexOrTransaction * Knex client for reading from RDS database - * @param {number} granuleCumulusId - The primary ID for a Granule - * @param {number} limit - limit to number of executions to query - * @returns {Promise} - Array of arn objects with the most recent first. + * @param {Array} params.executionColumns - Columns to return from executions table + * @param {number} params.granuleCumulusId - The primary ID for a Granule + * @param {number} [params.limit] - limit to number of executions to query + * @returns {Promise[]>} + * Array of arn objects with the most recent first. */ -export const getExecutionArnsByGranuleCumulusId = async ( +export const getExecutionInfoByGranuleCumulusId = async ({ + knexOrTransaction, + granuleCumulusId, + executionColumns = ['arn'], + limit, +}: { knexOrTransaction: Knex | Knex.Transaction, - granuleCumulusId: Number, + granuleCumulusId: number, + executionColumns: string[], limit?: number -): Promise => { +}): Promise[]> => { const knexQuery = knexOrTransaction(TableNames.executions) - .select(`${TableNames.executions}.arn`) - .where(`${TableNames.granules}.cumulus_id`, granuleCumulusId) + .column(executionColumns.map((column) => `${TableNames.executions}.${column}`)) + .where(`${TableNames.granulesExecutions}.granule_cumulus_id`, granuleCumulusId) .join( TableNames.granulesExecutions, `${TableNames.executions}.cumulus_id`, `${TableNames.granulesExecutions}.execution_cumulus_id` ) - .join( - TableNames.granules, - `${TableNames.granules}.cumulus_id`, - `${TableNames.granulesExecutions}.granule_cumulus_id` - ) .orderBy(`${TableNames.executions}.timestamp`, 'desc'); if (limit) { knexQuery.limit(limit); diff --git a/packages/db/src/models/file.ts b/packages/db/src/models/file.ts index 472f9f4c68a..01de6c6d22e 100644 --- a/packages/db/src/models/file.ts +++ b/packages/db/src/models/file.ts @@ -22,6 +22,18 @@ class FilePgModel extends BasePgModel { .merge() .returning('*'); } + /** + * Retrieves all files for all granules given + */ + searchByGranuleCumulusIds( + knexOrTrx: Knex | Knex.Transaction, + granule_cumulus_ids: number[], + columns: string | string[] = '*' + ): Promise { + return knexOrTrx(this.tableName) + .select(columns) + .whereIn('granule_cumulus_id', granule_cumulus_ids); + } } export { FilePgModel }; diff --git a/packages/db/src/search/ExecutionSearch.ts b/packages/db/src/search/ExecutionSearch.ts index 07e66e85d2f..a9e49d4a118 100644 --- a/packages/db/src/search/ExecutionSearch.ts +++ b/packages/db/src/search/ExecutionSearch.ts @@ -33,9 +33,9 @@ export class ExecutionSearch extends BaseSearch { } /** - * check if joined async_ops table search is needed + * check if joined async_operations table search is needed * - * @returns whether collection search is needed + * @returns whether async_operations search is needed */ protected searchAsync(): boolean { const { not, term, terms } = this.dbQueryParameters; @@ -43,9 +43,9 @@ export class ExecutionSearch extends BaseSearch { } /** - * check if joined async_ops table search is needed + * check if joined parent execution table search is needed * - * @returns whether collection search is needed + * @returns whether parent execution search is needed */ protected searchParent(): boolean { const { not, term, terms } = this.dbQueryParameters; diff --git a/packages/db/src/search/GranuleSearch.ts b/packages/db/src/search/GranuleSearch.ts index 0584efc4c3b..7ff78db5e7b 100644 --- a/packages/db/src/search/GranuleSearch.ts +++ b/packages/db/src/search/GranuleSearch.ts @@ -11,6 +11,9 @@ import { DbQueryParameters, QueryEvent } from '../types/search'; import { PostgresGranuleRecord } from '../types/granule'; import { translatePostgresGranuleToApiGranuleWithoutDbQuery } from '../translate/granules'; import { TableNames } from '../tables'; +import { FilePgModel } from '../models/file'; +import { PostgresFileRecord } from '../types/file'; +import { getExecutionInfoByGranuleCumulusIds } from '../lib/execution'; const log = new Logger({ sender: '@cumulus/db/GranuleSearch' }); @@ -111,11 +114,37 @@ export class GranuleSearch extends BaseSearch { * @param pgRecords - postgres records returned from query * @returns translated api records */ - protected translatePostgresRecordsToApiRecords(pgRecords: GranuleRecord[]) - : Partial[] { + protected async translatePostgresRecordsToApiRecords(pgRecords: GranuleRecord[], knex: Knex) + : Promise[]> { log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `); - const { fields } = this.dbQueryParameters; + const { fields, includeFullRecord } = this.dbQueryParameters; + + const fileMapping: { [key: number]: PostgresFileRecord[] } = {}; + const executionMapping: { [key: number]: { url: string, granule_cumulus_id: number } } = {}; + const cumulusIds = pgRecords.map((record) => record.cumulus_id); + if (includeFullRecord) { + //get Files + const fileModel = new FilePgModel(); + const files = await fileModel.searchByGranuleCumulusIds(knex, cumulusIds); + files.forEach((file) => { + if (!(file.granule_cumulus_id in fileMapping)) { + fileMapping[file.granule_cumulus_id] = []; + } + fileMapping[file.granule_cumulus_id].push(file); + }); + + //get Executions + const executions = await getExecutionInfoByGranuleCumulusIds({ + knexOrTransaction: knex, + granuleCumulusIds: cumulusIds, + }); + executions.forEach((execution) => { + if (!(execution.granule_cumulus_id in executionMapping)) { + executionMapping[execution.granule_cumulus_id] = execution; + } + }); + } const apiRecords = pgRecords.map((item: GranuleRecord) => { const granulePgRecord = item; const collectionPgRecord = { @@ -123,10 +152,19 @@ export class GranuleSearch extends BaseSearch { name: item.collectionName, version: item.collectionVersion, }; + const executionUrls = executionMapping[item.cumulus_id]?.url + ? [{ url: executionMapping[item.cumulus_id].url }] + : []; const pdr = item.pdrName ? { name: item.pdrName } : undefined; const providerPgRecord = item.providerName ? { name: item.providerName } : undefined; + const fileRecords = fileMapping[granulePgRecord.cumulus_id] || []; const apiRecord = translatePostgresGranuleToApiGranuleWithoutDbQuery({ - granulePgRecord, collectionPgRecord, pdr, providerPgRecord, + granulePgRecord, + collectionPgRecord, + pdr, + providerPgRecord, + files: fileRecords, + executionUrls, }); return fields ? pick(apiRecord, fields) : apiRecord; }); diff --git a/packages/db/tests/models/test-file-model.js b/packages/db/tests/models/test-file-model.js index af1ffec0ed7..1d90161c1df 100644 --- a/packages/db/tests/models/test-file-model.js +++ b/packages/db/tests/models/test-file-model.js @@ -1,6 +1,6 @@ const test = require('ava'); const cryptoRandomString = require('crypto-random-string'); - +const range = require('lodash/range'); const { CollectionPgModel, GranulePgModel, @@ -107,3 +107,136 @@ test('FilePgModel.upsert() overwrites a file record', async (t) => { updatedFile ); }); + +test('FilePgModel.searchByGranuleCumulusIds() returns relevant files', async (t) => { + const usedGranuleCumulusIds = await Promise.all(range(5).map(() => ( + createFakeGranule(t.context.knex) + ))); + const unUsedGranuleCumulusIds = await Promise.all(range(5).map(() => ( + createFakeGranule(t.context.knex) + ))); + const relevantFiles = await t.context.filePgModel.insert( + t.context.knex, + usedGranuleCumulusIds.map((granuleCumulusId) => ( + fakeFileRecordFactory({ + granule_cumulus_id: granuleCumulusId, + }) + )) + ); + const irrelevantFiles = await t.context.filePgModel.insert( + t.context.knex, + unUsedGranuleCumulusIds.map((granuleCumulusId) => ( + fakeFileRecordFactory({ + granule_cumulus_id: granuleCumulusId, + }) + )) + ); + const searched = await t.context.filePgModel.searchByGranuleCumulusIds( + t.context.knex, + usedGranuleCumulusIds + ); + + const foundFileCumulusIds = searched.map((file) => file.cumulus_id); + const foundGranuleCumulusIds = searched.map((file) => file.granule_cumulus_id); + relevantFiles.forEach((relevantFile) => { + t.true(foundFileCumulusIds.includes(relevantFile.cumulus_id)); + }); + irrelevantFiles.forEach((irrelevantFile) => { + t.false(foundFileCumulusIds.includes(irrelevantFile.cumulus_id)); + }); + usedGranuleCumulusIds.forEach((usedGranuleCumulusId) => { + t.true(foundGranuleCumulusIds.includes(usedGranuleCumulusId)); + }); + unUsedGranuleCumulusIds.forEach((unUsedGranuleCumulusId) => { + t.false(foundGranuleCumulusIds.includes(unUsedGranuleCumulusId)); + }); +}); + +test('FilePgModel.searchByGranuleCumulusIds() allows to specify desired columns', async (t) => { + const usedGranuleCumulusIds = await Promise.all(range(5).map(() => ( + createFakeGranule(t.context.knex) + ))); + const unUsedGranuleCumulusIds = await Promise.all(range(5).map(() => ( + createFakeGranule(t.context.knex) + ))); + const relevantFiles = await t.context.filePgModel.insert( + t.context.knex, + usedGranuleCumulusIds.map((granuleCumulusId) => ( + fakeFileRecordFactory({ + granule_cumulus_id: granuleCumulusId, + }) + )) + ); + const irrelevantFiles = await t.context.filePgModel.insert( + t.context.knex, + unUsedGranuleCumulusIds.map((granuleCumulusId) => ( + fakeFileRecordFactory({ + granule_cumulus_id: granuleCumulusId, + }) + )) + ); + let searched = await t.context.filePgModel.searchByGranuleCumulusIds( + t.context.knex, + usedGranuleCumulusIds, + 'cumulus_id' + ); + + searched.forEach((file) => { + t.true(file.granule_cumulus_id === undefined); + t.true(file.created_at === undefined); + t.true(file.updated_at === undefined); + t.true(file.file_size === undefined); + t.true(file.bucket === undefined); + t.true(file.checksum_type === undefined); + t.true(file.checksum_value === undefined); + t.true(file.file_name === undefined); + t.true(file.key === undefined); + t.true(file.path === undefined); + t.true(file.source === undefined); + t.true(file.type === undefined); + }); + + let foundFileCumulusIds = searched.map((file) => file.cumulus_id); + relevantFiles.forEach((relevantFile) => { + t.true(foundFileCumulusIds.includes(relevantFile.cumulus_id)); + }); + irrelevantFiles.forEach((irrelevantFile) => { + t.false(foundFileCumulusIds.includes(irrelevantFile.cumulus_id)); + }); + + searched = await t.context.filePgModel.searchByGranuleCumulusIds( + t.context.knex, + usedGranuleCumulusIds, + ['cumulus_id', 'granule_cumulus_id'] + ); + + searched.forEach((file) => { + t.true(file.created_at === undefined); + t.true(file.updated_at === undefined); + t.true(file.file_size === undefined); + t.true(file.bucket === undefined); + t.true(file.checksum_type === undefined); + t.true(file.checksum_value === undefined); + t.true(file.file_name === undefined); + t.true(file.key === undefined); + t.true(file.path === undefined); + t.true(file.source === undefined); + t.true(file.type === undefined); + }); + + foundFileCumulusIds = searched.map((file) => file.cumulus_id); + const foundGranuleCumulusIds = searched.map((file) => file.granule_cumulus_id); + relevantFiles.forEach((relevantFile) => { + t.true(foundFileCumulusIds.includes(relevantFile.cumulus_id)); + }); + irrelevantFiles.forEach((irrelevantFile) => { + t.false(foundFileCumulusIds.includes(irrelevantFile.cumulus_id)); + }); + + usedGranuleCumulusIds.forEach((usedGranuleCumulusId) => { + t.true(foundGranuleCumulusIds.includes(usedGranuleCumulusId)); + }); + unUsedGranuleCumulusIds.forEach((unUsedGranuleCumulusId) => { + t.false(foundGranuleCumulusIds.includes(unUsedGranuleCumulusId)); + }); +}); diff --git a/packages/db/tests/search/test-GranuleSearch.js b/packages/db/tests/search/test-GranuleSearch.js index 5d055c47a53..29db5323e01 100644 --- a/packages/db/tests/search/test-GranuleSearch.js +++ b/packages/db/tests/search/test-GranuleSearch.js @@ -3,7 +3,7 @@ const cryptoRandomString = require('crypto-random-string'); const range = require('lodash/range'); const { constructCollectionId } = require('@cumulus/message/Collections'); - +const { sleep } = require('@cumulus/common'); const { CollectionPgModel, fakeCollectionRecordFactory, @@ -16,6 +16,11 @@ const { PdrPgModel, ProviderPgModel, migrationDir, + FilePgModel, + fakeFileRecordFactory, + ExecutionPgModel, + fakeExecutionRecordFactory, + GranulesExecutionsPgModel, } = require('../../dist'); const testDbName = `granule_${cryptoRandomString({ length: 10 })}`; @@ -139,7 +144,7 @@ test.before(async (t) => { ? t.context.granuleSearchFields.lastUpdateDateTime : undefined, published: !!(num % 2), product_volume: Math.round(Number(t.context.granuleSearchFields.productVolume) - * (1 / (num + 1))).toString(), + * (1 / (num + 1))).toString(), time_to_archive: !(num % 10) ? Number(t.context.granuleSearchFields.timeToArchive) : undefined, time_to_process: !(num % 20) @@ -148,6 +153,73 @@ test.before(async (t) => { updated_at: new Date(t.context.granuleSearchFields.timestamp + (num % 2) * 1000), })) ); + + const filePgModel = new FilePgModel(); + await filePgModel.insert( + knex, + t.context.pgGranules.map((granule) => fakeFileRecordFactory( + { + granule_cumulus_id: granule.cumulus_id, + path: 'a.txt', + checksum_type: 'md5', + } + )) + ); + await filePgModel.insert( + knex, + t.context.pgGranules.map((granule) => fakeFileRecordFactory( + { + granule_cumulus_id: granule.cumulus_id, + path: 'b.txt', + checksum_type: 'sha256', + } + )) + ); + + const executionPgModel = new ExecutionPgModel(); + const granuleExecutionPgModel = new GranulesExecutionsPgModel(); + + let executionRecords = await executionPgModel.insert( + knex, + t.context.pgGranules.map((_, i) => fakeExecutionRecordFactory({ + url: `earlierUrl${i}`, + })) + ); + await granuleExecutionPgModel.insert( + knex, + t.context.pgGranules.map((granule, i) => ({ + granule_cumulus_id: granule.cumulus_id, + execution_cumulus_id: executionRecords[i].cumulus_id, + })) + ); + executionRecords = []; + // it's important for later testing that these are uploaded strictly in order + for (const i of range(100)) { + const [executionRecord] = await executionPgModel.insert( // eslint-disable-line no-await-in-loop + knex, + [fakeExecutionRecordFactory({ + url: `laterUrl${i}`, + })] + ); + executionRecords.push(executionRecord); + //ensure that timestamp in execution record is distinct + await sleep(1); // eslint-disable-line no-await-in-loop + } + + await granuleExecutionPgModel.insert( + knex, + t.context.pgGranules.map((granule, i) => ({ + granule_cumulus_id: granule.cumulus_id, + execution_cumulus_id: executionRecords[i].cumulus_id, + })) + ); + await granuleExecutionPgModel.insert( + knex, + t.context.pgGranules.map((granule, i) => ({ + granule_cumulus_id: granule.cumulus_id, + execution_cumulus_id: executionRecords[99 - i].cumulus_id, + })) + ); }); test('GranuleSearch returns 10 granule records by default', async (t) => { @@ -859,3 +931,91 @@ test('GranuleSearch estimates the rowcount of the table by default', async (t) = t.true(response.meta.count > 0); t.is(response.results?.length, 50); }); + +test('GranuleSearch with includeFullRecord true retrieves associated file objects for granules', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + includeFullRecord: 'true', + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.results?.length, 100); + response.results.forEach((granuleRecord) => { + t.is(granuleRecord.files?.length, 2); + t.true('bucket' in granuleRecord.files[0]); + t.true('key' in granuleRecord.files[0]); + t.true('bucket' in granuleRecord.files[1]); + t.true('key' in granuleRecord.files[1]); + }); +}); +test('GranuleSearch with includeFullRecord true retrieves associated file translated to api key format', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + includeFullRecord: 'true', + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.results?.length, 100); + response.results.forEach((granuleRecord) => { + t.is(granuleRecord.files?.length, 2); + t.true('bucket' in granuleRecord.files[0]); + t.true('key' in granuleRecord.files[0]); + t.true('checksumType' in granuleRecord.files[0]); + t.true('bucket' in granuleRecord.files[1]); + t.true('key' in granuleRecord.files[1]); + t.true('checksumType' in granuleRecord.files[1]); + }); +}); + +test('GranuleSearch with includeFullRecord true retrieves one associated Url object for granules', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + includeFullRecord: 'true', + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.results?.length, 100); + response.results.forEach((granuleRecord) => { + t.true('execution' in granuleRecord); + }); +}); + +test('GranuleSearch with includeFullRecord true retrieves latest associated Url object for granules', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 200, + includeFullRecord: 'true', + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.results?.length, 100); + response.results.sort((a, b) => a.cumulus_id - b.cumulus_id); + // these executions are loaded from lowest to highest number + // but each granule is associated with multiple executions: + // earlierUrl${i}, laterUrl${i}, and laterUrl${99-i} + // hence `laterUrl${max(i, 99-i)}` is the most recently updated execution + response.results.forEach((granuleRecord, i) => { + t.is(granuleRecord.execution, `laterUrl${Math.max(i, 99 - i)}`); + }); +}); + +test('GranuleSearch with includeFullRecord true retrieves granules, files and executions, with limit specifying number of granules', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: 4, + includeFullRecord: 'true', + }; + const dbSearch = new GranuleSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + t.is(response.results?.length, 4); + response.results.forEach((granuleRecord) => { + t.is(granuleRecord.files?.length, 2); + t.true('bucket' in granuleRecord.files[0]); + t.true('key' in granuleRecord.files[0]); + t.true('bucket' in granuleRecord.files[1]); + t.true('key' in granuleRecord.files[1]); + }); +}); From 0d7afd10c3a9077011cdcb80ce9cb73746fedb9d Mon Sep 17 00:00:00 2001 From: jennyhliu <34660846+jennyhliu@users.noreply.github.com> Date: Wed, 20 Nov 2024 14:45:14 -0500 Subject: [PATCH 57/61] CUMULUS-3859-1: Remove es from local api code and update active collection search (#3861) * CUMULUS-3859: Remove elasticsearch from api server * Updated CollectionSearch to filter additional granule fields for active collections * fix intermittent test error * granuleId field * refactor * update changelog * update octokit/graphql to fix cross-spawn * addAsyncOperations to serveUtils * clean variables * refactor --- CHANGELOG.md | 2 + package.json | 2 +- packages/api/bin/cli.js | 2 +- packages/api/bin/serve.js | 56 +------- packages/api/bin/serveUtils.js | 21 ++- packages/db/src/search/CollectionSearch.ts | 102 +++++++++------ packages/db/src/search/GranuleSearch.ts | 23 ++++ packages/db/src/search/StatsSearch.ts | 3 +- packages/db/src/translate/async_operations.ts | 3 +- packages/db/tests/lib/test-granule.js | 8 +- .../db/tests/search/test-CollectionSearch.js | 123 ++++++++++++++++-- 11 files changed, 227 insertions(+), 118 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 43d7845fc4e..790d017db4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -71,6 +71,8 @@ aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE - Added `reconciliationReports` type to stats endpoint, so `aggregate` query will work for reconciliation reports - **CUMULUS-3859** - Updated `@cumulus/api/bin/serveUtils` to no longer add records to ElasticSearch + - Removed ElasticSearch from local API server code + - Updated CollectionSearch to filter granule fields in addition to time frame for active collections ## [Unreleased] diff --git a/package.json b/package.json index 6fd084be558..149e9ae5efb 100644 --- a/package.json +++ b/package.json @@ -79,7 +79,7 @@ "@babel/eslint-parser": "^7.24.1", "@babel/preset-env": "^7.24.4", "@docusaurus/eslint-plugin": "^2.3.0", - "@octokit/graphql": "2.1.1", + "@octokit/graphql": "^2.3.0", "@smithy/types": "^2.11.0", "@types/aws-lambda": "^8.10.58", "@types/lodash": "^4.14.150", diff --git a/packages/api/bin/cli.js b/packages/api/bin/cli.js index cac11b2a74c..b5eea458b8a 100755 --- a/packages/api/bin/cli.js +++ b/packages/api/bin/cli.js @@ -54,7 +54,7 @@ program program .command('serve') .option('--stackName ', 'stackname to serve (defaults to "localrun")', undefined) - .option('--no-reseed', 'do not reseed dynamoDB and Elasticsearch with new data on start.') + .option('--no-reseed', 'do not reseed data stores with new data on start.') .description('Serves the local version of the Cumulus API') .action((cmd) => { serveApi(process.env.USERNAME, cmd.stackName, cmd.reseed).catch(console.error); diff --git a/packages/api/bin/serve.js b/packages/api/bin/serve.js index 591f04ae227..78d609fe7b6 100644 --- a/packages/api/bin/serve.js +++ b/packages/api/bin/serve.js @@ -19,18 +19,14 @@ const { const { constructCollectionId } = require('@cumulus/message/Collections'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); - const { ReconciliationReport } = require('../models'); const testUtils = require('../lib/testUtils'); const serveUtils = require('./serveUtils'); const { - setLocalEsVariables, localStackName, localSystemBucket, localUserName, - getESClientAndIndex, } = require('./local-test-defaults'); const workflowList = testUtils.getWorkflowList(); @@ -58,12 +54,6 @@ async function populateBucket(bucket, stackName) { } async function prepareServices(stackName, bucket) { - setLocalEsVariables(stackName); - console.log(process.env.ES_HOST); - await bootstrapElasticSearch({ - host: process.env.ES_HOST, - index: process.env.ES_INDEX, - }); await s3().createBucket({ Bucket: bucket }); const { TopicArn } = await createSnsTopic(randomId('topicName')); @@ -96,35 +86,7 @@ function checkEnvVariablesAreSet(moreRequiredEnvVars) { } /** - * erases Elasticsearch index - * @param {any} esClient - Elasticsearch client - * @param {any} esIndex - index to delete - */ -async function eraseElasticsearchIndices(esClient, esIndex) { - try { - await esClient.client.indices.delete({ index: esIndex }); - } catch (error) { - if (error.message !== 'index_not_found_exception') throw error; - } -} - -/** - * resets Elasticsearch and returns the client and index. - * - * @param {string} stackName - The name of local stack. Used to prefix stack resources. - * @returns {Object} - Elasticsearch client and index - */ -async function initializeLocalElasticsearch(stackName) { - const es = await getESClientAndIndex(stackName); - await eraseElasticsearchIndices(es.client, es.index); - return bootstrapElasticSearch({ - host: process.env.ES_HOST, - index: es.index, - }); -} - -/** - * Fill Postgres and Elasticsearch with fake records for testing. + * Fill Postgres with fake records for testing. * @param {string} stackName - The name of local stack. Used to prefix stack resources. * @param {string} user - username * @param {Object} knexOverride - Used to override knex object for testing @@ -140,7 +102,6 @@ async function createDBRecords(stackName, user, knexOverride) { const providerPgModel = new ProviderPgModel(); const rulePgModel = new RulePgModel(); - await initializeLocalElasticsearch(stackName); await serveUtils.resetPostgresDb(); if (user) { @@ -219,7 +180,7 @@ async function createDBRecords(stackName, user, knexOverride) { * @param {string} user - A username to add as an authorized user for the API. * @param {string} stackName - The name of local stack. Used to prefix stack resources. * @param {bool} reseed - boolean to control whether to load new data into - * Postgres and Elasticsearch. + * Postgres. */ async function serveApi(user, stackName = localStackName, reseed = true) { const port = process.env.PORT || 5001; @@ -320,18 +281,6 @@ async function serveDistributionApi(stackName = localStackName, done) { return distributionApp.listen(port, done); } -/** - * Resets Elasticsearch - * - * @param {string} stackName - defaults to local stack, 'localrun' - * @param {string} systemBucket - defaults to 'localbucket' - */ -function eraseDataStack( - stackName = localStackName -) { - return initializeLocalElasticsearch(stackName); -} - /** * Removes all additional data from tables and repopulates with original data. * @@ -353,7 +302,6 @@ async function resetTables( } module.exports = { - eraseDataStack, serveApi, serveDistributionApi, resetTables, diff --git a/packages/api/bin/serveUtils.js b/packages/api/bin/serveUtils.js index 4cba4f4babe..f3d6f277987 100644 --- a/packages/api/bin/serveUtils.js +++ b/packages/api/bin/serveUtils.js @@ -17,6 +17,7 @@ const { ProviderPgModel, ReconciliationReportPgModel, RulePgModel, + translateApiAsyncOperationToPostgresAsyncOperation, translateApiCollectionToPostgresCollection, translateApiExecutionToPostgresExecution, translateApiGranuleToPostgresGranule, @@ -44,6 +45,7 @@ async function erasePostgresTables(knex) { const granulesExecutionsPgModel = new GranulesExecutionsPgModel(); const pdrPgModel = new PdrPgModel(); const providerPgModel = new ProviderPgModel(); + const reconReportPgModel = new ReconciliationReportPgModel(); const rulePgModel = new RulePgModel(); await granulesExecutionsPgModel.delete(knex, {}); @@ -56,6 +58,7 @@ async function erasePostgresTables(knex) { await rulePgModel.delete(knex, {}); await collectionPgModel.delete(knex, {}); await providerPgModel.delete(knex, {}); + await reconReportPgModel.delete(knex, {}); } async function resetPostgresDb() { @@ -79,6 +82,22 @@ async function resetPostgresDb() { await erasePostgresTables(knex); } +async function addAsyncOperations(asyncOperations) { + const knex = await getKnexClient({ + env: { + ...envParams, + ...localStackConnectionEnv, + }, + }); + const asyncOperationPgModel = new AsyncOperationPgModel(); + return await Promise.all( + asyncOperations.map(async (r) => { + const dbRecord = await translateApiAsyncOperationToPostgresAsyncOperation(r, knex); + await asyncOperationPgModel.create(knex, dbRecord); + }) + ); +} + async function addCollections(collections) { const knex = await getKnexClient({ env: { @@ -210,7 +229,6 @@ async function addPdrs(pdrs) { ); } -// TODO this is dynamodb async function addReconciliationReports(reconciliationReports) { const knex = await getKnexClient({ env: { @@ -229,6 +247,7 @@ async function addReconciliationReports(reconciliationReports) { module.exports = { resetPostgresDb, + addAsyncOperations, addProviders, addCollections, addExecutions, diff --git a/packages/db/src/search/CollectionSearch.ts b/packages/db/src/search/CollectionSearch.ts index a2f638616ba..a1d891d0aac 100644 --- a/packages/db/src/search/CollectionSearch.ts +++ b/packages/db/src/search/CollectionSearch.ts @@ -1,9 +1,12 @@ import { Knex } from 'knex'; +import omitBy from 'lodash/omitBy'; import pick from 'lodash/pick'; import Logger from '@cumulus/logger'; import { CollectionRecord } from '@cumulus/types/api/collections'; import { BaseSearch } from './BaseSearch'; +import { convertQueryStringToDbQueryParameters } from './queries'; +import { GranuleSearch } from './GranuleSearch'; import { DbQueryParameters, QueryEvent } from '../types/search'; import { translatePostgresCollectionToApiCollection } from '../translate/collections'; import { PostgresCollectionRecord } from '../types/collection'; @@ -27,6 +30,10 @@ interface CollectionRecordApi extends CollectionRecord { stats?: Statuses, } +const granuleFields = ['createdAt', 'granuleId', 'timestamp', 'updatedAt']; +const isGranuleField = (_value: any, key: string): boolean => + granuleFields.includes(key.split('__')[0]); + /** * Class to build and execute db search query for collections */ @@ -39,6 +46,13 @@ export class CollectionSearch extends BaseSearch { super({ queryStringParameters }, 'collection'); this.active = (active === 'true'); this.includeStats = (includeStats === 'true'); + + // for active collection search, omit the fields which are for searching granules + if (this.active) { + this.dbQueryParameters = convertQueryStringToDbQueryParameters( + this.type, omitBy(this.queryStringParameters, isGranuleField) + ); + } } /** @@ -65,43 +79,47 @@ export class CollectionSearch extends BaseSearch { } /** - * Build queries for range fields + * Build subquery for active collections + * The subquery will search granules * - * @param params - * @param params.knex - db client - * @param [params.countQuery] - query builder for getting count - * @param params.searchQuery - query builder for search - * @param [params.dbQueryParameters] - db query parameters + * @param knex - db client + * @returns granule query */ - protected buildRangeQuery(params: { - knex: Knex, - countQuery: Knex.QueryBuilder, - searchQuery: Knex.QueryBuilder, - dbQueryParameters?: DbQueryParameters, - }) { - if (!this.active) { - super.buildRangeQuery(params); - return; - } - + private buildSubQueryForActiveCollections(knex: Knex): Knex.QueryBuilder { const granulesTable = TableNames.granules; - const { knex, countQuery, searchQuery, dbQueryParameters } = params; - const { range = {} } = dbQueryParameters ?? this.dbQueryParameters; + const granuleSearch = new GranuleSearch({ queryStringParameters: this.queryStringParameters }); + const { countQuery: subQuery } = granuleSearch.buildSearchForActiveCollections(knex); + + subQuery + .clear('select') + .select(1) + .where(`${granulesTable}.collection_cumulus_id`, knex.raw(`${this.tableName}.cumulus_id`)) + .limit(1); + return subQuery; + } - const subQuery = knex.select(1).from(granulesTable) - .where(`${granulesTable}.collection_cumulus_id`, knex.raw(`${this.tableName}.cumulus_id`)); + /** + * Build the search query + * + * @param knex - DB client + * @returns queries for getting count and search result + */ + protected buildSearch(knex: Knex) + : { + countQuery?: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + } { + const queries = super.buildSearch(knex); + if (!this.active) { + return queries; + } - Object.entries(range).forEach(([name, rangeValues]) => { - if (rangeValues.gte) { - subQuery.where(`${granulesTable}.${name}`, '>=', rangeValues.gte); - } - if (rangeValues.lte) { - subQuery.where(`${granulesTable}.${name}`, '<=', rangeValues.lte); - } - }); - subQuery.limit(1); + const subQuery = this.buildSubQueryForActiveCollections(knex); + const { countQuery, searchQuery } = queries; + [countQuery, searchQuery].forEach((query) => query?.whereExists(subQuery)); - [countQuery, searchQuery].forEach((query) => query.whereExists(subQuery)); + log.debug(`buildSearch returns countQuery: ${countQuery?.toSQL().sql}, searchQuery: ${searchQuery.toSQL().sql}`); + return { countQuery, searchQuery }; } /** @@ -114,22 +132,22 @@ export class CollectionSearch extends BaseSearch { private async retrieveGranuleStats(collectionCumulusIds: number[], knex: Knex) : Promise { const granulesTable = TableNames.granules; - const statsQuery = knex(granulesTable) + let statsQuery = knex(granulesTable); + + if (this.active) { + const granuleSearch = new GranuleSearch({ + queryStringParameters: this.queryStringParameters, + }); + const { countQuery } = granuleSearch.buildSearchForActiveCollections(knex); + statsQuery = countQuery.clear('select'); + } + + statsQuery .select(`${granulesTable}.collection_cumulus_id`, `${granulesTable}.status`) .count('*') .groupBy(`${granulesTable}.collection_cumulus_id`, `${granulesTable}.status`) .whereIn(`${granulesTable}.collection_cumulus_id`, collectionCumulusIds); - if (this.active) { - Object.entries(this.dbQueryParameters?.range ?? {}).forEach(([name, rangeValues]) => { - if (rangeValues.gte) { - statsQuery.where(`${granulesTable}.${name}`, '>=', rangeValues.gte); - } - if (rangeValues.lte) { - statsQuery.where(`${granulesTable}.${name}`, '<=', rangeValues.lte); - } - }); - } log.debug(`retrieveGranuleStats statsQuery: ${statsQuery?.toSQL().sql}`); const results = await statsQuery; const reduced = results.reduce((acc, record) => { diff --git a/packages/db/src/search/GranuleSearch.ts b/packages/db/src/search/GranuleSearch.ts index 7ff78db5e7b..1ff9a909435 100644 --- a/packages/db/src/search/GranuleSearch.ts +++ b/packages/db/src/search/GranuleSearch.ts @@ -108,10 +108,33 @@ export class GranuleSearch extends BaseSearch { } } + /** + * Build the search query for active collections. + * If time params are specified the query will search granules that have been updated + * in that time frame. If granuleId or providerId are provided, it will filter those as well. + * + * @param knex - DB client + * @returns queries for getting count and search result + */ + public buildSearchForActiveCollections(knex: Knex) + : { + countQuery: Knex.QueryBuilder, + searchQuery: Knex.QueryBuilder, + } { + const { countQuery, searchQuery } = this.buildBasicQuery(knex); + this.buildTermQuery({ countQuery, searchQuery }); + this.buildTermsQuery({ countQuery, searchQuery }); + this.buildRangeQuery({ knex, countQuery, searchQuery }); + + log.debug(`buildSearchForActiveCollections returns countQuery: ${countQuery?.toSQL().sql}, searchQuery: ${searchQuery.toSQL().sql}`); + return { countQuery, searchQuery }; + } + /** * Translate postgres records to api records * * @param pgRecords - postgres records returned from query + * @param knex - DB client * @returns translated api records */ protected async translatePostgresRecordsToApiRecords(pgRecords: GranuleRecord[], knex: Knex) diff --git a/packages/db/src/search/StatsSearch.ts b/packages/db/src/search/StatsSearch.ts index 0b45814b036..9d31efa4c9e 100644 --- a/packages/db/src/search/StatsSearch.ts +++ b/packages/db/src/search/StatsSearch.ts @@ -252,7 +252,6 @@ class StatsSearch extends BaseSearch { * @param params * @param params.searchQuery - the search query * @param [params.dbQueryParameters] - the db query parameters - * @returns the updated search query based on queryStringParams */ protected buildTermQuery(params: { searchQuery: Knex.QueryBuilder, @@ -265,7 +264,7 @@ class StatsSearch extends BaseSearch { searchQuery.whereRaw(`${this.tableName}.error ->> 'Error' is not null`); } - return super.buildTermQuery({ + super.buildTermQuery({ ...params, dbQueryParameters: { term: omit(term, 'error.Error') }, }); diff --git a/packages/db/src/translate/async_operations.ts b/packages/db/src/translate/async_operations.ts index f87255d14c7..f454fb14497 100644 --- a/packages/db/src/translate/async_operations.ts +++ b/packages/db/src/translate/async_operations.ts @@ -1,3 +1,4 @@ +import omit from 'lodash/omit'; import { toSnake } from 'snake-camel'; import { ApiAsyncOperation } from '@cumulus/types/api/async_operations'; import Logger from '@cumulus/logger'; @@ -38,7 +39,7 @@ export const translateApiAsyncOperationToPostgresAsyncOperation = ( record: ApiAsyncOperation ): PostgresAsyncOperation => { // fix for old implementation of async-operation output assignment - const translatedRecord = toSnake(record); + const translatedRecord = toSnake(omit(record, 'timestamp')); if (record.output === 'none') { delete translatedRecord.output; } else if (record.output !== undefined) { diff --git a/packages/db/tests/lib/test-granule.js b/packages/db/tests/lib/test-granule.js index ff9797d771b..6386b512899 100644 --- a/packages/db/tests/lib/test-granule.js +++ b/packages/db/tests/lib/test-granule.js @@ -178,14 +178,14 @@ test('upsertGranuleWithExecutionJoinRecord() handles multiple executions for a g } ); t.deepEqual( - await granulesExecutionsPgModel.search( + orderBy(await granulesExecutionsPgModel.search( knex, { granule_cumulus_id: granuleCumulusId } - ), - [executionCumulusId, secondExecutionCumulusId].map((executionId) => ({ + ), 'execution_cumulus_id'), + orderBy([executionCumulusId, secondExecutionCumulusId].map((executionId) => ({ granule_cumulus_id: granuleCumulusId, execution_cumulus_id: executionId, - })) + })), 'execution_cumulus_id') ); }); diff --git a/packages/db/tests/search/test-CollectionSearch.js b/packages/db/tests/search/test-CollectionSearch.js index 595ebb81cb0..cf6e4c54be7 100644 --- a/packages/db/tests/search/test-CollectionSearch.js +++ b/packages/db/tests/search/test-CollectionSearch.js @@ -10,8 +10,10 @@ const { generateLocalTestDb, CollectionPgModel, GranulePgModel, + ProviderPgModel, fakeCollectionRecordFactory, fakeGranuleRecordFactory, + fakeProviderRecordFactory, migrationDir, } = require('../../dist'); @@ -27,11 +29,10 @@ test.before(async (t) => { t.context.knex = knex; t.context.collectionPgModel = new CollectionPgModel(); - const collections = []; t.context.collectionSearchTmestamp = 1579352700000; - range(100).map((num) => ( - collections.push(fakeCollectionRecordFactory({ + const collections = range(100).map((num) => ( + fakeCollectionRecordFactory({ name: num % 2 === 0 ? 'testCollection' : 'fakeCollection', version: num, cumulus_id: num, @@ -39,27 +40,38 @@ test.before(async (t) => { process: num % 2 === 0 ? 'ingest' : 'publish', report_to_ems: num % 2 === 0, url_path: num % 2 === 0 ? 'https://fakepath.com' : undefined, - })) + granule_id_validation_regex: num % 2 === 0 ? 'testGranuleId' : 'fakeGranuleId', + }) )); + // Create provider + t.context.providerPgModel = new ProviderPgModel(); + t.context.provider = fakeProviderRecordFactory(); + + const [pgProvider] = await t.context.providerPgModel.create( + t.context.knex, + t.context.provider + ); + t.context.providerCumulusId = pgProvider.cumulus_id; + t.context.granulePgModel = new GranulePgModel(); - const granules = []; const statuses = ['queued', 'failed', 'completed', 'running']; t.context.granuleSearchTmestamp = 1688888800000; - - range(1000).map((num) => ( - granules.push(fakeGranuleRecordFactory({ + t.context.granules = range(1000).map((num) => ( + fakeGranuleRecordFactory({ // collection with cumulus_id 0-9 each has 11 granules, // collection 10-98 has 10 granules, and collection 99 has 0 granule collection_cumulus_id: num % 99, cumulus_id: 100 + num, + // when collection_cumulus_id is odd number(1,3,5...97), its granules have provider + provider_cumulus_id: (num % 99 % 2) ? t.context.providerCumulusId : undefined, status: statuses[num % 4], // granule with collection_cumulus_id n has timestamp granuleSearchTmestamp + n, // except granule 98 (with collection 98 ) which has timestamp granuleSearchTmestamp - 1 updated_at: num === 98 ? new Date(t.context.granuleSearchTmestamp - 1) : new Date(t.context.granuleSearchTmestamp + (num % 99)), - })) + }) )); await t.context.collectionPgModel.insert( @@ -69,7 +81,7 @@ test.before(async (t) => { await t.context.granulePgModel.insert( t.context.knex, - granules + t.context.granules ); }); @@ -197,6 +209,15 @@ test('CollectionSearch supports term search for string field', async (t) => { const response3 = await dbSearch3.query(knex); t.is(response3.meta.count, 50); t.is(response3.results?.length, 50); + + queryStringParameters = { + limit: 200, + granuleId: 'testGranuleId', + }; + const dbSearch4 = new CollectionSearch({ queryStringParameters }); + const response4 = await dbSearch4.query(knex); + t.is(response4.meta.count, 50); + t.is(response4.results?.length, 50); }); test('CollectionSearch supports range search', async (t) => { @@ -320,6 +341,15 @@ test('CollectionSearch supports terms search', async (t) => { response = await dbSearch.query(knex); t.is(response.meta.count, 1); t.is(response.results?.length, 1); + + queryStringParameters = { + limit: 200, + granuleId__in: ['testGranuleId', 'non-existent'].join(','), + }; + dbSearch = new CollectionSearch({ queryStringParameters }); + response = await dbSearch.query(knex); + t.is(response.meta.count, 50); + t.is(response.results?.length, 50); }); test('CollectionSearch supports search when collection field does not match the given value', async (t) => { @@ -396,6 +426,29 @@ test('CollectionSearch supports search for active collections', async (t) => { t.deepEqual(response.results[98].stats, expectedStats98); }); +test('CollectionSearch supports search for active collections by infix/prefix', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: '200', + active: 'true', + includeStats: 'true', + infix: 'Collection', + prefix: 'fake', + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + + // collection_cumulus_id 1 + const expectedStats0 = { queued: 3, completed: 2, failed: 3, running: 3, total: 11 }; + // collection_cumulus_id 97 + const expectedStats48 = { queued: 3, completed: 2, failed: 3, running: 2, total: 10 }; + + t.is(response.meta.count, 49); + t.is(response.results?.length, 49); + t.deepEqual(response.results[0].stats, expectedStats0); + t.deepEqual(response.results[48].stats, expectedStats48); +}); + test('CollectionSearch support search for active collections and stats with granules updated in the given time frame', async (t) => { const { knex } = t.context; const queryStringParameters = { @@ -409,13 +462,59 @@ test('CollectionSearch support search for active collections and stats with gran const dbSearch = new CollectionSearch({ queryStringParameters }); const response = await dbSearch.query(knex); - const expectedStats10 = { queued: 2, completed: 3, failed: 3, running: 2, total: 10 }; + const expectedStats0 = { queued: 2, completed: 3, failed: 3, running: 2, total: 10 }; // collection with cumulus_id 98 has 9 granules in the time frame const expectedStats98 = { queued: 2, completed: 2, failed: 3, running: 2, total: 9 }; // collections with cumulus_id 0-9 are filtered out t.is(response.meta.count, 89); t.is(response.results?.length, 89); - t.deepEqual(response.results[0].stats, expectedStats10); + t.deepEqual(response.results[0].stats, expectedStats0); t.deepEqual(response.results[88].stats, expectedStats98); }); + +test('CollectionSearch support search for active collections and stats with granules from a given provider', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: '200', + active: 'true', + includeStats: 'true', + provider: t.context.provider.name, + sort_by: 'version', + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + + // collection_cumulus_id 1 + const expectedStats0 = { queued: 3, completed: 2, failed: 3, running: 3, total: 11 }; + // collection_cumulus_id 97 + const expectedStats48 = { queued: 3, completed: 2, failed: 3, running: 2, total: 10 }; + + t.is(response.meta.count, 49); + t.is(response.results?.length, 49); + t.deepEqual(response.results[0].stats, expectedStats0); + t.deepEqual(response.results[48].stats, expectedStats48); +}); + +test('CollectionSearch support search for active collections and stats with granules in the granuleId list', async (t) => { + const { knex } = t.context; + const queryStringParameters = { + limit: '200', + active: 'true', + includeStats: 'true', + granuleId__in: [t.context.granules[0].granule_id, t.context.granules[5].granule_id].join(','), + sort_by: 'version', + }; + const dbSearch = new CollectionSearch({ queryStringParameters }); + const response = await dbSearch.query(knex); + + // collection_cumulus_id 0 + const expectedStats0 = { queued: 1, completed: 0, failed: 0, running: 0, total: 1 }; + // collection_cumulus_id 5 + const expectedStats1 = { queued: 0, completed: 0, failed: 1, running: 0, total: 1 }; + + t.is(response.meta.count, 2); + t.is(response.results?.length, 2); + t.deepEqual(response.results[0].stats, expectedStats0); + t.deepEqual(response.results[1].stats, expectedStats1); +}); From 6a1cf9581127ac034d6412b7ea997feb26a79539 Mon Sep 17 00:00:00 2001 From: Jonathan Kovarik Date: Mon, 25 Nov 2024 10:07:35 -0700 Subject: [PATCH 58/61] CUMULUS-3697: Remove searchContext from granules LIST endpoint ( Jk/cumulus 3697 update api) (#3863) * Update granule list endpoint/tests to no longer use searchContext * Fix unit test title/.only status * Remove unneeded test --- CHANGELOG.md | 8 ++ packages/api/package.json | 1 + .../granules/test-searchafter-10k.js | 121 ++++++++++-------- .../api/tests/endpoints/test-granules-get.js | 80 +----------- packages/api/tests/endpoints/test-granules.js | 32 ----- .../api/tests/helpers/create-test-data.js | 3 +- 6 files changed, 77 insertions(+), 168 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e04f9ede9f1..88bae24053f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,8 +27,16 @@ aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE - `PREFIX` is your Cumulus deployment prefix. - `OUTFILE` (**optional**) is the filepath where the Lambda output will be saved. + +#### CUMULUS-3967 + +External tooling making use of `searchContext` in the `GET` `/granules/` endpoint will need to update to make use of standard pagination via `limit` and `page` scrolling, as `searchContext` is no longer supported/is an ES specific feature. + ### Replace ElasticSearch Phase 2 +- **CUMULUS-3967** + - Remove `searchContext` from API granules GET `/granules` endpoint. + - Update relevant tests to validate expected behavior utilizing postgres pagination - **CUMULUS-3229** - Remove ElasticSearch queries from Rule LIST endpoint - **CUMULUS-3230** diff --git a/packages/api/package.json b/packages/api/package.json index ec592ddcbd6..8ffa64a8f1c 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -106,6 +106,7 @@ "p-map": "^4.0.0", "p-retry": "^2.0.0", "p-wait-for": "^2.0.1", + "p-times": "^4.0.0", "querystring": "^0.2.0", "saml2-js": "^4.0.0", "semver": "^7.3.2", diff --git a/packages/api/tests/endpoints/granules/test-searchafter-10k.js b/packages/api/tests/endpoints/granules/test-searchafter-10k.js index ccc927c01ee..8966033d661 100644 --- a/packages/api/tests/endpoints/granules/test-searchafter-10k.js +++ b/packages/api/tests/endpoints/granules/test-searchafter-10k.js @@ -2,10 +2,9 @@ const test = require('ava'); const request = require('supertest'); -const { randomId } = require('@cumulus/common/test-utils'); -const { getEsClient } = require('@cumulus/es-client/search'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); -const { loadGranules, granuleFactory } = require('@cumulus/es-client/tests/helpers/helpers'); +const cryptoRandomString = require('crypto-random-string'); + +const { randomId, randomString } = require('@cumulus/common/test-utils'); process.env.AccessTokensTable = randomId('token'); process.env.stackName = randomId('stackname'); @@ -13,70 +12,80 @@ process.env.system_bucket = randomId('system-bucket'); process.env.TOKEN_SECRET = randomId('secret'); process.env.backgroundQueueUrl = randomId('backgroundQueueUrl'); +const { + CollectionPgModel, + destroyLocalTestDb, + fakeCollectionRecordFactory, + fakeGranuleRecordFactory, + generateLocalTestDb, + GranulePgModel, + localStackConnectionEnv, + migrationDir, +} = require('@cumulus/db'); + // import the express app after setting the env variables const { app } = require('../../../app'); test.before(async (t) => { + const concurrency = 200; + const granuleTotal = 10001; + const { default: pTimes } = await import('p-times'); process.env.NODE_ENV = 'test'; - t.context.esAlias = randomId('esalias'); - t.context.esIndex = randomId('esindex'); - process.env.ES_INDEX = t.context.esAlias; - await bootstrapElasticSearch({ - host: 'fakehost', - index: t.context.esIndex, - alias: t.context.esAlias, + process.env.auth_mode = 'private'; + process.env.dbMaxPool = concurrency; + + // Generate a local test postGres database + t.context.testDbName = `granules_${cryptoRandomString({ length: 10 })}`; + const { knex, knexAdmin } = await generateLocalTestDb(t.context.testDbName, migrationDir); + t.context.knex = knex; + t.context.knexAdmin = knexAdmin; + process.env = { + ...process.env, + ...localStackConnectionEnv, + PG_DATABASE: t.context.testDbName, + }; + + const granulePgModel = new GranulePgModel(); + + const collectionName = randomString(5); + const collectionVersion = randomString(3); + const testPgCollection = fakeCollectionRecordFactory({ + name: collectionName, + version: collectionVersion, }); - t.context.esClient = await getEsClient(); - process.env.auth_mode = 'private'; + const collectionPgModel = new CollectionPgModel(); + const collectionPgRecords = await collectionPgModel.create( + knex, + testPgCollection + ); + // iterate 10k times + await pTimes(granuleTotal, ((index) => { + if (index % 1000 === 0 && index !== 0) { + console.log('Creating granule', index); + } + const newPgGranule = fakeGranuleRecordFactory({ + granule_id: randomString(25), + collection_cumulus_id: collectionPgRecords[0].cumulus_id, + }); + return granulePgModel.create(knex, newPgGranule); + }), { concurrency }); }); test.after.always(async (t) => { delete process.env.auth_mode; - await t.context.esClient.client.indices.delete({ index: t.context.esIndex }); + await destroyLocalTestDb({ + knex: t.context.knex, + knexAdmin: t.context.knexAdmin, + testDbName: t.context.testDbName, + }); }); -// TODO postgres query doesn't return searchContext -test.serial.skip('CUMULUS-2930 /GET granules allows searching past 10K results windows with searchContext', async (t) => { - const numGranules = 12 * 1000; - - // create granules in batches of 1000 - for (let i = 0; i < numGranules; i += 1000) { - const granules = granuleFactory(1000); - // eslint-disable-next-line no-await-in-loop - await loadGranules(granules, t); - console.log(`${i} of ${numGranules} loaded`); - } - console.log('Granules loaded.'); - - // expect numGranules / 100 loops since the api limit is 100; - const expectedLoops = 1 + (numGranules / 100); - let actualLoops = 0; - let lastResults = []; - let queryString = ''; - let searchContext = ''; - - do { - actualLoops += 1; - // eslint-disable-next-line no-await-in-loop - const response = await request(app) - .get(`/granules?limit=100${queryString}`) - .set('Accept', 'application/json') - .expect(200); - - const results = response.body.results; - t.notDeepEqual(results, lastResults); - lastResults = results; - - searchContext = response.body.meta.searchContext; - if (searchContext) { - t.is(results.length, 100); - } else { - t.is(results.length, 0); - } - queryString = `&searchContext=${response.body.meta.searchContext}`; - } while (searchContext !== undefined); +test.serial('CUMULUS-2930/3967 /GET granules allows searching past 10K results windows using pagination', async (t) => { + const response = await request(app) + .get('/granules?limit=100&page=101') + .set('Accept', 'application/json') + .expect(200); - t.is(lastResults.length, 0); - t.is(actualLoops, expectedLoops); + t.is(response.body.results.length, 1); }); diff --git a/packages/api/tests/endpoints/test-granules-get.js b/packages/api/tests/endpoints/test-granules-get.js index 493bf89fd69..adc725cb4da 100644 --- a/packages/api/tests/endpoints/test-granules-get.js +++ b/packages/api/tests/endpoints/test-granules-get.js @@ -293,52 +293,6 @@ test.afterEach(async (t) => { }); }); -// TODO postgres query doesn't return searchContext -test.serial.skip('default lists and paginates correctly with search_after', async (t) => { - const granuleIds = t.context.fakePGGranules.map((i) => i.granule_id); - const response = await request(app) - .get('/granules') - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - const { meta, results } = response.body; - t.is(results.length, 3); - t.is(meta.stack, process.env.stackName); - t.is(meta.table, 'granule'); - t.is(meta.count, 3); - results.forEach((r) => { - t.true(granuleIds.includes(r.granuleId)); - }); - // default paginates correctly with search_after - const firstResponse = await request(app) - .get('/granules?limit=1') - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - const { meta: firstMeta, results: firstResults } = firstResponse.body; - t.is(firstResults.length, 1); - t.is(firstMeta.page, 1); - t.truthy(firstMeta.searchContext); - - const newResponse = await request(app) - .get(`/granules?limit=1&page=2&searchContext=${firstMeta.searchContext}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - const { meta: newMeta, results: newResults } = newResponse.body; - t.is(newResults.length, 1); - t.is(newMeta.page, 2); - t.truthy(newMeta.searchContext); - - t.true(granuleIds.includes(results[0].granuleId)); - t.true(granuleIds.includes(newResults[0].granuleId)); - t.not(results[0].granuleId, newResults[0].granuleId); - t.not(meta.searchContext === newMeta.searchContext); -}); - test.serial('default lists and paginates correctly from querying database', async (t) => { const granuleIds = t.context.fakePGGranules.map((i) => i.granule_id); const response = await request(app) @@ -543,39 +497,7 @@ test.serial('GET returns a 404 response if the granule is not found', async (t) t.is(message, 'Granule not found'); }); -// TODO postgres query doesn't return searchContext -test.serial.skip('default paginates correctly with search_after', async (t) => { - const response = await request(app) - .get('/granules?limit=1') - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - const granuleIds = t.context.fakePGGranules.map((i) => i.granule_id); - - const { meta, results } = response.body; - t.is(results.length, 1); - t.is(meta.page, 1); - t.truthy(meta.searchContext); - - const newResponse = await request(app) - .get(`/granules?limit=1&page=2&searchContext=${meta.searchContext}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - const { meta: newMeta, results: newResults } = newResponse.body; - t.is(newResults.length, 1); - t.is(newMeta.page, 2); - t.truthy(newMeta.searchContext); - console.log(`default paginates granuleIds: ${JSON.stringify(granuleIds)}, results: ${results[0].granuleId}, ${newResults[0].granuleId}`); - t.true(granuleIds.includes(results[0].granuleId)); - t.true(granuleIds.includes(newResults[0].granuleId)); - t.not(results[0].granuleId, newResults[0].granuleId); - t.not(meta.searchContext === newMeta.searchContext); -}); - -test.only('LIST endpoint returns search result correctly', async (t) => { +test.serial('LIST endpoint returns search result correctly', async (t) => { const granuleIds = t.context.fakePGGranules.map((i) => i.granule_id); const searchParams = new URLSearchParams({ granuleId: granuleIds[3], diff --git a/packages/api/tests/endpoints/test-granules.js b/packages/api/tests/endpoints/test-granules.js index bf4d8e29c9e..605cde38e8e 100644 --- a/packages/api/tests/endpoints/test-granules.js +++ b/packages/api/tests/endpoints/test-granules.js @@ -3352,38 +3352,6 @@ test.serial('PUT returns 404 if collection is not part of URI', async (t) => { t.is(response.statusCode, 404); }); -// TODO postgres query doesn't return searchContext -test.serial.skip('default paginates correctly with search_after', async (t) => { - const response = await request(app) - .get('/granules?limit=1') - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - const granuleIds = t.context.fakePGGranules.map((i) => i.granule_id); - - const { meta, results } = response.body; - t.is(results.length, 1); - t.is(meta.page, 1); - t.truthy(meta.searchContext); - - const newResponse = await request(app) - .get(`/granules?limit=1&page=2&searchContext=${meta.searchContext}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - const { meta: newMeta, results: newResults } = newResponse.body; - t.is(newResults.length, 1); - t.is(newMeta.page, 2); - t.truthy(newMeta.searchContext); - console.log(`default paginates granuleIds: ${JSON.stringify(granuleIds)}, results: ${results[0].granuleId}, ${newResults[0].granuleId}`); - t.true(granuleIds.includes(results[0].granuleId)); - t.true(granuleIds.includes(newResults[0].granuleId)); - t.not(results[0].granuleId, newResults[0].granuleId); - t.not(meta.searchContext === newMeta.searchContext); -}); - test.serial('PUT returns 400 for version value less than the configured value', async (t) => { const granuleId = t.context.createGranuleId(); const response = await request(app) diff --git a/packages/api/tests/helpers/create-test-data.js b/packages/api/tests/helpers/create-test-data.js index 1867f7a66a7..09697ce08e8 100644 --- a/packages/api/tests/helpers/create-test-data.js +++ b/packages/api/tests/helpers/create-test-data.js @@ -47,8 +47,9 @@ const metadataFileFixture = fs.readFileSync(path.resolve(__dirname, '../data/met * @param {Knex} params.dbClient - Knex client * @param {number} params.executionCumulusId - executionId for execution record to link * @param {number} params.collectionId - collectionId for the granule's parent collection - * @param {number} params.collectionCumulusId - cumulus_id for the granule's parent collection * @param {boolean} params.published - if the granule should be marked published to CMR + * @param {Object} [params.granuleParams] - additional granule parameters + * @param {number} [params.collectionCumulusId] - cumulus_id for the granule's parent collection * @returns {Object} fake granule object */ async function createGranuleAndFiles({ From 2e86f6fc450a44bcdc766171fd59996370247d35 Mon Sep 17 00:00:00 2001 From: jennyhliu Date: Tue, 7 Jan 2025 19:51:41 -0500 Subject: [PATCH 59/61] update test coverage --- packages/async-operations/.nycrc.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/async-operations/.nycrc.json b/packages/async-operations/.nycrc.json index 6f46a3ffad1..3729b12e3da 100644 --- a/packages/async-operations/.nycrc.json +++ b/packages/async-operations/.nycrc.json @@ -2,6 +2,6 @@ "extends": "../../nyc.config.js", "statements": 97.0, "functions": 97.0, - "branches": 88.85, + "branches": 88.4, "lines": 97.0 } \ No newline at end of file From 230bf5ba07cbc730e505e0904ff0f23f59589b8a Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Mon, 27 Jan 2025 12:08:48 -0500 Subject: [PATCH 60/61] CUMULUS-3847: Remove any remaining ES indexing functions and tests (#3897) * CUMULUS-3847: first round of changes, remove es indexing except for Cloud Metrics usage * CUMULUS-3847: add changelog placeholder * CUMULUS-3847: fix lint issues, more index removal * CUMULUS-3874: add a few questions as comments * CUMULUS-3847: more lint fixes * CUMULUS-3847: more removals, add back lambda to modify/comment instead of full removal * CUMULUS-3847: remove lambda exports, comment out cleanExecutions lambda and test code * CUMULUS-3847: more removals. try to disable max-len on commented out files * CUMULUS-3847: remove tf for removed bootstrap lambda * CUMULUS-3847: modify eslint rule disable * CUMULUS-3847: remove unused var * CUMULUS-3847: remove resource reference * CUMULUS-3847: remove extra line * CUMULUS-3847: fill in changelog, clear out commented code in cleanExecutions, remove test file * CUMULUS-3847: another lint fix * CUMULUS-3847: pr feedback - clarify comments, modify and add logging * CUMULUS-3847: remove es references from tf-inventory * CUMULUS-3847: fix lint error * CUMULUS-3847: remove additional references --- CHANGELOG.md | 5 + example/deployments/cumulus/sandbox.tfvars | 5 - .../AsyncOperationRunnerFailingLambdaSpec.js | 2 +- ...yncOperationRunnerNonExistentLambdaSpec.js | 2 +- ...ncOperationRunnerNonExistentPayloadSpec.js | 2 +- .../AsyncOperationRunnerNonJsonPayloadSpec.js | 2 +- ...syncOperationRunnerSuccessfulLambdaSpec.js | 2 +- packages/api/app/routes.js | 3 - .../ecs/async-operation/tests/test-index.js | 2 +- packages/api/endpoints/elasticsearch.js | 233 ------ packages/api/lambdas/bootstrap.js | 35 - packages/api/lambdas/bulk-operation.js | 8 +- packages/api/lambdas/cleanExecutions.js | 133 +--- .../create-reconciliation-report-types.js | 1 - .../lambdas/create-reconciliation-report.js | 26 +- packages/api/lambdas/index-from-database.js | 324 -------- .../lambdas/process-s3-dead-letter-archive.js | 8 +- .../sf-event-sqs-to-db-records/index.js | 5 - .../sf-event-sqs-to-db-records/write-pdr.js | 27 - packages/api/lib/granules.js | 12 +- packages/api/lib/mmt.js | 16 +- packages/api/lib/orca.js | 2 +- packages/api/lib/reconciliationReport.js | 47 -- packages/api/lib/testUtils.js | 22 +- .../endpoints/providers/delete-provider.js | 22 - .../api/tests/endpoints/test-elasticsearch.js | 698 ------------------ .../api/tests/endpoints/test-executions.js | 2 +- .../sf-event-sqs-to-db-records/test-index.js | 28 - .../test-write-pdr.js | 100 +-- packages/api/tests/lambdas/test-bootstrap.js | 43 -- .../tests/lambdas/test-bulk-granule-delete.js | 23 +- .../api/tests/lambdas/test-bulk-operation.js | 2 +- .../api/tests/lambdas/test-cleanExecutions.js | 553 -------------- .../test-create-reconciliation-report.js | 128 ++-- .../tests/lambdas/test-index-from-database.js | 512 ------------- packages/api/tests/lib/test-ingest.js | 12 - .../tests/lib/test-reconciliationReport.js | 89 --- .../performance/lib/test-write-granules.js | 22 +- packages/api/webpack.config.js | 2 - .../tests/test-async_operations.js | 26 +- packages/aws-client/src/services.ts | 2 - packages/aws-client/tests/test-services.js | 22 - .../db/src/models/reconciliation_report.ts | 2 +- packages/db/src/test-utils.ts | 2 +- .../tests/translate/test-async-operations.js | 14 +- packages/tf-inventory/src/inventory.js | 6 +- packages/tf-inventory/tests/inventory.js | 20 +- tf-modules/archive/api.tf | 1 - tf-modules/archive/bootstrap.tf | 40 - tf-modules/archive/index_from_database.tf | 112 --- tf-modules/archive/reconciliation_report.tf | 2 - 51 files changed, 123 insertions(+), 3286 deletions(-) delete mode 100644 packages/api/endpoints/elasticsearch.js delete mode 100644 packages/api/lambdas/bootstrap.js delete mode 100644 packages/api/lambdas/index-from-database.js delete mode 100644 packages/api/tests/endpoints/test-elasticsearch.js delete mode 100644 packages/api/tests/lambdas/test-bootstrap.js delete mode 100644 packages/api/tests/lambdas/test-cleanExecutions.js delete mode 100644 packages/api/tests/lambdas/test-index-from-database.js delete mode 100644 tf-modules/archive/bootstrap.tf delete mode 100644 tf-modules/archive/index_from_database.tf diff --git a/CHANGELOG.md b/CHANGELOG.md index a17db9f820c..e08043eaa04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,11 @@ External tooling making use of `searchContext` in the `GET` `/granules/` endpoin - Updated `@cumulus/api/bin/serveUtils` to no longer add records to ElasticSearch - Removed ElasticSearch from local API server code - Updated CollectionSearch to filter granule fields in addition to time frame for active collections +- **CUMULUS-3847** + - remove remaining ES indexing in code and tests + - for asyncOperations test data, change any ES related values to other options + - remove code from `@cumulus/api/lambdas/cleanExecutions` leaving a dummy handler, as the code worked with ES. lambda will be rewritten with CUMULUS-3982 + - remove `@cumulus/api/endpoints/elasticsearch`, `@cumulus/api/lambdas/bootstrap`, and `@cumulus/api/lambdas/index-from-database` ## [Unreleased] diff --git a/example/deployments/cumulus/sandbox.tfvars b/example/deployments/cumulus/sandbox.tfvars index 106ee0f8c1c..680caba85e5 100644 --- a/example/deployments/cumulus/sandbox.tfvars +++ b/example/deployments/cumulus/sandbox.tfvars @@ -45,11 +45,6 @@ csdap_host_url = "https://auth.csdap.uat.earthdatacloud.nasa.gov" default_s3_multipart_chunksize_mb = 128 -elasticsearch_client_config = { - create_reconciliation_report_es_scroll_duration = "8m" - create_reconciliation_report_es_scroll_size = 1500 -} - launchpad_api = "https://api.launchpad.nasa.gov/icam/api/sm/v1" launchpad_certificate = "launchpad.pfx" diff --git a/example/spec/serial/AsyncOperationRunnerFailingLambdaSpec.js b/example/spec/serial/AsyncOperationRunnerFailingLambdaSpec.js index adcea147676..29f5e5bfd63 100644 --- a/example/spec/serial/AsyncOperationRunnerFailingLambdaSpec.js +++ b/example/spec/serial/AsyncOperationRunnerFailingLambdaSpec.js @@ -49,7 +49,7 @@ describe('The AsyncOperation task runner executing a failing lambda function', ( id: asyncOperationId, taskArn: randomString(), description: 'Some description', - operationType: 'ES Index', + operationType: 'Bulk Granules', status: 'RUNNING', }; diff --git a/example/spec/serial/AsyncOperationRunnerNonExistentLambdaSpec.js b/example/spec/serial/AsyncOperationRunnerNonExistentLambdaSpec.js index 7306851040c..10e98ff91fe 100644 --- a/example/spec/serial/AsyncOperationRunnerNonExistentLambdaSpec.js +++ b/example/spec/serial/AsyncOperationRunnerNonExistentLambdaSpec.js @@ -43,7 +43,7 @@ describe('The AsyncOperation task runner running a non-existent lambda function' id: asyncOperationId, taskArn: randomString(), description: 'Some description', - operationType: 'ES Index', + operationType: 'Bulk Granules', status: 'RUNNING', }; diff --git a/example/spec/serial/AsyncOperationRunnerNonExistentPayloadSpec.js b/example/spec/serial/AsyncOperationRunnerNonExistentPayloadSpec.js index b12dc77c5aa..c9676b7449a 100644 --- a/example/spec/serial/AsyncOperationRunnerNonExistentPayloadSpec.js +++ b/example/spec/serial/AsyncOperationRunnerNonExistentPayloadSpec.js @@ -43,7 +43,7 @@ describe('The AsyncOperation task runner with a non-existent payload', () => { id: asyncOperationId, taskArn: randomString(), description: 'Some description', - operationType: 'ES Index', + operationType: 'Bulk Granules', status: 'RUNNING', }; diff --git a/example/spec/serial/AsyncOperationRunnerNonJsonPayloadSpec.js b/example/spec/serial/AsyncOperationRunnerNonJsonPayloadSpec.js index 77403068625..c05dbfc9ab9 100644 --- a/example/spec/serial/AsyncOperationRunnerNonJsonPayloadSpec.js +++ b/example/spec/serial/AsyncOperationRunnerNonJsonPayloadSpec.js @@ -52,7 +52,7 @@ describe('The AsyncOperation task runner with a non-JSON payload', () => { id: asyncOperationId, taskArn: randomString(), description: 'Some description', - operationType: 'ES Index', + operationType: 'Kinesis Replay', status: 'RUNNING', }; diff --git a/example/spec/serial/AsyncOperationRunnerSuccessfulLambdaSpec.js b/example/spec/serial/AsyncOperationRunnerSuccessfulLambdaSpec.js index 198dc8b8a9a..c497b91dbfc 100644 --- a/example/spec/serial/AsyncOperationRunnerSuccessfulLambdaSpec.js +++ b/example/spec/serial/AsyncOperationRunnerSuccessfulLambdaSpec.js @@ -50,7 +50,7 @@ describe('The AsyncOperation task runner executing a successful lambda function' const asyncOperationObject = { description: 'Some description', - operationType: 'ES Index', + operationType: 'Bulk Granules', id: asyncOperationId, taskArn: randomString(), status: 'RUNNING', diff --git a/packages/api/app/routes.js b/packages/api/app/routes.js index cf655198c86..25a8f76de78 100644 --- a/packages/api/app/routes.js +++ b/packages/api/app/routes.js @@ -23,7 +23,6 @@ const stats = require('../endpoints/stats'); const version = require('../endpoints/version'); const workflows = require('../endpoints/workflows'); const dashboard = require('../endpoints/dashboard'); -const elasticsearch = require('../endpoints/elasticsearch'); const deadLetterArchive = require('../endpoints/dead-letter-archive'); const { launchpadProtectedAuth } = require('./launchpadAuth'); const launchpadSaml = require('../endpoints/launchpadSaml'); @@ -110,8 +109,6 @@ router.delete('/tokenDelete/:token', token.deleteTokenEndpoint); router.use('/dashboard', dashboard); -router.use('/elasticsearch', ensureAuthorized, elasticsearch.router); - // Catch and send the error message down (instead of just 500: internal server error) router.use(defaultErrorHandler); diff --git a/packages/api/ecs/async-operation/tests/test-index.js b/packages/api/ecs/async-operation/tests/test-index.js index 97649c89bda..4475bef1e9c 100644 --- a/packages/api/ecs/async-operation/tests/test-index.js +++ b/packages/api/ecs/async-operation/tests/test-index.js @@ -32,7 +32,7 @@ test.beforeEach(async (t) => { t.context.testAsyncOperation = { id: t.context.asyncOperationId, description: 'test description', - operationType: 'ES Index', + operationType: 'Reconciliation Report', status: 'RUNNING', createdAt: Date.now(), }; diff --git a/packages/api/endpoints/elasticsearch.js b/packages/api/endpoints/elasticsearch.js deleted file mode 100644 index b09bb81572e..00000000000 --- a/packages/api/endpoints/elasticsearch.js +++ /dev/null @@ -1,233 +0,0 @@ -'use strict'; - -const router = require('express-promise-router')(); -const { v4: uuidv4 } = require('uuid'); - -const log = require('@cumulus/common/log'); -const { IndexExistsError } = require('@cumulus/errors'); -const { defaultIndexAlias, getEsClient } = require('@cumulus/es-client/search'); -const { createIndex } = require('@cumulus/es-client/indexer'); - -const { asyncOperationEndpointErrorHandler } = require('../app/middleware'); -const { getFunctionNameFromRequestContext } = require('../lib/request'); -const startAsyncOperation = require('../lib/startAsyncOperation'); - -// const snapshotRepoName = 'cumulus-es-snapshots'; - -function timestampedIndexName() { - const date = new Date(); - return `cumulus-${date.getFullYear()}-${date.getMonth() + 1}-${date.getDate()}`; -} - -function createEsSnapshot(req, res) { - return res.boom.badRequest('Functionality not yet implemented'); -} - -async function reindex(req, res) { - let sourceIndex = req.body.sourceIndex; - let destIndex = req.body.destIndex; - const aliasName = req.body.aliasName || defaultIndexAlias; - - const esClient = await getEsClient(); - - if (!sourceIndex) { - const alias = await esClient.client.indices.getAlias({ - name: aliasName, - }).then((response) => response.body); - - // alias keys = index name - const indices = Object.keys(alias); - - if (indices.length > 1) { - // We don't know which index to use as the source, throw error - return res.boom.badRequest(`Multiple indices found for alias ${aliasName}. Specify source index as one of [${indices.sort().join(', ')}].`); - } - - sourceIndex = indices[0]; - } else { - const sourceExists = await esClient.client.indices.exists({ index: sourceIndex }) - .then((response) => response.body); - - if (!sourceExists) { - return res.boom.badRequest(`Source index ${sourceIndex} does not exist.`); - } - } - - if (!destIndex) { - destIndex = timestampedIndexName(); - } - - if (sourceIndex === destIndex) { - return res.boom.badRequest(`source index(${sourceIndex}) and destination index(${destIndex}) must be different.`); - } - - const destExists = await esClient.client.indices.exists({ index: destIndex }) - .then((response) => response.body); - - if (!destExists) { - try { - await createIndex(esClient, destIndex); - log.info(`Created destination index ${destIndex}.`); - } catch (error) { - return res.boom.badRequest(`Error creating index ${destIndex}: ${error.message}`); - } - } - - // reindex - esClient.client.reindex({ - body: { - source: { index: sourceIndex }, - dest: { index: destIndex }, - }, - }); - - const message = `Reindexing to ${destIndex} from ${sourceIndex}. Check the reindex-status endpoint for status.`; - - return res.status(200).send({ message }); -} - -async function reindexStatus(req, res) { - const esClient = await getEsClient(); - - const reindexTaskStatus = await esClient.client.tasks.list({ actions: ['*reindex'] }) - .then((response) => response.body); - - await esClient.client.indices.refresh(); - - const indexStatus = await esClient.client.indices.stats({ - metric: 'docs', - }).then((response) => response.body); - - const status = { - reindexStatus: reindexTaskStatus, - indexStatus, - }; - - return res.send(status); -} - -async function changeIndex(req, res) { - const deleteSource = req.body.deleteSource; - const aliasName = req.body.aliasName || defaultIndexAlias; - const currentIndex = req.body.currentIndex; - const newIndex = req.body.newIndex; - - const esClient = await getEsClient(); - - if (!currentIndex || !newIndex) { - return res.boom.badRequest('Please explicity specify a current and new index.'); - } - - if (currentIndex === newIndex) { - return res.boom.badRequest('The current index cannot be the same as the new index.'); - } - - const currentExists = await esClient.client.indices.exists({ index: currentIndex }) - .then((response) => response.body); - - if (!currentExists) { - return res.boom.badRequest(`Current index ${currentIndex} does not exist.`); - } - - const destExists = await esClient.client.indices.exists({ index: newIndex }) - .then((response) => response.body); - - if (!destExists) { - try { - await createIndex(esClient, newIndex); - log.info(`Created destination index ${newIndex}.`); - } catch (error) { - return res.boom.badRequest(`Error creating index ${newIndex}: ${error.message}`); - } - } - - try { - await esClient.client.indices.updateAliases({ - body: { - actions: [ - { remove: { index: currentIndex, alias: aliasName } }, - { add: { index: newIndex, alias: aliasName } }, - ], - }, - }); - - log.info(`Removed alias ${aliasName} from index ${currentIndex} and added alias to ${newIndex}`); - } catch (error) { - return res.boom.badRequest( - `Error removing alias ${aliasName} from index ${currentIndex} and adding alias to ${newIndex}: ${error}` - ); - } - - let message = `Change index success - alias ${aliasName} now pointing to ${newIndex}`; - - if (deleteSource) { - await esClient.client.indices.delete({ index: currentIndex }); - log.info(`Deleted index ${currentIndex}`); - message = `${message} and index ${currentIndex} deleted`; - } - - return res.send({ message }); -} - -async function indicesStatus(req, res) { - const esClient = await getEsClient(); - - return res.send(await esClient.client.cat.indices({})); -} - -async function indexFromDatabase(req, res) { - const esClient = await getEsClient(); - const indexName = req.body.indexName || timestampedIndexName(); - const { postgresResultPageSize, postgresConnectionPoolSize, esRequestConcurrency } = req.body; - - await createIndex(esClient, indexName) - .catch((error) => { - if (!(error instanceof IndexExistsError)) throw error; - }); - - const asyncOperationId = uuidv4(); - const asyncOperationEvent = { - asyncOperationId, - callerLambdaName: getFunctionNameFromRequestContext(req), - lambdaName: process.env.IndexFromDatabaseLambda, - description: 'Elasticsearch index from database', - operationType: 'ES Index', - payload: { - indexName, - reconciliationReportsTable: process.env.ReconciliationReportsTable, - esHost: process.env.ES_HOST, - esRequestConcurrency: esRequestConcurrency || process.env.ES_CONCURRENCY, - postgresResultPageSize, - postgresConnectionPoolSize, - }, - }; - - log.debug(`About to invoke lambda to start async operation ${asyncOperationId}`); - await startAsyncOperation.invokeStartAsyncOperationLambda(asyncOperationEvent); - return res.send({ message: `Indexing database to ${indexName}. Operation id: ${asyncOperationId}` }); -} - -async function getCurrentIndex(req, res) { - const esClient = await getEsClient(); - const alias = req.params.alias || defaultIndexAlias; - - const aliasIndices = await esClient.client.indices.getAlias({ name: alias }) - .then((response) => response.body); - - return res.send(Object.keys(aliasIndices)); -} - -// express routes -router.put('/create-snapshot', createEsSnapshot); -router.post('/reindex', reindex); -router.get('/reindex-status', reindexStatus); -router.post('/change-index', changeIndex); -router.post('/index-from-database', indexFromDatabase, asyncOperationEndpointErrorHandler); -router.get('/indices-status', indicesStatus); -router.get('/current-index/:alias', getCurrentIndex); -router.get('/current-index', getCurrentIndex); - -module.exports = { - indexFromDatabase, - router, -}; diff --git a/packages/api/lambdas/bootstrap.js b/packages/api/lambdas/bootstrap.js deleted file mode 100644 index 5a038058070..00000000000 --- a/packages/api/lambdas/bootstrap.js +++ /dev/null @@ -1,35 +0,0 @@ -/* this module is intended to be used for bootstraping - * the cloudformation deployment of a DAAC. - * - * It helps: - * - adding ElasticSearch index mapping when a new index is created - */ - -'use strict'; - -const log = require('@cumulus/common/log'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); - -/** - * Bootstrap Elasticsearch indexes - * - * @param {Object} event - AWS Lambda event input - * @returns {Promise} a Terraform Lambda invocation response - */ -const handler = async ({ elasticsearchHostname, removeAliasConflict, testContext = {} }) => { - const bootstrapFunction = testContext.bootstrapFunction || bootstrapElasticSearch; - try { - await bootstrapFunction({ - host: elasticsearchHostname, - removeAliasConflict, - }); - return { Status: 'SUCCESS', Data: {} }; - } catch (error) { - log.error(error); - throw error; - } -}; - -module.exports = { - handler, -}; diff --git a/packages/api/lambdas/bulk-operation.js b/packages/api/lambdas/bulk-operation.js index 2b19194a705..763c97e685a 100644 --- a/packages/api/lambdas/bulk-operation.js +++ b/packages/api/lambdas/bulk-operation.js @@ -93,8 +93,8 @@ async function applyWorkflowToGranules({ * Defaults to `concurrency` * @param {number} [payload.concurrency] * granule concurrency for the bulk deletion operation. Defaults to 10 - * @param {Object} [payload.query] - Optional parameter of query to send to ES - * @param {string} [payload.index] - Optional parameter of ES index to query. + * @param {Object} [payload.query] - Optional parameter of query to send to ES (Cloud Metrics) + * @param {string} [payload.index] - Optional parameter of ES index to query (Cloud Metrics). * Must exist if payload.query exists. * @param {Object} [payload.granules] - Optional list of granule unique IDs to bulk operate on * e.g. { granuleId: xxx, collectionID: xxx } @@ -178,8 +178,8 @@ async function bulkGranuleDelete( * @param {string} payload.workflowName - name of the workflow that will be applied to each granule. * @param {Object} [payload.meta] - Optional meta to add to workflow input * @param {string} [payload.queueUrl] - Optional name of queue that will be used to start workflows - * @param {Object} [payload.query] - Optional parameter of query to send to ES - * @param {string} [payload.index] - Optional parameter of ES index to query. + * @param {Object} [payload.query] - Optional parameter of query to send to ES (Cloud Metrics) + * @param {string} [payload.index] - Optional parameter of ES index to query (Cloud Metrics). * Must exist if payload.query exists. * @param {Object} [payload.granules] - Optional list of granule unique IDs to bulk operate on * e.g. { granuleId: xxx, collectionID: xxx } diff --git a/packages/api/lambdas/cleanExecutions.js b/packages/api/lambdas/cleanExecutions.js index 9699b0ce9d7..01051970ff6 100644 --- a/packages/api/lambdas/cleanExecutions.js +++ b/packages/api/lambdas/cleanExecutions.js @@ -2,107 +2,17 @@ 'use strict'; -const { getEsClient, esConfig } = require('@cumulus/es-client/search'); -const moment = require('moment'); +/** + * This lambda has a dummy handler because it needs to be rewritten for PG instead of running + * in ElasticSearch. This will be done in CUMULUS-3982. + * When this is being rewritten, redo the test file also. + */ + const Logger = require('@cumulus/logger'); -const { sleep } = require('@cumulus/common'); const log = new Logger({ sender: '@cumulus/api/lambdas/cleanExecutions', }); -/** - * @typedef {import('@cumulus/db').PostgresExecutionRecord} PostgresExecutionRecord - * @typedef {import('knex').Knex} Knex - */ - -/** - * Extract expiration dates and identify greater and lesser bounds - * - * @param {number} payloadTimeout - Maximum number of days a record should be held onto - * @returns {Date} - */ -const getExpirationDate = ( - payloadTimeout -) => moment().subtract(payloadTimeout, 'days').toDate(); - -/** - * Clean up Elasticsearch executions that have expired - * - * @param {number} payloadTimeout - Maximum number of days a record should be held onto - * @param {boolean} cleanupRunning - Enable removal of running execution - * payloads - * @param {boolean} cleanupNonRunning - Enable removal of execution payloads for - * statuses other than 'running' - * @param {number} updateLimit - maximum number of records to update - * @param {string} index - Elasticsearch index to cleanup - * @returns {Promise} -*/ -const cleanupExpiredESExecutionPayloads = async ( - payloadTimeout, - cleanupRunning, - cleanupNonRunning, - updateLimit, - index -) => { - const _expiration = getExpirationDate(payloadTimeout); - const expiration = _expiration.getTime(); - - const must = [ - { range: { updatedAt: { lte: expiration } } }, - { - bool: { - should: [ - { exists: { field: 'finalPayload' } }, - { exists: { field: 'originalPayload' } }, - ], - }, - }, - ]; - const mustNot = []; - - if (cleanupRunning && !cleanupNonRunning) { - must.push({ term: { status: 'running' } }); - } else if (!cleanupRunning && cleanupNonRunning) { - mustNot.push({ term: { status: 'running' } }); - } - const removePayloadScript = "ctx._source.remove('finalPayload'); ctx._source.remove('originalPayload')"; - - const script = { inline: removePayloadScript }; - const body = { - query: { - bool: { - must, - mustNot, - }, - }, - script: script, - }; - const esClient = await getEsClient(); - const [{ node }] = await esConfig(); - // this launches the job for ES to perform, asynchronously - const updateTask = await esClient._client.updateByQuery({ - index, - type: 'execution', - size: updateLimit, - body, - conflicts: 'proceed', - wait_for_completion: false, - refresh: true, - }); - let taskStatus; - // this async and poll method allows us to avoid http timeouts - // and persist in case of lambda timeout - log.info(`launched async elasticsearch task id ${updateTask.body.task} - to check on this task outside this lambda, or to stop this task run the following`); - log.info(` > curl --request GET ${node}/_tasks/${updateTask.body.task}`); - log.info(` > curl --request POST ${node}/_tasks/${updateTask.body.task}/_cancel`); - do { - sleep(10000); - // eslint-disable-next-line no-await-in-loop - taskStatus = await esClient._client?.tasks.get({ task_id: updateTask.body.task }); - } while (taskStatus?.body.completed === false); - log.info(`elasticsearch task completed with status ${JSON.stringify(taskStatus?.body.task.status)}`); -}; /** * parse out environment variable configuration * @returns {{ @@ -135,33 +45,9 @@ const parseEnvironment = () => { }; }; -/** - * parse environment variables to extract configuration and run cleanup of ES executions - * - * @returns {Promise} - */ -async function cleanExecutionPayloads() { +function handler(_event) { const envConfig = parseEnvironment(); - log.info(`running cleanExecutions with configuration ${JSON.stringify(envConfig)}`); - const { - updateLimit, - cleanupRunning, - cleanupNonRunning, - payloadTimeout, - esIndex, - } = envConfig; - - await cleanupExpiredESExecutionPayloads( - payloadTimeout, - cleanupRunning, - cleanupNonRunning, - updateLimit, - esIndex - ); -} - -async function handler(_event) { - return await cleanExecutionPayloads(); + log.info(`running empty (to be updated) cleanExecutions with configuration ${JSON.stringify(envConfig)}`); } if (require.main === module) { @@ -176,7 +62,4 @@ if (require.main === module) { module.exports = { handler, - cleanExecutionPayloads, - getExpirationDate, - cleanupExpiredESExecutionPayloads, }; diff --git a/packages/api/lambdas/create-reconciliation-report-types.js b/packages/api/lambdas/create-reconciliation-report-types.js index 2ba35b18596..eac05e81296 100644 --- a/packages/api/lambdas/create-reconciliation-report-types.js +++ b/packages/api/lambdas/create-reconciliation-report-types.js @@ -5,7 +5,6 @@ /** * @typedef {Object} Env * @property {string} [CONCURRENCY] - The concurrency level for processing. - * @property {string} [ES_INDEX] - The Elasticsearch index. * @property {string} [AWS_REGION] - The AWS region. * @property {string} [AWS_ACCESS_KEY_ID] - The AWS access key ID. * @property {string} [AWS_SECRET_ACCESS_KEY] - The AWS secret access key. diff --git a/packages/api/lambdas/create-reconciliation-report.js b/packages/api/lambdas/create-reconciliation-report.js index 3822420a9fe..0269898bd06 100644 --- a/packages/api/lambdas/create-reconciliation-report.js +++ b/packages/api/lambdas/create-reconciliation-report.js @@ -29,8 +29,6 @@ const { translatePostgresFileToApiFile, } = require('@cumulus/db'); const Logger = require('@cumulus/logger'); -const { getEsClient } = require('@cumulus/es-client/search'); -const { indexReconciliationReport } = require('@cumulus/es-client/indexer'); const { ReconciliationReportPgModel, @@ -54,7 +52,6 @@ const isDataBucket = (bucketConfig) => ['private', 'public', 'protected'].includ /** * @typedef {typeof process.env } ProcessEnv * @typedef {import('knex').Knex} Knex - * @typedef {import('@cumulus/es-client/search').EsClient} EsClient * @typedef {import('../lib/types').NormalizedRecReportParams } NormalizedRecReportParams * @typedef {import('../lib/types').EnhancedNormalizedRecReportParams} * EnhancedNormalizedRecReportParams @@ -870,7 +867,6 @@ async function createReconciliationReport(recReportParams) { * @param {string} params.reportName - the name of the report * @param {Env} params.env - the environment variables * @param {Knex} params.knex - Optional Instance of a Knex client for testing - * @param {EsClient} params.esClient - Optional Instance of an Elasticsearch client for testing * @returns {Promise} report record saved to the database */ async function processRequest(params) { @@ -882,7 +878,6 @@ async function processRequest(params) { systemBucket, stackName, knex = await getKnexClient({ env }), - esClient = await getEsClient(), } = params; const createStartTime = moment.utc(); const reportRecordName = reportName @@ -900,9 +895,9 @@ async function processRequest(params) { location: buildS3Uri(systemBucket, reportKey), }; let [reportPgRecord] = await reconciliationReportPgModel.create(knex, builtReportRecord); + // api format was being logged prior to ES removal, so keeping format for consistency let reportApiRecord = translatePostgresReconReportToApiReconReport(reportPgRecord); - await indexReconciliationReport(esClient, reportApiRecord, process.env.ES_INDEX); - log.info(`Report added to database as pending: ${JSON.stringify(reportApiRecord)}.`); + log.info(`Report added to database as Pending: ${JSON.stringify(reportApiRecord)}.`); const concurrency = env.CONCURRENCY || '3'; @@ -936,8 +931,6 @@ async function processRequest(params) { status: 'Generated', }; [reportPgRecord] = await reconciliationReportPgModel.upsert(knex, generatedRecord); - reportApiRecord = translatePostgresReconReportToApiReconReport(reportPgRecord); - await indexReconciliationReport(esClient, reportApiRecord, process.env.ES_INDEX); } catch (error) { log.error(`Error caught in createReconciliationReport creating ${reportType} report ${reportRecordName}. ${error}`); // eslint-disable-line max-len const erroredRecord = { @@ -951,16 +944,14 @@ async function processRequest(params) { }; [reportPgRecord] = await reconciliationReportPgModel.upsert(knex, erroredRecord); reportApiRecord = translatePostgresReconReportToApiReconReport(reportPgRecord); - await indexReconciliationReport( - esClient, - reportApiRecord, - process.env.ES_INDEX - ); + log.error(`Report updated in database as Failed including error: ${JSON.stringify(reportApiRecord)}`); throw error; } reportPgRecord = await reconciliationReportPgModel.get(knex, { name: builtReportRecord.name }); - return translatePostgresReconReportToApiReconReport(reportPgRecord); + reportApiRecord = translatePostgresReconReportToApiReconReport(reportPgRecord); + log.info(`Report updated in database as Generated: ${JSON.stringify(reportApiRecord)}.`); + return reportApiRecord; } async function handler(event) { @@ -968,10 +959,9 @@ async function handler(event) { process.env.CMR_LIMIT = process.env.CMR_LIMIT || '5000'; process.env.CMR_PAGE_SIZE = process.env.CMR_PAGE_SIZE || '200'; - //TODO: Remove irrelevant env vars from terraform after ES reports are removed - const varsToLog = ['CMR_LIMIT', 'CMR_PAGE_SIZE', 'ES_SCROLL', 'ES_SCROLL_SIZE']; + const varsToLog = ['CMR_LIMIT', 'CMR_PAGE_SIZE']; const envsToLog = pickBy(process.env, (value, key) => varsToLog.includes(key)); - log.info(`CMR and ES Environment variables: ${JSON.stringify(envsToLog)}`); + log.info(`CMR Environment variables: ${JSON.stringify(envsToLog)}`); return await processRequest(event); } diff --git a/packages/api/lambdas/index-from-database.js b/packages/api/lambdas/index-from-database.js deleted file mode 100644 index e0666992a18..00000000000 --- a/packages/api/lambdas/index-from-database.js +++ /dev/null @@ -1,324 +0,0 @@ -'use strict'; - -const isNil = require('lodash/isNil'); -const pLimit = require('p-limit'); - -const DynamoDbSearchQueue = require('@cumulus/aws-client/DynamoDbSearchQueue'); -const log = require('@cumulus/common/log'); - -const { getEsClient } = require('@cumulus/es-client/search'); -const { - CollectionPgModel, - ExecutionPgModel, - AsyncOperationPgModel, - GranulePgModel, - ProviderPgModel, - RulePgModel, - PdrPgModel, - getKnexClient, - translatePostgresCollectionToApiCollection, - translatePostgresExecutionToApiExecution, - translatePostgresAsyncOperationToApiAsyncOperation, - translatePostgresGranuleToApiGranule, - translatePostgresProviderToApiProvider, - translatePostgresPdrToApiPdr, - translatePostgresRuleToApiRule, -} = require('@cumulus/db'); -const indexer = require('@cumulus/es-client/indexer'); - -/** - * Return specified concurrency for ES requests. - * - * Returned value is used with [p-limit](https://github.com/sindresorhus/p-limit), which - * does not accept 0. - * - * @param {Object} event - Incoming Lambda event - * @returns {number} - Specified request concurrency. Defaults to 10. - * @throws {TypeError} - */ -const getEsRequestConcurrency = (event) => { - if (!isNil(event.esRequestConcurrency)) { - const parsedValue = Number.parseInt(event.esRequestConcurrency, 10); - - if (Number.isInteger(parsedValue) && parsedValue > 0) { - return parsedValue; - } - - throw new TypeError('event.esRequestConcurrency must be an integer greater than 0'); - } - - if (!isNil(process.env.ES_CONCURRENCY)) { - const parsedValue = Number.parseInt(process.env.ES_CONCURRENCY, 10); - - if (Number.isInteger(parsedValue) && parsedValue > 0) { - return parsedValue; - } - - throw new TypeError('The ES_CONCURRENCY environment variable must be an integer greater than 0'); - } - - return 10; -}; - -// Legacy method used for indexing Reconciliation Reports only -async function indexReconciliationReports({ - esClient, - tableName, - esIndex, - indexFn, - limitEsRequests, -}) { - const scanQueue = new DynamoDbSearchQueue({ - TableName: tableName, - }); - - let itemsComplete = false; - let totalItemsIndexed = 0; - - /* eslint-disable no-await-in-loop */ - while (itemsComplete === false) { - await scanQueue.fetchItems(); - - itemsComplete = scanQueue.items[scanQueue.items.length - 1] === null; - - if (itemsComplete) { - // pop the null item off - scanQueue.items.pop(); - } - - if (scanQueue.items.length === 0) { - log.info(`No records to index for ${tableName}`); - return true; - } - - log.info(`Attempting to index ${scanQueue.items.length} records from ${tableName}`); - - const input = scanQueue.items.map( - (item) => limitEsRequests( - async () => { - try { - return await indexFn(esClient, item, esIndex); - } catch (error) { - log.error(`Error indexing record ${JSON.stringify(item)}, error: ${error}`); - return false; - } - } - ) - ); - const results = await Promise.all(input); - const successfulResults = results.filter((result) => result !== false); - totalItemsIndexed += successfulResults; - - log.info(`Completed index of ${successfulResults.length} records from ${tableName}`); - } - /* eslint-enable no-await-in-loop */ - - return totalItemsIndexed; -} - -/** -* indexModel - Index a postgres RDS table's contents to ElasticSearch -* -* @param {Object} params -- parameters -* @param {any} params.esClient -- ElasticSearch client -* @param {any} params.postgresModel -- @cumulus/db model -* @param {string} params.esIndex -- esIndex to write records to -* @param {any} params.indexFn -- Indexer function that maps to the database model -* @param {any} params.limitEsRequests -- limitEsRequests method (used for testing) -* @param {Knex} params.knex -- configured knex instance -* @param {any} params.translationFunction -- function to translate postgres record -* to API record for ES -* @param {number} params.pageSize -- Page size for postgres pagination -* @returns {number} -- number of items indexed -*/ -async function indexModel({ - esClient, - postgresModel, - esIndex, - indexFn, - limitEsRequests, - knex, - translationFunction, - pageSize, -}) { - let startId = 1; - let totalItemsIndexed = 0; - let done; - let maxIndex = await postgresModel.getMaxCumulusId(knex); - let failCount = 0; - - log.info(`Starting index of ${postgresModel.tableName} with max cumulus_id of ${maxIndex}`); - /* eslint-disable no-await-in-loop */ - while (done !== true && maxIndex > 0) { - const pageResults = await postgresModel.paginateByCumulusId(knex, startId, pageSize); - log.info( - `Attempting to index ${pageResults.length} records from ${postgresModel.tableName}` - ); - - const indexPromises = pageResults.map((pageResult) => limitEsRequests(async () => { - let translationResult; - try { - translationResult = await translationFunction(pageResult); - await esClient.refreshClient(); - return await indexFn(esClient, translationResult, esIndex); - } catch (error) { - log.error( - `Error indexing record ${JSON.stringify(translationResult)}, error: ${error.message}` - ); - return false; - } - })); - - const results = await Promise.all(indexPromises); - const successfulResults = results.filter((result) => result !== false); - failCount += (results.length - successfulResults.length); - - totalItemsIndexed += successfulResults.length; - - log.info(`Completed index of ${successfulResults.length} records from ${postgresModel.tableName}`); - startId += pageSize; - if (startId > maxIndex) { - startId = maxIndex; - log.info(`Continuing indexing from cumulus_id ${startId} to account for new rows from ${postgresModel.tableName}`); - const oldMaxIndex = maxIndex; - maxIndex = await postgresModel.getMaxCumulusId(knex); - if (maxIndex <= oldMaxIndex) { - done = true; - } - } - } - /* eslint-enable no-await-in-loop */ - log.info(`Completed successful index of ${totalItemsIndexed} records from ${postgresModel.tableName}`); - if (failCount) { - log.warn(`${failCount} records failed indexing from ${postgresModel.tableName}`); - } - return totalItemsIndexed; -} - -async function indexFromDatabase(event) { - const { - indexName: esIndex, - esHost = process.env.ES_HOST, - reconciliationReportsTable = process.env.ReconciliationReportsTable, - postgresResultPageSize, - postgresConnectionPoolSize, - } = event; - const esClient = await getEsClient(esHost); - const knex = event.knex || (await getKnexClient({ - env: { - dbMaxPool: Number.parseInt(postgresConnectionPoolSize, 10) || 10, - ...process.env, - }, - })); - - const pageSize = Number.parseInt(postgresResultPageSize, 10) || 1000; - const esRequestConcurrency = getEsRequestConcurrency(event); - log.info( - `Tuning configuration: esRequestConcurrency: ${esRequestConcurrency}, postgresResultPageSize: ${pageSize}, postgresConnectionPoolSize: ${postgresConnectionPoolSize}` - ); - - const limitEsRequests = pLimit(esRequestConcurrency); - - await Promise.all([ - indexModel({ - esClient, - esIndex, - indexFn: indexer.indexCollection, - limitEsRequests, - postgresModel: new CollectionPgModel(), - translationFunction: translatePostgresCollectionToApiCollection, - knex, - pageSize, - }), - indexModel({ - esClient, - esIndex, - indexFn: indexer.indexExecution, - limitEsRequests, - postgresModel: new ExecutionPgModel(), - translationFunction: (record) => - translatePostgresExecutionToApiExecution(record, knex), - knex, - pageSize, - }), - indexModel({ - esClient, - esIndex, - indexFn: indexer.indexAsyncOperation, - limitEsRequests, - postgresModel: new AsyncOperationPgModel(), - translationFunction: translatePostgresAsyncOperationToApiAsyncOperation, - knex, - pageSize, - }), - indexModel({ - esClient, - esIndex, - indexFn: indexer.indexGranule, - limitEsRequests, - postgresModel: new GranulePgModel(), - translationFunction: (record) => - translatePostgresGranuleToApiGranule({ - granulePgRecord: record, - knexOrTransaction: knex, - }), - knex, - pageSize, - }), - indexModel({ - esClient, - esIndex, - indexFn: indexer.indexPdr, - limitEsRequests, - postgresModel: new PdrPgModel(), - translationFunction: (record) => - translatePostgresPdrToApiPdr(record, knex), - knex, - pageSize, - }), - indexModel({ - esClient, - esIndex, - indexFn: indexer.indexProvider, - limitEsRequests, - postgresModel: new ProviderPgModel(), - translationFunction: translatePostgresProviderToApiProvider, - knex, - pageSize, - }), - indexReconciliationReports({ - esClient, - tableName: reconciliationReportsTable, - esIndex, - indexFn: indexer.indexReconciliationReport, - limitEsRequests, - }), - indexModel({ - esClient, - esIndex, - indexFn: indexer.indexRule, - limitEsRequests, - postgresModel: new RulePgModel(), - translationFunction: (record) => - translatePostgresRuleToApiRule(record, knex), - knex, - pageSize, - }), - ]); -} - -async function handler(event) { - log.info(`Starting index from database for index ${event.indexName}`); - - await indexFromDatabase(event); - - log.info('Index from database complete'); - - return 'Index from database complete'; -} - -module.exports = { - handler, - indexFromDatabase, - getEsRequestConcurrency, -}; diff --git a/packages/api/lambdas/process-s3-dead-letter-archive.js b/packages/api/lambdas/process-s3-dead-letter-archive.js index ea0dc3d5541..fedec5b4cf9 100644 --- a/packages/api/lambdas/process-s3-dead-letter-archive.js +++ b/packages/api/lambdas/process-s3-dead-letter-archive.js @@ -4,9 +4,6 @@ const pSettle = require('p-settle'); const log = require('@cumulus/common/log'); -const { - getEsClient, -} = require('@cumulus/es-client/search'); const S3 = require('@cumulus/aws-client/S3'); const { s3 } = require('@cumulus/aws-client/services'); const { getJsonS3Object, deleteS3Object } = require('@cumulus/aws-client/S3'); @@ -101,13 +98,10 @@ async function processDeadLetterArchive({ let continuationToken; let allSuccessKeys = []; const allFailedKeys = []; - const esClient = await getEsClient(); let batchNumber = 1; /* eslint-disable no-await-in-loop */ do { log.info(`Processing batch ${batchNumber}`); - // Refresh ES client to avoid credentials timeout for long running processes - esClient.refreshClient(); listObjectsResponse = await s3().listObjectsV2({ Bucket: bucket, Prefix: path, @@ -120,7 +114,7 @@ async function processDeadLetterArchive({ const deadLetterMessage = await getJsonS3Object(bucket, deadLetterObject.Key); const cumulusMessage = await unwrapDeadLetterCumulusMessage(deadLetterMessage); try { - await writeRecordsFunction({ cumulusMessage, knex, esClient }); + await writeRecordsFunction({ cumulusMessage, knex }); return deadLetterObject.Key; } catch (error) { log.error(`Failed to write records from cumulusMessage for dead letter ${deadLetterObject.Key} due to '${error}'`); diff --git a/packages/api/lambdas/sf-event-sqs-to-db-records/index.js b/packages/api/lambdas/sf-event-sqs-to-db-records/index.js index e1609c90bc9..411079ae937 100644 --- a/packages/api/lambdas/sf-event-sqs-to-db-records/index.js +++ b/packages/api/lambdas/sf-event-sqs-to-db-records/index.js @@ -52,14 +52,12 @@ const log = new Logger({ sender: '@cumulus/api/lambdas/sf-event-sqs-to-db-record * @param {Object} params * @param {Object} params.cumulusMessage - Cumulus workflow message * @param {Knex} params.knex - Knex client - * @param {EsClient} params.esClient - Elasticsearch client * @param {Object} [params.testOverrides] * Optional override/mock object used for testing */ const writeRecords = async ({ cumulusMessage, knex, - esClient, testOverrides = {}, }) => { const messageCollectionNameVersion = getCollectionNameAndVersionFromMessage(cumulusMessage); @@ -103,7 +101,6 @@ const writeRecords = async ({ asyncOperationCumulusId, parentExecutionCumulusId, knex, - esClient, }); const providerCumulusId = await getMessageProviderCumulusId(cumulusMessage, knex); @@ -114,13 +111,11 @@ const writeRecords = async ({ providerCumulusId, knex, executionCumulusId, - esClient, }); return writeGranulesFromMessage({ cumulusMessage, executionCumulusId, - esClient, knex, testOverrides, }); diff --git a/packages/api/lambdas/sf-event-sqs-to-db-records/write-pdr.js b/packages/api/lambdas/sf-event-sqs-to-db-records/write-pdr.js index 26fb565ac0e..262b7377a5d 100644 --- a/packages/api/lambdas/sf-event-sqs-to-db-records/write-pdr.js +++ b/packages/api/lambdas/sf-event-sqs-to-db-records/write-pdr.js @@ -5,8 +5,6 @@ const { PdrPgModel, translatePostgresPdrToApiPdr, } = require('@cumulus/db'); -const { upsertPdr } = require('@cumulus/es-client/indexer'); -const { getEsClient } = require('@cumulus/es-client/search'); const { getMessagePdrName, messageHasPdr, @@ -14,7 +12,6 @@ const { getMessagePdrPANSent, getMessagePdrPANMessage, getPdrPercentCompletion, - generatePdrApiRecordFromMessage, } = require('@cumulus/message/PDRs'); const { getMetaStatus, @@ -87,23 +84,6 @@ const writePdrViaTransaction = async ({ return pdr; }; -const writePdrToEs = async (params) => { - const { - cumulusMessage, - updatedAt = Date.now(), - esClient = await getEsClient(), - } = params; - const pdrApiRecord = generatePdrApiRecordFromMessage(cumulusMessage, updatedAt); - if (!pdrApiRecord) { - return; - } - await upsertPdr({ - esClient, - updates: pdrApiRecord, - index: process.env.ES_INDEX, - }); -}; - const writePdr = async ({ cumulusMessage, collectionCumulusId, @@ -111,7 +91,6 @@ const writePdr = async ({ executionCumulusId, knex, updatedAt = Date.now(), - esClient, }) => { let pgPdr; // If there is no PDR in the message, then there's nothing to do here, which is fine @@ -133,11 +112,6 @@ const writePdr = async ({ executionCumulusId, updatedAt, }); - await writePdrToEs({ - cumulusMessage, - updatedAt, - esClient, - }); return pgPdr.cumulus_id; }); const pdrToPublish = await translatePostgresPdrToApiPdr(pgPdr, knex); @@ -149,5 +123,4 @@ module.exports = { generatePdrRecord, writePdrViaTransaction, writePdr, - writePdrToEs, }; diff --git a/packages/api/lib/granules.js b/packages/api/lib/granules.js index 1a9fbfc4e1f..ee0b908d0e8 100644 --- a/packages/api/lib/granules.js +++ b/packages/api/lib/granules.js @@ -234,10 +234,10 @@ function getTotalHits(bodyHits) { } /** - * Returns an array of granules from ElasticSearch query + * Returns an array of granules from an ElasticSearch query * * @param {Object} payload - * @param {string} [payload.index] - ES index to query + * @param {string} [payload.index] - ES index to query (Cloud Metrics) * @param {string} [payload.query] - ES query * @param {Object} [payload.source] - List of IDs to operate on * @param {Object} [payload.testBodyHits] - Optional body.hits for testing. @@ -284,12 +284,12 @@ async function granuleEsQuery({ index, query, source, testBodyHits }) { /** * Return a unique list of granules based on the provided list or the response from the - * query to ES using the provided query and index. + * query to ES (Cloud Metrics) using the provided query and index. * * @param {Object} payload * @param {Object} [payload.granules] - Optional list of granules with granuleId and collectionId - * @param {Object} [payload.query] - Optional parameter of query to send to ES - * @param {string} [payload.index] - Optional parameter of ES index to query. + * @param {Object} [payload.query] - Optional parameter of query to send to ES (Cloud Metrics) + * @param {string} [payload.index] - Optional parameter of ES index to query (Cloud Metrics). * Must exist if payload.query exists. * @returns {Promise>} */ @@ -297,7 +297,7 @@ async function getGranulesForPayload(payload) { const { granules, index, query } = payload; const queryGranules = granules || []; - // query ElasticSearch if needed + // query ElasticSearch (Cloud Metrics) if needed if (queryGranules.length === 0 && query) { log.info('No granules detected. Searching for granules in ElasticSearch.'); diff --git a/packages/api/lib/mmt.js b/packages/api/lib/mmt.js index d37ddc4db77..68581bcc2f0 100644 --- a/packages/api/lib/mmt.js +++ b/packages/api/lib/mmt.js @@ -42,16 +42,16 @@ const buildMMTLink = (conceptId, cmrEnv = process.env.CMR_ENVIRONMENT) => { }; /** - * Updates the Collection query results from ES with an MMTLink when the + * Updates the Collection query results with a MMTLink when the * matching CMR entry contains a collection_id. * - * @param {Array} esResults - collection query results from Cumulus' elasticsearch + * @param {Array} queryResults - collection query results from Cumulus DB * @param {Array} cmrEntries - cmr response feed entry that should match the * results collections - * @returns {Array} - Array of shallow clones of esResults objects with + * @returns {Array} - Array of shallow clones of queryResults objects with * MMTLinks added to them */ -const updateResponseWithMMT = (esResults, cmrEntries) => esResults.map((res) => { +const updateResponseWithMMT = (queryResults, cmrEntries) => queryResults.map((res) => { const matchedCmr = cmrEntries.filter( (entry) => entry.short_name === res.name && entry.version_id === res.version ); @@ -61,7 +61,7 @@ const updateResponseWithMMT = (esResults, cmrEntries) => esResults.map((res) => }); /** - * Simplifies and transforms The returned ES results from a collection query + * Simplifies and transforms the results from a collection query * into a list of objects suitable for a compound call to CMR to retrieve * collection_id information. * Transforms each object in the results array into an new object. @@ -69,7 +69,7 @@ const updateResponseWithMMT = (esResults, cmrEntries) => esResults.map((res) => * inputObject.version => outputObject.version * all other input object keys are dropped. * - * @param {Object} results - The elasticsearch results array returned from either + * @param {Object} results - The results array returned from either * Collection.query() or Collection.queryCollectionsWithActiveGranules() * @returns {Arary} - list of Objects with two keys (short_name and version). */ @@ -80,10 +80,10 @@ const parseResults = (results) => })); /** - * parses the elasticsearch collection lists and for each result inserts a "MMTLink" + * parses the query collection lists and for each result inserts a "MMTLink" * into the collection object. * - * @param {Object} inputResponse - an elasticsearch reponse returned from either + * @param {Object} inputResponse - a reponse returned from either * Collection.query() or Collection.queryCollectionsWithActiveGranules() * @returns {Object} a copy of input response object where each collection * has been updated to include a link to the Metadata Management Tool diff --git a/packages/api/lib/orca.js b/packages/api/lib/orca.js index dcc22aa148f..ea075c0cc7b 100644 --- a/packages/api/lib/orca.js +++ b/packages/api/lib/orca.js @@ -85,7 +85,7 @@ const getOrcaRecoveryStatusByGranuleIdAndCollection = async (granuleId, collecti /** * add recovery status for each granule in the granule list response * - * @param {Object} inputResponse - an elasticsearch response returned from granules query + * @param {Object} inputResponse - a response returned from a granules query * @returns {Object} a copy of input response object where each granule * has been updated to include orca recovery status */ diff --git a/packages/api/lib/reconciliationReport.js b/packages/api/lib/reconciliationReport.js index fdacf692709..f063d3112ad 100644 --- a/packages/api/lib/reconciliationReport.js +++ b/packages/api/lib/reconciliationReport.js @@ -2,9 +2,6 @@ 'use strict'; -const isEqual = require('lodash/isEqual'); -const omit = require('lodash/omit'); - const { removeNilProperties } = require('@cumulus/common/util'); const { constructCollectionId, deconstructCollectionId } = require('@cumulus/message/Collections'); const Logger = require('@cumulus/logger'); @@ -183,49 +180,6 @@ function filterDBCollections(collections, recReportParams) { return collections; } -/** - * Compare granules from Elasticsearch and API for deep equality. - * - * @param {Object} esGranule - Granule from Elasticsearch - * @param {Object} apiGranule - API Granule (translated from PostgreSQL) - * @returns {boolean} - */ -function compareEsGranuleAndApiGranule(esGranule, apiGranule) { - // Ignore files in initial comparison so we can ignore file order - // in comparison - const fieldsIgnored = ['timestamp', 'updatedAt', 'files']; - // "dataType" and "version" fields do not exist in the PostgreSQL database - // granules table which is now the source of truth - const esFieldsIgnored = [...fieldsIgnored, 'dataType', 'version']; - const granulesAreEqual = isEqual( - omit(esGranule, esFieldsIgnored), - omit(apiGranule, fieldsIgnored) - ); - - if (granulesAreEqual === false) return granulesAreEqual; - - const esGranulesHasFiles = esGranule.files !== undefined; - const apiGranuleHasFiles = apiGranule.files.length !== 0; - - // If neither granule has files, then return the previous equality result - if (!esGranulesHasFiles && !apiGranuleHasFiles) return granulesAreEqual; - // If either ES or PG granule does not have files, but the other granule does - // have files, then the granules don't match, so return false - if ((esGranulesHasFiles && !apiGranuleHasFiles) - || (!esGranulesHasFiles && apiGranuleHasFiles)) { - return false; - } - - // Compare files one-by-one to ignore sort order for comparison - return esGranule.files.every((esFile) => { - const matchingFile = apiGranule.files.find( - (apiFile) => apiFile.bucket === esFile.bucket && apiFile.key === esFile.key - ); - if (!matchingFile) return false; - return isEqual(esFile, matchingFile); - }); -} - module.exports = { cmrGranuleSearchParams, convertToDBCollectionSearchObject, @@ -233,5 +187,4 @@ module.exports = { convertToOrcaGranuleSearchParams, filterDBCollections, initialReportHeader, - compareEsGranuleAndApiGranule, }; diff --git a/packages/api/lib/testUtils.js b/packages/api/lib/testUtils.js index e13ba2132ea..506948315ac 100644 --- a/packages/api/lib/testUtils.js +++ b/packages/api/lib/testUtils.js @@ -21,9 +21,6 @@ const { translateApiRuleToPostgresRuleRaw, translatePostgresRuleToApiRule, } = require('@cumulus/db'); -const { - deleteExecution, -} = require('@cumulus/es-client/indexer'); const { constructCollectionId, } = require('@cumulus/message/Collections'); @@ -233,7 +230,7 @@ function fakeAsyncOperationFactory(params = {}) { taskArn: randomId('arn'), id: uuidv4(), description: randomId('description'), - operationType: 'ES Index', + operationType: 'Reconciliation Report', status: 'SUCCEEDED', createdAt: Date.now() - 180.5 * 1000, updatedAt: Date.now(), @@ -620,22 +617,6 @@ const createAsyncOperationTestRecords = async (context) => { }; }; -const cleanupExecutionTestRecords = async (context, { arn }) => { - const { - knex, - executionPgModel, - esClient, - esIndex, - } = context; - - await executionPgModel.delete(knex, { arn }); - await deleteExecution({ - esClient, - arn, - index: esIndex, - }); -}; - module.exports = { createFakeJwtAuthToken, createSqsQueues, @@ -666,6 +647,5 @@ module.exports = { createRuleTestRecords, createPdrTestRecords, createExecutionTestRecords, - cleanupExecutionTestRecords, createAsyncOperationTestRecords, }; diff --git a/packages/api/tests/endpoints/providers/delete-provider.js b/packages/api/tests/endpoints/providers/delete-provider.js index 70e9f06e34f..ae3dcdd88a2 100644 --- a/packages/api/tests/endpoints/providers/delete-provider.js +++ b/packages/api/tests/endpoints/providers/delete-provider.js @@ -133,28 +133,6 @@ test('Deleting a provider removes the provider from postgres', async (t) => { t.false(await providerPgModel.exists(t.context.testKnex, { name })); }); -test('Deleting a provider that exists in PostgreSQL and not Elasticsearch succeeds', async (t) => { - const testPgProvider = fakeProviderRecordFactory(); - await t.context.providerPgModel - .create( - t.context.testKnex, - testPgProvider - ); - - await request(app) - .delete(`/providers/${testPgProvider.name}`) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.false( - await t.context.providerPgModel.exists( - t.context.testKnex, - { name: testPgProvider.name } - ) - ); -}); - test('Deleting a provider that does not exist in PostgreSQL returns a 404', async (t) => { const { status } = await request(app) .delete(`/providers/${randomString}`) diff --git a/packages/api/tests/endpoints/test-elasticsearch.js b/packages/api/tests/endpoints/test-elasticsearch.js deleted file mode 100644 index 811587e93c3..00000000000 --- a/packages/api/tests/endpoints/test-elasticsearch.js +++ /dev/null @@ -1,698 +0,0 @@ -'use strict'; - -const request = require('supertest'); -const test = require('ava'); -const get = require('lodash/get'); -const sinon = require('sinon'); - -const { - localStackConnectionEnv, - generateLocalTestDb, - destroyLocalTestDb, - migrationDir, -} = require('@cumulus/db'); -const awsServices = require('@cumulus/aws-client/services'); -const { - recursivelyDeleteS3Bucket, -} = require('@cumulus/aws-client/S3'); -const { randomString, randomId } = require('@cumulus/common/test-utils'); -const { IndexExistsError } = require('@cumulus/errors'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); -const { getEsClient, defaultIndexAlias } = require('@cumulus/es-client/search'); -const mappings = require('@cumulus/es-client/config/mappings.json'); -const startAsyncOperation = require('../../lib/startAsyncOperation'); - -const models = require('../../models'); -const assertions = require('../../lib/assertions'); -const { - createFakeJwtAuthToken, - setAuthorizedOAuthUsers, -} = require('../../lib/testUtils'); - -const esIndex = randomId('esindex'); - -process.env.AccessTokensTable = randomString(); -process.env.TOKEN_SECRET = randomString(); -process.env.stackName = randomString(); -process.env.system_bucket = randomString(); - -// import the express app after setting the env variables -const { app } = require('../../app'); -const { indexFromDatabase } = require('../../endpoints/elasticsearch'); - -let jwtAuthToken; -let accessTokenModel; -let esClient; - -/** - * Index fake data - * - * @returns {undefined} - none - */ -async function indexData() { - const rules = [ - { name: 'Rule1' }, - { name: 'Rule2' }, - { name: 'Rule3' }, - ]; - - await Promise.all(rules.map(async (rule) => { - await esClient.client.index({ - index: esIndex, - type: 'rule', - id: rule.name, - body: rule, - }); - })); - - await esClient.client.indices.refresh(); -} - -/** - * Create and alias index by going through ES bootstrap - * - * @param {string} indexName - index name - * @param {string} aliasName - alias name - * @returns {undefined} - none - */ -async function createIndex(indexName, aliasName) { - await bootstrapElasticSearch({ - host: 'fakehost', - index: indexName, - alias: aliasName, - }); - esClient = await getEsClient(); -} - -const testDbName = randomId('elasticsearch'); - -test.before(async (t) => { - await awsServices.s3().createBucket({ Bucket: process.env.system_bucket }); - - const username = randomString(); - await setAuthorizedOAuthUsers([username]); - - accessTokenModel = new models.AccessToken(); - await accessTokenModel.createTable(); - - jwtAuthToken = await createFakeJwtAuthToken({ accessTokenModel, username }); - - t.context.esAlias = randomString(); - process.env.ES_INDEX = t.context.esAlias; - process.env = { - ...process.env, - ...localStackConnectionEnv, - PG_DATABASE: testDbName, - }; - - const { knex, knexAdmin } = await generateLocalTestDb(testDbName, migrationDir); - t.context.testKnex = knex; - t.context.testKnexAdmin = knexAdmin; - - // create the elasticsearch index and add mapping - await createIndex(esIndex, t.context.esAlias); - - await indexData(); -}); - -test.after.always(async (t) => { - await accessTokenModel.deleteTable(); - await esClient.client.indices.delete({ index: esIndex }); - await destroyLocalTestDb({ - knex: t.context.testKnex, - knexAdmin: t.context.testKnexAdmin, - testDbName, - }); - await recursivelyDeleteS3Bucket(process.env.system_bucket); -}); - -test('PUT snapshot without an Authorization header returns an Authorization Missing response', async (t) => { - const response = await request(app) - .post('/elasticsearch/create-snapshot') - .set('Accept', 'application/json') - .expect(401); - - assertions.isAuthorizationMissingResponse(t, response); -}); - -test('PUT snapshot with an invalid access token returns an unauthorized response', async (t) => { - const response = await request(app) - .post('/elasticsearch/create-snapshot') - .set('Accept', 'application/json') - .set('Authorization', 'Bearer ThisIsAnInvalidAuthorizationToken') - .expect(401); - - assertions.isInvalidAccessTokenResponse(t, response); -}); - -test.serial('Reindex - multiple aliases found', async (t) => { - // Prefixes for error message predictability - const indexName = `z-${randomString()}`; - const otherIndexName = `a-${randomString()}`; - - const aliasName = randomString(); - - await esClient.client.indices.create({ - index: indexName, - body: { mappings }, - }); - - await esClient.client.indices.putAlias({ - index: indexName, - name: aliasName, - }); - - await esClient.client.indices.create({ - index: otherIndexName, - body: { mappings }, - }); - - await esClient.client.indices.putAlias({ - index: otherIndexName, - name: aliasName, - }); - - const response = await request(app) - .post('/elasticsearch/reindex') - .send({ aliasName }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(400); - - t.is(response.body.message, `Multiple indices found for alias ${aliasName}. Specify source index as one of [${otherIndexName}, ${indexName}].`); - - await esClient.client.indices.delete({ index: indexName }); - await esClient.client.indices.delete({ index: otherIndexName }); -}); - -test.serial('Reindex - specify a source index that does not exist', async (t) => { - const { esAlias } = t.context; - - const response = await request(app) - .post('/elasticsearch/reindex') - .send({ aliasName: esAlias, sourceIndex: 'source-index' }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(400); - - t.is(response.body.message, 'Source index source-index does not exist.'); -}); - -test.serial('Reindex - specify a source index that is not aliased', async (t) => { - const { esAlias } = t.context; - const indexName = 'source-index'; - const destIndex = randomString(); - - await esClient.client.indices.create({ - index: indexName, - body: { mappings }, - }); - - const response = await request(app) - .post('/elasticsearch/reindex') - .send({ - aliasName: esAlias, - sourceIndex: indexName, - destIndex, - }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.is(response.body.message, `Reindexing to ${destIndex} from ${indexName}. Check the reindex-status endpoint for status.`); - - // Check the reindex status endpoint to see if the operation has completed - let statusResponse = await request(app) - .get('/elasticsearch/reindex-status') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - /* eslint-disable no-await-in-loop */ - while (Object.keys(statusResponse.body.reindexStatus.nodes).length > 0) { - statusResponse = await request(app) - .get('/elasticsearch/reindex-status') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - } - /* eslint-enable no-await-in-loop */ - - await esClient.client.indices.delete({ index: indexName }); - await esClient.client.indices.delete({ index: destIndex }); -}); - -test.serial('Reindex request returns 400 with the expected message when source index matches destination index.', async (t) => { - const indexName = randomId('index'); - await esClient.client.indices.create({ - index: indexName, - body: { mappings }, - }); - - const response = await request(app) - .post('/elasticsearch/reindex') - .send({ destIndex: indexName, sourceIndex: indexName }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(400); - - t.is(response.body.message, `source index(${indexName}) and destination index(${indexName}) must be different.`); - await esClient.client.indices.delete({ index: indexName }); -}); - -test.serial('Reindex request returns 400 with the expected message when source index matches the default destination index.', async (t) => { - const date = new Date(); - const defaultIndexName = `cumulus-${date.getFullYear()}-${date.getMonth() + 1}-${date.getDate()}`; - - try { - await createIndex(defaultIndexName); - } catch (error) { - if (!(error instanceof IndexExistsError)) throw error; - } - - t.teardown(async () => { - await esClient.client.indices.delete({ index: defaultIndexName }); - }); - - const response = await request(app) - .post('/elasticsearch/reindex') - .send({ sourceIndex: defaultIndexName }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(400); - - t.is(response.body.message, `source index(${defaultIndexName}) and destination index(${defaultIndexName}) must be different.`); -}); - -test.serial('Reindex success', async (t) => { - const { esAlias } = t.context; - const destIndex = randomString(); - - const response = await request(app) - .post('/elasticsearch/reindex') - .send({ - aliasName: esAlias, - destIndex, - sourceIndex: esIndex, - }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.is(response.body.message, `Reindexing to ${destIndex} from ${esIndex}. Check the reindex-status endpoint for status.`); - - // Check the reindex status endpoint to see if the operation has completed - let statusResponse = await request(app) - .get('/elasticsearch/reindex-status') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - /* eslint-disable no-await-in-loop */ - while (Object.keys(statusResponse.body.reindexStatus.nodes).length > 0) { - statusResponse = await request(app) - .get('/elasticsearch/reindex-status') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - } - /* eslint-enable no-await-in-loop */ - - const indexStatus = statusResponse.body.indexStatus.indices[destIndex]; - - t.is(3, indexStatus.primaries.docs.count); - - // Validate destination index mappings are correct - const fieldMappings = await esClient.client.indices.getMapping() - .then((mappingsResponse) => mappingsResponse.body); - - const sourceMapping = get(fieldMappings, esIndex); - const destMapping = get(fieldMappings, destIndex); - - t.deepEqual(sourceMapping.mappings, destMapping.mappings); - - await esClient.client.indices.delete({ index: destIndex }); -}); - -test.serial('Reindex - destination index exists', async (t) => { - const { esAlias } = t.context; - const destIndex = randomString(); - const newAlias = randomString(); - - await createIndex(destIndex, newAlias); - - const response = await request(app) - .post('/elasticsearch/reindex') - .send({ - aliasName: esAlias, - destIndex: destIndex, - sourceIndex: esIndex, - }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.is(response.body.message, `Reindexing to ${destIndex} from ${esIndex}. Check the reindex-status endpoint for status.`); - - // Check the reindex status endpoint to see if the operation has completed - let statusResponse = await request(app) - .get('/elasticsearch/reindex-status') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - /* eslint-disable no-await-in-loop */ - while (Object.keys(statusResponse.body.reindexStatus.nodes).length > 0) { - statusResponse = await request(app) - .get('/elasticsearch/reindex-status') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - } - /* eslint-enable no-await-in-loop */ - - await esClient.client.indices.delete({ index: destIndex }); -}); - -test.serial('Reindex status, no task running', async (t) => { - const response = await request(app) - .get('/elasticsearch/reindex-status') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.deepEqual(response.body.reindexStatus, { nodes: {} }); -}); - -test.serial('Change index - no current', async (t) => { - const { esAlias } = t.context; - - const response = await request(app) - .post('/elasticsearch/change-index') - .send({ - aliasName: esAlias, - newIndex: 'dest-index', - }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(400); - - t.is(response.body.message, 'Please explicity specify a current and new index.'); -}); - -test.serial('Change index - no new', async (t) => { - const { esAlias } = t.context; - - const response = await request(app) - .post('/elasticsearch/change-index') - .send({ - aliasName: esAlias, - currentIndex: 'source-index', - }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(400); - - t.is(response.body.message, 'Please explicity specify a current and new index.'); -}); - -test.serial('Change index - current index does not exist', async (t) => { - const { esAlias } = t.context; - - const currentIndex = 'source-index'; - - const response = await request(app) - .post('/elasticsearch/change-index') - .send({ - aliasName: esAlias, - currentIndex, - newIndex: 'dest-index', - }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(400); - - t.is(response.body.message, `Current index ${currentIndex} does not exist.`); -}); - -test.serial('Change index - new index does not exist', async (t) => { - const { esAlias } = t.context; - - const newIndex = 'dest-index'; - - const response = await request(app) - .post('/elasticsearch/change-index') - .send({ - aliasName: esAlias, - currentIndex: esIndex, - newIndex, - }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.is(response.body.message, `Change index success - alias ${esAlias} now pointing to ${newIndex}`); - - await esClient.client.indices.delete({ index: newIndex }); -}); - -test.serial('Change index - current index same as new index', async (t) => { - const { esAlias } = t.context; - - const response = await request(app) - .post('/elasticsearch/change-index') - .send({ - aliasName: esAlias, - currentIndex: 'source', - newIndex: 'source', - }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(400); - - t.is(response.body.message, 'The current index cannot be the same as the new index.'); -}); - -test.serial('Change index', async (t) => { - const sourceIndex = randomString(); - const aliasName = randomString(); - const destIndex = randomString(); - - await createIndex(sourceIndex, aliasName); - - await request(app) - .post('/elasticsearch/reindex') - .send({ - aliasName, - sourceIndex, - destIndex, - }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - const response = await request(app) - .post('/elasticsearch/change-index') - .send({ - aliasName, - currentIndex: sourceIndex, - newIndex: destIndex, - }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.is(response.body.message, - `Change index success - alias ${aliasName} now pointing to ${destIndex}`); - - const alias = await esClient.client.indices.getAlias({ name: aliasName }) - .then((aliasResponse) => aliasResponse.body); - - // Test that the only index connected to the alias is the destination index - t.deepEqual(Object.keys(alias), [destIndex]); - - t.is((await esClient.client.indices.exists({ index: sourceIndex })).body, true); - - await esClient.client.indices.delete({ index: destIndex }); -}); - -test.serial('Change index and delete source index', async (t) => { - const sourceIndex = randomString(); - const aliasName = randomString(); - const destIndex = randomString(); - - await createIndex(sourceIndex, aliasName); - - await request(app) - .post('/elasticsearch/reindex') - .send({ - aliasName, - sourceIndex, - destIndex, - }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - const response = await request(app) - .post('/elasticsearch/change-index') - .send({ - aliasName, - currentIndex: sourceIndex, - newIndex: destIndex, - deleteSource: true, - }) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.is(response.body.message, - `Change index success - alias ${aliasName} now pointing to ${destIndex} and index ${sourceIndex} deleted`); - t.is((await esClient.client.indices.exists({ index: sourceIndex })).body, false); - - await esClient.client.indices.delete({ index: destIndex }); -}); - -test.serial('Reindex from database - startAsyncOperation is called with expected payload', async (t) => { - const indexName = randomString(); - const processEnv = { ...process.env }; - process.env.ES_HOST = 'fakeEsHost'; - process.env.ReconciliationReportsTable = 'fakeReportsTable'; - - const asyncOperationsStub = sinon.stub(startAsyncOperation, 'invokeStartAsyncOperationLambda'); - const payload = { - indexName, - esRequestConcurrency: 'fakeEsRequestConcurrency', - postgresResultPageSize: 'fakePostgresResultPageSize', - postgresConnectionPoolSize: 'fakePostgresConnectionPoolSize', - }; - - try { - await request(app) - .post('/elasticsearch/index-from-database') - .send( - payload - ) - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - t.deepEqual(asyncOperationsStub.getCall(0).args[0].payload, { - ...payload, - esHost: process.env.ES_HOST, - reconciliationReportsTable: process.env.ReconciliationReportsTable, - }); - } finally { - process.env = processEnv; - await esClient.client.indices.delete({ index: indexName }); - asyncOperationsStub.restore(); - } -}); - -test.serial('Indices status', async (t) => { - const indexName = `z-${randomString()}`; - const otherIndexName = `a-${randomString()}`; - - await esClient.client.indices.create({ - index: indexName, - body: { mappings }, - }); - - await esClient.client.indices.create({ - index: otherIndexName, - body: { mappings }, - }); - - const response = await request(app) - .get('/elasticsearch/indices-status') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.true(response.text.includes(indexName)); - t.true(response.text.includes(otherIndexName)); - - await esClient.client.indices.delete({ index: indexName }); - await esClient.client.indices.delete({ index: otherIndexName }); -}); - -test.serial('Current index - default alias', async (t) => { - const indexName = randomString(); - await createIndex(indexName, defaultIndexAlias); - t.teardown(() => esClient.client.indices.delete({ index: indexName })); - - const response = await request(app) - .get('/elasticsearch/current-index') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.true(response.body.includes(indexName)); -}); - -test.serial('Current index - custom alias', async (t) => { - const indexName = randomString(); - const customAlias = randomString(); - await createIndex(indexName, customAlias); - - const response = await request(app) - .get(`/elasticsearch/current-index/${customAlias}`) - .set('Authorization', `Bearer ${jwtAuthToken}`) - .expect(200); - - t.deepEqual(response.body, [indexName]); - - await esClient.client.indices.delete({ index: indexName }); -}); - -test.serial('request to /elasticsearch/index-from-database endpoint returns 500 if invoking StartAsyncOperation lambda throws unexpected error', async (t) => { - const asyncOperationStartStub = sinon.stub(startAsyncOperation, 'invokeStartAsyncOperationLambda').throws( - new Error('failed to start') - ); - - try { - const response = await request(app) - .post('/elasticsearch/index-from-database') - .set('Accept', 'application/json') - .set('Authorization', `Bearer ${jwtAuthToken}`) - .send({}); - t.is(response.status, 500); - } finally { - asyncOperationStartStub.restore(); - } -}); - -test.serial('indexFromDatabase request completes successfully', async (t) => { - const stub = sinon.stub(startAsyncOperation, 'invokeStartAsyncOperationLambda'); - const functionName = randomId('lambda'); - const fakeRequest = { - apiGateway: { - context: { - functionName, - }, - }, - body: { - indexName: t.context.esAlias, - }, - }; - - const fakeResponse = { - send: sinon.stub(), - }; - - await t.notThrowsAsync(indexFromDatabase(fakeRequest, fakeResponse)); - t.true(fakeResponse.send.called); - stub.restore(); -}); - -test.serial('indexFromDatabase uses correct caller lambda function name', async (t) => { - const stub = sinon.stub(startAsyncOperation, 'invokeStartAsyncOperationLambda'); - const functionName = randomId('lambda'); - const fakeRequest = { - apiGateway: { - context: { - functionName, - }, - }, - body: { - indexName: randomId('index'), - }, - }; - const fakeResponse = { - send: sinon.stub(), - }; - - await indexFromDatabase(fakeRequest, fakeResponse); - t.is(stub.getCall(0).firstArg.callerLambdaName, functionName); - stub.restore(); -}); diff --git a/packages/api/tests/endpoints/test-executions.js b/packages/api/tests/endpoints/test-executions.js index 96439c58b26..fac59d3f4d5 100644 --- a/packages/api/tests/endpoints/test-executions.js +++ b/packages/api/tests/endpoints/test-executions.js @@ -233,7 +233,7 @@ test.beforeEach(async (t) => { ]; // create fake Postgres granule records - // es records are for Metrics search + // es records are for Cloud Metrics search t.context.fakePGGranules = await Promise.all(t.context.fakeGranules.map(async (fakeGranule) => { await indexer.indexGranule(esClient, fakeGranule, esIndex); const granulePgRecord = await translateApiGranuleToPostgresGranule({ diff --git a/packages/api/tests/lambdas/sf-event-sqs-to-db-records/test-index.js b/packages/api/tests/lambdas/sf-event-sqs-to-db-records/test-index.js index dfa6d152e0b..15fafbe37ad 100644 --- a/packages/api/tests/lambdas/sf-event-sqs-to-db-records/test-index.js +++ b/packages/api/tests/lambdas/sf-event-sqs-to-db-records/test-index.js @@ -29,13 +29,6 @@ const { const { UnmetRequirementsError, } = require('@cumulus/errors'); -const { - Search, -} = require('@cumulus/es-client/search'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const { constructCollectionId, } = require('@cumulus/message/Collections'); @@ -140,26 +133,6 @@ test.before(async (t) => { t.context.testKnex = knex; t.context.testKnexAdmin = knexAdmin; - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - - t.context.esExecutionsClient = new Search( - {}, - 'execution', - t.context.esIndex - ); - t.context.esPdrsClient = new Search( - {}, - 'pdr', - t.context.esIndex - ); - t.context.esGranulesClient = new Search( - {}, - 'granule', - t.context.esIndex - ); - t.context.collectionPgModel = new CollectionPgModel(); t.context.executionPgModel = new ExecutionPgModel(); t.context.granulePgModel = new GranulePgModel(); @@ -283,7 +256,6 @@ test.after.always(async (t) => { knexAdmin: t.context.testKnexAdmin, testDbName: t.context.testDbName, }); - await cleanupTestIndex(t.context); await sns().send(new DeleteTopicCommand({ TopicArn: ExecutionsTopicArn })); await sns().send(new DeleteTopicCommand({ TopicArn: PdrsTopicArn })); }); diff --git a/packages/api/tests/lambdas/sf-event-sqs-to-db-records/test-write-pdr.js b/packages/api/tests/lambdas/sf-event-sqs-to-db-records/test-write-pdr.js index 0dbe9e54272..8a44fcf3e4d 100644 --- a/packages/api/tests/lambdas/sf-event-sqs-to-db-records/test-write-pdr.js +++ b/packages/api/tests/lambdas/sf-event-sqs-to-db-records/test-write-pdr.js @@ -17,7 +17,6 @@ const { translatePostgresPdrToApiPdr, migrationDir, } = require('@cumulus/db'); -const { Search } = require('@cumulus/es-client/search'); const { createSnsTopic } = require('@cumulus/aws-client/SNS'); const { sns, sqs } = require('@cumulus/aws-client/services'); const { @@ -25,10 +24,6 @@ const { DeleteTopicCommand, } = require('@aws-sdk/client-sns'); const { ReceiveMessageCommand } = require('@aws-sdk/client-sqs'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const { generatePdrRecord, @@ -45,15 +40,6 @@ test.before(async (t) => { ); t.context.knexAdmin = knexAdmin; t.context.knex = knex; - - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esPdrClient = new Search( - {}, - 'pdr', - t.context.esIndex - ); }); test.beforeEach(async (t) => { @@ -166,7 +152,6 @@ test.after.always(async (t) => { await destroyLocalTestDb({ ...t.context, }); - await cleanupTestIndex(t.context); }); test('generatePdrRecord() generates correct PDR record', (t) => { @@ -320,7 +305,6 @@ test.serial('writePdr() does not update PDR record if update is from an older ex }); const pgRecord = await pdrPgModel.get(knex, { name: pdr.name }); - const esRecord = await t.context.esPdrClient.get(pdr.name); const stats = { processing: 0, @@ -330,10 +314,6 @@ test.serial('writePdr() does not update PDR record if update is from an older ex status: 'completed', stats, }); - t.like(esRecord, { - status: 'completed', - stats, - }); cumulusMessage.meta.status = 'running'; cumulusMessage.payload.running = ['arn2']; @@ -349,18 +329,13 @@ test.serial('writePdr() does not update PDR record if update is from an older ex }); const updatedPgRecord = await pdrPgModel.get(knex, { name: pdr.name }); - const updatedEsRecord = await t.context.esPdrClient.get(pdr.name); t.like(updatedPgRecord, { status: 'completed', stats, }); - t.like(updatedEsRecord, { - status: 'completed', - stats, - }); }); -test.serial('writePdr() saves a PDR record to PostgreSQL/Elasticsearch if PostgreSQL write is enabled', async (t) => { +test.serial('writePdr() saves a PDR record to PostgreSQL', async (t) => { const { cumulusMessage, knex, @@ -380,35 +355,9 @@ test.serial('writePdr() saves a PDR record to PostgreSQL/Elasticsearch if Postgr }); t.true(await pdrPgModel.exists(knex, { name: pdr.name })); - t.true(await t.context.esPdrClient.exists(pdr.name)); }); -test.serial('writePdr() saves a PDR record to PostgreSQL/Elasticsearch with same timestamps', async (t) => { - const { - cumulusMessage, - knex, - collectionCumulusId, - providerCumulusId, - executionCumulusId, - pdr, - pdrPgModel, - } = t.context; - - await writePdr({ - cumulusMessage, - collectionCumulusId, - providerCumulusId, - executionCumulusId: executionCumulusId, - knex, - }); - - const pgRecord = await pdrPgModel.get(knex, { name: pdr.name }); - const esRecord = await t.context.esPdrClient.get(pdr.name); - t.is(pgRecord.created_at.getTime(), esRecord.createdAt); - t.is(pgRecord.updated_at.getTime(), esRecord.updatedAt); -}); - -test.serial('writePdr() does not write to PostgreSQL/Elasticsearch if PostgreSQL write fails', async (t) => { +test.serial('writePdr() does not write to PostgreSQL if PostgreSQL write fails', async (t) => { const { cumulusMessage, knex, @@ -450,51 +399,6 @@ test.serial('writePdr() does not write to PostgreSQL/Elasticsearch if PostgreSQL ); t.false(await pdrPgModel.exists(knex, { name: pdr.name })); - t.false(await t.context.esPdrClient.exists(pdr.name)); -}); - -test.serial('writePdr() does not write to PostgreSQL/Elasticsearch if Elasticsearch write fails', async (t) => { - const { - cumulusMessage, - knex, - collectionCumulusId, - providerCumulusId, - pdrPgModel, - } = t.context; - - const pdr = { - name: cryptoRandomString({ length: 5 }), - PANSent: false, - PANmessage: 'test', - }; - cumulusMessage.payload = { - pdr, - }; - - cumulusMessage.meta.status = 'completed'; - - const fakeEsClient = { - initializeEsClient: () => Promise.resolve(), - client: { - update: () => { - throw new Error('PDR ES error'); - }, - }, - }; - - await t.throwsAsync( - writePdr({ - cumulusMessage, - collectionCumulusId, - providerCumulusId, - knex, - esClient: fakeEsClient, - }), - { message: 'PDR ES error' } - ); - - t.false(await pdrPgModel.exists(knex, { name: pdr.name })); - t.false(await t.context.esPdrClient.exists(pdr.name)); }); test.serial('writePdr() successfully publishes an SNS message', async (t) => { diff --git a/packages/api/tests/lambdas/test-bootstrap.js b/packages/api/tests/lambdas/test-bootstrap.js deleted file mode 100644 index c0797c6496b..00000000000 --- a/packages/api/tests/lambdas/test-bootstrap.js +++ /dev/null @@ -1,43 +0,0 @@ -const test = require('ava'); -const sinon = require('sinon'); - -const { handler } = require('../../lambdas/bootstrap'); - -test('handler calls bootstrapFunction with expected values', async (t) => { - const bootstrapFunctionStub = sinon.stub(); - const testContext = { - bootstrapFunction: bootstrapFunctionStub, - }; - - const hostName = 'fakehost'; - - const actual = await handler({ - testContext, - removeAliasConflict: true, - elasticsearchHostname: hostName, - }); - - t.deepEqual(actual, { Data: {}, Status: 'SUCCESS' }); - t.true(bootstrapFunctionStub.calledWith({ - host: hostName, - removeAliasConflict: true, - })); -}); - -test('handler throws with error/status on bootstrap function failure', async (t) => { - const errorMessage = 'Fake Error'; - const bootstrapFunctionStub = () => { - throw new Error(errorMessage); - }; - const testContext = { - bootstrapFunction: bootstrapFunctionStub, - }; - - const hostName = 'fakehost'; - - await t.throwsAsync(handler({ - testContext, - removeAliasConflict: true, - elasticsearchHostname: hostName, - }), { message: errorMessage }); -}); diff --git a/packages/api/tests/lambdas/test-bulk-granule-delete.js b/packages/api/tests/lambdas/test-bulk-granule-delete.js index c1f16faaad8..e42afe304dd 100644 --- a/packages/api/tests/lambdas/test-bulk-granule-delete.js +++ b/packages/api/tests/lambdas/test-bulk-granule-delete.js @@ -13,11 +13,7 @@ const { } = require('@cumulus/db'); const { createBucket, deleteS3Buckets } = require('@cumulus/aws-client/S3'); const { randomId, randomString } = require('@cumulus/common/test-utils'); -const { Search } = require('@cumulus/es-client/search'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); + const { sns, sqs } = require('@cumulus/aws-client/services'); const { SubscribeCommand, @@ -44,11 +40,6 @@ test.before(async (t) => { const { knex, knexAdmin } = await generateLocalTestDb(testDbName, migrationDir); t.context.knex = knex; t.context.knexAdmin = knexAdmin; - - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esGranulesClient = new Search({}, 'granule', t.context.esIndex); }); test.beforeEach(async (t) => { @@ -87,7 +78,6 @@ test.after.always(async (t) => { knexAdmin: t.context.knexAdmin, testDbName, }); - await cleanupTestIndex(t.context); }); test('bulkGranuleDelete does not fail on published granules if payload.forceRemoveFromCmr is true', async (t) => { @@ -164,17 +154,6 @@ test('bulkGranuleDelete does not fail on published granules if payload.forceRemo { granule_id: pgGranuleId2, collection_cumulus_id: pgCollectionCumulusId2 } )); - t.false( - await t.context.esGranulesClient.exists( - pgGranuleId1 - ) - ); - t.false( - await t.context.esGranulesClient.exists( - pgGranuleId2 - ) - ); - const s3Buckets = granules[0].s3Buckets; t.teardown(() => deleteS3Buckets([ s3Buckets.protected.name, diff --git a/packages/api/tests/lambdas/test-bulk-operation.js b/packages/api/tests/lambdas/test-bulk-operation.js index 3b28d46c994..f858da57e74 100644 --- a/packages/api/tests/lambdas/test-bulk-operation.js +++ b/packages/api/tests/lambdas/test-bulk-operation.js @@ -49,7 +49,6 @@ const esSearchStub = sandbox.stub(); const esScrollStub = sandbox.stub(); FakeEsClient.prototype.scroll = esScrollStub; FakeEsClient.prototype.search = esSearchStub; - const bulkOperation = proxyquire('../../lambdas/bulk-operation', { '../lib/granules': proxyquire('../../lib/granules', { '@cumulus/es-client/search': { @@ -392,6 +391,7 @@ test.serial('bulk operation BULK_GRANULE applies workflow to granules returned b }); await verifyGranulesQueuedStatus(t); }); + test.serial('applyWorkflowToGranules sets the granules status to queued', async (t) => { await setUpExistingDatabaseRecords(t); const workflowName = 'test-workflow'; diff --git a/packages/api/tests/lambdas/test-cleanExecutions.js b/packages/api/tests/lambdas/test-cleanExecutions.js deleted file mode 100644 index e19e251b058..00000000000 --- a/packages/api/tests/lambdas/test-cleanExecutions.js +++ /dev/null @@ -1,553 +0,0 @@ -/* eslint-disable no-await-in-loop */ -const test = require('ava'); -const moment = require('moment'); -const clone = require('lodash/clone'); -const { - translatePostgresExecutionToApiExecution, - fakeExecutionRecordFactory, - localStackConnectionEnv, -} = require('@cumulus/db'); -const { cleanupTestIndex, createTestIndex } = require('@cumulus/es-client/testUtils'); -const { handler, getExpirationDate, cleanupExpiredESExecutionPayloads } = require('../../lambdas/cleanExecutions'); -test.beforeEach(async (t) => { - const { esIndex, esClient, searchClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.searchClient = searchClient; - - const records = []; - for (let i = 0; i < 20; i += 2) { - records.push(await translatePostgresExecutionToApiExecution(fakeExecutionRecordFactory({ - updated_at: moment().subtract(i, 'days').toDate(), - final_payload: '{"a": "b"}', - original_payload: '{"b": "c"}', - status: 'completed', - cumulus_id: i, - }))); - records.push(await translatePostgresExecutionToApiExecution(fakeExecutionRecordFactory({ - updated_at: moment().subtract(i, 'days').toDate(), - final_payload: '{"a": "b"}', - original_payload: '{"b": "c"}', - status: 'running', - cumulus_id: i + 1, - }))); - } - for (const record of records) { - await t.context.esClient.client.index({ - body: record, - id: record.cumulusId, - index: t.context.esIndex, - type: 'execution', - refresh: true, - }); - } -}); - -test.afterEach.always(async (t) => { - await cleanupTestIndex(t.context); -}); - -const esPayloadsEmpty = (entry) => !entry.finalPayload && !entry.orginalPayload; - -test.serial('handler() handles running expiration', async (t) => { - const env = clone(process.env); - process.env = localStackConnectionEnv; - process.env.PG_DATABASE = t.context.testDbName; - process.env.ES_INDEX = t.context.esIndex; - process.env.LOCAL_ES_HOST = 'localhost'; - let expirationDays = 4; - let expirationDate = getExpirationDate(expirationDays); - process.env.CLEANUP_NON_RUNNING = 'false'; - process.env.CLEANUP_RUNNING = 'true'; - process.env.PAYLOAD_TIMEOUT = expirationDays; - - await handler(); - - let massagedEsExecutions = await t.context.searchClient.query({ - index: t.context.esIndex, - type: 'execution', - body: {}, - size: 30, - }); - massagedEsExecutions.results.forEach((massagedExecution) => { - if (massagedExecution.updatedAt <= expirationDate && massagedExecution.status === 'running') { - t.true(esPayloadsEmpty(massagedExecution)); - } else { - t.false(esPayloadsEmpty(massagedExecution)); - } - }); - - expirationDays = 2; - expirationDate = getExpirationDate(expirationDays); - process.env.PAYLOAD_TIMEOUT = expirationDays; - - await handler(); - - massagedEsExecutions = await t.context.searchClient.query({ - index: t.context.esIndex, - type: 'execution', - body: {}, - size: 30, - }); - massagedEsExecutions.results.forEach((massagedExecution) => { - if (massagedExecution.updatedAt <= expirationDate.getTime() && massagedExecution.status === 'running') { - t.true(esPayloadsEmpty(massagedExecution)); - } else { - t.false(esPayloadsEmpty(massagedExecution)); - } - }); - process.env = env; -}); - -test.serial('handler() handles non running expiration', async (t) => { - const env = clone(process.env); - process.env = localStackConnectionEnv; - process.env.PG_DATABASE = t.context.testDbName; - process.env.ES_INDEX = t.context.esIndex; - let expirationDays = 5; - let expirationDate = getExpirationDate(expirationDays); - process.env.CLEANUP_NON_RUNNING = 'true'; - process.env.CLEANUP_RUNNING = 'false'; - process.env.PAYLOAD_TIMEOUT = expirationDays; - await handler(); - - let massagedEsExecutions = await t.context.searchClient.query({ - index: t.context.esIndex, - type: 'execution', - body: {}, - size: 30, - }); - - massagedEsExecutions.results.forEach((massagedExecution) => { - if (massagedExecution.updatedAt <= expirationDate && massagedExecution.status !== 'running') { - t.true(esPayloadsEmpty(massagedExecution)); - } else { - t.false(esPayloadsEmpty(massagedExecution)); - } - }); - - expirationDays = 3; - expirationDate = getExpirationDate(expirationDays); - process.env.PAYLOAD_TIMEOUT = expirationDays; - - await handler(); - - massagedEsExecutions = await t.context.searchClient.query({ - index: t.context.esIndex, - type: 'execution', - body: {}, - size: 30, - }); - massagedEsExecutions.results.forEach((massagedExecution) => { - if (massagedExecution.updatedAt <= expirationDate.getTime() && massagedExecution.status !== 'running') { - t.true(esPayloadsEmpty(massagedExecution)); - } else { - t.false(esPayloadsEmpty(massagedExecution)); - } - }); - process.env = env; -}); - -test.serial('handler() handles both expirations', async (t) => { - const env = clone(process.env); - process.env = localStackConnectionEnv; - process.env.PG_DATABASE = t.context.testDbName; - process.env.ES_INDEX = t.context.esIndex; - process.env.LOCAL_ES_HOST = 'localhost'; - let payloadTimeout = 9; - let payloadExpiration = getExpirationDate(payloadTimeout); - - process.env.CLEANUP_RUNNING = 'true'; - process.env.CLEANUP_NON_RUNNING = 'true'; - process.env.PAYLOAD_TIMEOUT = payloadTimeout; - - await handler(); - - let massagedEsExecutions = await t.context.searchClient.query({ - index: t.context.esIndex, - type: 'execution', - body: {}, - size: 30, - }); - massagedEsExecutions.results.forEach((massagedExecution) => { - if (massagedExecution.updatedAt <= payloadExpiration.getTime()) { - t.true(esPayloadsEmpty(massagedExecution)); - } else { - t.false(esPayloadsEmpty(massagedExecution)); - } - }); - payloadTimeout = 8; - - payloadExpiration = getExpirationDate(payloadTimeout); - process.env.PAYLOAD_TIMEOUT = payloadTimeout; - - await handler(); - - massagedEsExecutions = await t.context.searchClient.query({ - index: t.context.esIndex, - type: 'execution', - body: {}, - size: 30, - }); - massagedEsExecutions.results.forEach((massagedExecution) => { - if (massagedExecution.updatedAt <= payloadExpiration.getTime()) { - t.true(esPayloadsEmpty(massagedExecution)); - } else { - t.false(esPayloadsEmpty(massagedExecution)); - } - }); - process.env = env; -}); - -test.serial('handler() throws errors when misconfigured', async (t) => { - const env = clone(process.env); - process.env.CLEANUP_RUNNING = 'false'; - process.env.CLEANUP_NON_RUNNING = 'false'; - - await t.throwsAsync(handler(), { - message: 'running and non-running executions configured to be skipped, nothing to do', - }); - - process.env.CLEANUP_RUNNING = 'false'; - process.env.CLEANUP_NON_RUNNING = 'true'; - process.env.PAYLOAD_TIMEOUT = 'frogs'; - await t.throwsAsync(handler(), { - message: 'Invalid number of days specified in configuration for payloadTimeout: frogs', - }); - process.env = env; -}); - -test.serial('handler() iterates through data in batches when updateLimit is set low', async (t) => { - const env = clone(process.env); - - process.env = localStackConnectionEnv; - process.env.PG_DATABASE = t.context.testDbName; - process.env.ES_INDEX = t.context.esIndex; - process.env.LOCAL_ES_HOST = 'localhost'; - - process.env.CLEANUP_RUNNING = 'true'; - process.env.CLEANUP_NON_RUNNING = 'true'; - process.env.PAYLOAD_TIMEOUT = 2; - - process.env.UPDATE_LIMIT = 2; - - await handler(); - - let massagedEsExecutions = await t.context.searchClient.query({ - index: t.context.esIndex, - type: 'execution', - body: {}, - size: 30, - }); - let esCleanedCount = 0; - massagedEsExecutions.results.forEach((massagedExecution) => { - if (esPayloadsEmpty(massagedExecution)) esCleanedCount += 1; - }); - t.is(esCleanedCount, 2); - - await handler(); - - massagedEsExecutions = await t.context.searchClient.query({ - index: t.context.esIndex, - type: 'execution', - body: {}, - size: 30, - }); - esCleanedCount = 0; - massagedEsExecutions.results.forEach((massagedExecution) => { - if (esPayloadsEmpty(massagedExecution)) esCleanedCount += 1; - }); - t.is(esCleanedCount, 4); - - process.env.UPDATE_LIMIT = 12; - - await handler(); - - massagedEsExecutions = await t.context.searchClient.query({ - index: t.context.esIndex, - type: 'execution', - body: {}, - size: 30, - }); - esCleanedCount = 0; - massagedEsExecutions.results.forEach((massagedExecution) => { - if (esPayloadsEmpty(massagedExecution)) esCleanedCount += 1; - }); - t.is(esCleanedCount, 16); - - process.env = env; -}); - -test('cleanupExpiredEsExecutionPayloads() for just running removes expired running executions', async (t) => { - let timeoutDays = 6; - await cleanupExpiredESExecutionPayloads( - timeoutDays, - true, - false, - 100, - t.context.esIndex - ); - // await es refresh - - let expiration = moment().subtract(timeoutDays, 'days').toDate().getTime(); - let relevantExecutions = await t.context.searchClient.query( - { - index: t.context.esIndex, - type: 'execution', - body: { - query: { - range: { - updatedAt: { - lte: expiration, - }, - }, - }, - }, - } - ); - for (const execution of relevantExecutions.results) { - if (execution.status === 'running') { - t.true(execution.finalPayload === undefined); - t.true(execution.originalPayload === undefined); - } else { - t.false(execution.finalPayload === undefined); - t.false(execution.originalPayload === undefined); - } - } - let irrelevantExecutions = await t.context.searchClient.query( - { - index: t.context.esIndex, - type: 'execution', - body: { - query: { - range: { - updatedAt: { - gt: expiration, - }, - }, - }, - }, - } - ); - for (const execution of irrelevantExecutions.results) { - t.false(execution.finalPayload === undefined); - t.false(execution.originalPayload === undefined); - } - - timeoutDays = 2; - await cleanupExpiredESExecutionPayloads( - timeoutDays, - true, - false, - 100, - t.context.esIndex - ); - - expiration = moment().subtract(timeoutDays, 'days').toDate().getTime(); - relevantExecutions = await t.context.searchClient.query( - { - index: t.context.esIndex, - type: 'execution', - body: { - query: { - range: { - updatedAt: { - lte: expiration, - }, - }, - }, - }, - } - ); - for (const execution of relevantExecutions.results) { - if (execution.status === 'running') { - t.true(execution.finalPayload === undefined); - t.true(execution.originalPayload === undefined); - } else { - t.false(execution.finalPayload === undefined); - t.false(execution.originalPayload === undefined); - } - } - irrelevantExecutions = await t.context.searchClient.query( - { - index: t.context.esIndex, - type: 'execution', - body: { - query: { - range: { - updatedAt: { - gt: expiration, - }, - }, - }, - }, - } - ); - for (const execution of irrelevantExecutions.results) { - t.false(execution.finalPayload === undefined); - t.false(execution.originalPayload === undefined); - } -}); - -test('cleanupExpiredEsExecutionPayloads() for just nonRunning removes expired non running executions', async (t) => { - let timeoutDays = 6; - await cleanupExpiredESExecutionPayloads( - timeoutDays, - false, - true, - 100, - t.context.esIndex - ); - - let expiration = moment().subtract(timeoutDays, 'days').toDate().getTime(); - - let relevantExecutions = await t.context.searchClient.query( - { - index: t.context.esIndex, - type: 'execution', - body: { - query: { - range: { - updatedAt: { - lte: expiration, - }, - }, - }, - }, - } - ); - for (const execution of relevantExecutions.results) { - if (execution.status !== 'running') { - t.true(execution.finalPayload === undefined); - t.true(execution.originalPayload === undefined); - } else { - t.false(execution.finalPayload === undefined); - t.false(execution.originalPayload === undefined); - } - } - let irrelevantExecutions = await t.context.searchClient.query( - { - index: t.context.esIndex, - type: 'execution', - body: { - query: { - range: { - updatedAt: { - gt: expiration, - }, - }, - }, - }, - } - ); - for (const execution of irrelevantExecutions.results) { - t.false(execution.finalPayload === undefined); - t.false(execution.originalPayload === undefined); - } - - timeoutDays = 2; - await cleanupExpiredESExecutionPayloads( - timeoutDays, - false, - true, - 100, - t.context.esIndex - ); - - expiration = moment().subtract(timeoutDays, 'days').toDate().getTime(); - relevantExecutions = await t.context.searchClient.query( - { - index: t.context.esIndex, - type: 'execution', - body: { - query: { - range: { - updatedAt: { - lte: expiration, - }, - }, - }, - }, - } - ); - for (const execution of relevantExecutions.results) { - if (execution.status !== 'running') { - t.true(execution.finalPayload === undefined); - t.true(execution.originalPayload === undefined); - } else { - t.false(execution.finalPayload === undefined); - t.false(execution.originalPayload === undefined); - } - } - irrelevantExecutions = await t.context.searchClient.query( - { - index: t.context.esIndex, - type: 'execution', - body: { - query: { - range: { - updatedAt: { - gt: expiration, - }, - }, - }, - }, - } - ); - for (const execution of irrelevantExecutions.results) { - t.false(execution.finalPayload === undefined); - t.false(execution.originalPayload === undefined); - } -}); - -test('cleanupExpiredEsExecutionPayloads() for running and nonRunning executions', async (t) => { - const timeoutDays = 5; - await cleanupExpiredESExecutionPayloads( - timeoutDays, - true, - true, - 100, - t.context.esIndex - ); - - const expiration = moment().subtract(timeoutDays, 'days').toDate().getTime(); - - const relevant = await t.context.searchClient.query( - { - index: t.context.esIndex, - type: 'execution', - body: { - query: { - range: { - updatedAt: { - lte: expiration, - }, - }, - }, - }, - } - ); - for (const execution of relevant.results) { - t.true(execution.finalPayload === undefined); - t.true(execution.originalPayload === undefined); - } - const irrelevantExecutions = await t.context.searchClient.query( - { - index: t.context.esIndex, - type: 'execution', - body: { - query: { - range: { - updatedAt: { - gt: expiration, - }, - }, - }, - }, - } - ); - for (const execution of irrelevantExecutions.results) { - t.false(execution.finalPayload === undefined); - t.false(execution.originalPayload === undefined); - } -}); diff --git a/packages/api/tests/lambdas/test-create-reconciliation-report.js b/packages/api/tests/lambdas/test-create-reconciliation-report.js index fead1d4b13b..25b988d21bd 100644 --- a/packages/api/tests/lambdas/test-create-reconciliation-report.js +++ b/packages/api/tests/lambdas/test-create-reconciliation-report.js @@ -46,8 +46,6 @@ const { translatePostgresReconReportToApiReconReport, } = require('@cumulus/db'); const { getDistributionBucketMapKey } = require('@cumulus/distribution-utils'); -const { Search } = require('@cumulus/es-client/search'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); const { fakeGranuleFactoryV2, @@ -62,9 +60,6 @@ const ORCASearchCatalogQueue = require('../../lib/ORCASearchCatalogQueue'); // Call normalize event on all input events before calling the handler. const handler = (event) => unwrappedHandler(normalizeEvent(event)); -let esAlias; -let esIndex; - const createBucket = (Bucket) => awsServices.s3().createBucket({ Bucket }); const requiredStaticCollectionFields = { granuleIdExtraction: randomString(), @@ -253,16 +248,16 @@ const randomBetween = (a, b) => Math.floor(Math.random() * (b - a + 1) + a); const randomTimeBetween = (t1, t2) => randomBetween(t1, t2); /** - * Prepares localstack with a number of active granules. Sets up ES with + * Prepares localstack with a number of active granules. Sets up pg with * random collections where some fall within the start and end timestamps. - * Also creates a number that are only in ES, as well as some that are only + * Also creates a number that are only in pg, as well as some that are only * "returned by CMR" (as a stubbed function) * * @param t.t * @param {object} t - AVA test context. * @param t.params * @returns {object} setupVars - Object with information about the current - * state of elasticsearch and CMR mock. + * state of pg and CMR mock. * The object returned has: * + startTimestamp - beginning of matching timerange * + endTimestamp - end of matching timerange @@ -270,11 +265,11 @@ const randomTimeBetween = (t1, t2) => randomBetween(t1, t2); * timestamps and included in the CMR mock * + matchingCollectionsOutsiderange - active collections dated not between the * start and end timestamps and included in the CMR mock - * + extraESCollections - collections within the timestamp range, but excluded - * from CMR mock. (only in ES) - * + extraESCollectionsOutOfRange - collections outside the timestamp range and - * excluded from CMR mock. (only in ES out of range) - * + extraCmrCollections - collections not in ES but returned by the CMR mock. + * + extraPgCollections - collections within the timestamp range, but excluded + * from CMR mock + * + extraPgCollectionsOutOfRange - collections outside the timestamp range and + * excluded from CMR mock + * + extraCmrCollections - collections not in pg but returned by the CMR mock */ const setupDatabaseAndCMRForTests = async ({ t, params = {} }) => { const dataBuckets = range(2).map(() => randomId('bucket')); @@ -294,8 +289,8 @@ const setupDatabaseAndCMRForTests = async ({ t, params = {} }) => { const { numMatchingCollections = randomBetween(10, 15), numMatchingCollectionsOutOfRange = randomBetween(5, 10), - numExtraESCollections = randomBetween(5, 10), - numExtraESCollectionsOutOfRange = randomBetween(5, 10), + numExtraPgCollections = randomBetween(5, 10), + numExtraPgCollectionsOutOfRange = randomBetween(5, 10), numExtraCmrCollections = randomBetween(5, 10), } = params; @@ -304,31 +299,31 @@ const setupDatabaseAndCMRForTests = async ({ t, params = {} }) => { const endTimestamp = new Date('2020-07-01T00:00:00.000Z').getTime(); const monthLater = moment(endTimestamp).add(1, 'month').valueOf(); - // Create collections that are in sync ES/CMR during the time period + // Create collections that are in sync pg/CMR during the time period const matchingCollections = range(numMatchingCollections).map((r) => ({ ...requiredStaticCollectionFields, name: randomId(`name${r}-`), version: randomId('vers'), updatedAt: randomTimeBetween(startTimestamp, endTimestamp), })); - // Create collections in sync ES/CMR outside of the timestamps range + // Create collections in sync pg/CMR outside of the timestamps range const matchingCollectionsOutsideRange = range(numMatchingCollectionsOutOfRange).map((r) => ({ ...requiredStaticCollectionFields, name: randomId(`name${r}-`), version: randomId('vers'), updatedAt: randomTimeBetween(monthEarlier, startTimestamp - 1), })); - // Create collections in ES only within the timestamp range - const extraESCollections = range(numExtraESCollections).map((r) => ({ + // Create collections in pg only within the timestamp range + const extraPgCollections = range(numExtraPgCollections).map((r) => ({ ...requiredStaticCollectionFields, - name: randomId(`extraES${r}-`), + name: randomId(`extraPg${r}-`), version: randomId('vers'), updatedAt: randomTimeBetween(startTimestamp, endTimestamp), })); - // Create collections in ES only outside of the timestamp range - const extraESCollectionsOutOfRange = range(numExtraESCollectionsOutOfRange).map((r) => ({ + // Create collections in pg only outside of the timestamp range + const extraPgCollectionsOutOfRange = range(numExtraPgCollectionsOutOfRange).map((r) => ({ ...requiredStaticCollectionFields, - name: randomId(`extraES${r}-`), + name: randomId(`extraPg${r}-`), version: randomId('vers'), updatedAt: randomTimeBetween(endTimestamp + 1, monthLater), })); @@ -360,8 +355,8 @@ const setupDatabaseAndCMRForTests = async ({ t, params = {} }) => { await storeCollectionsWithGranuleToPostgres( matchingCollections .concat(matchingCollectionsOutsideRange) - .concat(extraESCollections) - .concat(extraESCollectionsOutOfRange), + .concat(extraPgCollections) + .concat(extraPgCollectionsOutOfRange), t.context ); @@ -376,8 +371,8 @@ const setupDatabaseAndCMRForTests = async ({ t, params = {} }) => { endTimestamp, matchingCollections, matchingCollectionsOutsideRange, - extraESCollections, - extraESCollectionsOutOfRange, + extraPgCollections, + extraPgCollectionsOutOfRange, extraCmrCollections, collectionGranules, mappedProviders, @@ -421,20 +416,6 @@ test.beforeEach(async (t) => { cmrSearchStub.withArgs('collections').resolves([]); cmrSearchStub.withArgs('granules').resolves([]); - esAlias = randomId('esalias'); - esIndex = randomId('esindex'); - process.env.ES_INDEX = esAlias; - await bootstrapElasticSearch({ - host: 'fakehost', - index: esIndex, - alias: esAlias, - }); - t.context.esReportClient = new Search( - {}, - 'reconciliationReport', - process.env.ES_INDEX - ); - // write 4 providers to the database t.context.providers = await Promise.all(new Array(4).fill().map(async () => { const [pgProvider] = await t.context.providerPgModel.create( @@ -514,12 +495,8 @@ test.serial('Generates valid reconciliation report for no buckets', async (t) => t.true(createStartTime <= createEndTime); t.is(report.reportStartTime, (new Date(startTimestamp)).toISOString()); t.is(report.reportEndTime, (new Date(endTimestamp)).toISOString()); - - const esRecord = await t.context.esReportClient.get(reportRecord.name); - t.like(esRecord, reportRecord); }); -// TODO - use this to make generic the data to PG test.serial('Generates valid GNF reconciliation report when everything is in sync', async (t) => { const { files, matchingColls } = await generateRandomGranules(t); const event = { @@ -554,9 +531,6 @@ test.serial('Generates valid GNF reconciliation report when everything is in syn const createStartTime = moment(report.createStartTime); const createEndTime = moment(report.createEndTime); t.true(createStartTime <= createEndTime); - - const esRecord = await t.context.esReportClient.get(reportRecord.name); - t.like(esRecord, reportRecord); }); test.serial('Generates a valid Inventory reconciliation report when everything is in sync', async (t) => { @@ -787,7 +761,7 @@ test.serial('Generates valid reconciliation report when internally, there are bo test.serial('Generates valid reconciliation report when there are both extra postGres and CMR collections', async (t) => { const params = { numMatchingCollectionsOutOfRange: 0, - numExtraESCollectionsOutOfRange: 0, + numExtraPgCollectionsOutOfRange: 0, }; const setupVars = await setupDatabaseAndCMRForTests({ t, params }); @@ -807,8 +781,8 @@ test.serial('Generates valid reconciliation report when there are both extra pos t.is(report.error, undefined); t.is(collectionsInCumulusCmr.okCount, setupVars.matchingCollections.length); - t.is(collectionsInCumulusCmr.onlyInCumulus.length, setupVars.extraESCollections.length); - setupVars.extraESCollections.map((collection) => + t.is(collectionsInCumulusCmr.onlyInCumulus.length, setupVars.extraPgCollections.length); + setupVars.extraPgCollections.map((collection) => t.true(collectionsInCumulusCmr.onlyInCumulus .includes(constructCollectionId(collection.name, collection.version)))); @@ -843,14 +817,14 @@ test.serial( t.is(report.error, undefined); t.is(collectionsInCumulusCmr.okCount, setupVars.matchingCollections.length); - t.is(collectionsInCumulusCmr.onlyInCumulus.length, setupVars.extraESCollections.length); + t.is(collectionsInCumulusCmr.onlyInCumulus.length, setupVars.extraPgCollections.length); // Each extra collection in timerange is included - setupVars.extraESCollections.map((collection) => + setupVars.extraPgCollections.map((collection) => t.true(collectionsInCumulusCmr.onlyInCumulus .includes(constructCollectionId(collection.name, collection.version)))); // No collections that were out of timestamp are included - setupVars.extraESCollectionsOutOfRange.map((collection) => + setupVars.extraPgCollectionsOutOfRange.map((collection) => t.false(collectionsInCumulusCmr.onlyInCumulus .includes(constructCollectionId(collection.name, collection.version)))); @@ -949,7 +923,7 @@ test.serial( async (t) => { const params = { numMatchingCollectionsOutOfRange: 0, - numExtraESCollectionsOutOfRange: 0, + numExtraPgCollectionsOutOfRange: 0, }; const setupVars = await setupDatabaseAndCMRForTests({ t, params }); @@ -970,8 +944,8 @@ test.serial( t.is(collectionsInCumulusCmr.okCount, setupVars.matchingCollections.length); t.is(report.filesInCumulus.okCount, 0); - t.is(collectionsInCumulusCmr.onlyInCumulus.length, setupVars.extraESCollections.length); - setupVars.extraESCollections.map((collection) => + t.is(collectionsInCumulusCmr.onlyInCumulus.length, setupVars.extraPgCollections.length); + setupVars.extraPgCollections.map((collection) => t.true(collectionsInCumulusCmr.onlyInCumulus .includes(constructCollectionId(collection.name, collection.version)))); @@ -1009,12 +983,12 @@ test.serial( // all extra DB collections are found t.is( collectionsInCumulusCmr.onlyInCumulus.length, - setupVars.extraESCollections.length + setupVars.extraESCollectionsOutOfRange.length + setupVars.extraPgCollections.length + setupVars.extraPgCollectionsOutOfRange.length ); - setupVars.extraESCollections.map((collection) => + setupVars.extraPgCollections.map((collection) => t.true(collectionsInCumulusCmr.onlyInCumulus .includes(constructCollectionId(collection.name, collection.version)))); - setupVars.extraESCollectionsOutOfRange.map((collection) => + setupVars.extraPgCollectionsOutOfRange.map((collection) => t.true(collectionsInCumulusCmr.onlyInCumulus .includes(constructCollectionId(collection.name, collection.version)))); @@ -1037,8 +1011,8 @@ test.serial( const testCollection = [ setupVars.matchingCollections[3], setupVars.extraCmrCollections[1], - setupVars.extraESCollections[1], - setupVars.extraESCollectionsOutOfRange[0], + setupVars.extraPgCollections[1], + setupVars.extraPgCollectionsOutOfRange[0], ]; const collectionId = testCollection.map((c) => constructCollectionId(c.name, c.version)); @@ -1120,7 +1094,7 @@ test.serial( const testCollection = [ setupVars.extraCmrCollections[3], setupVars.matchingCollections[2], - setupVars.extraESCollections[1], + setupVars.extraPgCollections[1], ]; const collectionId = testCollection.map((c) => constructCollectionId(c.name, c.version)); console.log(`testCollection: ${JSON.stringify(collectionId)}`); @@ -1155,7 +1129,7 @@ test.serial( async (t) => { const setupVars = await setupDatabaseAndCMRForTests({ t }); - const testCollection = setupVars.extraESCollections[3]; + const testCollection = setupVars.extraPgCollections[3]; console.log(`testCollection: ${JSON.stringify(testCollection)}`); const event = { @@ -1187,15 +1161,15 @@ test.serial( ); test.serial( - 'Generates valid ONE WAY reconciliation report with time params and filters by granuleIds when there are extra cumulus/ES and CMR collections', + 'Generates valid ONE WAY reconciliation report with time params and filters by granuleIds when there are extra cumulus/pg and CMR collections', async (t) => { const { startTimestamp, endTimestamp, ...setupVars } = await setupDatabaseAndCMRForTests({ t }); const testCollection = [ setupVars.matchingCollections[3], setupVars.extraCmrCollections[1], - setupVars.extraESCollections[1], - setupVars.extraESCollectionsOutOfRange[0], + setupVars.extraPgCollections[1], + setupVars.extraPgCollectionsOutOfRange[0], ]; const testCollectionIds = testCollection.map((c) => constructCollectionId(c.name, c.version)); @@ -1251,7 +1225,7 @@ test.serial( const testCollection = [ setupVars.extraCmrCollections[3], setupVars.matchingCollections[2], - setupVars.extraESCollections[1], + setupVars.extraPgCollections[1], ]; const testCollectionIds = testCollection.map((c) => constructCollectionId(c.name, c.version)); @@ -1296,7 +1270,7 @@ test.serial( const testCollection = [ setupVars.extraCmrCollections[3], setupVars.matchingCollections[2], - setupVars.extraESCollections[1], + setupVars.extraPgCollections[1], ]; const testCollectionIds = testCollection.map((c) => constructCollectionId(c.name, c.version)); @@ -1765,7 +1739,7 @@ test.serial('When report creation fails, reconciliation report status is set to t.context.knex, { name: reportName } ); // reconciliation report lambda outputs the translated API version, not the PG version, so - // it should be translated for comparison, at least for the comparison with the ES (API) version + // it should be translated for comparison const reportApiRecord = translatePostgresReconReportToApiReconReport(reportPgRecord); t.is(reportApiRecord.status, 'Failed'); t.is(reportApiRecord.type, 'Inventory'); @@ -1774,9 +1748,6 @@ test.serial('When report creation fails, reconciliation report status is set to const report = await getJsonS3Object(t.context.systemBucket, reportKey); t.is(report.status, 'Failed'); t.truthy(report.error); - - const esRecord = await t.context.esReportClient.get(reportName); - t.like(esRecord, reportApiRecord); }); test.serial('Creates a valid Granule Inventory report', async (t) => { @@ -1824,9 +1795,6 @@ test.serial('Creates a valid Granule Inventory report', async (t) => { const header = '"granuleUr","collectionId","createdAt","startDateTime","endDateTime","status","updatedAt","published","provider"'; t.is(reportHeader, header); t.is(reportRows.length, 10); - - const esRecord = await t.context.esReportClient.get(reportRecord.name); - t.like(esRecord, reportRecord); }); test.serial('A valid ORCA Backup reconciliation report is generated', async (t) => { @@ -1920,9 +1888,6 @@ test.serial('A valid ORCA Backup reconciliation report is generated', async (t) t.is(report.granules.onlyInCumulus.length, 0); t.is(report.granules.onlyInOrca.length, 0); t.is(report.granules.withConflicts.length, 0); - - const esRecord = await t.context.esReportClient.get(reportRecord.name); - t.like(esRecord, reportRecord); }); test.serial('Inventory reconciliation report JSON is formatted', async (t) => { @@ -2022,7 +1987,7 @@ test.serial('When there is an error for an ORCA backup report, it throws', async t.context.knex, { name: reportName } ); // reconciliation report lambda outputs the translated API version, not the PG version, so - // it should be translated for comparison, at least for the comparison with the ES (API) version + // it should be translated for comparison const reportApiRecord = translatePostgresReconReportToApiReconReport(reportPgRecord); t.is(reportApiRecord.status, 'Failed'); t.is(reportApiRecord.type, event.reportType); @@ -2031,9 +1996,6 @@ test.serial('When there is an error for an ORCA backup report, it throws', async const report = await getJsonS3Object(t.context.systemBucket, reportKey); t.is(report.status, 'Failed'); t.is(report.reportType, event.reportType); - - const esRecord = await t.context.esReportClient.get(reportName); - t.like(esRecord, reportApiRecord); }); test.serial('Internal reconciliation report type throws an error', async (t) => { diff --git a/packages/api/tests/lambdas/test-index-from-database.js b/packages/api/tests/lambdas/test-index-from-database.js deleted file mode 100644 index 8e647736154..00000000000 --- a/packages/api/tests/lambdas/test-index-from-database.js +++ /dev/null @@ -1,512 +0,0 @@ -'use strict'; - -const cryptoRandomString = require('crypto-random-string'); -const sinon = require('sinon'); -const test = require('ava'); -const omit = require('lodash/omit'); - -const awsServices = require('@cumulus/aws-client/services'); -const { - promiseS3Upload, - recursivelyDeleteS3Bucket, -} = require('@cumulus/aws-client/S3'); -const { randomString } = require('@cumulus/common/test-utils'); -const { bootstrapElasticSearch } = require('@cumulus/es-client/bootstrap'); -const indexer = require('@cumulus/es-client/indexer'); -const { EsClient, Search } = require('@cumulus/es-client/search'); -const { - CollectionPgModel, - destroyLocalTestDb, - ExecutionPgModel, - fakeCollectionRecordFactory, - fakeExecutionRecordFactory, - fakeGranuleRecordFactory, - fakePdrRecordFactory, - fakeProviderRecordFactory, - generateLocalTestDb, - GranulePgModel, - migrationDir, - PdrPgModel, - ProviderPgModel, - translatePostgresCollectionToApiCollection, - translatePostgresExecutionToApiExecution, - translatePostgresGranuleToApiGranule, - translatePostgresPdrToApiPdr, - translatePostgresProviderToApiProvider, -} = require('@cumulus/db'); - -const { - fakeReconciliationReportFactory, -} = require('../../lib/testUtils'); - -const models = require('../../models'); -const indexFromDatabase = require('../../lambdas/index-from-database'); -const { - getWorkflowList, -} = require('../../lib/testUtils'); - -const workflowList = getWorkflowList(); -process.env.ReconciliationReportsTable = randomString(); -const reconciliationReportModel = new models.ReconciliationReport(); - -// create all the variables needed across this test -process.env.system_bucket = randomString(); -process.env.stackName = randomString(); - -const reconciliationReportsTable = process.env.ReconciliationReportsTable; - -function sortAndFilter(input, omitList, sortKey) { - return input.map((r) => omit(r, omitList)) - .sort((a, b) => (a[sortKey] > b[sortKey] ? 1 : -1)); -} - -async function addFakeDynamoData(numItems, factory, model, factoryParams = {}) { - const items = []; - - /* eslint-disable no-await-in-loop */ - for (let i = 0; i < numItems; i += 1) { - const item = factory(factoryParams); - items.push(item); - await model.create(item); - } - /* eslint-enable no-await-in-loop */ - - return items; -} - -async function addFakeData(knex, numItems, factory, model, factoryParams = {}) { - const items = []; - for (let i = 0; i < numItems; i += 1) { - const item = factory(factoryParams); - items.push(model.create(knex, item, '*')); - } - return (await Promise.all(items)).map((result) => result[0]); -} - -function searchEs(type, index, limit = 10) { - const executionQuery = new Search({ queryStringParameters: { limit } }, type, index); - return executionQuery.query(); -} - -test.before(async (t) => { - t.context.esIndices = []; - - await awsServices.s3().createBucket({ Bucket: process.env.system_bucket }); - await reconciliationReportModel.createTable(); - - const wKey = `${process.env.stackName}/workflows/${workflowList[0].name}.json`; - const tKey = `${process.env.stackName}/workflow_template.json`; - await Promise.all([ - promiseS3Upload({ - params: { - Bucket: process.env.system_bucket, - Key: wKey, - Body: JSON.stringify(workflowList[0]), - }, - }), - promiseS3Upload({ - params: { - Bucket: process.env.system_bucket, - Key: tKey, - Body: JSON.stringify({}), - }, - }), - ]); -}); - -test.beforeEach(async (t) => { - t.context.testDbName = `test_index_${cryptoRandomString({ length: 10 })}`; - const { knex, knexAdmin } = await generateLocalTestDb(t.context.testDbName, migrationDir); - t.context.knex = knex; - t.context.knexAdmin = knexAdmin; - t.context.esIndex = randomString(); - t.context.esAlias = randomString(); - await bootstrapElasticSearch({ - host: 'fakehost', - index: t.context.esIndex, - alias: t.context.esAlias, - }); - - t.context.esClient = new EsClient('fakehost'); - await t.context.esClient.initializeEsClient(); -}); - -test.afterEach.always(async (t) => { - const { esClient, esIndex, testDbName } = t.context; - await esClient.client.indices.delete({ index: esIndex }); - await destroyLocalTestDb({ - knex: t.context.knex, - knexAdmin: t.context.knexAdmin, - testDbName, - }); -}); - -test.after.always(async () => { - await recursivelyDeleteS3Bucket(process.env.system_bucket); -}); - -test('getEsRequestConcurrency respects concurrency value in payload', (t) => { - t.is(indexFromDatabase.getEsRequestConcurrency({ - esRequestConcurrency: 5, - }), 5); -}); - -test.serial('getEsRequestConcurrency respects ES_CONCURRENCY environment variable', (t) => { - process.env.ES_CONCURRENCY = 35; - t.is(indexFromDatabase.getEsRequestConcurrency({}), 35); - delete process.env.ES_CONCURRENCY; -}); - -test('getEsRequestConcurrency correctly returns 10 when nothing is specified', (t) => { - t.is(indexFromDatabase.getEsRequestConcurrency({}), 10); -}); - -test.serial('getEsRequestConcurrency throws an error when -1 is specified', (t) => { - t.throws( - () => indexFromDatabase.getEsRequestConcurrency({ - esRequestConcurrency: -1, - }), - { instanceOf: TypeError } - ); - - process.env.ES_CONCURRENCY = -1; - t.teardown(() => { - delete process.env.ES_CONCURRENCY; - }); - t.throws( - () => indexFromDatabase.getEsRequestConcurrency({}), - { instanceOf: TypeError } - ); -}); - -test.serial('getEsRequestConcurrency throws an error when "asdf" is specified', (t) => { - t.throws( - () => indexFromDatabase.getEsRequestConcurrency({ - esRequestConcurrency: 'asdf', - }), - { instanceOf: TypeError } - ); - - process.env.ES_CONCURRENCY = 'asdf'; - t.teardown(() => { - delete process.env.ES_CONCURRENCY; - }); - t.throws( - () => indexFromDatabase.getEsRequestConcurrency({}), - { instanceOf: TypeError } - ); -}); - -test.serial('getEsRequestConcurrency throws an error when 0 is specified', (t) => { - t.throws( - () => indexFromDatabase.getEsRequestConcurrency({ - esRequestConcurrency: 0, - }), - { instanceOf: TypeError } - ); - - process.env.ES_CONCURRENCY = 0; - t.teardown(() => { - delete process.env.ES_CONCURRENCY; - }); - t.throws( - () => indexFromDatabase.getEsRequestConcurrency({}), - { instanceOf: TypeError } - ); -}); - -test('No error is thrown if nothing is in the database', async (t) => { - const { esAlias, knex } = t.context; - - await t.notThrowsAsync(() => indexFromDatabase.indexFromDatabase({ - indexName: esAlias, - reconciliationReportsTable, - knex, - })); -}); - -test.serial('Lambda successfully indexes records of all types', async (t) => { - const knex = t.context.knex; - const { esAlias } = t.context; - - const numItems = 20; - - const fakeData = []; - const dateObject = { created_at: new Date(), updated_at: new Date() }; - const fakeCollectionRecords = await addFakeData( - knex, - numItems, - fakeCollectionRecordFactory, - new CollectionPgModel(), - dateObject - ); - fakeData.push(fakeCollectionRecords); - - const fakeExecutionRecords = await addFakeData( - knex, - numItems, - fakeExecutionRecordFactory, - new ExecutionPgModel(), - { ...dateObject } - ); - - const fakeGranuleRecords = await addFakeData( - knex, - numItems, - fakeGranuleRecordFactory, - new GranulePgModel(), - { collection_cumulus_id: fakeCollectionRecords[0].cumulus_id, ...dateObject } - ); - - const fakeProviderRecords = await addFakeData( - knex, - numItems, - fakeProviderRecordFactory, - new ProviderPgModel(), - dateObject - ); - - const fakePdrRecords = await addFakeData(knex, numItems, fakePdrRecordFactory, new PdrPgModel(), { - collection_cumulus_id: fakeCollectionRecords[0].cumulus_id, - provider_cumulus_id: fakeProviderRecords[0].cumulus_id, - ...dateObject, - }); - - const fakeReconciliationReportRecords = await addFakeDynamoData( - numItems, - fakeReconciliationReportFactory, - reconciliationReportModel - ); - - await indexFromDatabase.handler({ - indexName: esAlias, - pageSize: 6, - knex, - }); - - const searchResults = await Promise.all([ - searchEs('collection', esAlias, '20'), - searchEs('execution', esAlias, '20'), - searchEs('granule', esAlias, '20'), - searchEs('pdr', esAlias, '20'), - searchEs('provider', esAlias, '20'), - searchEs('reconciliationReport', esAlias, '20'), - ]); - - searchResults.map((res) => t.is(res.meta.count, numItems)); - - const collectionResults = await Promise.all( - fakeCollectionRecords.map((r) => - translatePostgresCollectionToApiCollection(r)) - ); - const executionResults = await Promise.all( - fakeExecutionRecords.map((r) => translatePostgresExecutionToApiExecution(r)) - ); - const granuleResults = await Promise.all( - fakeGranuleRecords.map((r) => - translatePostgresGranuleToApiGranule({ - granulePgRecord: r, - knexOrTransaction: knex, - })) - ); - const pdrResults = await Promise.all( - fakePdrRecords.map((r) => translatePostgresPdrToApiPdr(r, knex)) - ); - const providerResults = await Promise.all( - fakeProviderRecords.map((r) => translatePostgresProviderToApiProvider(r)) - ); - - t.deepEqual( - searchResults[0].results - .map((r) => omit(r, ['timestamp'])) - .sort((a, b) => (a.name > b.name ? 1 : -1)), - collectionResults - .sort((a, b) => (a.name > b.name ? 1 : -1)) - ); - - t.deepEqual( - sortAndFilter(searchResults[1].results, ['timestamp'], 'name'), - sortAndFilter(executionResults, ['timestamp'], 'name') - ); - - t.deepEqual( - sortAndFilter(searchResults[2].results, ['timestamp'], 'granuleId'), - sortAndFilter(granuleResults, ['timestamp'], 'granuleId') - ); - - t.deepEqual( - sortAndFilter(searchResults[3].results, ['timestamp'], 'pdrName'), - sortAndFilter(pdrResults, ['timestamp'], 'pdrName') - ); - - t.deepEqual( - sortAndFilter(searchResults[4].results, ['timestamp'], 'id'), - sortAndFilter(providerResults, ['timestamp'], 'id') - ); - - t.deepEqual( - sortAndFilter(searchResults[5].results, ['timestamp'], 'name'), - sortAndFilter(fakeReconciliationReportRecords, ['timestamp'], 'name') - ); -}); - -test.serial('failure in indexing record of specific type should not prevent indexing of other records with same type', async (t) => { - const { esAlias, esClient, knex } = t.context; - const granulePgModel = new GranulePgModel(); - const numItems = 7; - const collectionRecord = await addFakeData( - knex, - 1, - fakeCollectionRecordFactory, - new CollectionPgModel() - ); - const fakeData = await addFakeData(knex, numItems, fakeGranuleRecordFactory, granulePgModel, { - collection_cumulus_id: collectionRecord[0].cumulus_id, - created_at: new Date(), - updated_at: new Date(), - }); - - let numCalls = 0; - const originalIndexGranule = indexer.indexGranule; - const successCount = 4; - const indexGranuleStub = sinon.stub(indexer, 'indexGranule') - .callsFake(( - esClientArg, - payload, - index - ) => { - numCalls += 1; - if (numCalls <= successCount) { - return originalIndexGranule(esClientArg, payload, index); - } - throw new Error('fake error'); - }); - - let searchResults; - try { - await indexFromDatabase.handler({ - indexName: esAlias, - reconciliationReportsTable, - knex, - }); - - searchResults = await searchEs('granule', esAlias); - - t.is(searchResults.meta.count, successCount); - - searchResults.results.forEach((result) => { - const sourceData = fakeData.find((data) => data.granule_id === result.granuleId); - const expected = { - collectionId: `${collectionRecord[0].name}___${collectionRecord[0].version}`, - granuleId: sourceData.granule_id, - status: sourceData.status, - }; - const actual = { - collectionId: result.collectionId, - granuleId: result.granuleId, - status: result.status, - }; - - t.deepEqual(expected, actual); - }); - } finally { - indexGranuleStub.restore(); - await Promise.all(fakeData.map( - // eslint-disable-next-line camelcase - ({ granule_id }) => granulePgModel.delete(knex, { granule_id }) - )); - await Promise.all(searchResults.results.map( - (result) => - esClient.client.delete({ - index: esAlias, - type: 'granule', - id: result.granuleId, - parent: result.collectionId, - refresh: true, - }) - )); - } -}); - -test.serial( - 'failure in indexing record of one type should not prevent indexing of other records with different type', - async (t) => { - const { esAlias, esClient, knex } = t.context; - const numItems = 2; - const collectionRecord = await addFakeData( - knex, - 1, - fakeCollectionRecordFactory, - new CollectionPgModel() - ); - const [fakeProviderData, fakeGranuleData] = await Promise.all([ - addFakeData( - knex, - numItems, - fakeProviderRecordFactory, - new ProviderPgModel() - ), - addFakeData( - knex, - numItems, - fakeGranuleRecordFactory, - new GranulePgModel(), - { collection_cumulus_id: collectionRecord[0].cumulus_id } - ), - ]); - - const indexGranuleStub = sinon - .stub(indexer, 'indexGranule') - .throws(new Error('error')); - - let searchResults; - try { - await indexFromDatabase.handler({ - indexName: esAlias, - reconciliationReportsTable, - knex, - }); - - searchResults = await searchEs('provider', esAlias); - - t.is(searchResults.meta.count, numItems); - - searchResults.results.forEach((result) => { - const sourceData = fakeProviderData.find( - (data) => data.name === result.id - ); - t.deepEqual( - { host: result.host, id: result.id, protocol: result.protocol }, - { - host: sourceData.host, - id: sourceData.name, - protocol: sourceData.protocol, - } - ); - }); - } finally { - indexGranuleStub.restore(); - await Promise.all( - fakeProviderData.map(({ name }) => { - const pgModel = new ProviderPgModel(); - return pgModel.delete(knex, { name }); - }) - ); - await Promise.all( - fakeGranuleData.map( - // eslint-disable-next-line camelcase - ({ granule_id }) => new GranulePgModel().delete(knex, { granule_id }) - ) - ); - await Promise.all( - searchResults.results.map((result) => - esClient.client.delete({ - index: esAlias, - type: 'provider', - id: result.id, - refresh: true, - })) - ); - } - } -); diff --git a/packages/api/tests/lib/test-ingest.js b/packages/api/tests/lib/test-ingest.js index 521948ab899..2673f3f4756 100644 --- a/packages/api/tests/lib/test-ingest.js +++ b/packages/api/tests/lib/test-ingest.js @@ -22,9 +22,6 @@ const { fakeCollectionRecordFactory, getUniqueGranuleByGranuleId, } = require('@cumulus/db'); -const { - createTestIndex, -} = require('@cumulus/es-client/testUtils'); const { fakeGranuleFactoryV2, fakeCollectionFactory, @@ -37,11 +34,6 @@ const { const testDbName = randomString(12); const sandbox = sinon.createSandbox(); -const FakeEsClient = sandbox.stub(); -const esSearchStub = sandbox.stub(); -const esScrollStub = sandbox.stub(); -FakeEsClient.prototype.scroll = esScrollStub; -FakeEsClient.prototype.search = esSearchStub; let fakeExecution; let testCumulusMessage; @@ -64,10 +56,6 @@ test.before(async (t) => { t.context.knexAdmin = knexAdmin; t.context.granuleId = randomString(); - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - const { TopicArn } = await createSnsTopic(randomString()); t.context.granules_sns_topic_arn = TopicArn; process.env.granule_sns_topic_arn = t.context.granules_sns_topic_arn; diff --git a/packages/api/tests/lib/test-reconciliationReport.js b/packages/api/tests/lib/test-reconciliationReport.js index 2c7255475c7..e707df916de 100644 --- a/packages/api/tests/lib/test-reconciliationReport.js +++ b/packages/api/tests/lib/test-reconciliationReport.js @@ -1,5 +1,4 @@ const test = require('ava'); -const cryptoRandomString = require('crypto-random-string'); const rewire = require('rewire'); const range = require('lodash/range'); @@ -10,7 +9,6 @@ const { convertToDBCollectionSearchObject, convertToOrcaGranuleSearchParams, filterDBCollections, - compareEsGranuleAndApiGranule, } = require('../../lib/reconciliationReport'); const { fakeCollectionFactory } = require('../../lib/testUtils'); @@ -146,90 +144,3 @@ test("filterDBCollections filters collections by recReportParams's collectionIds t.deepEqual(actual, expected); }); - -test('compareEsGranuleAndApiGranule returns true for matching granules', (t) => { - const granule = { - granuleId: cryptoRandomString({ length: 5 }), - }; - const granule2 = { ...granule, files: [] }; - t.true(compareEsGranuleAndApiGranule(granule, granule2)); -}); - -test('compareEsGranuleAndApiGranule returns false for granules with different values', (t) => { - const granule = { - granuleId: cryptoRandomString({ length: 5 }), - }; - const granule2 = { ...granule, foo: 'bar' }; - t.false(compareEsGranuleAndApiGranule(granule, granule2)); -}); - -test('compareEsGranuleAndApiGranule returns false if one granule has files and other does not', (t) => { - const granule = { - granuleId: cryptoRandomString({ length: 5 }), - }; - const granule2 = { - ...granule, - files: [{ - bucket: 'bucket', - key: 'key', - }], - }; - t.false(compareEsGranuleAndApiGranule(granule, granule2)); -}); - -test('compareEsGranuleAndApiGranule returns false if granule file is missing from second granule', (t) => { - const granule = { - granuleId: cryptoRandomString({ length: 5 }), - files: [{ - bucket: 'bucket', - key: 'key', - }], - }; - const granule2 = { - ...granule, - files: [{ - bucket: 'bucket', - key: 'key2', - }], - }; - t.false(compareEsGranuleAndApiGranule(granule, granule2)); -}); - -test('compareEsGranuleAndApiGranule returns false if granule files have different properties', (t) => { - const granule = { - granuleId: cryptoRandomString({ length: 5 }), - files: [{ - bucket: 'bucket', - key: 'key', - }], - }; - const granule2 = { - ...granule, - files: [{ - bucket: 'bucket', - key: 'key', - size: 5, - }], - }; - t.false(compareEsGranuleAndApiGranule(granule, granule2)); -}); - -test('compareEsGranuleAndApiGranule returns false if granule files have different values for same property', (t) => { - const granule = { - granuleId: cryptoRandomString({ length: 5 }), - files: [{ - bucket: 'bucket', - key: 'key', - size: 1, - }], - }; - const granule2 = { - ...granule, - files: [{ - bucket: 'bucket', - key: 'key', - size: 5, - }], - }; - t.false(compareEsGranuleAndApiGranule(granule, granule2)); -}); diff --git a/packages/api/tests/performance/lib/test-write-granules.js b/packages/api/tests/performance/lib/test-write-granules.js index fb6aced84bf..2a123c7ef27 100644 --- a/packages/api/tests/performance/lib/test-write-granules.js +++ b/packages/api/tests/performance/lib/test-write-granules.js @@ -6,10 +6,6 @@ const pSettle = require('p-settle'); const cryptoRandomString = require('crypto-random-string'); const cloneDeep = require('lodash/cloneDeep'); -const { - getEsClient, - Search, -} = require('@cumulus/es-client/search'); const { createSnsTopic } = require('@cumulus/aws-client/SNS'); const StepFunctions = require('@cumulus/aws-client/StepFunctions'); @@ -39,10 +35,6 @@ const { const { getExecutionUrlFromArn, } = require('@cumulus/message/Executions'); -const { - createTestIndex, - cleanupTestIndex, -} = require('@cumulus/es-client/testUtils'); const { writeGranulesFromMessage, @@ -71,15 +63,6 @@ test.before(async (t) => { t.context.knex = knex; console.log(`Test DB max connection pool: ${t.context.knex.client.pool.max}`); - - const { esIndex, esClient } = await createTestIndex(); - t.context.esIndex = esIndex; - t.context.esClient = esClient; - t.context.esGranulesClient = new Search( - {}, - 'granule', - t.context.esIndex - ); }); test.beforeEach(async (t) => { @@ -220,13 +203,12 @@ test.after.always(async (t) => { await destroyLocalTestDb({ ...t.context, }); - await cleanupTestIndex(t.context); }); // This test is a performance test designed to run with a large number of messages // in a memory constrained test environment, it is not intended to run as part of // the normal unit test suite. -test('writeGranulesFromMessage operates on 2k granules with 10 files each within 1GB of ram when an instance of EsClient is passed in and concurrency is set to 60 and db connections are set to 60', async (t) => { +test('writeGranulesFromMessage operates on 2k granules with 10 files each within 1GB of ram when concurrency is set to 60 and db connections are set to 60', async (t) => { const { cumulusMessages, knex, @@ -237,13 +219,11 @@ test('writeGranulesFromMessage operates on 2k granules with 10 files each within // Message must be completed or files will not update - const esClient = await getEsClient(); await pSettle(cumulusMessages.map((cumulusMessage) => () => writeGranulesFromMessage({ cumulusMessage, executionCumulusId, providerCumulusId, knex, - esClient, testOverrides: { stepFunctionUtils }, })), { concurrency: t.context.concurrency }); diff --git a/packages/api/webpack.config.js b/packages/api/webpack.config.js index b33246ed82a..ee2f8d27ad7 100644 --- a/packages/api/webpack.config.js +++ b/packages/api/webpack.config.js @@ -24,12 +24,10 @@ module.exports = { mode: process.env.PRODUCTION ? 'production' : 'development', entry: { app: './app/index.js', - bootstrap: './lambdas/bootstrap.js', bulkOperation: './lambdas/bulk-operation.js', cleanExecutions: './lambdas/cleanExecutions.js', createReconciliationReport: './lambdas/create-reconciliation-report.js', distribution: './app/distribution.js', - indexFromDatabase: './lambdas/index-from-database.js', manualConsumer: './lambdas/manual-consumer.js', messageConsumer: './lambdas/message-consumer.js', payloadLogger: './lambdas/payload-logger.js', diff --git a/packages/async-operations/tests/test-async_operations.js b/packages/async-operations/tests/test-async_operations.js index 3aceaa599af..a04caac38c5 100644 --- a/packages/async-operations/tests/test-async_operations.js +++ b/packages/async-operations/tests/test-async_operations.js @@ -61,7 +61,7 @@ test.before(async (t) => { t.context.functionConfig = { Environment: { Variables: { - ES_HOST: 'es-host', + Timeout: 300, }, }, }; @@ -80,7 +80,7 @@ test.beforeEach((t) => { status: 'RUNNING', taskArn: cryptoRandomString({ length: 5 }), description: 'testing', - operationType: 'ES Index', + operationType: 'Reconciliation Report', createdAt: Date.now(), updatedAt: Date.now(), }; @@ -110,7 +110,7 @@ test.serial('startAsyncOperation uploads the payload to S3', async (t) => { callerLambdaName: randomString(), lambdaName: randomString(), description: randomString(), - operationType: 'ES Index', + operationType: 'Reconciliation Report', payload, stackName, knexConfig: knexConfig, @@ -142,7 +142,7 @@ test.serial('The AsyncOperation start method starts an ECS task with the correct lambdaName, callerLambdaName, description: randomString(), - operationType: 'ES Index', + operationType: 'Reconciliation Report', payload, stackName, knexConfig: knexConfig, @@ -188,7 +188,7 @@ test.serial('The AsyncOperation start method starts an ECS task with the asyncOp lambdaName, callerLambdaName, description: randomString(), - operationType: 'ES Index', + operationType: 'Reconciliation Report', payload, stackName, knexConfig: knexConfig, @@ -225,7 +225,7 @@ test.serial('The startAsyncOperation method throws error and calls createAsyncOp callerLambdaName: randomString(), lambdaName: randomString(), description: randomString(), - operationType: 'ES Index', + operationType: 'Reconciliation Report', payload: {}, stackName: randomString(), knexConfig: knexConfig, @@ -263,7 +263,7 @@ test.serial('The startAsyncOperation method throws error and calls createAsyncOp test('The startAsyncOperation writes records to the database', async (t) => { const description = randomString(); const stackName = randomString(); - const operationType = 'ES Index'; + const operationType = 'Reconciliation Report'; const taskArn = randomString(); stubbedEcsRunTaskResult = { @@ -291,7 +291,7 @@ test('The startAsyncOperation writes records to the database', async (t) => { const expected = { description, id, - operationType: 'ES Index', + operationType: 'Reconciliation Report', status: 'RUNNING', taskArn, }; @@ -317,7 +317,7 @@ test.serial('The startAsyncOperation method returns the newly-generated record', callerLambdaName: randomString(), lambdaName: randomString(), description: randomString(), - operationType: 'ES Index', + operationType: 'Reconciliation Report', payload: {}, stackName, knexConfig: knexConfig, @@ -340,7 +340,7 @@ test.serial('The startAsyncOperation method throws error if callerLambdaName par cluster: randomString, lambdaName: randomString, description: randomString(), - operationType: 'ES Index', + operationType: 'Reconciliation Report', payload: { x: randomString() }, stackName: randomString, knexConfig: knexConfig, @@ -360,7 +360,7 @@ test('getLambdaEnvironmentVariables returns expected environment variables', (t) const vars = getLambdaEnvironmentVariables(t.context.functionConfig); t.deepEqual(new Set(vars), new Set([ - { name: 'ES_HOST', value: 'es-host' }, + { name: 'Timeout', value: 300 }, ])); }); @@ -378,7 +378,7 @@ test.serial('ECS task params contain lambda environment variables when useLambda callerLambdaName: randomString(), lambdaName: randomString(), description: randomString(), - operationType: 'ES Index', + operationType: 'Reconciliation Report', payload: {}, useLambdaEnvironmentVariables: true, stackName, @@ -391,7 +391,7 @@ test.serial('ECS task params contain lambda environment variables when useLambda environmentOverrides[env.name] = env.value; }); - t.is(environmentOverrides.ES_HOST, 'es-host'); + t.is(environmentOverrides.Timeout, 300); }); test.serial('createAsyncOperation throws if stackName is not provided', async (t) => { diff --git a/packages/aws-client/src/services.ts b/packages/aws-client/src/services.ts index ff6bfd5572c..04f97ee8ea2 100644 --- a/packages/aws-client/src/services.ts +++ b/packages/aws-client/src/services.ts @@ -15,7 +15,6 @@ import { SNS } from '@aws-sdk/client-sns'; import { STS } from '@aws-sdk/client-sts'; import { ECS } from '@aws-sdk/client-ecs'; import { EC2 } from '@aws-sdk/client-ec2'; -import { ElasticsearchService } from '@aws-sdk/client-elasticsearch-service'; import awsClient from './client'; @@ -31,7 +30,6 @@ export const dynamodbDocClient = (docClientOptions?: TranslateConfig, dynamoOpti docClientOptions ); export const cf = awsClient(CloudFormation, '2010-05-15'); -export const es = awsClient(ElasticsearchService, '2015-01-01'); export const kinesis = awsClient(Kinesis, '2013-12-02'); export const kms = awsClient(KMS, '2014-11-01'); export const lambda = awsClient(Lambda, '2015-03-31'); diff --git a/packages/aws-client/tests/test-services.js b/packages/aws-client/tests/test-services.js index 994d27a68a0..9677f99ed58 100644 --- a/packages/aws-client/tests/test-services.js +++ b/packages/aws-client/tests/test-services.js @@ -6,7 +6,6 @@ const { CloudFormation } = require('@aws-sdk/client-cloudformation'); const { DynamoDB } = require('@aws-sdk/client-dynamodb'); const { ECS } = require('@aws-sdk/client-ecs'); const { EC2 } = require('@aws-sdk/client-ec2'); -const { ElasticsearchService } = require('@aws-sdk/client-elasticsearch-service'); const { Kinesis } = require('@aws-sdk/client-kinesis'); const { Lambda } = require('@aws-sdk/client-lambda'); const { S3 } = require('@aws-sdk/client-s3'); @@ -188,27 +187,6 @@ test('ec2() service defaults to localstack in test mode', async (t) => { ); }); -test('es() service defaults to localstack in test mode', async (t) => { - const es = services.es(); - const { - credentials, - endpoint, - } = localStackAwsClientOptions(ElasticsearchService); - t.like( - await es.config.credentials(), - credentials - ); - const esEndpoint = await es.config.endpoint(); - const localSatckEndpoint = new URL(endpoint); - t.like( - esEndpoint, - { - hostname: localSatckEndpoint.hostname, - port: Number.parseInt(localSatckEndpoint.port, 10), - } - ); -}); - test('kinesis() service defaults to localstack in test mode', async (t) => { const kinesis = services.kinesis(); const { diff --git a/packages/db/src/models/reconciliation_report.ts b/packages/db/src/models/reconciliation_report.ts index a8ff040d943..b9cf548f8ca 100644 --- a/packages/db/src/models/reconciliation_report.ts +++ b/packages/db/src/models/reconciliation_report.ts @@ -18,7 +18,7 @@ class ReconciliationReportPgModel extends BasePgModel { return super.create(knexOrTransaction, item, '*') as Promise; } diff --git a/packages/db/src/test-utils.ts b/packages/db/src/test-utils.ts index 6870a80d9c5..ae57ccd6451 100644 --- a/packages/db/src/test-utils.ts +++ b/packages/db/src/test-utils.ts @@ -138,7 +138,7 @@ export const fakeAsyncOperationRecordFactory = ( ): PostgresAsyncOperation => ({ id: uuidv4(), description: cryptoRandomString({ length: 10 }), - operation_type: 'ES Index', + operation_type: 'Reconciliation Report', status: 'RUNNING', output: { test: 'output' }, task_arn: cryptoRandomString({ length: 3 }), diff --git a/packages/db/tests/translate/test-async-operations.js b/packages/db/tests/translate/test-async-operations.js index d4030b2f7f1..f824497cc72 100644 --- a/packages/db/tests/translate/test-async-operations.js +++ b/packages/db/tests/translate/test-async-operations.js @@ -14,7 +14,7 @@ test('translateApiAsyncOperationToPostgresAsyncOperation converts a camelCase re status: 'RUNNING', taskArn: 'aws:arn:ecs:task:someTask', description: 'dummy operation', - operationType: 'ES Index', + operationType: 'Reconciliation Report', }; const expected = { @@ -38,7 +38,7 @@ test('translateApiAsyncOperationToPostgresAsyncOperation parses output from JSON status: 'SUCCEEDED', taskArn: 'aws:arn:ecs:task:someTask', description: 'dummy operation', - operationType: 'ES Index', + operationType: 'Reconciliation Report', output: JSON.stringify(operationOutput), }; @@ -60,7 +60,7 @@ test('translateApiAsyncOperationToPostgresAsyncOperation parses output from JSON status: 'SUCCEEDED', taskArn: 'aws:arn:ecs:task:someTask', description: 'dummy operation', - operationType: 'ES Index', + operationType: 'Reconciliation Report', output: operationOutput, }; @@ -83,7 +83,7 @@ test('translateApiAsyncOperationToPostgresAsyncOperation parses output from stri status: 'SUCCEEDED', taskArn: 'aws:arn:ecs:task:someTask', description: 'dummy operation', - operationType: 'ES Index', + operationType: 'Reconciliation Report', output: operationOutput, }; @@ -106,7 +106,7 @@ test('translateApiAsyncOperationToPostgresAsyncOperation parses output from JSON status: 'SUCCEEDED', taskArn: 'aws:arn:ecs:task:someTask', description: 'dummy operation', - operationType: 'ES Index', + operationType: 'Reconciliation Report', output: operationOutput, }; @@ -127,7 +127,7 @@ test('translateApiAsyncOperationToPostgresAsyncOperation discards \'none\' outpu status: 'SUCCEEDED', taskArn: 'aws:arn:ecs:task:someTask', description: 'dummy operation', - operationType: 'ES Index', + operationType: 'Reconciliation Report', output: 'none', }; @@ -162,7 +162,7 @@ test('translatePostgresAsyncOperationToApiAsyncOperation translates PostgreSQL r status: 'RUNNING', taskArn, description, - operationType: 'ES Index', + operationType: 'Reconciliation Report', output: JSON.stringify({ test: 'output' }), createdAt: createdAt.getTime(), updatedAt: updatedAt.getTime(), diff --git a/packages/tf-inventory/src/inventory.js b/packages/tf-inventory/src/inventory.js index da596087d63..b1764938880 100644 --- a/packages/tf-inventory/src/inventory.js +++ b/packages/tf-inventory/src/inventory.js @@ -1,6 +1,6 @@ 'use strict'; -const { ecs, ec2, es } = require('@cumulus/aws-client/services'); +const { ecs, ec2 } = require('@cumulus/aws-client/services'); const mergeWith = require('lodash/mergeWith'); const difference = require('lodash/difference'); @@ -80,13 +80,9 @@ async function listAwsResources() { ec2Instances = [].concat(...ec2Instances.Reservations.map((e) => e.Instances)); ec2Instances = ec2Instances.map((inst) => inst.InstanceId); - let esDomainNames = await es().listDomainNames(); - esDomainNames = esDomainNames.DomainNames.map((e) => e.DomainName); - return { ecsClusters: ecsClusters.clusterArns, ec2Instances, - esDomainNames, }; } diff --git a/packages/tf-inventory/tests/inventory.js b/packages/tf-inventory/tests/inventory.js index 3ef54f23341..960406631c8 100644 --- a/packages/tf-inventory/tests/inventory.js +++ b/packages/tf-inventory/tests/inventory.js @@ -3,7 +3,7 @@ const test = require('ava'); const rewire = require('rewire'); const sinon = require('sinon'); -const { ecs, ec2, es } = require('@cumulus/aws-client/services'); +const { ecs, ec2 } = require('@cumulus/aws-client/services'); const inventory = rewire('../src/inventory'); const mergeResourceLists = inventory.__get__('mergeResourceLists'); const resourceDiff = inventory.__get__('resourceDiff'); @@ -15,7 +15,6 @@ let listResourcesForFileStub; let listTfStateFilesStub; let ecsStub; let ec2Stub; -let esStub; /** * @@ -28,7 +27,6 @@ function resourcesForStateFile(sf) { return { ecsClusters: ['clusterArn1', 'clusterArn2'], ec2Instances: ['i-000'], - esDomainNames: ['cumulus-1-es5vpc'], }; } @@ -36,7 +34,6 @@ function resourcesForStateFile(sf) { return { ecsClusters: ['clusterArn3'], ec2Instances: ['i-111', 'i-222'], - esDomainNames: ['cumulus-2-es5vpc'], }; } @@ -80,17 +77,6 @@ test.before(() => { ], }), }); - - esStub = sinon.stub(es(), 'listDomainNames') - .returns( - Promise.resolve({ - DomainNames: [ - { DomainName: 'cumulus-es5vpc' }, - { DomainName: 'cumulus-1-es5vpc' }, - { DomainName: 'cumulus-2-es5vpc' }, - ], - }) - ); }); test.after.always(() => { @@ -98,7 +84,6 @@ test.after.always(() => { listTfStateFilesStub.restore(); ecsStub.restore(); ec2Stub.restore(); - esStub.restore(); }); test('mergeResourceLists merges resource object by key', (t) => { @@ -235,7 +220,6 @@ test('listTfResources merges resources correctly', async (t) => { t.deepEqual(tfResources, { ecsClusters: ['clusterArn1', 'clusterArn2', 'clusterArn3'], ec2Instances: ['i-000', 'i-111', 'i-222'], - esDomainNames: ['cumulus-1-es5vpc', 'cumulus-2-es5vpc'], }); }); @@ -246,7 +230,6 @@ test('listAwsResources properly combines ec2 intsances', async (t) => { { ecsClusters: ['clusterArn1', 'clusterArn2', 'clusterArn3', 'clusterArn4'], ec2Instances: ['i-000', 'i-111', 'i-222', 'i-333'], - esDomainNames: ['cumulus-es5vpc', 'cumulus-1-es5vpc', 'cumulus-2-es5vpc'], }); }); @@ -257,6 +240,5 @@ test('reconcileResources returns only resources not specified in TF files', asyn { ecsClusters: ['clusterArn4'], ec2Instances: ['i-333'], - esDomainNames: ['cumulus-es5vpc'], }); }); diff --git a/tf-modules/archive/api.tf b/tf-modules/archive/api.tf index a1f12066b19..47044ca26ee 100644 --- a/tf-modules/archive/api.tf +++ b/tf-modules/archive/api.tf @@ -56,7 +56,6 @@ locals { execution_sns_topic_arn = aws_sns_topic.report_executions_topic.arn idleTimeoutMillis = var.rds_connection_timing_configuration.idleTimeoutMillis IDP_LOGIN = var.saml_idp_login - IndexFromDatabaseLambda = aws_lambda_function.index_from_database.arn invoke = var.schedule_sf_function_arn invokeArn = var.schedule_sf_function_arn invokeReconcileLambda = aws_lambda_function.create_reconciliation_report.arn diff --git a/tf-modules/archive/bootstrap.tf b/tf-modules/archive/bootstrap.tf deleted file mode 100644 index 00468fe6d19..00000000000 --- a/tf-modules/archive/bootstrap.tf +++ /dev/null @@ -1,40 +0,0 @@ -resource "aws_lambda_function" "custom_bootstrap" { - function_name = "${var.prefix}-CustomBootstrap" - filename = "${path.module}/../../packages/api/dist/bootstrap/lambda.zip" - source_code_hash = filebase64sha256("${path.module}/../../packages/api/dist/bootstrap/lambda.zip") - handler = "index.handler" - role = var.lambda_processing_role_arn - runtime = "nodejs20.x" - timeout = lookup(var.lambda_timeouts, "CustomBootstrap", 300) - memory_size = lookup(var.lambda_memory_sizes, "CustomBootstrap", 512) - environment { - variables = { - stackName = var.prefix - system_bucket = var.system_bucket - ES_INDEX_SHARDS = var.es_index_shards - } - } - - tags = var.tags - - dynamic "vpc_config" { - for_each = length(var.lambda_subnet_ids) == 0 ? [] : [1] - content { - subnet_ids = var.lambda_subnet_ids - security_group_ids = local.lambda_security_group_ids - } - } -} - -data "aws_lambda_invocation" "custom_bootstrap" { - count = var.elasticsearch_hostname != null ? 1 : 0 - depends_on = [aws_lambda_function.custom_bootstrap] - function_name = aws_lambda_function.custom_bootstrap.function_name - - input = jsonencode( - { - elasticsearchHostname = var.elasticsearch_hostname - removeAliasConflict = var.elasticsearch_remove_index_alias_conflict - replacementTrigger = timestamp() - }) -} diff --git a/tf-modules/archive/index_from_database.tf b/tf-modules/archive/index_from_database.tf deleted file mode 100644 index 61138dd4664..00000000000 --- a/tf-modules/archive/index_from_database.tf +++ /dev/null @@ -1,112 +0,0 @@ -resource "aws_lambda_function" "index_from_database" { - function_name = "${var.prefix}-IndexFromDatabase" - filename = "${path.module}/../../packages/api/dist/indexFromDatabase/lambda.zip" - source_code_hash = filebase64sha256("${path.module}/../../packages/api/dist/indexFromDatabase/lambda.zip") - handler = "index.handler" - role = aws_iam_role.index_from_database.arn - runtime = "nodejs20.x" - timeout = lookup(var.lambda_timeouts, "IndexFromDatabase", 300) - memory_size = lookup(var.lambda_memory_sizes, "IndexFromDatabase", 512) - environment { - variables = { - CMR_ENVIRONMENT = var.cmr_environment - CMR_HOST = var.cmr_custom_host - databaseCredentialSecretArn = var.rds_user_access_secret_arn - ES_CONCURRENCY = var.es_request_concurrency - ES_HOST = var.elasticsearch_hostname - ReconciliationReportsTable = var.dynamo_tables.reconciliation_reports.name - stackName = var.prefix - } - } - tags = var.tags - - dynamic "vpc_config" { - for_each = length(var.lambda_subnet_ids) == 0 ? [] : [1] - content { - subnet_ids = var.lambda_subnet_ids - security_group_ids = concat(local.lambda_security_group_ids, [var.rds_security_group]) - } - } -} - - -resource "aws_iam_role" "index_from_database" { - name = "${var.prefix}-index_from_database" - assume_role_policy = data.aws_iam_policy_document.lambda_assume_role_policy.json - permissions_boundary = var.permissions_boundary_arn - - tags = var.tags -} - - -resource "aws_iam_role_policy" "index_from_database" { - name = "${var.prefix}_index_from_database_policy" - role = aws_iam_role.index_from_database.id - policy = data.aws_iam_policy_document.index_from_database.json -} - - -data "aws_iam_policy_document" "index_from_database" { - statement { - actions = ["ecs:RunTask"] - resources = [aws_ecs_task_definition.async_operation.arn] - } - - statement { - actions = [ - "ec2:CreateNetworkInterface", - "ec2:DeleteNetworkInterface", - "ec2:DescribeNetworkInterfaces", - "logs:DescribeLogStreams", - "logs:CreateLogGroup", - "logs:CreateLogStream", - "logs:PutLogEvents", - ] - resources = ["*"] - } - - statement { - actions = [ - "dynamodb:GetItem", - "dynamodb:Scan", - ] - resources = [for k, v in var.dynamo_tables : v.arn] - } - - statement { - actions = ["dynamodb:Query"] - resources = [for k, v in var.dynamo_tables : "${v.arn}/index/*"] - } - - statement { - actions = [ - "dynamodb:GetRecords", - "dynamodb:GetShardIterator", - "dynamodb:DescribeStream", - "dynamodb:ListStreams" - ] - resources = [for k, v in var.dynamo_tables : "${v.arn}/stream/*"] - } - - statement { - actions = ["dynamodb:ListTables"] - resources = ["*"] - } - - statement { - actions = ["secretsmanager:GetSecretValue"] - resources = [ - aws_secretsmanager_secret.api_cmr_password.arn, - aws_secretsmanager_secret.api_launchpad_passphrase.arn, - var.rds_user_access_secret_arn - ] - } - - statement { - actions = [ - "ssm:GetParameter" - ] - resources = [aws_ssm_parameter.dynamo_table_names.arn] - } -} - diff --git a/tf-modules/archive/reconciliation_report.tf b/tf-modules/archive/reconciliation_report.tf index b09ef5a337f..6db11219711 100644 --- a/tf-modules/archive/reconciliation_report.tf +++ b/tf-modules/archive/reconciliation_report.tf @@ -15,8 +15,6 @@ resource "aws_lambda_function" "create_reconciliation_report" { CMR_HOST = var.cmr_custom_host DISTRIBUTION_ENDPOINT = var.distribution_url ES_HOST = var.elasticsearch_hostname - ES_SCROLL = lookup(var.elasticsearch_client_config, "create_reconciliation_report_es_scroll_duration", "6m") - ES_SCROLL_SIZE = lookup(var.elasticsearch_client_config, "create_reconciliation_report_es_scroll_size", 1000) stackName = var.prefix system_bucket = var.system_bucket cmr_client_id = var.cmr_client_id From a08d6ef49d1b002ee62beabf7ed27a83224810e3 Mon Sep 17 00:00:00 2001 From: Naga Nages <66387215+Nnaga1@users.noreply.github.com> Date: Thu, 30 Jan 2025 13:25:06 -0500 Subject: [PATCH 61/61] first commit - updated branch (#3909) --- CHANGELOG.md | 2 + example/cumulus-tf/main.tf | 15 ------ example/cumulus-tf/variables.tf | 24 --------- .../migration-helper-async-operation/main.tf | 2 - .../variables.tf | 8 --- tf-modules/archive/api.tf | 3 -- tf-modules/archive/async_operation.tf | 8 --- tf-modules/archive/bulk_operation.tf | 1 - tf-modules/archive/clean_executions.tf | 10 ++-- tf-modules/archive/main.tf | 1 - .../archive/process_dead_letter_archive.tf | 1 - tf-modules/archive/reconciliation_report.tf | 1 - .../archive/sf_event_sqs_to_db_records.tf | 1 - .../archive/start_async_operation_lambda.tf | 1 - tf-modules/archive/variables.tf | 45 ---------------- tf-modules/cumulus/README.md | 6 --- tf-modules/cumulus/archive.tf | 10 ---- tf-modules/cumulus/ecs_cluster.tf | 23 -------- .../migration_helper_async_operation.tf | 3 -- tf-modules/cumulus/monitoring.tf | 1 - tf-modules/cumulus/variables.tf | 54 ------------------- .../monitoring/.terraform.tfvars.sample | 11 ---- tf-modules/monitoring/cloudwatch-dashboard.tf | 32 ----------- tf-modules/monitoring/variables.tf | 7 --- 24 files changed, 5 insertions(+), 265 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24a2b840b36..083ccd6ffdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -97,6 +97,8 @@ External tooling making use of `searchContext` in the `GET` `/granules/` endpoin - for asyncOperations test data, change any ES related values to other options - remove code from `@cumulus/api/lambdas/cleanExecutions` leaving a dummy handler, as the code worked with ES. lambda will be rewritten with CUMULUS-3982 - remove `@cumulus/api/endpoints/elasticsearch`, `@cumulus/api/lambdas/bootstrap`, and `@cumulus/api/lambdas/index-from-database` +- **CUMULUS-3983** + - Removed elasticsearch references used in in cumulus `tf-modules` ## [Unreleased] diff --git a/example/cumulus-tf/main.tf b/example/cumulus-tf/main.tf index b799378ac90..ef68e6e5c2c 100644 --- a/example/cumulus-tf/main.tf +++ b/example/cumulus-tf/main.tf @@ -32,10 +32,6 @@ provider "aws" { locals { tags = merge(var.tags, { Deployment = var.prefix }) - elasticsearch_alarms = lookup(data.terraform_remote_state.data_persistence.outputs, "elasticsearch_alarms", []) - elasticsearch_domain_arn = lookup(data.terraform_remote_state.data_persistence.outputs, "elasticsearch_domain_arn", null) - elasticsearch_hostname = lookup(data.terraform_remote_state.data_persistence.outputs, "elasticsearch_hostname", null) - elasticsearch_security_group_id = lookup(data.terraform_remote_state.data_persistence.outputs, "elasticsearch_security_group_id", "") protected_bucket_names = [for k, v in var.buckets : v.name if v.type == "protected"] public_bucket_names = [for k, v in var.buckets : v.name if v.type == "public"] rds_security_group = lookup(data.terraform_remote_state.data_persistence.outputs, "rds_security_group", "") @@ -110,8 +106,6 @@ module "cumulus" { urs_client_id = var.urs_client_id urs_client_password = var.urs_client_password - es_request_concurrency = var.es_request_concurrency - metrics_es_host = var.metrics_es_host metrics_es_password = var.metrics_es_password metrics_es_username = var.metrics_es_username @@ -157,13 +151,6 @@ module "cumulus" { system_bucket = var.system_bucket buckets = var.buckets - elasticsearch_remove_index_alias_conflict = var.elasticsearch_remove_index_alias_conflict - elasticsearch_alarms = local.elasticsearch_alarms - elasticsearch_domain_arn = local.elasticsearch_domain_arn - elasticsearch_hostname = local.elasticsearch_hostname - elasticsearch_security_group_id = local.elasticsearch_security_group_id - es_index_shards = var.es_index_shards - dynamo_tables = merge(data.terraform_remote_state.data_persistence.outputs.dynamo_tables, var.optional_dynamo_tables) default_log_retention_days = var.default_log_retention_days cloudwatch_log_retention_periods = var.cloudwatch_log_retention_periods @@ -195,8 +182,6 @@ module "cumulus" { api_gateway_stage = var.api_gateway_stage archive_api_reserved_concurrency = var.api_reserved_concurrency - elasticsearch_client_config = var.elasticsearch_client_config - # Thin Egress App settings. Uncomment to use TEA. # must match stage_name variable for thin-egress-app module # tea_api_gateway_stage = local.tea_stage_name diff --git a/example/cumulus-tf/variables.tf b/example/cumulus-tf/variables.tf index ae1daa1e31e..e428178c264 100644 --- a/example/cumulus-tf/variables.tf +++ b/example/cumulus-tf/variables.tf @@ -215,24 +215,6 @@ variable "ecs_include_docker_cleanup_cronjob" { default = false } -variable "elasticsearch_client_config" { - description = "Configuration parameters for Elasticsearch client for cumulus tasks" - type = map(string) - default = {} -} - -variable "elasticsearch_remove_index_alias_conflict" { - type = bool - default = true - description = "NOTE -- THIS SHOULD NEVER BE SET TO TRUE BY DEFAULT IN PRODUCTION SITUATIONS, we've set it to true here for dev only -- Set to false to not allow cumulus deployment bootstrap lambda to remove existing ES index named 'cumulus-alias'." -} - -variable "es_request_concurrency" { - type = number - default = 10 - description = "Maximum number of concurrent requests to send to Elasticsearch. Used in index-from-database operation" -} - variable "key_name" { type = string default = null @@ -318,12 +300,6 @@ variable "tags" { default = {} } -variable "es_index_shards" { - description = "The number of shards for the Elasticsearch index" - type = number - default = 2 -} - variable "pdr_node_name_provider_bucket" { type = string description = "The name of the common bucket used as an S3 provider for PDR NODE_NAME tests" diff --git a/lambdas/migration-helper-async-operation/main.tf b/lambdas/migration-helper-async-operation/main.tf index c89559717b5..bdf16782287 100644 --- a/lambdas/migration-helper-async-operation/main.tf +++ b/lambdas/migration-helper-async-operation/main.tf @@ -19,7 +19,6 @@ resource "aws_lambda_function" "migration_helper_async_operation" { createTimeoutMillis = var.rds_connection_timing_configuration.createTimeoutMillis databaseCredentialSecretArn = var.rds_user_access_secret_arn EcsCluster = var.ecs_cluster_name - ES_HOST = var.elasticsearch_hostname idleTimeoutMillis = var.rds_connection_timing_configuration.idleTimeoutMillis DlaMigrationLambda = var.dla_migration_function_arn reapIntervalMillis = var.rds_connection_timing_configuration.reapIntervalMillis @@ -35,7 +34,6 @@ resource "aws_lambda_function" "migration_helper_async_operation" { security_group_ids = compact([ aws_security_group.migration_helper_async_operation[0].id, var.rds_security_group_id, - var.elasticsearch_security_group_id ]) } } diff --git a/lambdas/migration-helper-async-operation/variables.tf b/lambdas/migration-helper-async-operation/variables.tf index 1c033d09c27..9696f6cf1d9 100644 --- a/lambdas/migration-helper-async-operation/variables.tf +++ b/lambdas/migration-helper-async-operation/variables.tf @@ -14,14 +14,6 @@ variable "ecs_cluster_name" { type = string } -variable "elasticsearch_hostname" { - type = string -} - -variable "elasticsearch_security_group_id" { - description = "Security Group ID For Elasticsearch (OpenSearch)" -} - variable "ecs_execution_role_arn" { description = "ARN of IAM role for initializing ECS tasks" type = string diff --git a/tf-modules/archive/api.tf b/tf-modules/archive/api.tf index 47044ca26ee..7cf1456366f 100644 --- a/tf-modules/archive/api.tf +++ b/tf-modules/archive/api.tf @@ -49,9 +49,6 @@ locals { EARTHDATA_CLIENT_PASSWORD = var.urs_client_password EcsCluster = var.ecs_cluster_name ENTITY_ID = var.saml_entity_id - ES_CONCURRENCY = var.es_request_concurrency - ES_HOST = var.elasticsearch_hostname - ES_INDEX_SHARDS = var.es_index_shards granule_sns_topic_arn = aws_sns_topic.report_granules_topic.arn execution_sns_topic_arn = aws_sns_topic.report_executions_topic.arn idleTimeoutMillis = var.rds_connection_timing_configuration.idleTimeoutMillis diff --git a/tf-modules/archive/async_operation.tf b/tf-modules/archive/async_operation.tf index 5783a37e831..25985b01f1b 100644 --- a/tf-modules/archive/async_operation.tf +++ b/tf-modules/archive/async_operation.tf @@ -31,10 +31,6 @@ resource "aws_ecs_task_definition" "async_operation" { { "name": "databaseCredentialSecretArn", "value": "${var.rds_user_access_secret_arn}" - }, - { - "name": "ES_HOST", - "value": "${var.elasticsearch_hostname}" } ], "image": "${var.async_operation_image}", @@ -74,10 +70,6 @@ resource "aws_ecs_task_definition" "dead_letter_recovery_operation" { { "name": "databaseCredentialSecretArn", "value": "${var.rds_user_access_secret_arn}" - }, - { - "name": "ES_HOST", - "value": "${var.elasticsearch_hostname}" } ], "image": "${var.async_operation_image}", diff --git a/tf-modules/archive/bulk_operation.tf b/tf-modules/archive/bulk_operation.tf index 989c675b23b..17cc48889da 100644 --- a/tf-modules/archive/bulk_operation.tf +++ b/tf-modules/archive/bulk_operation.tf @@ -12,7 +12,6 @@ resource "aws_lambda_function" "bulk_operation" { acquireTimeoutMillis = var.rds_connection_timing_configuration.acquireTimeoutMillis createRetryIntervalMillis = var.rds_connection_timing_configuration.createRetryIntervalMillis createTimeoutMillis = var.rds_connection_timing_configuration.createTimeoutMillis - ES_HOST = var.elasticsearch_hostname granule_sns_topic_arn = aws_sns_topic.report_granules_topic.arn idleTimeoutMillis = var.rds_connection_timing_configuration.idleTimeoutMillis invoke = var.schedule_sf_function_arn diff --git a/tf-modules/archive/clean_executions.tf b/tf-modules/archive/clean_executions.tf index 6a55d920fe4..398c6658304 100644 --- a/tf-modules/archive/clean_executions.tf +++ b/tf-modules/archive/clean_executions.tf @@ -22,13 +22,9 @@ source_code_hash = filebase64sha256("${path.module}/../../packages/api/dist/clea environment { variables = { stackName = var.prefix - ES_HOST = var.elasticsearch_hostname - CLEANUP_RUNNING = var.cleanup_running - CLEANUP_NON_RUNNING = var.cleanup_non_running - - PAYLOAD_TIMEOUT = var.payload_timeout - - ES_INDEX = var.es_index + CLEANUP_RUNNING = var.cleanup_running + CLEANUP_NON_RUNNING = var.cleanup_non_running + PAYLOAD_TIMEOUT = var.payload_timeout UPDATE_LIMIT = var.update_limit } } diff --git a/tf-modules/archive/main.tf b/tf-modules/archive/main.tf index 890d48d84ba..e352a6c547b 100644 --- a/tf-modules/archive/main.tf +++ b/tf-modules/archive/main.tf @@ -10,7 +10,6 @@ terraform { locals { lambda_security_group_ids = compact([ aws_security_group.no_ingress_all_egress[0].id, - var.elasticsearch_security_group_id ]) all_bucket_names = [for k, v in var.buckets : v.name] all_non_internal_buckets = [for k, v in var.buckets : v.name if v.type != "internal"] diff --git a/tf-modules/archive/process_dead_letter_archive.tf b/tf-modules/archive/process_dead_letter_archive.tf index 4c6b198f655..c5b2448d459 100644 --- a/tf-modules/archive/process_dead_letter_archive.tf +++ b/tf-modules/archive/process_dead_letter_archive.tf @@ -22,7 +22,6 @@ resource "aws_lambda_function" "process_dead_letter_archive" { stackName = var.prefix system_bucket = var.system_bucket RDS_DEPLOYMENT_CUMULUS_VERSION = "9.0.0" - ES_HOST = var.elasticsearch_hostname } } diff --git a/tf-modules/archive/reconciliation_report.tf b/tf-modules/archive/reconciliation_report.tf index 6db11219711..cc04ae81cfe 100644 --- a/tf-modules/archive/reconciliation_report.tf +++ b/tf-modules/archive/reconciliation_report.tf @@ -14,7 +14,6 @@ resource "aws_lambda_function" "create_reconciliation_report" { CMR_ENVIRONMENT = var.cmr_environment CMR_HOST = var.cmr_custom_host DISTRIBUTION_ENDPOINT = var.distribution_url - ES_HOST = var.elasticsearch_hostname stackName = var.prefix system_bucket = var.system_bucket cmr_client_id = var.cmr_client_id diff --git a/tf-modules/archive/sf_event_sqs_to_db_records.tf b/tf-modules/archive/sf_event_sqs_to_db_records.tf index 2ae35f465ff..e3cdb49b5aa 100644 --- a/tf-modules/archive/sf_event_sqs_to_db_records.tf +++ b/tf-modules/archive/sf_event_sqs_to_db_records.tf @@ -192,7 +192,6 @@ resource "aws_lambda_function" "sf_event_sqs_to_db_records" { pdr_sns_topic_arn = aws_sns_topic.report_pdrs_topic.arn RDS_DEPLOYMENT_CUMULUS_VERSION = "9.0.0" reapIntervalMillis = var.rds_connection_timing_configuration.reapIntervalMillis - ES_HOST = var.elasticsearch_hostname } } diff --git a/tf-modules/archive/start_async_operation_lambda.tf b/tf-modules/archive/start_async_operation_lambda.tf index 0957481916a..b05a7714358 100644 --- a/tf-modules/archive/start_async_operation_lambda.tf +++ b/tf-modules/archive/start_async_operation_lambda.tf @@ -15,7 +15,6 @@ resource "aws_lambda_function" "start_async_operation" { createTimeoutMillis = var.rds_connection_timing_configuration.createTimeoutMillis databaseCredentialSecretArn = var.rds_user_access_secret_arn EcsCluster = var.ecs_cluster_name - ES_HOST = var.elasticsearch_hostname idleTimeoutMillis = var.rds_connection_timing_configuration.idleTimeoutMillis reapIntervalMillis = var.rds_connection_timing_configuration.reapIntervalMillis stackName = var.prefix diff --git a/tf-modules/archive/variables.tf b/tf-modules/archive/variables.tf index 07af3668100..74ab989ec42 100644 --- a/tf-modules/archive/variables.tf +++ b/tf-modules/archive/variables.tf @@ -71,27 +71,6 @@ variable "ecs_task_role" { type = object({ name = string, arn = string}) } -variable "elasticsearch_domain_arn" { - type = string - default = null -} - -variable "elasticsearch_hostname" { - type = string - default = null -} - -variable "elasticsearch_security_group_id" { - type = string - default = "" -} - -variable "elasticsearch_remove_index_alias_conflict" { - type = bool - default = false - description = "Set to true to allow cumulus deployment bootstrap lambda to remove existing ES index named 'cumulus-alias' if it exists. Setting to false will cause deployment to fail on existing index" -} - variable "kinesis_inbound_event_logger_lambda_function_arn" { type = string } @@ -198,18 +177,6 @@ variable "cmr_search_client_config" { default = {} } -variable "elasticsearch_client_config" { - description = "Configuration parameters for Elasticsearch client for cumulus tasks" - type = map(string) - default = {} -} - -variable "es_request_concurrency" { - type = number - default = 10 - description = "Maximum number of concurrent requests to send to Elasticsearch. Used in index-from-database operation" -} - variable "lambda_memory_sizes" { description = "Configurable map of memory sizes for lambdas" type = map(number) @@ -358,12 +325,6 @@ variable "payload_timeout" { description = "Number of days to retain execution payload records in the database" } -variable "es_index" { - type = string - default = "cumulus" - description = "elasticsearch index to be affected" -} - variable "update_limit" { type = number default = 10000 @@ -376,12 +337,6 @@ variable "log_destination_arn" { description = "A shared AWS:Log:Destination that receives logs from log_groups" } -variable "es_index_shards" { - description = "The number of shards for the Elasticsearch index" - type = number - default = 2 -} - variable "cloudwatch_log_retention_periods" { type = map(number) description = "retention periods for the respective cloudwatch log group, these values will be used instead of default retention days" diff --git a/tf-modules/cumulus/README.md b/tf-modules/cumulus/README.md index a87d787c78a..94fd96de919 100644 --- a/tf-modules/cumulus/README.md +++ b/tf-modules/cumulus/README.md @@ -70,11 +70,5 @@ module "cumulus" { archive_api_users = ["urs-user1", "urs-user2"] sts_credentials_lambda_function_arn = "arn:aws:lambda:us-east-1:1234567890:function:sts-lambda" - - # Optional - elasticsearch_alarms = ["arn:aws:cloudwatch:us-east-1:12345:alarm:prefix-es-NodesLowAlarm"] - elasticsearch_domain_arn = "arn:aws:es:us-east-1:12345:domain/prefix-es" - elasticsearch_hostname = "prefix-es-abcdef.us-east-1.es.amazonaws.com" - elasticsearch_security_group_id = ["sg-12345"] } ``` diff --git a/tf-modules/cumulus/archive.tf b/tf-modules/cumulus/archive.tf index 84a4818efd0..a99b269d305 100644 --- a/tf-modules/cumulus/archive.tf +++ b/tf-modules/cumulus/archive.tf @@ -17,15 +17,6 @@ module "archive" { default_log_retention_days = var.default_log_retention_days cloudwatch_log_retention_periods = var.cloudwatch_log_retention_periods - elasticsearch_client_config = var.elasticsearch_client_config - elasticsearch_domain_arn = var.elasticsearch_domain_arn - elasticsearch_hostname = var.elasticsearch_hostname - elasticsearch_security_group_id = var.elasticsearch_security_group_id - elasticsearch_remove_index_alias_conflict = var.elasticsearch_remove_index_alias_conflict - - es_index_shards = var.es_index_shards - es_request_concurrency = var.es_request_concurrency - system_bucket = var.system_bucket buckets = var.buckets @@ -92,7 +83,6 @@ module "archive" { payload_timeout = var.payload_timeout - es_index = var.es_index update_limit = var.update_limit background_queue_url = module.ingest.background_queue_url diff --git a/tf-modules/cumulus/ecs_cluster.tf b/tf-modules/cumulus/ecs_cluster.tf index 438f0175bf7..0be8ea6c562 100644 --- a/tf-modules/cumulus/ecs_cluster.tf +++ b/tf-modules/cumulus/ecs_cluster.tf @@ -154,28 +154,6 @@ resource "aws_iam_role_policy" "ecs_cluster_instance" { policy = data.aws_iam_policy_document.ecs_cluster_instance_policy.json } -# Give ECS permission to access ES, if necessary -data "aws_iam_policy_document" "ecs_cluster_access_es_document" { - count = var.elasticsearch_domain_arn != null ? 1 : 0 - statement { - actions = [ - "es:ESHttpDelete", - "es:ESHttpGet", - "es:ESHttpHead", - "es:ESHttpPost", - "es:ESHttpPut" - ] - resources = [var.elasticsearch_domain_arn] - } -} - -resource "aws_iam_role_policy" "ecs_cluster_access_es_policy" { - name = "${var.prefix}_ecs_cluster_access_es_policy" - count = var.elasticsearch_domain_arn != null ? 1 : 0 - role = aws_iam_role.ecs_cluster_instance.id - policy = data.aws_iam_policy_document.ecs_cluster_access_es_document[0].json -} - resource "aws_iam_role_policy_attachment" "NGAPProtAppInstanceMinimalPolicy" { count = var.deploy_to_ngap ? 1 : 0 policy_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:policy/NGAPProtAppInstanceMinimalPolicy" @@ -235,7 +213,6 @@ locals { security_group_ids = compact(concat( [ aws_security_group.ecs_cluster_instance.id, - var.elasticsearch_security_group_id, var.rds_security_group ], var.ecs_custom_sg_ids diff --git a/tf-modules/cumulus/migration_helper_async_operation.tf b/tf-modules/cumulus/migration_helper_async_operation.tf index 65955e311c9..ce898cfdaea 100644 --- a/tf-modules/cumulus/migration_helper_async_operation.tf +++ b/tf-modules/cumulus/migration_helper_async_operation.tf @@ -12,9 +12,6 @@ module "migration_helper_async_operation" { ecs_execution_role_arn = aws_iam_role.ecs_execution_role.arn ecs_task_role_arn = aws_iam_role.ecs_task_role.arn - elasticsearch_hostname = var.elasticsearch_hostname - elasticsearch_security_group_id = var.elasticsearch_security_group_id - lambda_subnet_ids = var.lambda_subnet_ids prefix = var.prefix diff --git a/tf-modules/cumulus/monitoring.tf b/tf-modules/cumulus/monitoring.tf index 61d40fcf1a2..fd112c944f1 100644 --- a/tf-modules/cumulus/monitoring.tf +++ b/tf-modules/cumulus/monitoring.tf @@ -2,7 +2,6 @@ module "monitoring" { source = "../monitoring" prefix = var.prefix - elasticsearch_alarms = var.elasticsearch_alarms ecs_service_alarms = var.ecs_service_alarms system_bucket = var.system_bucket } diff --git a/tf-modules/cumulus/variables.tf b/tf-modules/cumulus/variables.tf index 0c3bb7a1113..24274b95633 100644 --- a/tf-modules/cumulus/variables.tf +++ b/tf-modules/cumulus/variables.tf @@ -82,30 +82,6 @@ variable "ecs_cluster_min_size" { type = number } -variable "elasticsearch_remove_index_alias_conflict" { - type = bool - default = false - description = "Set to true to allow cumulus deployment bootstrap lambda to remove existing ES index named 'cumulus-alias' if it exists. Setting to false will cause deployment to fail on existing index" -} - -variable "elasticsearch_domain_arn" { - description = "The ARN of an Elasticsearch domain to use for storing data" - type = string - default = null -} - -variable "elasticsearch_hostname" { - description = "The hostname of an Elasticsearch domain to use for storing data" - type = string - default = null -} - -variable "elasticsearch_security_group_id" { - description = "The ID of the security group for the Elasticsearch domain specified by `elasticsearch_domain_arn`" - type = string - default = "" -} - variable "lambda_memory_sizes" { description = "Configurable map of memory sizes for lambdas" type = map(number) @@ -181,12 +157,6 @@ variable "cmr_search_client_config" { default = {} } -variable "elasticsearch_client_config" { - description = "Configuration parameters for Elasticsearch client" - type = map(string) - default = {} -} - variable "archive_api_port" { description = "Port number that should be used for archive API requests" type = number @@ -321,18 +291,6 @@ variable "ecs_service_alarms" { default = [] } -variable "elasticsearch_alarms" { - description = "List of Cloudwatch alarms monitoring Elasticsearch domain" - type = list(object({ name = string, arn = string })) - default = [] -} - -variable "es_request_concurrency" { - type = number - default = 10 - description = "Maximum number of concurrent requests to send to Elasticsearch. Used in index-from-database operation" -} - variable "key_name" { description = "Name of EC2 key pair for accessing EC2 instances" type = string @@ -568,12 +526,6 @@ variable "payload_timeout" { description = "Number of days to retain execution payload records in the database" } -variable "es_index" { - type = string - default = "cumulus" - description = "elasticsearch index to be affected" -} - variable "update_limit" { type = number default = 10000 @@ -592,12 +544,6 @@ variable "additional_log_groups_to_elk" { default = {} } -variable "es_index_shards" { - description = "The number of shards for the Elasticsearch index" - type = number - default = 2 -} - variable "ecs_custom_sg_ids" { description = "User defined security groups to add to the Core ECS cluster" type = list(string) diff --git a/tf-modules/monitoring/.terraform.tfvars.sample b/tf-modules/monitoring/.terraform.tfvars.sample index d8bd52aac6b..a500bc7c179 100644 --- a/tf-modules/monitoring/.terraform.tfvars.sample +++ b/tf-modules/monitoring/.terraform.tfvars.sample @@ -1,17 +1,6 @@ # Required prefix = "myprefix" -elasticsearch_alarms = [ - { - "arn" = "es-alarm1-arn" - "name" = "es-alarm1" - }, - { - "arn" = "alarm2-arn" - "name" = "alarm2" - } -] - ecs_service_alarms = [ { "arn" = "ecs-alarm1-arn" diff --git a/tf-modules/monitoring/cloudwatch-dashboard.tf b/tf-modules/monitoring/cloudwatch-dashboard.tf index e19fe409080..c8fe7d7637b 100644 --- a/tf-modules/monitoring/cloudwatch-dashboard.tf +++ b/tf-modules/monitoring/cloudwatch-dashboard.tf @@ -12,38 +12,6 @@ resource "aws_cloudwatch_dashboard" "cloudwatch_dashboard" { dashboard_body = <