From c4a363f9f905393f5683e33844e7d329d41eae43 Mon Sep 17 00:00:00 2001 From: Michael Farrell Date: Tue, 17 Sep 2024 22:47:29 -0700 Subject: [PATCH] Temporary ability to pull salesforce ids (#355) * Temporary ability to pull salesforce ids * :uniq --- package.json | 3 +- src/cli-cron-pull-identifiers.ts | 9 ++ src/cli-cron-pull-profiles.ts | 171 +++++++++++++++++++++ src/graphql/fetchRequestFilesForRequest.ts | 54 +++++++ src/graphql/gqls/index.ts | 1 + src/graphql/gqls/requestFile.ts | 27 ++++ src/graphql/index.ts | 1 + yarn.lock | 1 + 8 files changed, 266 insertions(+), 1 deletion(-) create mode 100644 src/cli-cron-pull-profiles.ts create mode 100644 src/graphql/fetchRequestFilesForRequest.ts create mode 100644 src/graphql/gqls/requestFile.ts diff --git a/package.json b/package.json index cb8326ab..c23cd642 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "author": "Transcend Inc.", "name": "@transcend-io/cli", "description": "Small package containing useful typescript utilities.", - "version": "6.3.0", + "version": "6.4.0", "homepage": "https://github.com/transcend-io/cli", "repository": { "type": "git", @@ -17,6 +17,7 @@ "tr-create-assessment": "./build/cli-create-assessment.js", "tr-cron-mark-identifiers-completed": "./build/cli-cron-mark-identifiers-completed.js", "tr-cron-pull-identifiers": "./build/cli-cron-pull-identifiers.js", + "tr-cron-pull-profiles": "./build/cli-cron-pull-profiles.js", "tr-derive-data-silos-from-data-flows": "./build/cli-derive-data-silos-from-data-flows.js", "tr-derive-data-silos-from-data-flows-cross-instance": "./build/cli-derive-data-silos-from-data-flows-cross-instance.js", "tr-discover-silos": "./build/cli-discover-silos.js", diff --git a/src/cli-cron-pull-identifiers.ts b/src/cli-cron-pull-identifiers.ts index d3817107..8c42d695 100644 --- a/src/cli-cron-pull-identifiers.ts +++ b/src/cli-cron-pull-identifiers.ts @@ -49,6 +49,15 @@ async function main(): Promise { process.exit(1); } + if (!dataSiloId) { + logger.error( + colors.red( + 'A data silo ID must be provided. You can specify using --dataSiloId=92636cda-b7c6-48c6-b1b1-2df574596cbc', + ), + ); + process.exit(1); + } + if (!actions) { logger.error( colors.red( diff --git a/src/cli-cron-pull-profiles.ts b/src/cli-cron-pull-profiles.ts new file mode 100644 index 00000000..7c15455a --- /dev/null +++ b/src/cli-cron-pull-profiles.ts @@ -0,0 +1,171 @@ +#!/usr/bin/env node + +import yargs from 'yargs-parser'; +import colors from 'colors'; + +import { logger } from './logger'; +import uniq from 'lodash/uniq'; +import { pullCustomSiloOutstandingIdentifiers, writeCsv } from './cron'; +import { RequestAction } from '@transcend-io/privacy-types'; +import { DEFAULT_TRANSCEND_API } from './constants'; +import { splitCsvToList } from './requests'; +import { map } from 'bluebird'; +import { + buildTranscendGraphQLClient, + fetchRequestFilesForRequest, +} from './graphql'; + +/** + * This is a temporary script that can be removed after the launch of workflows v2 + * TODO: https://transcend.height.app/T-39035 - remove this + * + * Requires an API key with: + * - scope for "View the Request Compilation" + * + * Dev Usage: + * yarn ts-node ./src/cli-cron-pull-profiles.ts --auth=$TRANSCEND_API_KEY \ + * --cronDataSiloId=92636cda-b7c6-48c6-b1b1-2df574596cbc \ + * --targetDataSiloId=40ec5df2-61f7-41e6-80d7-afe7c2f0e390 \ + * --actions=ERASURE \ + * --file=/Users/michaelfarrell/Desktop/test.csv \ + * --fileTarget=/Users/michaelfarrell/Desktop/test-target.csv + * + * Standard usage: + * yarn tr-cron-pull-identifiers --auth=$TRANSCEND_API_KEY \ + * --cronDataSiloId=92636cda-b7c6-48c6-b1b1-2df574596cbc \ + * --targetDataSiloId=40ec5df2-61f7-41e6-80d7-afe7c2f0e390 \ + * --actions=ERASURE \ + * --file=/Users/michaelfarrell/Desktop/test.csv \ + * --fileTarget=/Users/michaelfarrell/Desktop/test-target.csv + */ +async function main(): Promise { + // Parse command line arguments + const { + file = './cron-identifiers.csv', + fileTarget = './cron-identifiers-target.csv', + transcendUrl = DEFAULT_TRANSCEND_API, + auth, + sombraAuth, + cronDataSiloId, + targetDataSiloId, + actions, + pageLimit = '100', + } = yargs(process.argv.slice(2)) as { [k in string]: string }; + + // Ensure auth is passed + if (!auth) { + logger.error( + colors.red( + 'A Transcend API key must be provided. You can specify using --auth=$TRANSCEND_API_KEY', + ), + ); + process.exit(1); + } + + // Ensure cronDataSiloId + if (!cronDataSiloId) { + logger.error( + colors.red( + 'A cronDataSiloId must be provided. You can specify using --cronDataSiloId=92636cda-b7c6-48c6-b1b1-2df574596cbc', + ), + ); + process.exit(1); + } + + // Ensure targetDataSiloId + if (!targetDataSiloId) { + logger.error( + colors.red( + 'A targetDataSiloId must be provided. You can specify using --targetDataSiloId=40ec5df2-61f7-41e6-80d7-afe7c2f0e390', + ), + ); + process.exit(1); + } + + // Ensure actions + if (!actions) { + logger.error( + colors.red( + 'At least one action must be provided. You can specify using --actions=ERASURE', + ), + ); + process.exit(1); + } + + // Validate actions + const parsedActions = splitCsvToList(actions) as RequestAction[]; + const invalidActions = parsedActions.filter( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (type) => !Object.values(RequestAction).includes(type as any), + ); + if (invalidActions.length > 0) { + logger.error( + colors.red( + `Failed to parse actions:"${invalidActions.join(',')}".\n` + + `Expected one of: \n${Object.values(RequestAction).join('\n')}`, + ), + ); + process.exit(1); + } + + // Pull down outstanding identifiers + const { identifiersFormattedForCsv } = + await pullCustomSiloOutstandingIdentifiers({ + transcendUrl, + pageLimit: parseInt(pageLimit, 10), + actions: parsedActions, + auth, + sombraAuth, + dataSiloId: cronDataSiloId, + }); + + // Grab the requestIds from the list of silos to process + const requestIds = identifiersFormattedForCsv.map( + (d) => d.requestId as string, + ); + + // Create GraphQL client to connect to Transcend backend + const client = buildTranscendGraphQLClient(transcendUrl, auth); + + // Pull down target identifiers + const results = await map( + uniq(requestIds), + async (requestId) => { + const results = await fetchRequestFilesForRequest(client, { requestId }); + return results.map(({ fileName, ...res }) => ({ + ...res, + requestId, + datapointName: fileName + .replace('.json', '') + .split('/') + .pop() + ?.replace(' Information', ''), + })); + }, + { + concurrency: 10, + }, + ); + + // Write CSV + const headers = uniq( + identifiersFormattedForCsv.map((d) => Object.keys(d)).flat(), + ); + writeCsv(file, identifiersFormattedForCsv, headers); + logger.info( + colors.green( + `Successfully wrote ${identifiersFormattedForCsv.length} identifiers to file "${file}"`, + ), + ); + + const targetIdentifiers = results.flat(); + const headers2 = uniq(targetIdentifiers.map((d) => Object.keys(d)).flat()); + writeCsv(fileTarget, targetIdentifiers, headers2); + logger.info( + colors.green( + `Successfully wrote ${targetIdentifiers.length} identifiers to file "${fileTarget}"`, + ), + ); +} + +main(); diff --git a/src/graphql/fetchRequestFilesForRequest.ts b/src/graphql/fetchRequestFilesForRequest.ts new file mode 100644 index 00000000..b321e855 --- /dev/null +++ b/src/graphql/fetchRequestFilesForRequest.ts @@ -0,0 +1,54 @@ +import { GraphQLClient } from 'graphql-request'; +import { REQUEST_FILES } from './gqls'; +import { makeGraphQLRequest } from './makeGraphQLRequest'; + +export interface RequestFile { + /** The remote ID */ + remoteId: string; + /** The file name */ + fileName: string; +} + +const PAGE_SIZE = 20; + +/** + * Fetch all RequestFiles for a single request + * + * @param client - GraphQL client + * @param filterBy - Filter by + * @returns All RequestFiles in the organization + */ +export async function fetchRequestFilesForRequest( + client: GraphQLClient, + filterBy: { + /** Filter by request ID */ + requestId: string; + }, +): Promise { + const requestFiles: RequestFile[] = []; + let offset = 0; + + // Whether to continue looping + let shouldContinue = false; + do { + const { + requestFiles: { nodes }, + // eslint-disable-next-line no-await-in-loop + } = await makeGraphQLRequest<{ + /** RequestFiles */ + requestFiles: { + /** List */ + nodes: RequestFile[]; + }; + }>(client, REQUEST_FILES, { + first: PAGE_SIZE, + offset, + filterBy, + }); + requestFiles.push(...nodes); + offset += PAGE_SIZE; + shouldContinue = nodes.length === PAGE_SIZE; + } while (shouldContinue); + + return requestFiles.sort((a, b) => a.remoteId.localeCompare(b.remoteId)); +} diff --git a/src/graphql/gqls/index.ts b/src/graphql/gqls/index.ts index f7ab82d9..447bc781 100644 --- a/src/graphql/gqls/index.ts +++ b/src/graphql/gqls/index.ts @@ -23,6 +23,7 @@ export * from './RequestEnricher'; export * from './RequestDataSilo'; export * from './team'; export * from './user'; +export * from './requestFile'; export * from './promptRun'; export * from './actionItemCollection'; export * from './attribute'; diff --git a/src/graphql/gqls/requestFile.ts b/src/graphql/gqls/requestFile.ts new file mode 100644 index 00000000..033107c5 --- /dev/null +++ b/src/graphql/gqls/requestFile.ts @@ -0,0 +1,27 @@ +import { gql } from 'graphql-request'; + +// TODO: https://transcend.height.app/T-27909 - enable optimizations +// isExportCsv: true +// useMaster: false +export const REQUEST_FILES = gql` + query TranscendCliRequestFiles( + $first: Int! + $offset: Int! + $filterBy: RequestFileFiltersInput! + ) { + requestFiles( + filterBy: $filterBy + first: $first + offset: $offset + orderBy: [ + { field: createdAt, direction: ASC } + { field: id, direction: ASC } + ] + ) { + nodes { + remoteId + fileName + } + } + } +`; diff --git a/src/graphql/index.ts b/src/graphql/index.ts index 1f4f60f0..5131e8ee 100644 --- a/src/graphql/index.ts +++ b/src/graphql/index.ts @@ -61,6 +61,7 @@ export * from './fetchPromptGroups'; export * from './fetchPromptPartials'; export * from './fetchPromptGroups'; export * from './syncPrompts'; +export * from './fetchRequestFilesForRequest'; export * from './deployConsentManager'; export * from './reportPromptRun'; export * from './addMessagesToPromptRun'; diff --git a/yarn.lock b/yarn.lock index 0406b605..c36d85e1 100644 --- a/yarn.lock +++ b/yarn.lock @@ -580,6 +580,7 @@ __metadata: tr-create-assessment: ./build/cli-create-assessment.js tr-cron-mark-identifiers-completed: ./build/cli-cron-mark-identifiers-completed.js tr-cron-pull-identifiers: ./build/cli-cron-pull-identifiers.js + tr-cron-pull-profiles: ./build/cli-cron-pull-profiles.js tr-derive-data-silos-from-data-flows: ./build/cli-derive-data-silos-from-data-flows.js tr-derive-data-silos-from-data-flows-cross-instance: ./build/cli-derive-data-silos-from-data-flows-cross-instance.js tr-discover-silos: ./build/cli-discover-silos.js