Skip to content

Commit

Permalink
Refactor so that export returns immediately then client polls status …
Browse files Browse the repository at this point in the history
…endpoint
  • Loading branch information
philmcmahon committed Jan 9, 2025
1 parent a4dc93c commit d6baa92
Show file tree
Hide file tree
Showing 5 changed files with 279 additions and 132 deletions.
48 changes: 37 additions & 11 deletions packages/api/src/export.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@ import {
TranscriptionConfig,
TranscriptionDynamoItem,
} from '@guardian/transcription-service-backend-common';
import { ZTokenResponse } from '@guardian/transcription-service-common';
import {
ExportItems,
ExportStatus,
ExportStatuses,
ExportType,
ZTokenResponse,
} from '@guardian/transcription-service-common';
import {
uploadFileToGoogleDrive,
uploadToGoogleDocs,
Expand All @@ -24,7 +30,7 @@ export const exportMediaToDrive = async (
item: TranscriptionDynamoItem,
oAuthTokenResponse: ZTokenResponse,
folderId: string,
): Promise<{ statusCode: number; fileId?: string; message?: string }> => {
): Promise<ExportStatus> => {
const mediaSize = await getObjectSize(
s3Client,
config.app.sourceMediaBucket,
Expand All @@ -33,7 +39,8 @@ export const exportMediaToDrive = async (
if (mediaSize && mediaSize > LAMBDA_MAX_EPHEMERAL_STORAGE_BYTES) {
const msg = `Media file too large to export to google drive. Please manually download the file and upload using the google drive UI`;
return {
statusCode: 400,
exportType: 'source-media',
status: 'failure',
message: msg,
};
}
Expand Down Expand Up @@ -62,8 +69,9 @@ export const exportMediaToDrive = async (
folderId,
);
return {
fileId: id,
statusCode: 200,
exportType: 'source-media',
id,
status: 'success',
};
};

Expand All @@ -75,7 +83,7 @@ export const exportTranscriptToDoc = async (
folderId: string,
drive: Drive,
docs: Docs,
): Promise<{ statusCode: number; message?: string; documentId?: string }> => {
): Promise<ExportStatus> => {
const transcriptS3Key = item.transcriptKeys[format];
const transcriptText = await getObjectText(
s3Client,
Expand All @@ -86,14 +94,16 @@ export const exportTranscriptToDoc = async (
if (transcriptText.failureReason === 'NoSuchKey') {
const msg = `Failed to export transcript - file has expired. Please re-upload the file and try again.`;
return {
statusCode: 410,
status: 'failure',
message: msg,
exportType: format,
};
}
const msg = `Failed to fetch transcript. Please contact the digital investigations team for support`;
return {
statusCode: 500,
status: 'failure',
message: msg,
exportType: format,
};
}
const exportResult = await uploadToGoogleDocs(
Expand All @@ -107,12 +117,28 @@ export const exportTranscriptToDoc = async (
const msg = `Failed to create google document for item with id ${item.id}`;
logger.error(msg);
return {
statusCode: 500,
status: 'failure',
message: msg,
exportType: format,
};
}
return {
statusCode: 200,
documentId: exportResult,
status: 'success',
id: exportResult,
exportType: format,
};
};

export const exportStatusInProgress = (items: ExportItems): ExportStatuses => {
return items.map((item: ExportType) => ({
status: 'in-progress',
exportType: item,
}));
};

export const updateStatus = (
status: ExportStatus,
statuses: ExportStatuses,
): ExportStatuses => {
return statuses.map((s) => (s.exportType === status.exportType ? status : s));
};
135 changes: 80 additions & 55 deletions packages/api/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
logger,
getS3Client,
sendMessage,
writeTranscriptionItem,
} from '@guardian/transcription-service-backend-common';
import {
ClientConfig,
Expand All @@ -27,8 +28,10 @@ import {
transcribeUrlRequestBody,
MediaDownloadJob,
CreateFolderRequest,
ExportResponse,
signedUrlRequestBody,
ExportStatus,
ExportStatuses,
ExportType,
} from '@guardian/transcription-service-common';
import type { SignedUrlResponseBody } from '@guardian/transcription-service-common';
import {
Expand All @@ -37,7 +40,13 @@ import {
} from '@guardian/transcription-service-backend-common/src/dynamodb';
import { createExportFolder, getDriveClients } from './services/googleDrive';
import { v4 as uuid4 } from 'uuid';
import { exportMediaToDrive, exportTranscriptToDoc } from './export';
import {
exportMediaToDrive,
exportStatusInProgress,
exportTranscriptToDoc,
updateStatus,
} from './export';
import { DynamoDBDocumentClient } from '@aws-sdk/lib-dynamodb';

const runningOnAws = process.env['AWS_EXECUTION_ENV'];
const emulateProductionLocally =
Expand All @@ -56,7 +65,7 @@ const getApp = async () => {
);

const s3Client = getS3Client(config.aws.region);
const dynamoClient = getDynamoClient(
const dynamoClient: DynamoDBDocumentClient = getDynamoClient(
config.aws.region,
config.aws.localstackEndpoint,
);
Expand Down Expand Up @@ -195,6 +204,30 @@ const getApp = async () => {
}),
]);

apiRouter.get('/export/status', [
checkAuth,
asyncHandler(async (req, res) => {
const id = req.query.id as string;
if (!id) {
res
.status(400)
.send('You must provide the transcript id in the query string');
return;
}
const { item, errorMessage } = await getTranscriptionItem(
dynamoClient,
config.app.tableName,
id,
);
if (!item) {
res.status(500).send(errorMessage);
return;
}
res.send(JSON.stringify(item.exportStatus));
return;
}),
]);

apiRouter.post('/export/create-folder', [
checkAuth,
asyncHandler(async (req, res) => {
Expand Down Expand Up @@ -261,59 +294,51 @@ const getApp = async () => {
config,
exportRequest.data.oAuthTokenResponse,
);
const exportResult: ExportResponse = {
textDocumentId: undefined,
srtDocumentId: undefined,
sourceMediaFileId: undefined,
};
if (exportRequest.data.items.transcriptText) {
const textResult = await exportTranscriptToDoc(
config,
s3Client,
item,
'text',
exportRequest.data.folderId,
driveClients.drive,
driveClients.docs,
);
if (textResult.statusCode !== 200) {
res.status(textResult.statusCode).send(textResult.message);
return;
}
exportResult.textDocumentId = textResult.documentId;
}
if (exportRequest.data.items.transcriptSrt) {
const srtResult = await exportTranscriptToDoc(
config,
s3Client,
item,
'srt',
exportRequest.data.folderId,
driveClients.drive,
driveClients.docs,
);
if (srtResult.statusCode !== 200) {
res.status(srtResult.statusCode).send(srtResult.message);
return;
}
exportResult.srtDocumentId = srtResult.documentId;
}
if (exportRequest.data.items.sourceMedia) {
const mediaResult = await exportMediaToDrive(
config,
s3Client,
item,
exportRequest.data.oAuthTokenResponse,
exportRequest.data.folderId,
let currentStatuses: ExportStatuses = exportStatusInProgress(
exportRequest.data.items,
);
await writeTranscriptionItem(dynamoClient, config.app.tableName, {
...item,
exportStatus: currentStatuses,
});
const exportPromises: Promise<void>[] = exportRequest.data.items
.map((exportType: ExportType) => {
if (exportType === 'text' || exportType === 'srt') {
return exportTranscriptToDoc(
config,
s3Client,
item,
exportType,
exportRequest.data.folderId,
driveClients.drive,
driveClients.docs,
);
} else {
return exportMediaToDrive(
config,
s3Client,
item,
exportRequest.data.oAuthTokenResponse,
exportRequest.data.folderId,
);
}
})
.map((exportResult: Promise<ExportStatus>) =>
exportResult.then(async (result: ExportStatus) => {
if (result.status === 'failure') {
logger.error(result.message);
} else {
logger.info(`Transcript ${result.exportType} export complete`);
}
currentStatuses = updateStatus(result, currentStatuses);
await writeTranscriptionItem(dynamoClient, config.app.tableName, {
...item,
exportStatus: currentStatuses,
});
}),
);
if (mediaResult.statusCode !== 200) {
logger.error('Failed to export media to google drive');
res.status(mediaResult.statusCode).send(mediaResult.message);
return;
}
exportResult.sourceMediaFileId = mediaResult.fileId;
}
res.send(JSON.stringify(exportResult));
res.send(JSON.stringify(currentStatuses));
await Promise.all(exportPromises);
return;
}),
]);
Expand Down
2 changes: 2 additions & 0 deletions packages/backend-common/src/dynamodb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {

import { z } from 'zod';
import { logger } from '@guardian/transcription-service-backend-common';
import { ExportStatus } from '@guardian/transcription-service-common';

export const getDynamoClient = (
region: string,
Expand Down Expand Up @@ -39,6 +40,7 @@ export const TranscriptionDynamoItem = z.object({
userEmail: z.string(),
completedAt: z.optional(z.string()), // dynamodb can't handle dates so we need to use an ISO date
isTranslation: z.boolean(),
exportStatus: z.optional(z.array(ExportStatus)),
});

export type TranscriptionDynamoItem = z.infer<typeof TranscriptionDynamoItem>;
Expand Down
Loading

0 comments on commit d6baa92

Please sign in to comment.