Skip to content

Commit

Permalink
Move uploadFileToGoogleDrive to media-export service
Browse files Browse the repository at this point in the history
  • Loading branch information
philmcmahon committed Jan 13, 2025
1 parent f2e538f commit ebe74ed
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 83 deletions.
82 changes: 0 additions & 82 deletions packages/api/src/services/googleDrive.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import {
TranscriptionConfig,
} from '@guardian/transcription-service-backend-common';
import { ZTokenResponse } from '@guardian/transcription-service-common';
import * as fs from 'node:fs';
import Drive = drive_v3.Drive;

const ROOT_FOLDER_NAME = 'Guardian Transcribe Tool';
Expand Down Expand Up @@ -130,84 +129,3 @@ export const createExportFolder = async (drive: Drive, name: string) => {
}
return folderId;
};

export const uploadFileToGoogleDrive = async (
fileName: string,
oAuthTokenResponse: ZTokenResponse,
filePath: string,
mimeType: string,
folderId: string,
) => {
const fileSize = fs.statSync(filePath).size;

const startResumableSessionResponse = await fetch(
'https://www.googleapis.com/upload/drive/v3/files?uploadType=resumable',
{
method: 'POST',
headers: {
'X-Upload-Content-Length': `${fileSize}`,
'X-Upload-Content-Type': mimeType,
'Content-Type': 'application/json',
Authorization: `Bearer ${oAuthTokenResponse.access_token}`,
},
body: JSON.stringify({
name: fileName,
mimeType,
parents: [folderId],
}),
},
);

const uploadUrl = startResumableSessionResponse.headers.get('location');

if (!uploadUrl) {
throw new Error('Failed to start resumable upload session');
}

//when changing this value consider the amount of memory allocated to the API lambda function
const CHUNK_SIZE = 128 * 1024 * 1024; // 128MB -
const fileStream = fs.createReadStream(filePath, {
highWaterMark: CHUNK_SIZE,
});

let offset = 0;

for await (const chunk of fileStream) {
// pause the stream to prevent node from buffering any more data whilst we upload
fileStream.pause();
const chunkSize = chunk.length;
const range = `bytes ${offset}-${offset + chunkSize - 1}/${fileSize}`;

logger.info(
`Uploading chunk: ${range} (Upload ${Math.floor((offset / fileSize) * 100)}% complete)`,
);

const response = await fetch(uploadUrl, {
method: 'PUT',
headers: {
'Content-Range': range,
'Content-Length': chunkSize,
},
body: chunk,
});

if (response.ok) {
// Response status is 308 until the final chunk. Final response includes file metadata
return ((await response.json()) as { id: string }).id;
}
if (response.status === 308) {
//continue
} else {
const text = await response.text();
logger.error(`Received ${response.status} from google, error: ${text}`);
throw new Error(
`Failed to upload chunk: ${response.status} ${response.statusText}`,
);
}

offset += chunkSize;
fileStream.resume();
}

throw new Error('Failed to upload file');
};
84 changes: 84 additions & 0 deletions packages/media-export/src/googleDrive.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import { ZTokenResponse } from '@guardian/transcription-service-common';
import fs from 'node:fs';
import { logger } from '@guardian/transcription-service-backend-common';

export const uploadFileToGoogleDrive = async (
fileName: string,
oAuthTokenResponse: ZTokenResponse,
filePath: string,
mimeType: string,
folderId: string,
) => {
const fileSize = fs.statSync(filePath).size;

const startResumableSessionResponse = await fetch(
'https://www.googleapis.com/upload/drive/v3/files?uploadType=resumable',
{
method: 'POST',
headers: {
'X-Upload-Content-Length': `${fileSize}`,
'X-Upload-Content-Type': mimeType,
'Content-Type': 'application/json',
Authorization: `Bearer ${oAuthTokenResponse.access_token}`,
},
body: JSON.stringify({
name: fileName,
mimeType,
parents: [folderId],
}),
},
);

const uploadUrl = startResumableSessionResponse.headers.get('location');

if (!uploadUrl) {
throw new Error('Failed to start resumable upload session');
}

//when changing this value consider the amount of memory allocated to the API lambda function
const CHUNK_SIZE = 128 * 1024 * 1024; // 128MB -
const fileStream = fs.createReadStream(filePath, {
highWaterMark: CHUNK_SIZE,
});

let offset = 0;

for await (const chunk of fileStream) {
// pause the stream to prevent node from buffering any more data whilst we upload
fileStream.pause();
const chunkSize = chunk.length;
const range = `bytes ${offset}-${offset + chunkSize - 1}/${fileSize}`;

logger.info(
`Uploading chunk: ${range} (Upload ${Math.floor((offset / fileSize) * 100)}% complete)`,
);

const response = await fetch(uploadUrl, {
method: 'PUT',
headers: {
'Content-Range': range,
'Content-Length': chunkSize,
},
body: chunk,
});

if (response.ok) {
// Response status is 308 until the final chunk. Final response includes file metadata
return ((await response.json()) as { id: string }).id;
}
if (response.status === 308) {
//continue
} else {
const text = await response.text();
logger.error(`Received ${response.status} from google, error: ${text}`);
throw new Error(
`Failed to upload chunk: ${response.status} ${response.statusText}`,
);
}

offset += chunkSize;
fileStream.resume();
}

throw new Error('Failed to upload file');
};
2 changes: 1 addition & 1 deletion packages/media-export/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ import {
TranscriptExportRequest,
ZTokenResponse,
} from '@guardian/transcription-service-common';
import { uploadFileToGoogleDrive } from 'api/src/services/googleDrive';
import { updateStatus } from 'api/src/export';
import { LAMBDA_MAX_EPHEMERAL_STORAGE_BYTES } from 'api/src/services/lambda';
import { uploadFileToGoogleDrive } from './googleDrive';

export const exportMediaToDrive = async (
config: TranscriptionConfig,
Expand Down

0 comments on commit ebe74ed

Please sign in to comment.