Skip to content

Commit

Permalink
Files health check rewrite and tests (#7223)
Browse files Browse the repository at this point in the history
* WIP, Use case with same functionality as old script

* WIP, S3FileStorage v2

* WIP, v2 fileshealthcheck script

* added custom upload type

* Fixed lint errors

* Fix type errors
  • Loading branch information
daneryl authored Sep 15, 2024
1 parent 1c8e48f commit 895a492
Show file tree
Hide file tree
Showing 33 changed files with 981 additions and 319 deletions.
68 changes: 68 additions & 0 deletions app/api/files.v2/FilesHealthCheck.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import { FilesDataSource } from './contracts/FilesDataSource';
import { FileStorage } from './contracts/FileStorage';
import { URLAttachment } from './model/URLAttachment';

function filterFilesInStorage(files: string[]) {
return files.filter(file => !file.endsWith('activity.log'));
}

export class FilesHealthCheck {
// eslint-disable-next-line class-methods-use-this
private onMissingInDBCB: (filename: string) => void = () => {};

// eslint-disable-next-line class-methods-use-this
private onMissingInStorageCB: (fileDTO: { _id: string; filename: string }) => void = () => {};

private fileStorage: FileStorage;

private filesDS: FilesDataSource;

constructor(fileStorage: FileStorage, filesDS: FilesDataSource) {
this.fileStorage = fileStorage;
this.filesDS = filesDS;
}

async execute() {
const allFilesInDb = await this.filesDS.getAll().all();
const allFilesInStorage = await this.fileStorage.list();
const filteredFilesInStorage = new Set(filterFilesInStorage(allFilesInStorage));
let missingInStorage = 0;
const missingInStorageList: string[] = [];
const missingInDbList: string[] = [];
const countInStorage = filteredFilesInStorage.size;
let countInDb = 0;

allFilesInDb.forEach(file => {
countInDb += 1;
const existsInStorage = filteredFilesInStorage.delete(this.fileStorage.getPath(file));

if (!existsInStorage && !(file instanceof URLAttachment)) {
missingInStorage += 1;
missingInStorageList.push(this.fileStorage.getPath(file));
this.onMissingInStorageCB({ _id: file.id, filename: file.filename });
}
});

filteredFilesInStorage.forEach(file => {
missingInDbList.push(file);
this.onMissingInDBCB(file);
});

return {
missingInStorageList,
missingInStorage,
missingInDbList,
missingInDb: filteredFilesInStorage.size,
countInDb,
countInStorage,
};
}

onMissingInDB(cb: (filename: string) => void) {
this.onMissingInDBCB = cb;
}

onMissingInStorage(cb: (fileDTO: { _id: string; filename: string }) => void) {
this.onMissingInStorageCB = cb;
}
}
6 changes: 6 additions & 0 deletions app/api/files.v2/contracts/FileStorage.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { UwaziFile } from '../model/UwaziFile';

export interface FileStorage {
list(): Promise<string[]>;
getPath(file: UwaziFile): string;
}
4 changes: 4 additions & 0 deletions app/api/files.v2/contracts/FilesDataSource.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import { ResultSet } from 'api/common.v2/contracts/ResultSet';
import { UwaziFile } from '../model/UwaziFile';

export interface FilesDataSource {
filesExistForEntities(files: { entity: string; _id: string }[]): Promise<boolean>;
getAll(): ResultSet<UwaziFile>;
}
53 changes: 53 additions & 0 deletions app/api/files.v2/database/FilesMappers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// import { OptionalId } from 'mongodb';

import { FileDBOType } from './schemas/filesTypes';
import { UwaziFile } from '../model/UwaziFile';
import { Document } from '../model/Document';
import { URLAttachment } from '../model/URLAttachment';
import { Attachment } from '../model/Attachment';
import { CustomUpload } from '../model/CustomUpload';

export const FileMappers = {
// toDBO(file: UwaziFile): OptionalId<FileDBOType> {
// return {
// filename: file.filename,
// entity: file.entity,
// type: 'document',
// totalPages: file.totalPages,
// };
// },

toModel(fileDBO: FileDBOType): UwaziFile {
if (fileDBO.type === 'attachment' && fileDBO.url) {
return new URLAttachment(
fileDBO._id.toString(),
fileDBO.entity,
fileDBO.totalPages,
fileDBO.url
);
}
if (fileDBO.type === 'attachment') {
return new Attachment(
fileDBO._id.toString(),
fileDBO.entity,
fileDBO.totalPages,
fileDBO.filename
);
}

if (fileDBO.type === 'custom') {
return new CustomUpload(
fileDBO._id.toString(),
fileDBO.entity,
fileDBO.totalPages,
fileDBO.filename
);
}
return new Document(
fileDBO._id.toString(),
fileDBO.entity,
fileDBO.totalPages,
fileDBO.filename
);
},
};
10 changes: 10 additions & 0 deletions app/api/files.v2/database/MongoFilesDataSource.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
import { MongoDataSource } from 'api/common.v2/database/MongoDataSource';
import { MongoResultSet } from 'api/common.v2/database/MongoResultSet';
import { ObjectId } from 'mongodb';
import { FilesDataSource } from '../contracts/FilesDataSource';
import { FileMappers } from './FilesMappers';
import { FileDBOType } from './schemas/filesTypes';
import { UwaziFile } from '../model/UwaziFile';

export class MongoFilesDataSource extends MongoDataSource<FileDBOType> implements FilesDataSource {
getAll() {
return new MongoResultSet<FileDBOType, UwaziFile>(
this.getCollection().find({}, { projection: { fullText: 0 } }),
FileMappers.toModel
);
}

protected collectionName = 'files';

async filesExistForEntities(files: { entity: string; _id: string }[]) {
Expand Down
4 changes: 3 additions & 1 deletion app/api/files.v2/database/schemas/filesTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ import { ObjectId } from 'mongodb';
interface BaseFileDBOType {
_id: ObjectId;
entity: string;
filename: string;
url: string;
}

interface DocumentFileDBOType extends BaseFileDBOType {
type: 'document';
type: 'document' | 'attachment' | 'custom';
totalPages: number;
}

Expand Down
6 changes: 3 additions & 3 deletions app/api/files.v2/database/specs/MongoFilesDataSource.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ const factory = getFixturesFactory();

const fixtures = {
files: [
factory.file('file1', 'entity1', 'document', 'file1.pdf'),
factory.file('file2', 'entity2', 'document', 'file2.pdf'),
factory.file('file3', 'entity3', 'document', 'file3.pdf'),
factory.fileDeprecated('file1', 'entity1', 'document', 'file1.pdf'),
factory.fileDeprecated('file2', 'entity2', 'document', 'file2.pdf'),
factory.fileDeprecated('file3', 'entity3', 'document', 'file3.pdf'),
],
};

Expand Down
57 changes: 57 additions & 0 deletions app/api/files.v2/infrastructure/S3FileStorage.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { _Object, ListObjectsV2Command, S3Client } from '@aws-sdk/client-s3';
import { config } from 'api/config';
import { Tenant } from 'api/tenants/tenantContext';
import path from 'path';
import { FileStorage } from '../contracts/FileStorage';
import { Attachment } from '../model/Attachment';
import { UwaziFile } from '../model/UwaziFile';
import { URLAttachment } from '../model/URLAttachment';
import { CustomUpload } from '../model/CustomUpload';

export class S3FileStorage implements FileStorage {
private s3Client: S3Client;

private tenant: Tenant;

constructor(s3Client: S3Client, tenant: Tenant) {
this.s3Client = s3Client;
this.tenant = tenant;
}

getPath(file: UwaziFile): string {
if (file instanceof Attachment) {
return path.join(this.tenant.attachments, file.filename);
}
if (file instanceof CustomUpload) {
return path.join(this.tenant.customUploads, file.filename);
}
if (file instanceof URLAttachment) {
return 'not implemented';
}
return path.join(this.tenant.uploadedDocuments, file.filename);
}

async list(): Promise<string[]> {
const objects: _Object[] = [];
const requestNext = async (token?: string) => {
const response = await this.s3Client.send(
new ListObjectsV2Command({
Bucket: config.s3.bucket,
Prefix: this.tenant.name,
ContinuationToken: token,
MaxKeys: config.s3.batchSize,
})
);
objects.push(...(response.Contents || []));
return response.NextContinuationToken;
};

let continuationToken = await requestNext();
while (continuationToken) {
// eslint-disable-next-line no-await-in-loop
continuationToken = await requestNext(continuationToken);
}

return objects.map(c => c.Key!);
}
}
125 changes: 125 additions & 0 deletions app/api/files.v2/infrastructure/specs/S3FileStorage.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import {
CreateBucketCommand,
DeleteBucketCommand,
DeleteObjectCommand,
ListObjectsCommand,
PutObjectCommand,
S3Client,
} from '@aws-sdk/client-s3';
import { config } from 'api/config';
import { Attachment } from 'api/files.v2/model/Attachment';
import { Document } from 'api/files.v2/model/Document';
import { Tenant } from 'api/tenants/tenantContext';
import { S3FileStorage } from '../S3FileStorage';

describe('S3FileStorage', () => {
let s3Client: S3Client;
let s3fileStorage: S3FileStorage;
let tenant: Tenant;

beforeEach(async () => {
config.s3 = {
endpoint: 'http://127.0.0.1:9000',
bucket: 'uwazi-development',
credentials: {
accessKeyId: 'minioadmin',
secretAccessKey: 'minioadmin',
},
batchSize: 1,
};

s3Client = new S3Client({
apiVersion: 'latest',
region: 'region',
forcePathStyle: true, // needed for minio
...config.s3,
});
await s3Client.send(new CreateBucketCommand({ Bucket: 'uwazi-development' }));

tenant = {
name: 'test-tenant',
dbName: 'test-tenant',
indexName: 'test-tenant',
uploadedDocuments: 'test-tenant/documents',
attachments: 'test-tenant/attachments',
customUploads: 'test-tenant/customUploads',
activityLogs: 'test-tenant/log',
};

s3fileStorage = new S3FileStorage(s3Client, tenant);
});

afterEach(async () => {
const allBucketKeys = (
(
await s3Client.send(
new ListObjectsCommand({
Bucket: 'uwazi-development',
})
)
).Contents || []
).map(content => content.Key);

await Promise.all(
allBucketKeys.map(async key =>
s3Client.send(
new DeleteObjectCommand({
Bucket: 'uwazi-development',
Key: key,
})
)
)
);
await s3Client.send(new DeleteBucketCommand({ Bucket: 'uwazi-development' }));
s3Client.destroy();
});

describe('list', () => {
it('should list all s3 keys', async () => {
await s3Client.send(
new PutObjectCommand({
Bucket: 'uwazi-development',
Key: 'test-tenant/documents/document1',
Body: 'body',
})
);

await s3Client.send(
new PutObjectCommand({
Bucket: 'uwazi-development',
Key: 'test-tenant/documents/document2',
Body: 'body',
})
);

const listedFiles = await s3fileStorage.list();

expect(listedFiles.sort()).toEqual(
['test-tenant/documents/document1', 'test-tenant/documents/document2'].sort()
);
});
});

describe('getPath', () => {
it.each([
{
file: new Document('id', 'entity', 1, 'document'),
expected: 'test-tenant/documents/document',
},
{
file: new Attachment('id', 'entity', 1, 'attachment'),
expected: 'test-tenant/attachments/attachment',
},
// {
// file: new URLAttachment('id', 'filename', 'entity', 1, 'url'),
// expected: 'test-tenant/?????/filename',
// },
])(
'should use dinamic paths based on tenant ($file.filename -> $expected)',
async ({ file, expected }) => {
const key = s3fileStorage.getPath(file);
expect(key).toBe(expected);
}
);
});
});
10 changes: 10 additions & 0 deletions app/api/files.v2/model/Attachment.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import { BaseFile } from './BaseFile';

export class Attachment extends BaseFile {
readonly filename: string;

constructor(id: string, entity: string, totalPages: number, filename: string) {
super(id, entity, totalPages);
this.filename = filename;
}
}
13 changes: 13 additions & 0 deletions app/api/files.v2/model/BaseFile.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
export class BaseFile {
readonly id: string;

readonly entity: string;

readonly totalPages: number;

constructor(id: string, entity: string, totalPages: number) {
this.id = id;
this.entity = entity;
this.totalPages = totalPages;
}
}
Loading

0 comments on commit 895a492

Please sign in to comment.