diff --git a/README.md b/README.md index d9522557..db5091eb 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ See full schema: [test_report.proto](./proto/test_report.proto) - Export - BigQuery - Local file (output JSON or JSON Lines) + - Google Cloud Storage (GCS) # USAGE ```bash @@ -118,8 +119,9 @@ Most recommend tag for user is `v{major}`. If you prefere more conservetive vers - LastRunStore - GOOGLE_APPLICATION_CREDENTIALS -## Setup BigQuery (Recommend) -If you want to use `bigquery_exporter`, you have to create dataset and table that CIAnalyzer will export data to it. +## Setup Exporter +### Setup BigQuery table (Recommend) +If you want to use `exporter.bigquery`, you have to create dataset and table that CIAnalyzer will export data to it. ```bash # Prepare bigquery schema json files @@ -153,7 +155,24 @@ bq mk \ And also GCP service account used for CIAnalyzer needs some BigQuery permissions. Please attach `roles/bigquery.dataEditor` and `roles/bigquery.jobUser`. More detail, check [BigQuery access control document](https://cloud.google.com/bigquery/docs/access-control). -## Setup GCS bucket (Recommend) +### Setup GCS +If you want to use `exporter.gcs`, you have to create a bucket that CIAnalyzer will export data to. + +BigQuery can also read JSONL formatted data stored in GCS as [external tables](https://cloud.google.com/bigquery/docs/external-data-cloud-storage), so it is useful to save data to GCS instead of exporting directly to a BigQuery table. In that case, it is recommended to save data in a path that includes the DATE to be recognized as a Hive partition for efficient querying from BigQuery. + +see: https://cloud.google.com/bigquery/docs/hive-partitioned-queries + +CIAnalyzer can save data to a path with date partitions by specifying a `prefixTemplate` in the configuration file as follows: + +```yaml +exporter: + gcs: + project: $GCP_PROJECT_ID + bucket: $BUCKET_NAME + prefixTemplate: ci_analyzer/{reportType}/dt={YYYY}-{MM}-{DD}/ +``` + +## Setup LastRunStore ### What is LastRunStore CIAnalyzer collects build data from each CI service API, but there may be duplicates of the previously collected data. To remove the duplicate, it is necessary to save the last build number of the previous run and output only the difference from the previous run. @@ -163,7 +182,7 @@ By default, CIAnalyzer uses a local JSON file as a backend for LastRunStore. How Resolving these problems, CIAnalyzer can use GCS as LastRunStore to read/write the last build number from any machine. It inspired by [Terraform backend](https://www.terraform.io/docs/backends/index.html). -### Create GCS bucket +### Setup GCS bucket (Recommend) If you want to use `lastRunStore.backend: gcs`, you have to create GCS bucket before execute CIAnalyzer. ```bash @@ -385,15 +404,25 @@ To load your custom schema JSON from CIAnalyzer that runs inside of container, y See sample [cron.jenkinsfile](./sample/cron.jenkinsfile). -# Roadmap -- [x] Collect test data -- [x] Collect any of JSON format from build artifacts -- [x] Support Bitrise -- [x] Support CircleCI API v2 -- [x] Implement better logger -- [x] Better error message -- [x] Export commit message -- [x] Export executor data (CircleCI, Bitrise) +# Roadmap and features +- Supported CI services + - [x] GitHub Actions + - [x] CircleCI API v2 + - [x] Bitrise + - [x] Jenkins +- Supported data + - [x] Workflow, Job + - [x] Test data (JUnit format) + - [x] Any of JSON format from build artifacts +- Supported exporters + - [x] Local file + - [x] BigQuery + - [x] Google Cloud Storage + - [ ] S3/S3 compatible storage +- Supported LastRunStore + - [x] Local file + - [x] Google Cloud Storage + - [ ] S3/S3 compatible storage # Debug options - Fetch only selected service @@ -405,7 +434,7 @@ See sample [cron.jenkinsfile](./sample/cron.jenkinsfile). - Enable debug mode - `--debug` - Limit fetching build results only 10 by each services - - Export result to local only + - Export result to local only if `--only-exporters` omitted - Don't loading and storing last build number - Enable debug log - `export CI_ANALYZER_DEBUG=1` diff --git a/__tests__/exporter/bigquery_exporter.test.ts b/__tests__/exporter/bigquery_exporter.test.ts index 470284a9..726629c3 100644 --- a/__tests__/exporter/bigquery_exporter.test.ts +++ b/__tests__/exporter/bigquery_exporter.test.ts @@ -1,7 +1,7 @@ import path from "node:path"; import { vi, describe, it, expect, beforeEach } from "vitest"; import { BigqueryExporter } from "../../src/exporter/bigquery_exporter"; -import type { BigqueryExporterConfig } from "../../src/config/config"; +import type { BigqueryExporterConfig } from "../../src/config/schema"; import { CustomReportCollection } from "../../src/custom_report_collection"; import { Logger } from "tslog"; diff --git a/__tests__/exporter/gcs_exporter.test.ts b/__tests__/exporter/gcs_exporter.test.ts new file mode 100644 index 00000000..ae585854 --- /dev/null +++ b/__tests__/exporter/gcs_exporter.test.ts @@ -0,0 +1,137 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; +import { GcsExporter } from "../../src/exporter/gcs_exporter"; +import type { GcsExporterConfig } from "../../src/config/schema"; +import { Logger } from "tslog"; + +const mockStorage = { + bucket: vi.fn().mockReturnThis(), + file: vi.fn().mockReturnThis(), + save: vi.fn(), +}; +const logger = new Logger({ type: "hidden" }); + +describe("GcsExporter", () => { + const baseConfig: GcsExporterConfig = { + project: "project", + bucket: "bucket", + prefixTemplate: "ci_analyzer/{reportType}/dt={YYYY}-{MM}-{DD}/", + }; + + beforeEach(() => { + // Mock the current time for `now = dayjs()` + vi.useFakeTimers(); + vi.setSystemTime(new Date("2023-01-01T12:34:56Z")); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + describe("new", () => { + it("should not throw when all required params are provided", () => { + expect(() => { + new GcsExporter(logger, "github", baseConfig); + }).not.toThrow(); + }); + + it("should throw when prefixTemplate does not include {reportType}", () => { + const config = { + ...baseConfig, + prefixTemplate: "ci_analyzer/dt={YYYY}-{MM}-{DD}/", + }; + expect(() => { + new GcsExporter(logger, "github", config); + }).toThrow(); + }); + }); + + describe("export", () => { + let exporter: GcsExporter; + + beforeEach(() => { + exporter = new GcsExporter(logger, "github", baseConfig); + exporter.storage = mockStorage as any; + }); + + it("exportWorkflowReports should create correct file path when all reports have the same createdAt", async () => { + const report = [{ createdAt: "2023-01-01T12:34:56Z" }]; + await exporter.exportWorkflowReports(report as any); + + expect(mockStorage.file).toHaveBeenCalledWith( + "ci_analyzer/workflow/dt=2023-01-01/20230101-123456-workflow-github.json", + ); + }); + + it("exportWorkflowReports should create correct file paths when reports have different createdAt", async () => { + const reports = [ + { createdAt: "2023-01-01T12:34:56Z" }, + { createdAt: "2022-12-31T12:34:56Z" }, + { createdAt: "2023-01-01T12:34:56Z" }, + ]; + await exporter.exportWorkflowReports(reports as any); + + expect(mockStorage.file).toHaveBeenCalledWith( + "ci_analyzer/workflow/dt=2023-01-01/20230101-123456-workflow-github.json", + ); + expect(mockStorage.file).toHaveBeenCalledWith( + "ci_analyzer/workflow/dt=2022-12-31/20230101-123456-workflow-github.json", + ); + }); + + it("exportTestReports should create correct file path when all reports have the same createdAt", async () => { + const report = [{ createdAt: "2023-01-01T12:34:56Z" }]; + await exporter.exportTestReports(report as any); + + expect(mockStorage.file).toHaveBeenCalledWith( + "ci_analyzer/test/dt=2023-01-01/20230101-123456-test-github.json", + ); + }); + + it("exportTestReports should create correct file paths when reports have different createdAt", async () => { + const reports = [ + { createdAt: "2023-01-01T12:34:56Z" }, + { createdAt: "2022-12-31T12:34:56Z" }, + { createdAt: "2023-01-01T12:34:56Z" }, + ]; + await exporter.exportTestReports(reports as any); + + expect(mockStorage.file).toHaveBeenCalledWith( + "ci_analyzer/test/dt=2023-01-01/20230101-123456-test-github.json", + ); + expect(mockStorage.file).toHaveBeenCalledWith( + "ci_analyzer/test/dt=2022-12-31/20230101-123456-test-github.json", + ); + }); + + it("exportCustomReports should create correct file path when all reports have the same createdAt", async () => { + const report = [{ createdAt: "2023-01-01T12:34:56Z" }]; + const customReportCollection = { + customReports: new Map([["custom", report]]), + }; + await exporter.exportCustomReports(customReportCollection as any); + + expect(mockStorage.file).toHaveBeenCalledWith( + "ci_analyzer/custom/dt=2023-01-01/20230101-123456-custom-github.json", + ); + }); + + it("exportCustomReports should create correct file paths when reports have different createdAt", async () => { + const reports = [ + { createdAt: "2023-01-01T12:34:56Z" }, + { createdAt: "2022-12-31T12:34:56Z" }, + { createdAt: "2023-01-01T12:34:56Z" }, + ]; + const customReportCollection = { + customReports: new Map([["custom", reports]]), + }; + await exporter.exportCustomReports(customReportCollection as any); + + expect(mockStorage.file).toHaveBeenCalledWith( + "ci_analyzer/custom/dt=2023-01-01/20230101-123456-custom-github.json", + ); + expect(mockStorage.file).toHaveBeenCalledWith( + "ci_analyzer/custom/dt=2022-12-31/20230101-123456-custom-github.json", + ); + }); + }); +}); diff --git a/__tests__/exporter/local_exporter.test.ts b/__tests__/exporter/local_exporter.test.ts index ca411a17..3d780f56 100644 --- a/__tests__/exporter/local_exporter.test.ts +++ b/__tests__/exporter/local_exporter.test.ts @@ -1,7 +1,7 @@ import { vi, describe, it, expect, beforeEach } from "vitest"; import { LocalExporter } from "../../src/exporter/local_exporter"; import path from "node:path"; -import type { LocalExporterConfig } from "../../src/config/config"; +import type { LocalExporterConfig } from "../../src/config/schema"; import { Logger } from "tslog"; const mockFsPromises = { diff --git a/ci_analyzer.yaml b/ci_analyzer.yaml index 88bd2eca..18719e6a 100644 --- a/ci_analyzer.yaml +++ b/ci_analyzer.yaml @@ -31,6 +31,13 @@ github: table: $CUSTOM_REPORT_TABLE schema: ./$SCHEMA_DIR/$CUSTOM_REPORT_TABLE_SCHEMA.json # It accepts absolute path or relative path from this config yaml. maxBadRecords: 0 # (Optional) default: 0. If set > 0, skip bad record. This option should only be used for workaround. + gcs: + project: $GCP_PROJECT_ID + bucket: $BUCKET_NAME + # {reportType} placeholder is Required. + # {YYYY}, {MM}, {DD} placeholders are optional. + # If you want to use BigQuery external tables, the GCS path should be in a format supported by Hive partitions like this + prefixTemplate: ci_analyzer/{reportType}/dt={YYYY}-{MM}-{DD}/ lastRunStore: backend: gcs # Recommend using 'gcs' backend project: $GCP_PROJECT_ID diff --git a/package.json b/package.json index b697ee82..29739e94 100644 --- a/package.json +++ b/package.json @@ -32,8 +32,8 @@ "biome:ci": "biome ci .", "lint:fix": "biome lint --apply-unsafe .", "fmt:fix": "biome format --write .", - "test": "vitest", - "test:ci": "vitest --run --coverage", + "test": "TZ=UTC vitest", + "test:ci": "TZ=UTC vitest --run --coverage", "proto": "earthly --strict --remote-cache=ghcr.io/kesin11/ci_analyzer_earthly:cache +proto", "docker": "earthly --strict --remote-cache=ghcr.io/kesin11/ci_analyzer_earthly:cache +docker", "schema": "earthly --strict --remote-cache=ghcr.io/kesin11/ci_analyzer_earthly:cache +schema" diff --git a/schema.json b/schema.json index 118e6aa2..c0161c42 100644 --- a/schema.json +++ b/schema.json @@ -89,6 +89,26 @@ } }, "additionalProperties": false + }, + "gcs": { + "type": "object", + "properties": { + "project": { + "type": "string" + }, + "bucket": { + "type": "string" + }, + "prefixTemplate": { + "type": "string" + } + }, + "required": [ + "project", + "bucket", + "prefixTemplate" + ], + "additionalProperties": false } }, "additionalProperties": false diff --git a/src/config/schema.ts b/src/config/schema.ts index 96c45045..7086e329 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -28,9 +28,17 @@ const bigqueryExporterSchema = z.object({ }); export type BigqueryExporterConfig = z.infer; +const gcsExporterSchema = z.object({ + project: z.string(), + bucket: z.string(), + prefixTemplate: z.string(), +}); +export type GcsExporterConfig = z.infer; + const exporterSchema = z.object({ local: localExporterSchema.optional(), bigquery: bigqueryExporterSchema.optional(), + gcs: gcsExporterSchema.optional(), }); export type ExporterConfig = z.infer; diff --git a/src/exporter/exporter.ts b/src/exporter/exporter.ts index 36aed560..59eb4bd9 100644 --- a/src/exporter/exporter.ts +++ b/src/exporter/exporter.ts @@ -4,11 +4,13 @@ import type { ExporterConfig, LocalExporterConfig, BigqueryExporterConfig, + GcsExporterConfig, } from "../config/schema.js"; import { BigqueryExporter } from "./bigquery_exporter.js"; import type { CustomReportCollection } from "../custom_report_collection.js"; import type { ArgumentOptions } from "../arg_options.js"; import type { Logger } from "tslog"; +import { GcsExporter } from "./gcs_exporter.js"; export interface Exporter { exportWorkflowReports(reports: WorkflowReport[]): Promise; @@ -26,7 +28,7 @@ export class CompositExporter implements Exporter { service: string, config?: ExporterConfig, ) { - if (options.debug || !config) { + if ((options.debug && options.onlyExporters === undefined) || !config) { this.exporters = [ new LocalExporter(logger, service, options.configDir, {}), ]; @@ -41,7 +43,10 @@ export class CompositExporter implements Exporter { this.exporters = exporters .map((exporter) => { - let _config: LocalExporterConfig | BigqueryExporterConfig; + let _config: + | LocalExporterConfig + | BigqueryExporterConfig + | GcsExporterConfig; switch (exporter) { case "local": _config = config[exporter] ?? {}; @@ -54,6 +59,13 @@ export class CompositExporter implements Exporter { case "bigquery": _config = config[exporter] ?? {}; return new BigqueryExporter(logger, _config, options.configDir); + case "gcs": + _config = config[exporter] ?? {}; + return new GcsExporter( + logger, + service, + _config as GcsExporterConfig, + ); } }) .filter((exporter) => exporter !== undefined); diff --git a/src/exporter/gcs_exporter.ts b/src/exporter/gcs_exporter.ts new file mode 100644 index 00000000..ebfefa97 --- /dev/null +++ b/src/exporter/gcs_exporter.ts @@ -0,0 +1,103 @@ +import path from "node:path"; +import { Storage } from "@google-cloud/storage"; +import type { Logger } from "tslog"; +import dayjs from "dayjs"; +import type { Exporter } from "./exporter.js"; +import type { WorkflowReport, TestReport } from "../analyzer/analyzer.js"; +import type { GcsExporterConfig } from "../config/schema.js"; +import type { + CustomReport, + CustomReportCollection, +} from "../custom_report_collection.js"; + +export class GcsExporter implements Exporter { + service: string; + storage: Storage; + bucketName: string; + prefixTemplate: string; + logger: Logger; + + constructor( + logger: Logger, + service: string, + config: GcsExporterConfig, + ) { + if (!config.project || !config.bucket || !config.prefixTemplate) { + throw new Error( + "Must need 'project', 'bucket', and 'prefixTemplate' parameters in exporter.gcs config.", + ); + } + if (!config.prefixTemplate.includes("{reportType}")) { + throw new Error( + "prefixTemplate must include '{reportType}' placeholder.", + ); + } + this.service = service; + this.logger = logger.getSubLogger({ name: GcsExporter.name }); + this.storage = new Storage({ projectId: config.project }); + this.bucketName = config.bucket; + this.prefixTemplate = config.prefixTemplate; + } + + private formatJsonLines(reports: unknown[]): string { + return reports.map((report) => JSON.stringify(report)).join("\n"); + } + + private async export( + reportType: string, + reports: WorkflowReport[] | TestReport[] | CustomReport[], + ) { + // NOTE: these can replace with `Object.groupBy` from Node.js v21 + const groupedReports = reports.reduce( + (acc, report) => { + const createdAt = dayjs(report.createdAt); + const dirPath = this.prefixTemplate + .replace("{reportType}", reportType) + .replace("{YYYY}", createdAt.format("YYYY")) + .replace("{MM}", createdAt.format("MM")) + .replace("{DD}", createdAt.format("DD")); + + if (!acc[dirPath]) { + acc[dirPath] = []; + } + acc[dirPath].push(report); + return acc; + }, + {} as Record, + ); + + const now = dayjs(); + for (const [dirPath, reports] of Object.entries(groupedReports)) { + const filePath = path.join( + dirPath, + `${now.format("YYYYMMDD-HHmmss")}-${reportType}-${this.service}.json`, + ); + const file = this.storage.bucket(this.bucketName).file(filePath); + const reportJson = this.formatJsonLines(reports); + + this.logger.info( + `Uploading ${reportType} reports to gs://${this.bucketName}/${filePath}`, + ); + + await file.save(reportJson); + + this.logger.info( + `Successfully uploaded to gs://${this.bucketName}/${filePath}`, + ); + } + } + + async exportWorkflowReports(reports: WorkflowReport[]) { + await this.export("workflow", reports); + } + + async exportTestReports(reports: TestReport[]) { + await this.export("test", reports); + } + + async exportCustomReports(customReportCollection: CustomReportCollection) { + for (const [name, reports] of customReportCollection.customReports) { + await this.export(name, reports); + } + } +} diff --git a/src/exporter/local_exporter.ts b/src/exporter/local_exporter.ts index 4bd2f272..0e4be4e8 100644 --- a/src/exporter/local_exporter.ts +++ b/src/exporter/local_exporter.ts @@ -4,7 +4,10 @@ import dayjs from "dayjs"; import type { WorkflowReport, TestReport } from "../analyzer/analyzer.js"; import type { Exporter } from "./exporter.js"; import type { LocalExporterConfig } from "../config/schema.js"; -import type { CustomReportCollection } from "../custom_report_collection.js"; +import type { + CustomReport, + CustomReportCollection, +} from "../custom_report_collection.js"; import type { Logger } from "tslog"; const defaultOutDir = "output"; @@ -32,32 +35,35 @@ export class LocalExporter implements Exporter { this.logger = logger.getSubLogger({ name: LocalExporter.name }); } - private async exportReports(type: string, reports: unknown[]) { + private async export( + reportType: string, + reports: WorkflowReport[] | TestReport[] | CustomReport[], + ) { await this.fsPromises.mkdir(this.outDir, { recursive: true }); const now = dayjs(); const outputPath = path.join( this.outDir, - `${now.format("YYYYMMDD-HHmm")}-${type}-${this.service}.json`, + `${now.format("YYYYMMDD-HHmm")}-${reportType}-${this.service}.json`, ); const formated = this.formatter(reports); await this.fsPromises.writeFile(outputPath, formated, { encoding: "utf8" }); - this.logger.info(`Export ${type} reports to ${outputPath}`); + this.logger.info(`Export ${reportType} reports to ${outputPath}`); } async exportWorkflowReports(reports: WorkflowReport[]) { - await this.exportReports("workflow", reports); + await this.export("workflow", reports); } async exportTestReports(reports: TestReport[]) { - await this.exportReports("test", reports); + await this.export("test", reports); } async exportCustomReports(customReportCollection: CustomReportCollection) { for (const [name, reports] of customReportCollection.customReports) { - await this.exportReports(name, reports); + await this.export(name, reports); } }