Skip to content

Commit

Permalink
Merge pull request #6 from zazuko/s3
Browse files Browse the repository at this point in the history
S3 support
  • Loading branch information
ludovicm67 authored Aug 19, 2024
2 parents 1121b80 + 70a083d commit 38c1c24
Show file tree
Hide file tree
Showing 6 changed files with 1,808 additions and 158 deletions.
34 changes: 23 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,29 @@ npm run start # Start the script

## Environment variables

| Name | Description | Default Value |
| ------------------------ | ------------------------------------------------------------------------- | ------------- |
| **CACHE_ENDPOINT** | The URL of the cache endpoint | `""` |
| CACHE_ENDPOINT_USERNAME | The username for the cache endpoint | `""` |
| CACHE_ENDPOINT_PASSWORD | The password for the cache endpoint | `""` |
| CACHE_DEFAULT_ENTRY_NAME | The default entry name for the cache | `"default"` |
| CACHE_TAG_HEADER | The header name for the cache tag | `"xkey"` |
| SUPPORT_URL_ENCODED | Whether to clear the cache for the URL-encoded version of the dataset URI | `"true"` |
| **SPARQL_ENDPOINT_URL** | The URL of the SPARQL endpoint | `""` |
| SPARQL_USERNAME | The username for the SPARQL endpoint | `""` |
| SPARQL_PASSWORD | The password for the SPARQL endpoint | `""` |
| Name | Description | Default Value |
| ------------------------ | ------------------------------------------------------------------------- | ---------------------- |
| **CACHE_ENDPOINT** | The URL of the cache endpoint | `""` |
| CACHE_ENDPOINT_USERNAME | The username for the cache endpoint | `""` |
| CACHE_ENDPOINT_PASSWORD | The password for the cache endpoint | `""` |
| CACHE_DEFAULT_ENTRY_NAME | The default entry name for the cache | `"default"` |
| CACHE_TAG_HEADER | The header name for the cache tag | `"xkey"` |
| SUPPORT_URL_ENCODED | Whether to clear the cache for the URL-encoded version of the dataset URI | `"true"` |
| **SPARQL_ENDPOINT_URL** | The URL of the SPARQL endpoint | `""` |
| SPARQL_USERNAME | The username for the SPARQL endpoint | `""` |
| SPARQL_PASSWORD | The password for the SPARQL endpoint | `""` |
| S3_ENABLED | Whether to use S3 for caching | `"false"` |
| S3_LAST_TIMESTAMP_KEY | The key for the last timestamp file in S3 | `"last_timestamp.txt"` |
| S3_BUCKET | The S3 bucket name | `"default"` |
| S3_ACCESS_KEY_ID | The S3 access key ID | `""` |
| S3_SECRET_ACCESS_KEY | The S3 secret access key | `""` |
| S3_REGION | The S3 region | `"default"` |
| S3_ENDPOINT | The S3 endpoint | `""` |
| S3_SSL_ENABLED | Whether to use SSL for S3 | `"false"` |
| S3_FORCE_PATH_STYLE | Whether to force path style for S3 | `"false"` |

If `S3_ENABLED` is set to `true`, the first time you run the script you might see an error message saying that the last timestamp file does not exist. This is expected, and the script will create the file automatically at the end of the first run, and will update that file every time it runs.
You will not see this error message again after the first run.

## License

Expand Down
11 changes: 11 additions & 0 deletions example.env
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,14 @@ SUPPORT_URL_ENCODED="true"
SPARQL_ENDPOINT_URL="" # Required!
SPARQL_USERNAME=""
SPARQL_PASSWORD=""

# S3 configuration
S3_ENABLED="true"
S3_LAST_TIMESTAMP_KEY="last_timestamp.txt"
S3_ACCESS_KEY_ID="admin"
S3_SECRET_ACCESS_KEY="thisisasecret"
S3_REGION="default"
S3_ENDPOINT="http://localhost:9000"
S3_SSL_ENABLED="false"
S3_FORCE_PATH_STYLE="true"
S3_BUCKET="default"
32 changes: 31 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
// @ts-check

import "dotenv/config";
import { ParsingClient } from "sparql-http-client";
import { getObject, saveObject } from "./lib/s3.js";

const currentDateTime = new Date().toISOString();

// Get the date 1 day ago (this is the default value if no date is provided)
const yesterday = new Date(Date.now() - 1000 * 60 * 60 * 24);
const yesterdayStr = yesterday.toISOString();

// S3 configuration
const s3Enabled = process.env.S3_ENABLED === "true"; // Default to false
const s3LastTimestampKey = process.env.S3_LAST_TIMESTAMP_KEY || "last_timestamp.txt";

// Cache entry name for unnamed cache entries ; it will be cleared if any of the named cache entries are cleared
const cacheEndpoint = process.env.CACHE_ENDPOINT || "";
const cacheEndpointUsername = process.env.CACHE_ENDPOINT_USERNAME || "";
Expand All @@ -21,7 +30,23 @@ const sparqlUsername = process.env.SPARQL_USERNAME || "";
const sparqlPassword = process.env.SPARQL_PASSWORD || "";

// Get the date to compare with
const previousDateStr = process.env.DEFAULT_PREVIOUS_DATE || yesterdayStr; // 1 day ago
let previousDateStr = process.env.DEFAULT_PREVIOUS_DATE || yesterdayStr; // 1 day ago
if (s3Enabled) {
try {
const lastTimestamp = await getObject(s3LastTimestampKey);
let trimmed;
if (lastTimestamp.Body) {
const bodyAsString = await lastTimestamp.Body.transformToString();
trimmed = bodyAsString.trim();
}
if (trimmed) {
console.log(`Last timestamp found in S3: ${trimmed}`);
previousDateStr = trimmed;
}
} catch (error) {
console.error(`Failed to get last timestamp from S3: ${error}`);
}
}
const previousDate = new Date(previousDateStr);

// Tell the user that some required environment variables are missing
Expand Down Expand Up @@ -119,6 +144,11 @@ const promises = await Promise.allSettled(entriesToClearArray.map(async (entry)
}
}));

// Update the last timestamp in the S3 bucket
if (s3Enabled) {
await saveObject(s3LastTimestampKey, currentDateTime, "text/plain");
}

// Return the right status code
const failedPromises = promises.filter((p) => p.status === "rejected");
if (failedPromises.length > 0) {
Expand Down
57 changes: 57 additions & 0 deletions lib/s3.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// @ts-check

import { GetObjectCommand, PutObjectCommand, S3Client } from "@aws-sdk/client-s3";

/**
* The S3 bucket to use.
* @type {string}
* @default "default"
*/
export const s3Bucket = `${process.env.S3_BUCKET || "default"}`;

/**
* The S3 client.
* @type {S3Client}
*/
export const s3Client = new S3Client({
credentials: {
accessKeyId: `${process.env.S3_ACCESS_KEY_ID}` || "",
secretAccessKey: `${process.env.S3_SECRET_ACCESS_KEY}` || "",
},
region: `${process.env.S3_REGION}` || "default",
endpoint: `${process.env.S3_ENDPOINT}`,
tls: process.env.S3_SSL_ENABLED === "true",
forcePathStyle: process.env.S3_FORCE_PATH_STYLE === "true",
});

/**
* Get an object from S3.
*
* @param {string} key The key of the object to get.
* @returns {Promise<import('@aws-sdk/client-s3').GetObjectCommandOutput>}
*/
export const getObject = async (key) => {
const command = new GetObjectCommand({
Bucket: s3Bucket,
Key: key,
});
return s3Client.send(command);
};

/**
* Save an object to S3.
*
* @param {string} key The key of the object to save.
* @param {string} body The body of the object to save.
* @param {string} [contentType="text/plain"] The content type of the object to save.
* @returns {Promise<import('@aws-sdk/client-s3').PutObjectCommandOutput>}
*/
export const saveObject = async (key, body, contentType = "text/plain") => {
const command = new PutObjectCommand({
Bucket: s3Bucket,
Key: key,
Body: body,
ContentType: contentType,
});
return s3Client.send(command);
};
Loading

0 comments on commit 38c1c24

Please sign in to comment.