Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement feature for retrieving additional content #203

Merged
merged 12 commits into from
Jul 30, 2024
37 changes: 37 additions & 0 deletions actions/StringUtils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright 2023 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
export function truncateText(text, maxLength) {
if (text.length <= maxLength) {
return text;
}

let truncated = text.substring(0, maxLength);

// Find the last period in the truncated string
const lastPeriod = truncated.lastIndexOf('.');

// If there's a period, cut off at that point
if (lastPeriod !== -1) {
truncated = truncated.substring(0, lastPeriod + 1);
} else {
// If no period is found, look for the last space
const lastSpace = truncated.lastIndexOf(' ');
if (lastSpace !== -1) {
truncated = `${truncated.substring(0, lastSpace)}...`;
} else {
// If no space is found, just add ellipsis to the end
truncated += '...';
}
}

return truncated;
}
51 changes: 51 additions & 0 deletions actions/StringUtils.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright 2023 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
const { truncateText } = require('./StringUtils.js');

describe('truncateText', () => {
test('returns the original text if text length is less than or equal to maxLength', () => {
expect(truncateText('Hello', 10)).toBe('Hello');
expect(truncateText('Short text.', 11)).toBe('Short text.');
});

test('truncates the text to maxLength and appends ellipsis if there is no period or space', () => {
expect(truncateText('ThisIsAVeryLongWordThatExceedsMaxLength', 10)).toBe('ThisIsAVer...');
});

test('truncates the text at the last period if it is within maxLength', () => {
expect(truncateText('This is a test. This should be truncated.', 20)).toBe('This is a test.');
});

test('truncates the text at the last space within maxLength and appends ellipsis', () => {
expect(truncateText('This is a test of truncation', 20)).toBe('This is a test of...');
});

test('adds ellipsis if the truncated text has no space or period', () => {
expect(truncateText('HelloWorld', 5)).toBe('Hello...');
});

test('handles empty string', () => {
expect(truncateText('', 5)).toBe('');
});

test('handles text with no spaces or periods', () => {
expect(truncateText('abcdefghij', 5)).toBe('abcde...');
});

test('handles text with multiple periods', () => {
expect(truncateText('Hello. This is a test. Here is another sentence.', 25)).toBe('Hello. This is a test.');
});

test('handles text with periods at the very end of maxLength', () => {
expect(truncateText('Hello. This is a test.', 22)).toBe('Hello. This is a test.');
});
});
40 changes: 40 additions & 0 deletions actions/scraper/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright 2023 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
const { JSDOM } = require('jsdom');
const wretch = require('../Network.js');
const { asGenericAction } = require('../GenericAction.js');
const { asFirefallAction } = require('../FirefallAction.js');
const { asAuthZAction } = require('../AuthZAction.js');
const { asAuthNAction } = require('../AuthNAction.js');
const { truncateText } = require('../StringUtils.js');

const MIN_CHUNK_LENGTH = 10;
const MAX_CONTENT_LENGTH = 1500;

async function main({
url, selector, prompt, firefallClient,
}) {
console.log(`Scraping URL: ${url}`);
const html = await wretch(url).get().text();
const dom = new JSDOM(html);
console.log(`Using selector: ${selector}`);
const text = Array.from(dom.window.document.querySelectorAll(selector))
.map((node) => node.textContent.replace(/\s+/g, ' '))
.filter((textChunk) => textChunk.length > MIN_CHUNK_LENGTH)
.join('\n');
const truncatedText = truncateText(text, MAX_CONTENT_LENGTH);
console.log(`Scraper output: ${truncatedText}`);
const { generations } = await firefallClient.completion(`${prompt}:\n\n${truncatedText}`, 0);
return generations[0][0].message.content;
vtsaplin marked this conversation as resolved.
Show resolved Hide resolved
}

exports.main = asGenericAction(asAuthNAction(asAuthZAction(asFirefallAction(main))));
15 changes: 15 additions & 0 deletions app.config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,21 @@ application:
IMS_PRODUCT_CONTEXT: $IMS_PRODUCT_CONTEXT
FT_EARLY_ACCESS: $FT_EARLY_ACCESS
LD_SDK_KEY: $LD_SDK_KEY
scraper:
function: actions/scraper/index.js
web: true
runtime: nodejs:18
inputs:
FIREFALL_ENDPOINT: $FIREFALL_ENDPOINT
FIREFALL_API_KEY: $FIREFALL_API_KEY
IMS_ENDPOINT: $IMS_ENDPOINT
IMS_CLIENT_ID: $IMS_CLIENT_ID
IMS_SERVICE_CLIENT_ID: $IMS_SERVICE_CLIENT_ID
IMS_SERVICE_CLIENT_SECRET: $IMS_SERVICE_CLIENT_SECRET
IMS_SERVICE_PERM_AUTH_CODE: $IMS_SERVICE_PERM_AUTH_CODE
IMS_PRODUCT_CONTEXT: $IMS_PRODUCT_CONTEXT
FT_EARLY_ACCESS: $FT_EARLY_ACCESS
LD_SDK_KEY: $LD_SDK_KEY
hooks:
post-app-run: ./hooks/post-app-run.js
pre-app-deploy: ./hooks/pre-app-deploy.js
Loading
Loading