Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing Uqload scraper with new TLD #103

Merged
merged 4 commits into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions src/core/scraper/uqload.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* @module
* @license MIT
* @see https://uqload.co/
* @see https://uqload.com/
* @author Sébastien Règne
*/

Expand All @@ -17,27 +17,27 @@ const DATA_REGEXP = /sources: \["(?<source>.*\/v.mp4)"\],/u;
/**
* Extrait les informations nécessaires pour lire une vidéo sur Kodi.
*
* @param {URL} _url L'URL d'une vidéo de Uqload.
* @param {URL} url L'URL d'une vidéo de Uqload.
* @param {Object} metadata Les métadonnées de l'URL.
* @param {Function} metadata.html La fonction retournant la promesse contenant
* le document HTML.
* @returns {Promise<string|undefined>} Une promesse contenant le lien du
* <em>fichier</em> ou
* <code>undefined</code>.
*/
const action = async function (_url, metadata) {
const action = async function (url, metadata) {
const doc = await metadata.html();
if (undefined === doc) {
return undefined;
}

for (const script of doc.querySelectorAll("script:not([src])")) {
const result = DATA_REGEXP.exec(script.text);
if (null !== result) {
return result.groups.source + "|Referer=https://uqload.co/";
return result.groups.source + `|Referer=${url.href}`;
}
}
return undefined;
};
export const extract = matchPattern(
action,
"*://uqload.co/*.html",
// Ajouter aussi l'ancien nom de domaine (qui redirige vers le nouveau).
"*://uqload.com/*.html",
);
// Ne pas filter sur le TLD car il change régulièrement.
export const extract = matchPattern(action, "*://uqload.*/*.html");
40 changes: 10 additions & 30 deletions test/integration/scraper/uqload.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,65 +9,45 @@ import { extract } from "../../../src/core/scrapers.js";

describe("Scraper: Uqload", function () {
it("should return undefined when it isn't a video", async function () {
const url = new URL("https://uqload.co/checkfiles.html");
const url = new URL("https://uqload.com/checkfiles.html");
const context = { depth: false, incognito: false };

const file = await extract(url, context);
assert.equal(file, undefined);
});

it("should return undefined when video was deleted", async function () {
const url = new URL("https://uqload.co/k1phujbh3t7d.html");
const url = new URL("https://uqload.com/k1phujbh3t7d.html");
const context = { depth: false, incognito: false };

const file = await extract(url, context);
assert.equal(file, undefined);
});

it("should return video URL", async function () {
const url = new URL("https://uqload.co/5x0cgygu2bgg.html");
const context = { depth: false, incognito: false };

const file = await extract(url, context);
assert.ok(
undefined !== file &&
file.endsWith("/v.mp4|Referer=https://uqload.co/"),
`"${file}".endsWith(...)`,
);
});

it("should return video URL when protocol is HTTP", async function () {
const url = new URL("http://uqload.co/5x0cgygu2bgg.html");
const url = new URL("https://uqload.com/5x0cgygu2bgg.html");
const context = { depth: false, incognito: false };

const file = await extract(url, context);
assert.ok(
undefined !== file &&
file.endsWith("/v.mp4|Referer=https://uqload.co/"),
file.endsWith(
"/v.mp4|Referer=https://uqload.com/5x0cgygu2bgg.html",
),
`"${file}".endsWith(...)`,
);
});

it("should return video URL from embed", async function () {
const url = new URL("https://uqload.co/embed-5x0cgygu2bgg.html");
const context = { depth: false, incognito: false };

const file = await extract(url, context);
assert.ok(
undefined !== file &&
file.endsWith("/v.mp4|Referer=https://uqload.co/"),
`"${file}".endsWith(...)`,
);
});

it("should return video URL from old TLD", async function () {
const url = new URL("https://uqload.com/5x0cgygu2bgg.html");
const url = new URL("https://uqload.com/embed-5x0cgygu2bgg.html");
const context = { depth: false, incognito: false };

const file = await extract(url, context);
assert.ok(
undefined !== file &&
file.endsWith("/v.mp4|Referer=https://uqload.co/"),
file.endsWith(
"/v.mp4|Referer=https://uqload.com/embed-5x0cgygu2bgg.html",
),
`"${file}".endsWith(...)`,
);
});
Expand Down
50 changes: 16 additions & 34 deletions test/unit/core/scraper/uqload.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,22 @@ import * as scraper from "../../../../src/core/scraper/uqload.js";
describe("core/scraper/uqload.js", function () {
describe("extract()", function () {
it("shouldn't handle when it's a unsupported URL", async function () {
const url = new URL("https://uqload.co/faq");
const url = new URL("https://uqload.foo/faq");

const file = await scraper.extract(url);
assert.equal(file, undefined);
});

it("should return undefined when no html", async function () {
const url = new URL("https://uqload.foo/bar.html");
const metadata = { html: () => Promise.resolve(undefined) };

const file = await scraper.extract(url, metadata);
assert.equal(file, undefined);
});

it("should return undefined when no script", async function () {
const url = new URL("https://uqload.co/foo.html");
const url = new URL("https://uqload.foo/bar.html");
const metadata = {
html: () =>
Promise.resolve(
Expand All @@ -33,14 +41,13 @@ describe("core/scraper/uqload.js", function () {
});

it("should return undefined when no inline script", async function () {
const url = new URL("https://uqload.co/foo.html");
const url = new URL("https://uqload.foo/bar.html");
const metadata = {
html: () =>
Promise.resolve(
new DOMParser().parseFromString(
`<html><body>
<script src="https://uqload.co/script.js"` +
`></script>
<script src="https://uqload.foo/baz.js"></script>
</body></html>`,
"text/html",
),
Expand All @@ -52,7 +59,7 @@ describe("core/scraper/uqload.js", function () {
});

it("should return undefined when no sources", async function () {
const url = new URL("https://uqload.co/foo.html");
const url = new URL("https://uqload.foo/bar.html");
const metadata = {
html: () =>
Promise.resolve(
Expand All @@ -72,40 +79,15 @@ describe("core/scraper/uqload.js", function () {
});

it("should return video URL", async function () {
const url = new URL("https://uqload.co/foo.html");
const metadata = {
html: () =>
Promise.resolve(
new DOMParser().parseFromString(
`<html><body>
<script>
var player = new Clappr.Player({
sources: ["https://bar.com/baz/v.mp4"],
});
</script>
</body></html>`,
"text/html",
),
),
};

const file = await scraper.extract(url, metadata);
assert.equal(
file,
"https://bar.com/baz/v.mp4|Referer=https://uqload.co/",
);
});

it("should return video URL from old TLD", async function () {
const url = new URL("https://uqload.com/foo.html");
const url = new URL("https://uqload.foo/bar.html");
const metadata = {
html: () =>
Promise.resolve(
new DOMParser().parseFromString(
`<html><body>
<script>
var player = new Clappr.Player({
sources: ["https://bar.com/baz/v.mp4"],
sources: ["https://baz.com/qux/v.mp4"],
});
</script>
</body></html>`,
Expand All @@ -117,7 +99,7 @@ describe("core/scraper/uqload.js", function () {
const file = await scraper.extract(url, metadata);
assert.equal(
file,
"https://bar.com/baz/v.mp4|Referer=https://uqload.co/",
"https://baz.com/qux/v.mp4|Referer=https://uqload.foo/bar.html",
);
});
});
Expand Down
2 changes: 1 addition & 1 deletion test/unit/core/scraper/vudeo.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ describe("core/scraper/vudeo.js", function () {
Promise.resolve(
new DOMParser().parseFromString(
`<html><body>
<script src="https://vudeo.foo/bar.js"></script>
<script src="https://vudeo.foo/baz.js"></script>
</body></html>`,
"text/html",
),
Expand Down
Loading