From 5d331127ebbf4190b1b95f891adb763f27580d99 Mon Sep 17 00:00:00 2001 From: Francisco Abarzua Date: Tue, 7 May 2024 16:45:42 -0400 Subject: [PATCH] fixes issues with query strings in pytesseract --- package.json | 2 +- src/pytesseract/adapter.ts | 25 ++--- tests/pytesseract/datasource.spec.js | 132 ++++++++++++++++++++++++++- 3 files changed, 141 insertions(+), 18 deletions(-) diff --git a/package.json b/package.json index e604a20..74c9c59 100644 --- a/package.json +++ b/package.json @@ -19,7 +19,7 @@ "scripts": { "build": "tsup", "pretest": "npm run build", - "test": "mocha ./tests", + "test": "mocha ./tests/**/*.js", "prepublishOnly": "npm run test" }, "author": "Francisco Abarzua ", diff --git a/src/pytesseract/adapter.ts b/src/pytesseract/adapter.ts index 00db8d1..70e6fc1 100644 --- a/src/pytesseract/adapter.ts +++ b/src/pytesseract/adapter.ts @@ -51,28 +51,29 @@ export function buildSearchParams(query: Query): TesseractDataRequest { return { cube: query.cube.name, locale: query.getParam("locale"), - drilldowns: query.getParam("drilldowns").map(getName), - measures: query.getParam("measures").map(getName), - properties: query.getParam("properties").map(getName), + drilldowns: query.getParam("drilldowns").map(getName).join(","), + measures: query.getParam("measures").map(getName).join(","), + properties: query.getParam("properties").map(getName).join(","), exclude: filterMap(cuts, (item) => { if (!item.isExclusive) return null; return `${item.drillable.name}:${item.members.join(",")}`; - }), + }).join(","), include: filterMap(cuts, (item) => { if (item.isExclusive) return null; return `${item.drillable.name}:${item.members.join(",")}`; - }), + }).join(","), filters: query.getParam("filters").map((item) => { - const filter = `${item.measure}.${item.const1.join(".")}`; + const measure = typeof item.measure === "string" ? item.measure : item.measure.name; + const filter = `${measure}.${item.const1.join(".")}`; return item.const2 ? `${filter}.${item.joint}.${item.const2.join(".")}` : filter; - }), + }).join(","), limit: `${pagination.limit},${pagination.offset}`, sort: !sorting.property ? undefined : typeof sorting.property === "string" ? `${sorting.property}.${sorting.direction}` : `${sorting.property.name}.${sorting.direction}`, - time: time.value ? `${time.value}.${time.precision}` : undefined, + time: time.precision ? `${time.precision}.${time.value}` : undefined, parents: options.parents || undefined, }; } @@ -200,15 +201,15 @@ export function hydrateQueryFromRequest( }); const [pagiLimit = "0", pagiOffset = "0"] = splitTokens(request.limit); - const [sortProp, sortDir] = splitTokens(request.sort); - const [timeScale, timeAge] = splitTokens(request.time); + const [sortProp, sortDir] = splitTokens(request.sort, "."); + const [timeScale, timeAge] = splitTokens(request.time, "."); const params: Partial = { cube: request.cube, locale: request.locale, - drilldowns: splitTokens(request.drilldowns).map((level) => ({level})), + drilldowns: splitTokens(request.drilldowns).map((level) => ({level, toString: () => level})), measures: splitTokens(request.measures), - properties: splitTokens(request.properties).map((property) => ({property})), + properties: splitTokens(request.properties).map((property) => ({property, toString: () => property})), page_limit: Number.parseInt(pagiLimit), page_offset: Number.parseInt(pagiOffset), cuts: cutsInclude.concat(cutsExclude), diff --git a/tests/pytesseract/datasource.spec.js b/tests/pytesseract/datasource.spec.js index 9d80017..014ebd0 100644 --- a/tests/pytesseract/datasource.spec.js +++ b/tests/pytesseract/datasource.spec.js @@ -61,9 +61,11 @@ describe("PyTesseractDataSource", function () { query = await ds.fetchCube("indicators_i_wdi_a").then((plainCube) => { const cube = new Cube(plainCube, ds); return cube.query - .addDrilldown("Country") + .setLocale("en") .addMeasure("Measure") - .addCut("Continent", ["eu"]) + .addDrilldown("Year") + .addDrilldown("Country") + .addCut("Continent", ["na"]) .setFormat("jsonrecords"); }); }); @@ -73,7 +75,13 @@ describe("PyTesseractDataSource", function () { assert.match(res.url, /\/data\.jsonrecords\?/); assert.strictEqual(res.status, 200); - assert.strictEqual(res.data.length, 48); + assert.strictEqual(res.data.length, 2205); + + const columns = Object.keys(res.data[0]); + assert.ok(columns.includes("Year")); + assert.ok(columns.includes("Country")); + assert.ok(columns.includes("Country ID")); + assert.ok(columns.includes("Measure")); }); }); @@ -158,7 +166,121 @@ describe("PyTesseractDataSource", function () { }); }); - describe("#parseQueryURL", () => {}); + describe("#parseQueryURL", () => { + const ds = new PyTesseractDataSource(PYTESSERACT_SERVER); + const search = new URLSearchParams([ + ["cube", "indicators_i_wdi_a"], + ["locale", "es"], + ["drilldowns", "Year,Country"], + ["measures", "Measure"], + ["properties", "ISO 3"], + ["include", "Year:2020,2021"], + ["exclude", "Continent:af,as"], + ["filters", "Measure.lte.100000"], + ["limit", "1,2"], + ["sort", "ISO 3.asc"], + ["time", "year.latest"], + ["parents", "false"], + ]); + let cube; + + this.beforeAll(async () => { + cube = await ds.fetchCube("indicators_i_wdi_a").then((cube) => new Cube(cube, ds)); + }); + + it("should parse a search string into a Query", () => { + let query; + + assert.doesNotThrow(() => { + query = ds.parseQueryURL(cube.query, `http://testserver/?${search}`); + }); + + assert.strictEqual(query.cube.name, "indicators_i_wdi_a"); + assert.strictEqual(query.getParam("locale"), "es"); + assert.deepEqual(query.getParam("drilldowns"), [ + cube.getLevel("Year"), + cube.getLevel("Country"), + ]); + assert.deepEqual(query.getParam("measures"), [cube.getMeasure("Measure")]); + assert.deepEqual(query.getParam("properties"), [cube.getProperty("ISO 3")]); + assert.deepEqual(query.getParam("cuts"), [ + { + drillable: cube.getLevel("Year"), + members: ["2020", "2021"], + isExclusive: false, + isForMatch: undefined, + }, + { + drillable: cube.getLevel("Continent"), + members: ["af", "as"], + isExclusive: true, + isForMatch: undefined, + }, + ]); + assert.deepEqual(query.getParam("filters"), [ + { + measure: cube.getMeasure("Measure"), + const1: ["lte", 100000], + joint: undefined, + const2: undefined, + }, + ]); + assert.deepEqual(query.getParam("pagination"), {limit: 1, offset: 2}); + assert.deepEqual(query.getParam("sorting"), { + property: cube.getProperty("ISO 3"), + direction: "asc", + }); + assert.deepEqual(query.getParam("time"), {precision: "year", value: "latest"}); + assert.deepEqual(query.getParam("options"), {parents: false}); + }); + }); + + describe("#stringifyQueryURL", () => { + const ds = new PyTesseractDataSource(PYTESSERACT_SERVER); + let cube; + + this.beforeAll(async () => { + cube = await ds.fetchCube("indicators_i_wdi_a").then((cube) => new Cube(cube, ds)); + }); - describe("#stringifyQueryURL", () => {}); + it("should stringify a Query into a search string", () => { + const query = cube.query + .setFormat("jsonrecords") + .setLocale("es") + .addMeasure("Measure") + .addDrilldown("Year") + .addDrilldown("Country") + .addProperty("Country.ISO 3") + .addCut("Year", ["2020", "2021"], {exclusive: false}) + .addCut("Continent", ["af", "as"], {exclusive: true}) + .addFilter("Measure", ["lte", 100000]) + .setPagination(1, 2) + .setSorting("Country.ISO 3", "asc") + .setTime("year", "latest") + .setOption("parents", false); + + const url = ds.stringifyQueryURL(query, "csv"); + + const parsedURL = new URL(url); + const parsedSearch = Object.fromEntries(parsedURL.searchParams); + + assert.strictEqual( + parsedURL.origin + parsedURL.pathname, + `${new URL("data.csv", PYTESSERACT_SERVER)}`, + ); + assert.deepEqual(parsedSearch, { + cube: "indicators_i_wdi_a", + locale: "es", + drilldowns: "Year,Country", + measures: "Measure", + properties: "ISO 3", + include: "Year:2020,2021", + exclude: "Continent:af,as", + filters: "Measure.lte.100000", + limit: "1,2", + sort: "ISO 3.asc", + time: "year.latest", + }); + }); + }); });