From f40150115f876f6494ca0057e4c673222e33b965 Mon Sep 17 00:00:00 2001 From: Pranav Phadke Date: Sun, 15 Oct 2023 15:27:21 -0400 Subject: [PATCH 01/12] update yarn lock file --- yarn.lock | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/yarn.lock b/yarn.lock index a987def3..54db8048 100644 --- a/yarn.lock +++ b/yarn.lock @@ -9,25 +9,6 @@ dependencies: "@jridgewell/trace-mapping" "^0.3.0" -"@apollo/protobufjs@1.2.2": - version "1.2.2" - resolved "https://registry.yarnpkg.com/@apollo/protobufjs/-/protobufjs-1.2.2.tgz#4bd92cd7701ccaef6d517cdb75af2755f049f87c" - integrity sha512-vF+zxhPiLtkwxONs6YanSt1EpwpGilThpneExUN5K3tCymuxNnVq2yojTvnpRjv2QfsEIt/n7ozPIIzBLwGIDQ== - dependencies: - "@protobufjs/aspromise" "^1.1.2" - "@protobufjs/base64" "^1.1.2" - "@protobufjs/codegen" "^2.0.4" - "@protobufjs/eventemitter" "^1.1.0" - "@protobufjs/fetch" "^1.1.0" - "@protobufjs/float" "^1.0.2" - "@protobufjs/inquire" "^1.1.0" - "@protobufjs/path" "^1.1.2" - "@protobufjs/pool" "^1.1.0" - "@protobufjs/utf8" "^1.1.0" - "@types/long" "^4.0.0" - "@types/node" "^10.1.0" - long "^4.0.0" - "@apollo/protobufjs@1.2.6": version "1.2.6" resolved "https://registry.yarnpkg.com/@apollo/protobufjs/-/protobufjs-1.2.6.tgz#d601e65211e06ae1432bf5993a1a0105f2862f27" @@ -2104,20 +2085,13 @@ apollo-datasource@^3.3.2: "@apollo/utils.keyvaluecache" "^1.0.1" apollo-server-env "^4.2.1" -apollo-reporting-protobuf@^3.3.0, apollo-reporting-protobuf@^3.4.0: +apollo-reporting-protobuf@^3.4.0: version "3.4.0" resolved "https://registry.yarnpkg.com/apollo-reporting-protobuf/-/apollo-reporting-protobuf-3.4.0.tgz#6edd31f09d4a3704d9e808d1db30eca2229ded26" integrity sha512-h0u3EbC/9RpihWOmcSsvTW2O6RXVaD/mPEjfrPkxRPTEPWqncsgOoRJw+wih4OqfH3PvTJvoEIf4LwKrUaqWog== dependencies: "@apollo/protobufjs" "1.2.6" -apollo-server-caching@^3.3.0: - version "3.3.0" - resolved "https://registry.yarnpkg.com/apollo-server-caching/-/apollo-server-caching-3.3.0.tgz#f501cbeb820a4201d98c2b768c085f22848d9dc5" - integrity sha512-Wgcb0ArjZ5DjQ7ID+tvxUcZ7Yxdbk5l1MxZL8D8gkyjooOkhPNzjRVQ7ubPoXqO54PrOMOTm1ejVhsF+AfIirQ== - dependencies: - lru-cache "^6.0.0" - apollo-server-core@^3.6.2: version "3.12.0" resolved "https://registry.yarnpkg.com/apollo-server-core/-/apollo-server-core-3.12.0.tgz#8aa2a7329ce6fe1823290c45168c749db01548df" From 9473d0ecf20224a89535c3a111965468fac6940c Mon Sep 17 00:00:00 2001 From: Pranav Phadke Date: Sun, 15 Oct 2023 15:27:34 -0400 Subject: [PATCH 02/12] add column to termInfo schema --- .../20231015192547_add_active_field_to_term_info/migration.sql | 2 ++ prisma/schema.prisma | 1 + 2 files changed, 3 insertions(+) create mode 100644 prisma/migrations/20231015192547_add_active_field_to_term_info/migration.sql diff --git a/prisma/migrations/20231015192547_add_active_field_to_term_info/migration.sql b/prisma/migrations/20231015192547_add_active_field_to_term_info/migration.sql new file mode 100644 index 00000000..c9e33f46 --- /dev/null +++ b/prisma/migrations/20231015192547_add_active_field_to_term_info/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "term_ids" ADD COLUMN "active" BOOLEAN NOT NULL DEFAULT true; diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 5eda00c8..288bbf79 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -121,6 +121,7 @@ model TermInfo { termId String @unique(map: "term_ids.term_id_unique") @map("term_id") subCollege String @map("sub_college") text String + active Boolean @default(true) @@map("term_ids") } From e36d62c4ef91a4f599112ceb91fc067b1b2d6d51 Mon Sep 17 00:00:00 2001 From: Pranav Phadke Date: Sun, 15 Oct 2023 15:53:38 -0400 Subject: [PATCH 03/12] logic for determining active terms --- scrapers/classes/parsersxe/termListParser.ts | 10 ++++++++++ services/dumpProcessor.ts | 6 ++++-- types/types.ts | 1 + 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/scrapers/classes/parsersxe/termListParser.ts b/scrapers/classes/parsersxe/termListParser.ts index a29e713e..ed786f79 100644 --- a/scrapers/classes/parsersxe/termListParser.ts +++ b/scrapers/classes/parsersxe/termListParser.ts @@ -9,6 +9,15 @@ class TermListParser { serializeTermsList( termsFromBanner: { code: string; description: string }[] ): TermInfo[] { + const activeTermInfos = termsFromBanner.filter( + (term) => !term.description.includes("View Only") + ); + const activeTermIds = activeTermInfos.map((termInfo) => + Number(termInfo.code) + ); + /* The smallest active termInfo code. + All termInfo's with codes greater than or equal to this are considered active.*/ + const minActiveTermInfoCode = Math.min(...activeTermIds); return termsFromBanner.map((term) => { const subCollege = this.determineSubCollegeName(term.description); @@ -24,6 +33,7 @@ class TermListParser { termId: term.code, text: text, subCollege: subCollege, + active: Number(term.code) >= minActiveTermInfoCode, }; }); } diff --git a/services/dumpProcessor.ts b/services/dumpProcessor.ts index f727c018..4460c2e5 100644 --- a/services/dumpProcessor.ts +++ b/services/dumpProcessor.ts @@ -223,18 +223,20 @@ class DumpProcessor { termIdsWithData.includes(t.termId) ); - // Upsert new term IDs, along with their names and sub college - for (const { termId, subCollege, text } of termInfosWithData) { + // Upsert new term IDs, along with their names, sub college, and active status + for (const { termId, subCollege, text, active } of termInfosWithData) { await prisma.termInfo.upsert({ where: { termId }, update: { text, subCollege, + active, }, create: { termId, text, subCollege, + active, }, }); } diff --git a/types/types.ts b/types/types.ts index ab799605..d18db386 100644 --- a/types/types.ts +++ b/types/types.ts @@ -264,6 +264,7 @@ export interface TermInfo { termId: string; subCollege: string; text: string; + active: boolean; } export interface CourseRef { From 4edc8552eb3561c062a9a176954a4562856de2bf Mon Sep 17 00:00:00 2001 From: Pranav Phadke Date: Sun, 15 Oct 2023 15:54:12 -0400 Subject: [PATCH 04/12] script for testing logic --- .github/workflows/test.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 4f19fb47..aaead070 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -105,6 +105,11 @@ jobs: psql -U postgres -h localhost -p 5432 -d searchneu_dev -c \ "INSERT INTO sections (class_hash, id, crn, last_update_time) VALUES ('neu.edu/202240/CS/2501', 'neu.edu/202240/CS/2501/987654321', '987654321', '1999-04-03 18:34:35.882');" + # Set all Term IDS to active for testing purposes + - run: |- + psql -U postgres -h localhost -p 5432 -d searchneu_dev -c \ + "UPDATE term_ids SET active = true;" + - name: Run the updater ONLY ONCE, so that it removes the newly-inserted section with an outdated lastUpdateTime run: UPDATE_ONLY_ONCE=true LOG_LEVEL=VERBOSE yarn updater | tee _updater.log From 11040a7ba8578451777d04f9220262f9a7991b9d Mon Sep 17 00:00:00 2001 From: Pranav Phadke Date: Sun, 15 Oct 2023 16:00:00 -0400 Subject: [PATCH 05/12] add field to tests --- scrapers/classes/parsersxe/tests/bannerv9Parser.test.ts | 3 +++ tests/database/dumpProcessor.test.seq.ts | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/scrapers/classes/parsersxe/tests/bannerv9Parser.test.ts b/scrapers/classes/parsersxe/tests/bannerv9Parser.test.ts index 5e7f45ea..273e0fe6 100644 --- a/scrapers/classes/parsersxe/tests/bannerv9Parser.test.ts +++ b/scrapers/classes/parsersxe/tests/bannerv9Parser.test.ts @@ -38,18 +38,21 @@ describe("getAllTermInfos", () => { it("serializes the term list", async () => { expect(await bannerv9.getAllTermInfos()).toEqual([ { + active: true, host: "neu.edu", subCollege: "NEU", termId: "3", text: "Fall 2022 Semester", }, { + active: true, host: "neu.edu", subCollege: "LAW", termId: "2", text: "Summer 2022 Semester", }, { + active: true, host: "neu.edu", subCollege: "CPS", termId: "1", diff --git a/tests/database/dumpProcessor.test.seq.ts b/tests/database/dumpProcessor.test.seq.ts index 151e6afb..079cfe68 100644 --- a/tests/database/dumpProcessor.test.seq.ts +++ b/tests/database/dumpProcessor.test.seq.ts @@ -25,11 +25,13 @@ afterAll(async () => { const termInfos: TermInfo[] = [ { + active: true, termId: "123456", subCollege: "NEU", text: "This is some text", }, { + active: true, termId: "654321", subCollege: "LAW", text: "This is some more text", @@ -103,6 +105,7 @@ describe("with termInfos", () => { it("deletes old termInfos", async () => { await prisma.termInfo.create({ data: { + active: true, termId: "1", subCollege: "NEU", text: "hello", @@ -122,6 +125,7 @@ describe("with termInfos", () => { it("doesn't delete old termInfos if deleteOutdatedData is false", async () => { await prisma.termInfo.create({ data: { + active: true, termId: "1", subCollege: "NEU", text: "hello", @@ -141,6 +145,7 @@ describe("with termInfos", () => { it("updates existing termInfos", async () => { await prisma.termInfo.create({ data: { + active: true, termId: "654321", subCollege: "fake college", text: "This is some more text", From 496ce564b406d52325faa9b168b9ba4c9f497256 Mon Sep 17 00:00:00 2001 From: Pranav Phadke Date: Wed, 25 Oct 2023 17:56:01 -0400 Subject: [PATCH 06/12] update snapshot --- .../parsersxe/tests/__snapshots__/termListParser.test.js.snap | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scrapers/classes/parsersxe/tests/__snapshots__/termListParser.test.js.snap b/scrapers/classes/parsersxe/tests/__snapshots__/termListParser.test.js.snap index 347d3529..193f43b5 100644 --- a/scrapers/classes/parsersxe/tests/__snapshots__/termListParser.test.js.snap +++ b/scrapers/classes/parsersxe/tests/__snapshots__/termListParser.test.js.snap @@ -3,18 +3,21 @@ exports[`termListParser pulls out relevant data 1`] = ` Array [ Object { + "active": true, "host": "neu.edu", "subCollege": "CPS", "termId": "202034", "text": "Spring 2020 Semester", }, Object { + "active": true, "host": "neu.edu", "subCollege": "LAW", "termId": "202032", "text": "Spring 2020 Semester", }, Object { + "active": true, "host": "neu.edu", "subCollege": "NEU", "termId": "202030", From 17b50a599ff578ef673076fff4b062d447ca1f84 Mon Sep 17 00:00:00 2001 From: Pranav Phadke Date: Sun, 29 Oct 2023 17:26:30 -0400 Subject: [PATCH 07/12] Update main.ts --- scrapers/main.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scrapers/main.ts b/scrapers/main.ts index 58bd5515..a412da7d 100644 --- a/scrapers/main.ts +++ b/scrapers/main.ts @@ -55,8 +55,10 @@ class Main { const start = Date.now(); // Get the TermInfo information from Banner const allTermInfos = await bannerv9parser.getAllTermInfos(); + // Only scrape active terms + const allActiveTermInfos = allTermInfos.filter((t) => t.active); const termsToScrape = await this.getTermIdsToScrape( - allTermInfos.map((t) => t.termId) + allActiveTermInfos.map((t) => t.termId) ); // Scraping should NOT be resolved simultaneously (eg. via p-map): From 48b4b3e011024a34aa93c144a82fdceca448abf1 Mon Sep 17 00:00:00 2001 From: Pranav Phadke Date: Sun, 29 Oct 2023 23:44:22 -0400 Subject: [PATCH 08/12] revert previous change --- scrapers/main.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scrapers/main.ts b/scrapers/main.ts index a412da7d..58bd5515 100644 --- a/scrapers/main.ts +++ b/scrapers/main.ts @@ -55,10 +55,8 @@ class Main { const start = Date.now(); // Get the TermInfo information from Banner const allTermInfos = await bannerv9parser.getAllTermInfos(); - // Only scrape active terms - const allActiveTermInfos = allTermInfos.filter((t) => t.active); const termsToScrape = await this.getTermIdsToScrape( - allActiveTermInfos.map((t) => t.termId) + allTermInfos.map((t) => t.termId) ); // Scraping should NOT be resolved simultaneously (eg. via p-map): From 0a9b433fa20ef4920d6708534bae03d15746ee9b Mon Sep 17 00:00:00 2001 From: Pranav Phadke Date: Sun, 29 Oct 2023 23:44:42 -0400 Subject: [PATCH 09/12] only update active term ids --- services/updater.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/updater.ts b/services/updater.ts index ac5dee16..4352c98f 100644 --- a/services/updater.ts +++ b/services/updater.ts @@ -59,9 +59,10 @@ class Updater { return termsStr.split(","); } - // Get term IDs from our database + // Get active term IDs from our database const termInfos = await prisma.termInfo.findMany({ orderBy: { termId: "desc" }, + where: { active: true }, take: NUMBER_OF_TERMS_TO_UPDATE, }); From c42fd65ac4bb54aef96d33891e208c818430c950 Mon Sep 17 00:00:00 2001 From: Pranav Phadke Date: Sun, 19 Nov 2023 17:42:53 -0500 Subject: [PATCH 10/12] test to ensure the updater only maintains a list of active termIds, ignoring the inactive terms in the database. --- tests/database/updater.test.seq.ts | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/database/updater.test.seq.ts b/tests/database/updater.test.seq.ts index 51a0da6d..a7c6e5ce 100644 --- a/tests/database/updater.test.seq.ts +++ b/tests/database/updater.test.seq.ts @@ -314,6 +314,42 @@ describe("Updater", () => { ); }); + it("inactive terms aren't saved in the updater", async () => { + const INACTIVE_TERM = "000000"; + const ACTIVE_TERM = "000001"; + let termIdsToUpdate = await Updater.getTermIdsToUpdate(); + // Shouldn't include these termId + expect(termIdsToUpdate).not.toContain(INACTIVE_TERM); + expect(termIdsToUpdate).not.toContain(ACTIVE_TERM); + + // Create an inactive term in database + await prisma.termInfo.create({ + data: { + termId: INACTIVE_TERM, + subCollege: "NEU", + text: "description", + active: false, + }, + }); + + termIdsToUpdate = await Updater.getTermIdsToUpdate(); + + // Still shouldn't include this inactive term + expect(termIdsToUpdate).not.toContain(INACTIVE_TERM); + // Create an active term + await prisma.termInfo.create({ + data: { + termId: ACTIVE_TERM, + subCollege: "NEU", + text: "description", + active: true, + }, + }); + termIdsToUpdate = await Updater.getTermIdsToUpdate(); + // Should include the active term + expect(termIdsToUpdate).toContain(ACTIVE_TERM); + }); + describe("getNotificationInfo", () => { let FUNDIES_ONE_COURSE; let FUNDIES_TWO_COURSE; From 95e0c2cc90447c665473dada1286738f283c1232 Mon Sep 17 00:00:00 2001 From: Pranav Phadke Date: Sun, 19 Nov 2023 17:43:16 -0500 Subject: [PATCH 11/12] remove unused import --- services/dumpProcessor.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/services/dumpProcessor.ts b/services/dumpProcessor.ts index 4460c2e5..e7fbd685 100644 --- a/services/dumpProcessor.ts +++ b/services/dumpProcessor.ts @@ -16,10 +16,7 @@ import { Section, convertSectionToPrismaType, } from "../types/types"; -import { - ParsedCourseSR, - convertCourseToPrismaType, -} from "../types/scraperTypes"; +import { convertCourseToPrismaType } from "../types/scraperTypes"; class DumpProcessor { /** From 266dcff5b4a3d43b30d79218ac2b19d9d9043492 Mon Sep 17 00:00:00 2001 From: Pranav Phadke Date: Sun, 19 Nov 2023 17:44:32 -0500 Subject: [PATCH 12/12] revert yarn.lock changes --- yarn.lock | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/yarn.lock b/yarn.lock index 54db8048..a987def3 100644 --- a/yarn.lock +++ b/yarn.lock @@ -9,6 +9,25 @@ dependencies: "@jridgewell/trace-mapping" "^0.3.0" +"@apollo/protobufjs@1.2.2": + version "1.2.2" + resolved "https://registry.yarnpkg.com/@apollo/protobufjs/-/protobufjs-1.2.2.tgz#4bd92cd7701ccaef6d517cdb75af2755f049f87c" + integrity sha512-vF+zxhPiLtkwxONs6YanSt1EpwpGilThpneExUN5K3tCymuxNnVq2yojTvnpRjv2QfsEIt/n7ozPIIzBLwGIDQ== + dependencies: + "@protobufjs/aspromise" "^1.1.2" + "@protobufjs/base64" "^1.1.2" + "@protobufjs/codegen" "^2.0.4" + "@protobufjs/eventemitter" "^1.1.0" + "@protobufjs/fetch" "^1.1.0" + "@protobufjs/float" "^1.0.2" + "@protobufjs/inquire" "^1.1.0" + "@protobufjs/path" "^1.1.2" + "@protobufjs/pool" "^1.1.0" + "@protobufjs/utf8" "^1.1.0" + "@types/long" "^4.0.0" + "@types/node" "^10.1.0" + long "^4.0.0" + "@apollo/protobufjs@1.2.6": version "1.2.6" resolved "https://registry.yarnpkg.com/@apollo/protobufjs/-/protobufjs-1.2.6.tgz#d601e65211e06ae1432bf5993a1a0105f2862f27" @@ -2085,13 +2104,20 @@ apollo-datasource@^3.3.2: "@apollo/utils.keyvaluecache" "^1.0.1" apollo-server-env "^4.2.1" -apollo-reporting-protobuf@^3.4.0: +apollo-reporting-protobuf@^3.3.0, apollo-reporting-protobuf@^3.4.0: version "3.4.0" resolved "https://registry.yarnpkg.com/apollo-reporting-protobuf/-/apollo-reporting-protobuf-3.4.0.tgz#6edd31f09d4a3704d9e808d1db30eca2229ded26" integrity sha512-h0u3EbC/9RpihWOmcSsvTW2O6RXVaD/mPEjfrPkxRPTEPWqncsgOoRJw+wih4OqfH3PvTJvoEIf4LwKrUaqWog== dependencies: "@apollo/protobufjs" "1.2.6" +apollo-server-caching@^3.3.0: + version "3.3.0" + resolved "https://registry.yarnpkg.com/apollo-server-caching/-/apollo-server-caching-3.3.0.tgz#f501cbeb820a4201d98c2b768c085f22848d9dc5" + integrity sha512-Wgcb0ArjZ5DjQ7ID+tvxUcZ7Yxdbk5l1MxZL8D8gkyjooOkhPNzjRVQ7ubPoXqO54PrOMOTm1ejVhsF+AfIirQ== + dependencies: + lru-cache "^6.0.0" + apollo-server-core@^3.6.2: version "3.12.0" resolved "https://registry.yarnpkg.com/apollo-server-core/-/apollo-server-core-3.12.0.tgz#8aa2a7329ce6fe1823290c45168c749db01548df"