Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Youngj/deep data source scrapper #20

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 47 additions & 16 deletions backend/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,33 @@ const _PASSWORD = '799Ln38Lsjqs!yg^99wfs*9ahYo6L8';
interface DeedData {
county: string;
dateOfSale: Date;
deedBook: number;
deedPage: number;
deedBook: number | string;
deedPage: number | string;
propertyAddress: string;
propertyValue: number;
propertyTaxDue: number;
propertyValue: number | string;
propertyTaxDue: number | string;
propertyLandDistrict: string;
propertyLandLot: number;
propertyLandLot: number | string;
buyerName: string;
buyerAddress: string;
sellerName: string;
sellerAddress: string;
}

//navigates to source of deed data
async function navigateToSource(address: string): Promise<void> {
const browser = await puppeteer.launch();
const page = await browser.newPage();

//goto deed data site
await navigateDataSource(page, address);

await scrapeData(page, address);

browser.close();
}

//navigate to page with deed data
async function navigateDataSource(page: puppeteer.Page, address: string) {
await page.goto(_DATASOURCEPAGELOGIN);

//click login [top right]
Expand Down Expand Up @@ -58,16 +67,39 @@ async function navigateToSource(address: string): Promise<void> {
page.waitForNavigation(),
]);

//wait for deed data selections to load
await page.waitForSelector('#BodyContent_lvDashboard_btnViewPT61_0');
//click 'view pt-61 information'
await page.click('#BodyContent_lvDashboard_btnViewPT61_0');

scrapeData(page, address);
const deedIndex = await getLatestDeedIndex(page);
//click 'view pt-61 information' for deed data page
await page.click(`#BodyContent_lvDashboard_btnViewPT61_${deedIndex}`);
}

browser.close();
async function getLatestDeedIndex(page: puppeteer.Page) {
//if more than one result for deed find latest
//otherwise return only deed available
if (await page.$('#BodyContent_lvDashboard_btnViewPT61_1')) {
let dateArray: Date[] = [];

for (let deedCounter = 0; deedCounter < 10; deedCounter++) {
if (await page.$(`#BodyContent_lvDashboard_btnViewPT61_${deedCounter}`)) {
let element = await page.waitForSelector(
`#BodyContent_lvDashboard_lblDashboardSaleDate_${deedCounter}`
);
let date = await element.evaluate((e) => e.textContent);
dateArray.push(new Date(date));
}
}
const latestDate = dateArray.reduce((a, b) => {
return a > b ? a : b;
});
return dateArray.indexOf(latestDate);
}
return 0;
}

async function scrapeData(page: Page, address: string): Promise<DeedData> {
//?? is there a more elegant way to do this?
const county = await page.waitForSelector(
`#BodyContent_lvFinalViews_ucCombinedQuickView_0_ucPT61QuickView_0_lblCountyName_0`
);
Expand Down Expand Up @@ -110,26 +142,25 @@ async function scrapeData(page: Page, address: string): Promise<DeedData> {
const DeedData = {
county: await county.evaluate((e) => e.textContent),
dateOfSale: new Date(await dateOfSale.evaluate((e) => e.textContent)),
deedBook: parseInt(await deedBook.evaluate((e) => e.textContent)),
deedPage: parseInt(await deedPage.evaluate((e) => e.textContent)),
deedBook: parseInt(await deedBook.evaluate((e) => e.textContent)) || '',
deedPage: parseInt(await deedPage.evaluate((e) => e.textContent)) || '',
propertyAddress: address,
propertyValue: parseInt(
await propertyValue.evaluate((e) => e.textContent.replace(/[^0-9]/g, ''))
),
) || '',
propertyTaxDue: parseInt(
await propertyTaxDue.evaluate((e) => e.textContent.replace(/[^0-9]/g, ''))
),
) || '',
propertyLandDistrict: await propertyLandDistrict.evaluate(
(e) => e.textContent
),
propertyLandLot: parseInt(
await propertyLandLot.evaluate((e) => e.textContent)
),
) || '',
buyerName: await buyerName.evaluate((e) => e.textContent),
buyerAddress: await buyerAddress.evaluate((e) => e.textContent),
sellerName: await sellerName.evaluate((e) => e.textContent),
sellerAddress: await sellerAddress.evaluate((e) => e.textContent),
};
console.log(DeedData);
return DeedData;
}
3 changes: 2 additions & 1 deletion backend/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
/* Type Checking */
"strict": false, /* Enable all strict type-checking options. */
"noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
"skipLibCheck": true /* Skip type checking all .d.ts files. */
"skipLibCheck": true, /* Skip type checking all .d.ts files. */
"sourceMap": true
}
}