-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
264 lines (234 loc) · 7.8 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
/**
* Scrapes Job Postings based on an input value
*/
const puppeteer = require('puppeteer');
const delay = require('delay');
const fs = require('fs');
const { format } = require('date-fns');
const XLSX = require('xlsx');
const Common = require('./common');
const Config = require('./config');
// results
let companyResults = {};
const readFile = (path) => {
return fs.readFileSync(path).toString().split("\n");
};
const formatToExcel = (result) => {
let formatted = Object.keys(result).map((key) => {
return {
company: key,
count: result[key].count,
postings: JSON.stringify(result[key].postings)
}
});
return formatted;
};
const writeResultsExcel = (results) => {
let fileName = format(new Date(), 'MM-dd-yyyy-k:mm:ss') + `_total.xlsx`;
try {
if (!fs.existsSync('./output')){
fs.mkdirSync('./output');
}
const ws = XLSX.utils.json_to_sheet(formatToExcel(results));
const wb = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(wb, ws, `RESULTS`);
XLSX.writeFile(wb, `./output/${fileName}`);
} catch (err) {
console.log('Failed to write results to excel: ', err);
}
};
/**
* Basic navigation
* @param page browser.Page
* @param url String
* @returns {Promise<*>}
*/
const navigateTo = async (page, url) => {
await page.goto(url);
};
const clickNext = async (page, selector) => {
const next = await getNext(page, selector);
await Common.click(next, 1);
await delay(2000);
};
const handlePopover = async (page) => {
await delay(2000);
if (await checkExists(page, Config.selectors.indeed.results.popover)) {
await closePopover(page, Config.selectors.indeed.results.closePopover).catch((err) => {
console.log('Error closing popover')
});
await delay(1000);
return true;
}
return false;
};
/**
* Get all elements that match the selector
* @param page
* @param selector
* @returns {Promise<*>}
*/
const getAllElements = async (page, selector) => {
return await page.$$(selector);
};
const getElement = async (page, selector) => {
return await page.$(selector);
};
const closePopover = async (page, selector) => {
const el = await getElement(page, selector);
await el.click({ clickCount: 2});
};
const checkExists = async (page, selector) => {
const el = await getElement(page, selector);
return !!el;
};
const getNext = async (page, selector) => {
// check if next arrow exists, ret el
const element = await getElement(page, selector.arrow);
if (element) return element;
// check if next text exists, ret el
const elements = await getAllElements(page, selector.span);
for (const el of elements) {
const text = await Common.getElementInnerText(el);
if (text.toLowerCase().includes('next')) return el;
}
return null;
};
const checkNext = async (page, selector) => {
const next = await getNext(page, selector);
return !!next;
};
/**
* Handles getting and storing the search results
* The nested loops is a design decision allowing the program to scale as you want to remove and add properties
* along with the count
* @param page browser.Page
* @returns {Object}
*/
const handleResults = async (page) => {
companyResults = await page.evaluate(async (Config, companyResults) => {
const addOrIncrement = (company) => {
if (companyResults[company] && companyResults[company].count >= 1) {
let currCount = companyResults[company].count;
currCount++;
companyResults[company].count = currCount;
} else {
// create default company
companyResults[company] = {
count: 1,
postings: []
}
}
};
const addPosting = async (company, posting) => {
companyResults[company].postings.push(posting);
};
const getElementInnerText = async (el, selector) => {
try {
const data = el.querySelector(selector).innerText;
return data;
} catch (err) {
return '';
}
};
const searchRows = document.querySelectorAll(Config.selectors.indeed.results.row);
let properties = Config.selectors.indeed.results.details;
let propertyKeys = Object.keys(properties);
for (const result of searchRows) {
const company = await getElementInnerText(result, Config.selectors.indeed.results.key);
if (company) await addOrIncrement(company);
if (company) {
let posting = {};
for (const p of propertyKeys) {
const data = await getElementInnerText(result, properties[p]);
if (data) posting[p] = data;
}
await addPosting(company, posting);
}
}
return companyResults;
}, Config, companyResults).catch((err) => {
console.log('No results found: ', err);
});
};
/**
* Clears the input field, then types the result
* @param page browser.Page
* @param selector String
* @param input String
* @param location String
* @returns {Promise<void>}
*/
const inputSearchParams = async (page, selector, input, location) => {
const searchInputs = await getAllElements(page, selector.main);
// focus clear type
for (let i = 0; i < 2; i++) { // do it twice to remove cached
searchInputs[0].focus();
await Common.clearTextField(page, selector.desc);
await Common.type(searchInputs[0], input);
searchInputs[1].focus();
await Common.clearTextField(page, selector.loc);
await Common.type(searchInputs[1], location);
}
};
/**
* Clicks the button to generate results
* @param page browser.Page
* @param selector String
* @returns {Promise<void>}
*/
const submit = async (page, selector) => {
const submitBtn = await getElement(page, selector);
await Common.navigateClick(page, submitBtn, Config.selectors.indeed.results.row, true);
};
(async () => {
// get .txt values
const args = readFile('./input/input.txt');
// establish browser and page
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.setViewport({
width: 1000,
height: 1000
});
// navigate to indeed landing page
await navigateTo(page, Config.urls.indeed);
// loop through and perform search
for (const arg of args) {
if (!arg) continue; // skip empty lines
// assign search inputs
let searchInputs = arg.split('+');
let input = searchInputs[0];
let location = searchInputs[1];
try {
// grab input fields
await inputSearchParams(page, Config.selectors.indeed.inputs.search, input, location);
// submit result
await submit(page, Config.selectors.indeed.inputs.submit);
// handle popup
await handlePopover(page);
// gather results
let next;
do {
try {
next = await checkNext(page, Config.selectors.indeed.results.next).catch((err) => console.log(err));
await handleResults(page);
if (next) {
await clickNext(page, Config.selectors.indeed.results.next);
await handlePopover(page);
}
} catch (err) {
console.log('Error: ', err)
}
} while (next);
} catch (err) {
console.log('Error: ', err);
}
}
// handle results
await writeResultsExcel(companyResults);
// close browser
await browser.close();
})();