Skip to content

Commit

Permalink
V1.3 ~90% of success to search product in differents stores
Browse files Browse the repository at this point in the history
  • Loading branch information
chriSmile0 committed Mar 27, 2024
1 parent 06d5d20 commit 84e871c
Show file tree
Hide file tree
Showing 7 changed files with 168 additions and 173 deletions.
64 changes: 10 additions & 54 deletions src/scrape_su.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,79 +44,39 @@ async function scrape(url,town,product) {
await page.goto(url);
const url_ = await page.url();
console.log(url_);
/*console.log(town);
console.log(product);*/
console.log(town);
console.log(product);
const dD = url_.indexOf("on/demand");
console.log(dD);
if(dD != -1) {
console.log("DataDome activate");
break_js.loadedBrk(page,url,'#captcha__puzzle','.slider',"canva_rd.png","screen_su.png");
// -- TEST IN NEXT COMMIT -- //
}
else {
console.log("No DataDome");
}
//await page.screenshot({ path: 'screen_t.png', fullPage: true });
// -----------------------NO DETECTION BOT USAGE (15 hit OK) ----------------------------//
await page.waitForTimeout(2000);
await page.waitForSelector('#popin_tc_privacy_button_2');
await page.click('#popin_tc_privacy_button_2');
// -----------------------NO DETECTION BOT USAGE (15 hit OK)----------------------------//

await page.waitForSelector('#store-search');
await page.type('#store-search',town,{delay: 100});

// -----------------------NO DETECTION BOT USAGE (15 hit OK)----------------------------//
// -- WAITED ISSUE (1/2 try is waited issue)-- //
await page.waitForTimeout(3000);
//await page.screenshot({ path: 'screen_ab_prehome.png', fullPage: true });
await page.waitForTimeout(3000); //-> necessary ? delay ?
await page.waitForSelector('.ab-prehome-search-suggestion');
await page.click('.ab-prehome-search-suggestion');

// -----------------------NO DETECTION BOT USAGE(12 hit OK)----------------------------//

// PRECISE RESEARCH WITH ONE ELEMENT RESULT, MAYBE MORE IN THE FUTURE
await page.waitForTimeout(5000);
//await page.screenshot({ path: 'screen_store_delivery_mode_arrow.png', fullPage: true });
await page.waitForSelector('.store-delivery-mode-arrow'); // click on the store
//CHECK FOR HAVE ALL ARROW WITH THE TEXT TO SELECT THE STORE !!!!
await page.click('.store-delivery-mode-arrow');

// -----------------------NO DETECTION BOT USAGE(15 hit OK)----------------------------//

await page.waitForTimeout(3000);
await page.screenshot({ path: 'screen_new_client_alert.png', fullPage: true }); // necessary Idk why
const wq = await page.$('xpath//html/body/div[7]/div[1]/button');
//console.log(wq);
//const wq_pos = await wq.boundingBox();
//console.log(wq_pos);
if(wq === null) { // - NO Question
console.log("no wq"); // OK
}
else {
//await page.waitForSelector('#q'); // click on the store
//await page.click('#q');
const wq_pos = await wq.boundingBox();
console.log(wq_pos);
console.log("message to quit");
if(wq_pos !== null)
wq.click();
}

// -----------------------NO DETECTION BOT USAGE(15 hit OK)----------------------------//
// -- TEXT ITERABLE ISSUE -- //

/*const cookie2 = await page.$('#popin_tc_privacy_button_2');
if(cookie2 === null)
console.log("no cookie");
else
await page.click('#popin_tc_privacy_button_2');*/

await page.waitForTimeout(2000);
await page.screenshot({ path: 'screen_search2_.png', fullPage: true });
const wq2 = await page.$('xpath//html/body/div[7]/div[1]/button');
// console.log(wq2);
if(wq2 === null) { // - NO Question
await page.waitForSelector('.ui-button');
const wq2 = await page.$('.ui-button');
wq2.click();
/*if(wq2 === null) { // - NO Question
console.log("no wq2"); // OK
}
else {
Expand All @@ -127,19 +87,17 @@ async function scrape(url,town,product) {
console.log("message to quit2");
if(wq2_pos !== null)
wq2.click();
}
await page.waitForTimeout(2000);
}*/
await page.waitForTimeout(1000);
await page.type('#q',product,{delay: 100});
//await page.keyboard.press('Enter'); -> error detection bot activate with that
// test first suggestions
await page.waitForTimeout(1000);
const suggets1 = await page.$('xpath//html/body/div[3]/main/div[1]/header/div[4]/div[2]/div/div[1]/div[1]/div[1]/a');
if(suggets1 !== null) {
const suggets1_bouding = await suggets1.boundingBox();
console.log(suggets1_bouding);
suggets1.click();
}
await page.waitForTimeout(3000);
await page.waitForTimeout(5000);

// -----------------------NO DETECTION BOT USAGE(n hit OK ?) ----------------------------//

Expand Down Expand Up @@ -168,8 +126,6 @@ async function scrape(url,town,product) {
retour += sub_line.substring(0,sub_line.indexOf("page_filter")) + "\n";
}
console.log(retour);
await page.waitForTimeout(7000);
await page.screenshot({ path: 'screen_f.png', fullPage: true });
await browser.close();
}
url = "https://www.coursesu.com/drive/home";
Expand Down
40 changes: 23 additions & 17 deletions src/scrapper_auchan.php
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ function generate_driver_a() {
$capabilities = DesiredCapabilities::firefox();
$firefoxOptions = new FirefoxOptions;
$firefoxOptions->addArguments(['-headless']);
$firefoxOptions->addArguments(['--headless']);
$capabilities->setCapability(FirefoxOptions::CAPABILITY, $firefoxOptions);
try {
return RemoteWebDriver::create($host,$capabilities);
Expand All @@ -76,10 +76,12 @@ function generate_driver_a() {
}*/

//------------FirefoxDriver, geckodriver directly on this process--------//
shell_exec("kill -s kill `ps -e | grep -e geckodriver | grep -Eo '[0-9]{1,10}' | head -n 1`");
sleep(1);
$firefoxOptions = new FirefoxOptions();
$firefoxOptions->setProfile(new FirefoxProfile());
$capabilities = DesiredCapabilities::firefox();
$firefoxOptions->addArguments(['-headless']);
$firefoxOptions->addArguments(['--headless']);
$capabilities->setCapability(FirefoxOptions::CAPABILITY, $firefoxOptions);
try {
return FirefoxDriver::start($capabilities);
Expand Down Expand Up @@ -112,9 +114,9 @@ function findElement_a($driver, string $type, string $path, string $error, strin
case 'id':
try {
if($type_2 === "presence")
$driver->wait()->until(WebDriverExpectedCondition::presenceOfElementLocated(WebDriverBy::id($path))); // this or sleep
$driver->wait(1)->until(WebDriverExpectedCondition::presenceOfElementLocated(WebDriverBy::id($path))); // this or sleep
else
$driver->wait()->until(WebDriverExpectedCondition::visibilityOfElementLocated(WebDriverBy::id($path)));
$driver->wait(1)->until(WebDriverExpectedCondition::visibilityOfElementLocated(WebDriverBy::id($path)));
$elem = $driver->findElement(WebDriverBy::id($path));
}
catch (Exception $e) {
Expand All @@ -124,9 +126,9 @@ function findElement_a($driver, string $type, string $path, string $error, strin
case 'tag':
try {
if($type_2 === "presence")
$driver->wait()->until(WebDriverExpectedCondition::presenceOfElementLocated(WebDriverBy::tagName($path))); // this or sleep
$driver->wait(1)->until(WebDriverExpectedCondition::presenceOfElementLocated(WebDriverBy::tagName($path))); // this or sleep
else
$driver->wait()->until(WebDriverExpectedCondition::visibilityOfElementLocated(WebDriverBy::tagName($path)));
$driver->wait(1)->until(WebDriverExpectedCondition::visibilityOfElementLocated(WebDriverBy::tagName($path)));
$elem = $driver->findElement(WebDriverBy::tagName($path));
}
catch (Exception $e) {
Expand All @@ -136,9 +138,9 @@ function findElement_a($driver, string $type, string $path, string $error, strin
case 'class':
try {
if($type_2 === "presence")
$driver->wait()->until(WebDriverExpectedCondition::presenceOfElementLocated(WebDriverBy::className($path))); // this or sleep
$driver->wait(1)->until(WebDriverExpectedCondition::presenceOfElementLocated(WebDriverBy::className($path))); // this or sleep
else
$driver->wait()->until(WebDriverExpectedCondition::visibilityOfElementLocated(WebDriverBy::className($path)));
$driver->wait(1)->until(WebDriverExpectedCondition::visibilityOfElementLocated(WebDriverBy::className($path)));
$elem = $driver->findElement(WebDriverBy::className($path));
}
catch (Exception $e) {
Expand All @@ -148,9 +150,9 @@ function findElement_a($driver, string $type, string $path, string $error, strin
case 'xpath';
try {
if($type_2 === "presence")
$driver->wait()->until(WebDriverExpectedCondition::presenceOfElementLocated(WebDriverBy::xpath($path))); // this or sleep
$driver->wait(1)->until(WebDriverExpectedCondition::presenceOfElementLocated(WebDriverBy::xpath($path))); // this or sleep
else
$driver->wait()->until(WebDriverExpectedCondition::visibilityOfElementLocated(WebDriverBy::xpath($path)));
$driver->wait(1)->until(WebDriverExpectedCondition::visibilityOfElementLocated(WebDriverBy::xpath($path)));
$elem = $driver->findElement(WebDriverBy::xpath($path));
}
catch (Exception $e) {
Expand All @@ -163,7 +165,7 @@ function findElement_a($driver, string $type, string $path, string $error, strin
break;
}
}
var_dump($error);
//var_dump($error);
return [$elem,$error];
}

Expand Down Expand Up @@ -292,10 +294,13 @@ function extract_source_auchan(string $url,$driver, string $town, string $target
}
$res_find = findElement_a($driver,"xpath","/html/body/div[13]/div[1]/main/div[1]/div[2]/div[2]/section/div[1]/div/div/div[2]/form/button",$res_find[1]);
if($res_find[0]!=="") $res_find[0]->submit();

echo "submit form\n";

try {
sleep(1);
sleep(4); // necessary for load all products
$prods = $driver->findElements(WebDriverBy::xpath('/html/body/div[3]/div[2]/div[2]/div[4]/article'));
sleep(1);
}
catch(Exception $e) {
$res_find[1] = $e->getMessage();
Expand All @@ -318,14 +323,15 @@ function extract_source_auchan(string $url,$driver, string $town, string $target

/**
* [BRIEF] The main procedure -> for include in other path
* @param string $url the url to scrap
*
* @param string $target_product the target product
* @param string $town the research area
* @example content_scrap_auchan((@see URL1),"lardons","Paris")
* @author chriSmile0
* @return array array of all product with specific information that we needed
*/
function content_scrap_auchan(string $url, string $target_product, string $town) : array {
function content_scrap_auchan(string $target_product, string $town) : array {
$url = "https://www.auchan.fr/";
$driver = generate_driver_a();
if($driver === NULL)
return array();
Expand Down Expand Up @@ -368,15 +374,15 @@ function content_scrap_auchan(string $url, string $target_product, string $town)
* test or if the scrapping failed
*/
function main_a($argc, $argv) : bool {
if($argc == 5) {
if(empty(content_scrap_auchan($argv[1],$argv[2],$argv[3]))) {
if($argc == 4) {
if(empty(content_scrap_auchan($argv[1],$argv[2]))) {
echo "NO CORRESPONDENCE FOUND \n";
return 0;
}
return 1;
}
else {
echo "ERROR : format : ". $argv[0] . " [url] [research_product_type] [town] --with-openssl\n";
echo "ERROR : format : ". $argv[0] . "[research_product_type] [town] --with-openssl\n";
return 0;
}
echo "EXECUTION FINISH WITH SUCCESS \n";
Expand Down
Loading

0 comments on commit 84e871c

Please sign in to comment.