From ff88caff491350c6acac6f9e3d139506a9e155ef Mon Sep 17 00:00:00 2001 From: Joellensilva Date: Tue, 27 Feb 2024 15:52:49 -0300 Subject: [PATCH] atualizando url e adicionando user-agent --- crawler.go | 16 +++++++++------- main.go | 14 ++++++++++++-- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/crawler.go b/crawler.go index 7985d42..8570357 100644 --- a/crawler.go +++ b/crawler.go @@ -16,6 +16,7 @@ import ( type crawler struct { collectionTimeout time.Duration timeBetweenSteps time.Duration + downloadTimeout time.Duration year string month string output string @@ -34,6 +35,7 @@ func (c crawler) crawl() ([]string, error) { chromedp.Flag("headless", true), // mude para false para executar com navegador visível. chromedp.NoSandbox, chromedp.DisableGPU, + chromedp.UserAgent("Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile Safari/537.36"), )..., ) defer allocCancel() @@ -108,11 +110,11 @@ func (c crawler) abreCaixaDialogo(ctx context.Context, tipo string) error { var baseURL string var selectYear string if tipo == "contra" { - baseURL = "http://www.mpap.mp.br/transparencia/index.php?pg=consulta_folha_membros_ativos" - selectYear = `//select[@id="ano"]` + baseURL = "https://portal.mpap.mp.br/transparencia/index.php?pg=consulta_folha_membros_ativos" + selectYear = `//*[@id="ano"]` } else { - baseURL = "https://www.mpap.mp.br/transparencia/index.php?pg=consulta_verbas_indenizatorias" - selectYear = `//select[@id="ano_verbas"]` + baseURL = "https://portal.mpap.mp.br/transparencia/index.php?pg=consulta_verbas_indenizatorias" + selectYear = `//*[@id="ano_verbas"]` } return chromedp.Run(ctx, @@ -124,7 +126,7 @@ func (c crawler) abreCaixaDialogo(ctx context.Context, tipo string) error { chromedp.Sleep(c.timeBetweenSteps), // Seleciona mes - chromedp.SetValue(`//select[@id="mes"]`, strings.TrimPrefix(c.month, "0"), chromedp.BySearch, chromedp.NodeVisible), + chromedp.SetValue(`//*[@id="mes"]`, strings.TrimPrefix(c.month, "0"), chromedp.BySearch, chromedp.NodeVisible), chromedp.Sleep(c.timeBetweenSteps), // Busca @@ -144,13 +146,13 @@ func (c crawler) exportaPlanilha(ctx context.Context, fName string) error { chromedp.Run(ctx, // Clica no botão de download chromedp.Click(`/html/body/div[1]/center/div/fieldset/div[3]/form/button[2]`, chromedp.BySearch, chromedp.NodeVisible), - chromedp.Sleep(c.timeBetweenSteps), + chromedp.Sleep(c.downloadTimeout), ) } else { chromedp.Run(ctx, // Clica no botão de download chromedp.Click(`/html/body/div[1]/center/div/fieldset/div/form/button[2]`, chromedp.BySearch, chromedp.NodeVisible), - chromedp.Sleep(c.timeBetweenSteps), + chromedp.Sleep(c.downloadTimeout), ) } diff --git a/main.go b/main.go index 3f9aa07..c3afd35 100644 --- a/main.go +++ b/main.go @@ -11,7 +11,8 @@ import ( const ( defaultGeneralTimeout = 4 * time.Minute // Duração máxima total da coleta de todos os arquivos. Valor padrão calculado a partir de uma média de execuções ~4.5min - defaulTimeBetweenSteps = 5 * time.Second //Tempo de espera entre passos do coletor." + defaulTimeBetweenSteps = 5 * time.Second //Tempo de espera entre passos do coletor." + defaultFileDownloadTimeout = 20 * time.Second // Duração que o coletor deve esperar até que o download de cada um dos arquivos seja concluído ) func main() { @@ -51,9 +52,18 @@ func main() { log.Fatalf("Invalid TIME_BETWEEN_STEPS (\"%s\"): %q", os.Getenv("TIME_BETWEEN_STEPS"), err) } } + downloadTimeout := defaultFileDownloadTimeout + if os.Getenv("DOWNLOAD_TIMEOUT") != "" { + var err error + downloadTimeout, err = time.ParseDuration(os.Getenv("DOWNLOAD_TIMEOUT")) + if err != nil { + log.Fatalf("Invalid DOWNLOAD_TIMEOUT (\"%s\"): %q", os.Getenv("DOWNLOAD_TIMEOUT"), err) + } + } c := crawler{ collectionTimeout: generalTimeout, timeBetweenSteps: timeBetweenSteps, + downloadTimeout: downloadTimeout, year: year, month: month, output: outputFolder, @@ -66,4 +76,4 @@ func main() { // O parser do MPAP espera os arquivos separados por \n. Mudanças aqui tem // refletir as expectativas lá. fmt.Println(strings.Join(downloads, "\n")) -} \ No newline at end of file +}