forked from royopa/anbima_scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdebentures_mercado_secundario.py
109 lines (84 loc) · 3.21 KB
/
debentures_mercado_secundario.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
import csv
import os
import time
from datetime import datetime, timedelta
import pandas as pd
import pyexcel_xls
import requests
from tqdm import tqdm
import utils
def remove_old_files():
path_download = os.path.join('downloads')
if not os.path.exists(path_download):
os.makedirs(path_download)
file_list = os.listdir(path_download)
for file_name in file_list:
if not file_name.endswith('.xls'):
continue
today = datetime.datetime.now().strftime('%d.%m.%Y')
data_arquivo = file_name.split('.xls')[-2][-10:]
if today != data_arquivo:
os.remove(os.path.join('downloads', file_name))
def download_file(url, dt_referencia, file_name):
dt_referencia = dt_referencia.strftime('%y%m%d')
url = url + dt_referencia + '.txt'
response = requests.get(url, stream=True)
if response.status_code != 200:
print(url, 'Erro', response.status_code)
return False
with open(file_name, "wb") as handle:
for data in tqdm(response.iter_content()):
handle.write(data)
handle.close()
def generate_csv_base(df, path_file_base):
# organizar o arquivo base por dt_referencia
df = pd.read_csv(path_file_base, sep=';')
df = df.sort_values('dt_referencia')
# set the index
df.set_index('dt_referencia', inplace=True)
df.to_csv(path_file_base, sep=';')
def generate_xlsx_base(df, path_saida):
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter(path_saida, engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet1')
# Close the Pandas Excel writer and output the Excel file.
writer.save()
def xrange(x):
return iter(range(x))
def datetime_range(start=None, end=None):
span = end - start
for i in xrange(span.days + 1):
yield start + timedelta(days=i)
def main():
# apaga arquivos antigos
remove_old_files()
# verifica a última data disponível na base
name_file_base = 'debentures_base.csv'
path_file_base = os.path.join('bases', name_file_base)
# verifica a última data disponível na base
today = datetime.now().date()
cal = utils.get_calendar()
ultima_data_base = cal.offset(today, -6)
dates_range = list(utils.datetime_range(start=ultima_data_base, end=today))
# faz o download do csv no site da anbima
url = 'https://www.anbima.com.br/informacoes/merc-sec-debentures/arqs/db'
for dt_referencia in reversed(dates_range):
path_download = os.path.join('downloads')
if not os.path.exists(path_download):
os.makedirs(path_download)
path_download = os.path.join('downloads', 'debentures')
if not os.path.exists(path_download):
os.makedirs(path_download)
file_path = os.path.join(
path_download,
dt_referencia.strftime('%y%m%d') + '.txt'
)
# faz o download do arquivo caso ele ainda não tiver sido baixado
if not os.path.exists(file_path):
download_file(url, dt_referencia, file_path)
print("Arquivos baixados com sucesso")
if __name__ == '__main__':
main()