Skip to content

Commit

Permalink
Merge pull request #3 from dadosjusbr/atualizando-parser
Browse files Browse the repository at this point in the history
atualizando parser - mudança na estrutura das planilhas
  • Loading branch information
joellensilva authored Jul 2, 2024
2 parents 0461410 + bd9b75d commit c22da11
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 3 deletions.
5 changes: 4 additions & 1 deletion src/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ def catch(month, year):
# A planilha de indenizações de maio de 2020 é diferente dos demais meses, tendo mais de 200 colunas.
# A planilha de indenizações de junho de 2020 possui menos colunas
# As planilhas seguem um padrão diferente a partir de junho de 2020
if year == 2020 and month in [5, 6, 7] or year == 2019 and month == 6:
if (year == 2020 and month in [5, 6, 7]) or (year == 2019 and month == 6) or (year == 2024 and month == 1):
metadata.formato_consistente = False
else:
metadata.formato_consistente = True

if year >= 2024:
metadata.estritamente_tabular = True

return metadata
48 changes: 46 additions & 2 deletions src/parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import number
import re
import numpy as np

from coleta import coleta_pb2 as Coleta

Expand Down Expand Up @@ -165,6 +166,30 @@ def remunerations_2019_2020(row):
remuneration_array.remuneracao.append(rem)
return remuneration_array


def remunerations_2024(row):
remuneration_array = Coleta.Remuneracoes()
# VERBAS INDENIZATÓRIAS
if not number.is_nan(row[4]) and row[4] != "N/A":
rem = Coleta.Remuneracao()
rem.natureza = Coleta.Remuneracao.Natureza.Value("R")
rem.categoria = "Verbas indenizatórias"
rem.item = row[4]
rem.valor = float(number.format_value(row[5]))
rem.tipo_receita = Coleta.Remuneracao.TipoReceita.Value("O")
remuneration_array.remuneracao.append(rem)
# OUTRAS REMUNERAÇÕES TEMPORÁRIAS
if not number.is_nan(row[6]) and row[6] != "N/A":
rem = Coleta.Remuneracao()
rem.natureza = Coleta.Remuneracao.Natureza.Value("R")
rem.categoria = "Outras Remunerações Temporárias"
rem.item = row[6]
rem.valor = float(number.format_value(row[7]))
rem.tipo_receita = Coleta.Remuneracao.TipoReceita.Value("O")
remuneration_array.remuneracao.append(rem)
return remuneration_array


# A planilha mantém esse formato até 05/2019
def remunerations_2018(row):
remuneration_array = Coleta.Remuneracoes()
Expand Down Expand Up @@ -236,6 +261,24 @@ def update_employees_2020_06(data, employees):
employees[registration] = emp
return employees


# As planilhas seguem um padrão diferente a partir de 2024
def update_employees_2024(data, employees):
registration = np.nan
for row in data.indenizacoes:
if str(row[0]) != "nan":
registration = str(row[0])
if registration in employees.keys():
emp = employees[registration]
if int(data.year) == 2020 and int(data.month) == 6:
remu = remunerations_06_20(row)
else:
remu = remunerations_2024(row)
emp.remuneracoes.MergeFrom(remu)
employees[registration] = emp
return employees


# As planilhas de indenizações possuem um padrão de 2018 a maio de 2019
def update_employees_2018(data, employees):
for row in data:
Expand Down Expand Up @@ -269,9 +312,10 @@ def parse(data, colect_key):
elif int(data.year) == 2020 and int(data.month) == 5:
update_employees_05_20(data, employees)

# Período de referência: int(data.year) > 2020 or int(data.year) == 2020 and int(data.month) >= 6
else:
elif (int(data.year) > 2020 and int(data.year) < 2024) or int(data.year) == 2020 and int(data.month) >= 6:
update_employees_2020_06(data, employees)
else:
update_employees_2024(data, employees)

for i in employees.values():
payroll.contra_cheque.append(i)
Expand Down

0 comments on commit c22da11

Please sign in to comment.