From 813df478880a558a4eda82ccdb708b091f8dd545 Mon Sep 17 00:00:00 2001 From: Joellensilva Date: Thu, 30 Nov 2023 15:17:58 -0300 Subject: [PATCH 1/6] agregando rubricas --- main.go | 162 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 112 insertions(+), 50 deletions(-) diff --git a/main.go b/main.go index d89d520..826f2f2 100644 --- a/main.go +++ b/main.go @@ -1,9 +1,11 @@ package main import ( + "encoding/json" "fmt" "io/ioutil" "math" + "net/http" "os" "regexp" "strings" @@ -18,6 +20,7 @@ import ( "github.com/dadosjusbr/storage/repo/database" "github.com/dadosjusbr/storage/repo/file_storage" "github.com/kelseyhightower/envconfig" + "golang.org/x/exp/slices" "golang.org/x/text/runes" "golang.org/x/text/transform" "golang.org/x/text/unicode/norm" @@ -118,60 +121,13 @@ func main() { status.ExitFromError(status.NewError(2, fmt.Errorf("error trying to store Remunerations zip in Postgres: %v, error: %v", er.Pr.Remuneracoes, err))) } - agmi := models.AgencyMonthlyInfo{ - AgencyID: er.Rc.Coleta.Orgao, - Month: int(er.Rc.Coleta.Mes), - Year: int(er.Rc.Coleta.Ano), - CrawlerRepo: er.Rc.Coleta.RepositorioColetor, - CrawlerVersion: er.Rc.Coleta.VersaoColetor, - ParserRepo: er.Rc.Coleta.RepositorioParser, - ParserVersion: er.Rc.Coleta.VersaoParser, - CrawlingTimestamp: er.Rc.Coleta.TimestampColeta, - Summary: summary(er.Rc.Folha.ContraCheque), - Backups: []models.Backup{*s3Backups}, - Meta: &models.Meta{ - OpenFormat: er.Rc.Metadados.FormatoAberto, - Access: er.Rc.Metadados.Acesso.String(), - Extension: er.Rc.Metadados.Extensao.String(), - StrictlyTabular: er.Rc.Metadados.EstritamenteTabular, - ConsistentFormat: er.Rc.Metadados.FormatoConsistente, - HaveEnrollment: er.Rc.Metadados.TemMatricula, - ThereIsACapacity: er.Rc.Metadados.TemLotacao, - HasPosition: er.Rc.Metadados.TemCargo, - BaseRevenue: er.Rc.Metadados.ReceitaBase.String(), - OtherRecipes: er.Rc.Metadados.OutrasReceitas.String(), - Expenditure: er.Rc.Metadados.Despesas.String(), - }, - Score: &models.Score{ - Score: float64(er.Rc.Metadados.IndiceTransparencia), - EasinessScore: float64(er.Rc.Metadados.IndiceFacilidade), - CompletenessScore: float64(er.Rc.Metadados.IndiceCompletude), - }, - ProcInfo: er.Rc.Procinfo, - Package: s3Backup, - } - // Calculando o tempo de execução da coleta - if c.StartTime != "" { - layout := "2006-01-02 15:04:05.000000" // formato data-hora - t, err := time.Parse(layout, c.StartTime) // transformando a hora (string) para o tipo time.Time - if err != nil { - status.ExitFromError(status.NewError(2, fmt.Errorf("error calculating collection time: %v", err))) - } - Duration := time.Since(t) // Calcula a diferença da hora dada com a hora atual (UTC+0) - agmi.Duration = Duration.Seconds() - } - if er.Rc.Procinfo != nil && er.Rc.Procinfo.Status != 0 { - agmi.ProcInfo = er.Rc.Procinfo - } - - if err = pgS3Client.Store(agmi); err != nil { - status.ExitFromError(status.NewError(2, fmt.Errorf("error trying to store 'coleta': %v", err))) - } - var paychecks []models.Paycheck var remunerations []models.PaycheckItem m, _ := regexp.Compile("[A-Za-z]") + // Mapeando as rubricas distintas da folha de contracheque + valor_por_rubrica := make(map[string]float64) + // Contracheques for id, p := range er.Rc.Folha.ContraCheque { salary, benefits, discounts, remuneration := calcBaseSalary(*p) @@ -216,12 +172,70 @@ func main() { } else { // Se a rubrica não for inconsistente, faremos uma cópia sanitizada na coluna item_sanitizado. itemSanitizado := sanitizarItem(r.Item) + // agregamos o valor por rubrica (não considerando descontos) + if r.Natureza != coleta.Remuneracao_D { + valor_por_rubrica[itemSanitizado] += math.Abs(r.Valor) + } remunerations[len(remunerations)-1].SanitizedItem = &itemSanitizado } i++ } } } + + itemSummary := agregandoRubricas(valor_por_rubrica) + + agmi := models.AgencyMonthlyInfo{ + AgencyID: er.Rc.Coleta.Orgao, + Month: int(er.Rc.Coleta.Mes), + Year: int(er.Rc.Coleta.Ano), + CrawlerRepo: er.Rc.Coleta.RepositorioColetor, + CrawlerVersion: er.Rc.Coleta.VersaoColetor, + ParserRepo: er.Rc.Coleta.RepositorioParser, + ParserVersion: er.Rc.Coleta.VersaoParser, + CrawlingTimestamp: er.Rc.Coleta.TimestampColeta, + Summary: summary(er.Rc.Folha.ContraCheque), + Backups: []models.Backup{*s3Backups}, + Meta: &models.Meta{ + OpenFormat: er.Rc.Metadados.FormatoAberto, + Access: er.Rc.Metadados.Acesso.String(), + Extension: er.Rc.Metadados.Extensao.String(), + StrictlyTabular: er.Rc.Metadados.EstritamenteTabular, + ConsistentFormat: er.Rc.Metadados.FormatoConsistente, + HaveEnrollment: er.Rc.Metadados.TemMatricula, + ThereIsACapacity: er.Rc.Metadados.TemLotacao, + HasPosition: er.Rc.Metadados.TemCargo, + BaseRevenue: er.Rc.Metadados.ReceitaBase.String(), + OtherRecipes: er.Rc.Metadados.OutrasReceitas.String(), + Expenditure: er.Rc.Metadados.Despesas.String(), + }, + Score: &models.Score{ + Score: float64(er.Rc.Metadados.IndiceTransparencia), + EasinessScore: float64(er.Rc.Metadados.IndiceFacilidade), + CompletenessScore: float64(er.Rc.Metadados.IndiceCompletude), + }, + ProcInfo: er.Rc.Procinfo, + Package: s3Backup, + ItemSummary: &itemSummary, + } + // Calculando o tempo de execução da coleta + if c.StartTime != "" { + layout := "2006-01-02 15:04:05.000000" // formato data-hora + t, err := time.Parse(layout, c.StartTime) // transformando a hora (string) para o tipo time.Time + if err != nil { + status.ExitFromError(status.NewError(2, fmt.Errorf("error calculating collection time: %v", err))) + } + Duration := time.Since(t) // Calcula a diferença da hora dada com a hora atual (UTC+0) + agmi.Duration = Duration.Seconds() + } + if er.Rc.Procinfo != nil && er.Rc.Procinfo.Status != 0 { + agmi.ProcInfo = er.Rc.Procinfo + } + + if err = pgS3Client.Store(agmi); err != nil { + status.ExitFromError(status.NewError(2, fmt.Errorf("error trying to store 'coleta': %v", err))) + } + if err := pgS3Client.StorePaychecks(paychecks, remunerations); err != nil { status.ExitFromError(status.NewError(2, fmt.Errorf("error trying to store 'contracheques' and 'remuneracoes': %v", err))) } @@ -341,3 +355,51 @@ func sanitizarItem(item string) string { return item } + +// Realiza o download do json com as rubricas desambiguadas +func listarRubricas() map[string][]string { + // json com rubricas desambiguadas + url := "https://raw.githubusercontent.com/dadosjusbr/desambiguador/main/rubricas.json" + + res, err := http.Get(url) + if err != nil { + status.ExitFromError(status.NewError(status.ConnectionError, err)) + } + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + status.ExitFromError(status.NewError(status.SystemError, err)) + } + + var rubricas map[string][]string + + // unmarshall + json.Unmarshal(body, &rubricas) + + return rubricas +} + +// Com a lista de rubricas distintas da folha de contracheque (e seu somatório), +// comparamos com a lista de rubricas desambiguadas e criamos o json da coluna 'resumo' +// alocando o valor de cada rubrica a seu respectivo grupo. +func agregandoRubricas(valor_por_rubrica map[string]float64) models.ItemSummary { + rubricas := listarRubricas() + var itemSummary models.ItemSummary + var others float64 + + for rubrica, valor := range valor_por_rubrica { + for chave, listaRubricas := range rubricas { + others = valor + if slices.Contains(listaRubricas, rubrica) { + switch chave { + case "auxilio-alimentacao": + itemSummary.FoodAllowance += valor + others = 0 + } + break + } + } + itemSummary.Others += others + } + return itemSummary +} From 8a8d4f166ce3250db5a91833ab0f10e6f5626c56 Mon Sep 17 00:00:00 2001 From: Joellensilva Date: Mon, 4 Dec 2023 13:46:12 -0300 Subject: [PATCH 2/6] add campo 'resumo_rubricas' a coluna sumario --- go.mod | 5 +++-- go.sum | 4 ++++ main.go | 13 ++++++------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index 6483190..8b32012 100644 --- a/go.mod +++ b/go.mod @@ -1,10 +1,10 @@ module armazenador -go 1.17 +go 1.18 require ( github.com/dadosjusbr/proto v0.0.0-20221212025627-91c60aa3cd12 - github.com/dadosjusbr/storage v0.0.0-20231026210858-4a308036eb85 + github.com/dadosjusbr/storage v0.0.0-20231202000028-bee3d3aef9a9 github.com/kelseyhightower/envconfig v1.4.0 google.golang.org/protobuf v1.28.1 ) @@ -30,6 +30,7 @@ require ( github.com/newrelic/go-agent/v3 v3.19.2 // indirect github.com/newrelic/go-agent/v3/integrations/nrpq v1.1.1 // indirect golang.org/x/crypto v0.1.0 // indirect + golang.org/x/exp v0.0.0-20231127185646-65229373498e golang.org/x/net v0.1.0 // indirect golang.org/x/sys v0.1.0 // indirect golang.org/x/text v0.4.0 diff --git a/go.sum b/go.sum index b622cbd..7486827 100644 --- a/go.sum +++ b/go.sum @@ -31,6 +31,8 @@ github.com/dadosjusbr/status v0.0.0-20230428151814-b605fe0e598f h1:MHtPcEeoJ4CCO github.com/dadosjusbr/status v0.0.0-20230428151814-b605fe0e598f/go.mod h1:7M8ds2L3u+rHuiP+IImNTx7bSlIUz6hXzW0ZTBOeHzw= github.com/dadosjusbr/storage v0.0.0-20231026210858-4a308036eb85 h1:WL3E1SK52OwLoq4pEZwjV4C8P7HyfmwmYumu9KGA/L4= github.com/dadosjusbr/storage v0.0.0-20231026210858-4a308036eb85/go.mod h1:JTg/Wu2+wn9SV1J19dDeEronive9J6QelMA7K6vLO8I= +github.com/dadosjusbr/storage v0.0.0-20231202000028-bee3d3aef9a9 h1:nn0jgylIokUL72rv3raZA+Ih5fuSyCO8zg0nw72PPXk= +github.com/dadosjusbr/storage v0.0.0-20231202000028-bee3d3aef9a9/go.mod h1:PszGy6CDoG3kNLjIsCmwD3MAWED7xL7U/OWj7ajsiHc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -252,6 +254,8 @@ golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0 golang.org/x/crypto v0.1.0 h1:MDRAIl0xIo9Io2xV565hzXHw3zVseKrJKodhohM5CjU= golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20231127185646-65229373498e h1:Gvh4YaCaXNs6dKTlfgismwWZKyjVZXwOPfIyUaqU3No= +golang.org/x/exp v0.0.0-20231127185646-65229373498e/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= diff --git a/main.go b/main.go index 826f2f2..4c6487e 100644 --- a/main.go +++ b/main.go @@ -183,8 +183,6 @@ func main() { } } - itemSummary := agregandoRubricas(valor_por_rubrica) - agmi := models.AgencyMonthlyInfo{ AgencyID: er.Rc.Coleta.Orgao, Month: int(er.Rc.Coleta.Mes), @@ -194,7 +192,7 @@ func main() { ParserRepo: er.Rc.Coleta.RepositorioParser, ParserVersion: er.Rc.Coleta.VersaoParser, CrawlingTimestamp: er.Rc.Coleta.TimestampColeta, - Summary: summary(er.Rc.Folha.ContraCheque), + Summary: summary(er.Rc.Folha.ContraCheque, valor_por_rubrica), Backups: []models.Backup{*s3Backups}, Meta: &models.Meta{ OpenFormat: er.Rc.Metadados.FormatoAberto, @@ -214,9 +212,8 @@ func main() { EasinessScore: float64(er.Rc.Metadados.IndiceFacilidade), CompletenessScore: float64(er.Rc.Metadados.IndiceCompletude), }, - ProcInfo: er.Rc.Procinfo, - Package: s3Backup, - ItemSummary: &itemSummary, + ProcInfo: er.Rc.Procinfo, + Package: s3Backup, } // Calculando o tempo de execução da coleta if c.StartTime != "" { @@ -244,9 +241,11 @@ func main() { } // summary aux func to make all necessary calculations to DataSummary Struct -func summary(employees []*coleta.ContraCheque) *models.Summary { +func summary(employees []*coleta.ContraCheque, valor_por_rubrica map[string]float64) *models.Summary { + itemSummary := agregandoRubricas(valor_por_rubrica) memberActive := models.Summary{ IncomeHistogram: map[int]int{10000: 0, 20000: 0, 30000: 0, 40000: 0, 50000: 0, -1: 0}, + ItemSummary: itemSummary, } for _, emp := range employees { // checking if the employee instance has the required data to build the summary From f625082ce73f5c60018f820d0025b0bc35fd7eb3 Mon Sep 17 00:00:00 2001 From: Joellensilva Date: Mon, 11 Dec 2023 17:18:26 -0300 Subject: [PATCH 3/6] =?UTF-8?q?endere=C3=A7ando=20coment=C3=A1rios?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.go | 83 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/main.go b/main.go index 4c6487e..45a3afe 100644 --- a/main.go +++ b/main.go @@ -20,7 +20,6 @@ import ( "github.com/dadosjusbr/storage/repo/database" "github.com/dadosjusbr/storage/repo/file_storage" "github.com/kelseyhightower/envconfig" - "golang.org/x/exp/slices" "golang.org/x/text/runes" "golang.org/x/text/transform" "golang.org/x/text/unicode/norm" @@ -48,34 +47,34 @@ type config struct { func main() { var c config if err := envconfig.Process("", &c); err != nil { - status.ExitFromError(status.NewError(4, fmt.Errorf("error loading config values from .env: %v", err.Error()))) + status.ExitFromError(status.NewError(status.DataUnavailable, fmt.Errorf("error loading config values from .env: %v", err.Error()))) } // Criando o client do Postgres postgresDB, err := database.NewPostgresDB(c.PostgresUser, c.PostgresPassword, c.PostgresDBName, c.PostgresHost, c.PostgresPort) if err != nil { - status.ExitFromError(status.NewError(4, fmt.Errorf("error creating PostgresDB client: %v", err.Error()))) + status.ExitFromError(status.NewError(status.DataUnavailable, fmt.Errorf("error creating PostgresDB client: %v", err.Error()))) } // Criando o client do S3 s3Client, err := file_storage.NewS3Client(c.AWSRegion, c.S3Bucket) if err != nil { - status.ExitFromError(status.NewError(4, fmt.Errorf("error creating S3 client: %v", err.Error()))) + status.ExitFromError(status.NewError(status.DataUnavailable, fmt.Errorf("error creating S3 client: %v", err.Error()))) } // Criando client do storage a partir do banco postgres e do client do s3 pgS3Client, err := storage.NewClient(postgresDB, s3Client) if err != nil { - status.ExitFromError(status.NewError(3, fmt.Errorf("error setting up postgres storage client: %s", err))) + status.ExitFromError(status.NewError(status.ConnectionError, fmt.Errorf("error setting up postgres storage client: %s", err))) } defer pgS3Client.Db.Disconnect() var er pipeline.ResultadoExecucao erIN, err := ioutil.ReadAll(os.Stdin) if err != nil { - status.ExitFromError(status.NewError(2, fmt.Errorf("error reading execution result: %v", err))) + status.ExitFromError(status.NewError(status.SystemError, fmt.Errorf("error reading execution result: %v", err))) } if err = prototext.Unmarshal(erIN, &er); err != nil { - status.ExitFromError(status.NewError(2, fmt.Errorf("error reading execution result: %v", err))) + status.ExitFromError(status.NewError(status.SystemError, fmt.Errorf("error reading execution result: %v", err))) } // Package. @@ -83,7 +82,7 @@ func main() { status.ExitFromError(status.NewError(status.InvalidInput, fmt.Errorf("there is no package to store. PackageResult:%+v", er.Pr))) } - //Armazenando os datapackages no S3 + // Armazenando os datapackages no S3 dstKey := fmt.Sprintf("%s/datapackage/%s-%d-%d.zip", er.Rc.Coleta.Orgao, er.Rc.Coleta.Orgao, er.Rc.Coleta.Ano, er.Rc.Coleta.Mes) s3Backup, err := pgS3Client.Cloud.UploadFile(er.Pr.Pacote, dstKey) if err != nil { @@ -95,18 +94,18 @@ func main() { status.ExitFromError(status.NewError(2, fmt.Errorf("no backup files found: CrawlingResult:%+v", er.Cr))) } */ - //Armazenando os backups no S3 + // Armazenando os backups no S3 dstKey = fmt.Sprintf("%s/backups/%s-%d-%d.zip", er.Rc.Coleta.Orgao, er.Rc.Coleta.Orgao, er.Rc.Coleta.Ano, er.Rc.Coleta.Mes) s3Backups, err := pgS3Client.Cloud.UploadFile(er.Rc.Coleta.Arquivos[0], dstKey) if err != nil { - status.ExitFromError(status.NewError(2, fmt.Errorf("error trying to get Backup files from S3: %v, error: %v", er.Rc.Coleta.Arquivos, err))) + status.ExitFromError(status.NewError(status.SystemError, fmt.Errorf("error trying to get Backup files from S3: %v, error: %v", er.Rc.Coleta.Arquivos, err))) } //Armazenando as remuneracoes no S3 e no postgres dstKey = fmt.Sprintf("%s/remuneracoes/%s-%d-%d.zip", er.Rc.Coleta.Orgao, er.Rc.Coleta.Orgao, er.Rc.Coleta.Ano, er.Rc.Coleta.Mes) _, err = pgS3Client.Cloud.UploadFile(er.Pr.Remuneracoes.ZipUrl, dstKey) if err != nil { - status.ExitFromError(status.NewError(2, fmt.Errorf("error trying to upload Remunerations zip in S3: %v, error: %v", er.Pr.Remuneracoes, err))) + status.ExitFromError(status.NewError(status.SystemError, fmt.Errorf("error trying to upload Remunerations zip in S3: %v, error: %v", er.Pr.Remuneracoes, err))) } err = pgS3Client.StoreRemunerations(models.Remunerations{ AgencyID: er.Rc.Coleta.Orgao, @@ -118,7 +117,7 @@ func main() { ZipUrl: fmt.Sprintf("https://%s.s3.amazonaws.com/%s", c.S3Bucket, dstKey), }) if err != nil { - status.ExitFromError(status.NewError(2, fmt.Errorf("error trying to store Remunerations zip in Postgres: %v, error: %v", er.Pr.Remuneracoes, err))) + status.ExitFromError(status.NewError(status.SystemError, fmt.Errorf("error trying to store Remunerations zip in Postgres: %v, error: %v", er.Pr.Remuneracoes, err))) } var paychecks []models.Paycheck @@ -126,7 +125,7 @@ func main() { m, _ := regexp.Compile("[A-Za-z]") // Mapeando as rubricas distintas da folha de contracheque - valor_por_rubrica := make(map[string]float64) + itemValues := make(map[string]float64) // Contracheques for id, p := range er.Rc.Folha.ContraCheque { @@ -174,7 +173,7 @@ func main() { itemSanitizado := sanitizarItem(r.Item) // agregamos o valor por rubrica (não considerando descontos) if r.Natureza != coleta.Remuneracao_D { - valor_por_rubrica[itemSanitizado] += math.Abs(r.Valor) + itemValues[itemSanitizado] += math.Abs(r.Valor) } remunerations[len(remunerations)-1].SanitizedItem = &itemSanitizado } @@ -192,7 +191,7 @@ func main() { ParserRepo: er.Rc.Coleta.RepositorioParser, ParserVersion: er.Rc.Coleta.VersaoParser, CrawlingTimestamp: er.Rc.Coleta.TimestampColeta, - Summary: summary(er.Rc.Folha.ContraCheque, valor_por_rubrica), + Summary: summary(er.Rc.Folha.ContraCheque, itemValues), Backups: []models.Backup{*s3Backups}, Meta: &models.Meta{ OpenFormat: er.Rc.Metadados.FormatoAberto, @@ -217,10 +216,10 @@ func main() { } // Calculando o tempo de execução da coleta if c.StartTime != "" { - layout := "2006-01-02 15:04:05.000000" // formato data-hora - t, err := time.Parse(layout, c.StartTime) // transformando a hora (string) para o tipo time.Time + const layout = "2006-01-02 15:04:05.000000" // formato data-hora + t, err := time.Parse(layout, c.StartTime) // transformando a hora (string) para o tipo time.Time if err != nil { - status.ExitFromError(status.NewError(2, fmt.Errorf("error calculating collection time: %v", err))) + status.ExitFromError(status.NewError(status.SystemError, fmt.Errorf("error calculating collection time: %v", err))) } Duration := time.Since(t) // Calcula a diferença da hora dada com a hora atual (UTC+0) agmi.Duration = Duration.Seconds() @@ -230,19 +229,19 @@ func main() { } if err = pgS3Client.Store(agmi); err != nil { - status.ExitFromError(status.NewError(2, fmt.Errorf("error trying to store 'coleta': %v", err))) + status.ExitFromError(status.NewError(status.SystemError, fmt.Errorf("error trying to store 'coleta': %v", err))) } if err := pgS3Client.StorePaychecks(paychecks, remunerations); err != nil { - status.ExitFromError(status.NewError(2, fmt.Errorf("error trying to store 'contracheques' and 'remuneracoes': %v", err))) + status.ExitFromError(status.NewError(status.SystemError, fmt.Errorf("error trying to store 'contracheques' and 'remuneracoes': %v", err))) } fmt.Println("Store Executed...") } // summary aux func to make all necessary calculations to DataSummary Struct -func summary(employees []*coleta.ContraCheque, valor_por_rubrica map[string]float64) *models.Summary { - itemSummary := agregandoRubricas(valor_por_rubrica) +func summary(employees []*coleta.ContraCheque, itemValues map[string]float64) *models.Summary { + itemSummary := aggregatingItems(itemValues) memberActive := models.Summary{ IncomeHistogram: map[int]int{10000: 0, 20000: 0, 30000: 0, 40000: 0, 50000: 0, -1: 0}, ItemSummary: itemSummary, @@ -356,9 +355,10 @@ func sanitizarItem(item string) string { } // Realiza o download do json com as rubricas desambiguadas -func listarRubricas() map[string][]string { +// Ex. saída: map["auxilio-alimentacao":map["alimentacao":{}, "aux alimentacao":{}...]] +func getItems() map[string]map[string]struct{} { // json com rubricas desambiguadas - url := "https://raw.githubusercontent.com/dadosjusbr/desambiguador/main/rubricas.json" + const url = "https://raw.githubusercontent.com/dadosjusbr/desambiguador/main/rubricas.json" res, err := http.Get(url) if err != nil { @@ -370,29 +370,42 @@ func listarRubricas() map[string][]string { status.ExitFromError(status.NewError(status.SystemError, err)) } - var rubricas map[string][]string + var itemJson map[string][]string // unmarshall - json.Unmarshal(body, &rubricas) + if err := json.Unmarshal(body, &itemJson); err != nil { + status.ExitFromError(status.NewError(status.SystemError, fmt.Errorf("error unmarshalling 'rubricas.json': %w", err))) + } + + // json: cannot unmarshal string into Go value of type struct {} + // Esse processo visa facilitar a iteração mútua de rubricas do contracheque <> rubricas desambiguadas + // E se faz necessário uma vez que não é possível formatar o json diretamente para esse formato/tipo. + itemStruct := make(map[string]map[string]struct{}) + for key, values := range itemJson { + itemStruct[key] = make(map[string]struct{}) + for _, value := range values { + itemStruct[key][value] = struct{}{} + } + } - return rubricas + return itemStruct } // Com a lista de rubricas distintas da folha de contracheque (e seu somatório), // comparamos com a lista de rubricas desambiguadas e criamos o json da coluna 'resumo' // alocando o valor de cada rubrica a seu respectivo grupo. -func agregandoRubricas(valor_por_rubrica map[string]float64) models.ItemSummary { - rubricas := listarRubricas() +func aggregatingItems(itemValues map[string]float64) models.ItemSummary { + items := getItems() var itemSummary models.ItemSummary var others float64 - for rubrica, valor := range valor_por_rubrica { - for chave, listaRubricas := range rubricas { - others = valor - if slices.Contains(listaRubricas, rubrica) { - switch chave { + for item, value := range itemValues { + for key, listItems := range items { + others = value + if _, ok := listItems[item]; ok { + switch key { case "auxilio-alimentacao": - itemSummary.FoodAllowance += valor + itemSummary.FoodAllowance += value others = 0 } break From cf4098fd72aa941485b61ddfadba41f3da0f22ca Mon Sep 17 00:00:00 2001 From: Joellensilva Date: Wed, 13 Dec 2023 17:02:25 -0300 Subject: [PATCH 4/6] desambiguando/agregando valor de rubricas --- go.mod | 1 - go.sum | 7 ------- main.go | 26 ++++++++++++-------------- 3 files changed, 12 insertions(+), 22 deletions(-) diff --git a/go.mod b/go.mod index 8b32012..3b79aea 100644 --- a/go.mod +++ b/go.mod @@ -30,7 +30,6 @@ require ( github.com/newrelic/go-agent/v3 v3.19.2 // indirect github.com/newrelic/go-agent/v3/integrations/nrpq v1.1.1 // indirect golang.org/x/crypto v0.1.0 // indirect - golang.org/x/exp v0.0.0-20231127185646-65229373498e golang.org/x/net v0.1.0 // indirect golang.org/x/sys v0.1.0 // indirect golang.org/x/text v0.4.0 diff --git a/go.sum b/go.sum index 7486827..adee21c 100644 --- a/go.sum +++ b/go.sum @@ -29,8 +29,6 @@ github.com/dadosjusbr/proto v0.0.0-20221212025627-91c60aa3cd12 h1:ufl8nbCEo6g2VH github.com/dadosjusbr/proto v0.0.0-20221212025627-91c60aa3cd12/go.mod h1:gPA7VxjEmyez/xtln4qBj+tM1NO0/zcw3ryjxTRNSco= github.com/dadosjusbr/status v0.0.0-20230428151814-b605fe0e598f h1:MHtPcEeoJ4CCOvFDzvYXU2AndoqiIvZdrOjWACaImLA= github.com/dadosjusbr/status v0.0.0-20230428151814-b605fe0e598f/go.mod h1:7M8ds2L3u+rHuiP+IImNTx7bSlIUz6hXzW0ZTBOeHzw= -github.com/dadosjusbr/storage v0.0.0-20231026210858-4a308036eb85 h1:WL3E1SK52OwLoq4pEZwjV4C8P7HyfmwmYumu9KGA/L4= -github.com/dadosjusbr/storage v0.0.0-20231026210858-4a308036eb85/go.mod h1:JTg/Wu2+wn9SV1J19dDeEronive9J6QelMA7K6vLO8I= github.com/dadosjusbr/storage v0.0.0-20231202000028-bee3d3aef9a9 h1:nn0jgylIokUL72rv3raZA+Ih5fuSyCO8zg0nw72PPXk= github.com/dadosjusbr/storage v0.0.0-20231202000028-bee3d3aef9a9/go.mod h1:PszGy6CDoG3kNLjIsCmwD3MAWED7xL7U/OWj7ajsiHc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -91,7 +89,6 @@ github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/jackc/chunkreader v1.0.0 h1:4s39bBR8ByfqH+DKm8rQA3E1LHZWB9XWcrz8fqaZbe0= github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo= github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk= github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8= @@ -113,7 +110,6 @@ github.com/jackc/pgmock v0.0.0-20210724152146-4ad1a8207f65 h1:DadwsjnMwFjfWc9y5W github.com/jackc/pgmock v0.0.0-20210724152146-4ad1a8207f65/go.mod h1:5R2h2EEX+qri8jOWMbJCtaPWkrrNc7OHwsp2TCqp7ak= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= -github.com/jackc/pgproto3 v1.1.0 h1:FYYE4yRw+AgI8wXIinMlNjBbp/UitDJwfj5LqqewP1A= github.com/jackc/pgproto3 v1.1.0/go.mod h1:eR5FA3leWg7p9aeAqi37XOTgTIbkABlvcPB3E5rlc78= github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190420180111-c116219b62db/go.mod h1:bhq50y+xrl9n5mRYyCBFKkpRVTLYJVWeCc+mEAI3yXA= github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190609003834-432c2951c711/go.mod h1:uH0AWtUmuShn0bcesswc4aBTWGvw0cAxIJp+6OB//Wg= @@ -156,7 +152,6 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGw github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg= github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= -github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df/go.mod h1:mAVCUAYtW9NG31eB30umMSLKcDt6mCUWSjoSn5qBh0k= github.com/kelseyhightower/envconfig v1.4.0 h1:Im6hONhd3pLkfDFsbRgu68RDNkGF1r3dvMUtDTo2cv8= github.com/kelseyhightower/envconfig v1.4.0/go.mod h1:cccZRl6mQpaq41TPp5QxidR+Sa3axMbJDNb//FQX6Gg= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= @@ -254,8 +249,6 @@ golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0 golang.org/x/crypto v0.1.0 h1:MDRAIl0xIo9Io2xV565hzXHw3zVseKrJKodhohM5CjU= golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20231127185646-65229373498e h1:Gvh4YaCaXNs6dKTlfgismwWZKyjVZXwOPfIyUaqU3No= -golang.org/x/exp v0.0.0-20231127185646-65229373498e/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= diff --git a/main.go b/main.go index 45a3afe..b70cff3 100644 --- a/main.go +++ b/main.go @@ -356,7 +356,7 @@ func sanitizarItem(item string) string { // Realiza o download do json com as rubricas desambiguadas // Ex. saída: map["auxilio-alimentacao":map["alimentacao":{}, "aux alimentacao":{}...]] -func getItems() map[string]map[string]struct{} { +func getItems() map[string][]string { // json com rubricas desambiguadas const url = "https://raw.githubusercontent.com/dadosjusbr/desambiguador/main/rubricas.json" @@ -377,18 +377,7 @@ func getItems() map[string]map[string]struct{} { status.ExitFromError(status.NewError(status.SystemError, fmt.Errorf("error unmarshalling 'rubricas.json': %w", err))) } - // json: cannot unmarshal string into Go value of type struct {} - // Esse processo visa facilitar a iteração mútua de rubricas do contracheque <> rubricas desambiguadas - // E se faz necessário uma vez que não é possível formatar o json diretamente para esse formato/tipo. - itemStruct := make(map[string]map[string]struct{}) - for key, values := range itemJson { - itemStruct[key] = make(map[string]struct{}) - for _, value := range values { - itemStruct[key][value] = struct{}{} - } - } - - return itemStruct + return itemJson } // Com a lista de rubricas distintas da folha de contracheque (e seu somatório), @@ -399,8 +388,17 @@ func aggregatingItems(itemValues map[string]float64) models.ItemSummary { var itemSummary models.ItemSummary var others float64 + // Esse processo visa facilitar a iteração mútua de rubricas do contracheque <> rubricas desambiguadas + itemStruct := make(map[string]map[string]struct{}) + for key, values := range items { + itemStruct[key] = make(map[string]struct{}) + for _, value := range values { + itemStruct[key][value] = struct{}{} + } + } + for item, value := range itemValues { - for key, listItems := range items { + for key, listItems := range itemStruct { others = value if _, ok := listItems[item]; ok { switch key { From ca1f524f697deef10c7ce2a8ce4922f43e5c8e32 Mon Sep 17 00:00:00 2001 From: Joellensilva Date: Wed, 13 Dec 2023 17:04:52 -0300 Subject: [PATCH 5/6] =?UTF-8?q?removendo=20coment=C3=A1rio?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.go | 1 - 1 file changed, 1 deletion(-) diff --git a/main.go b/main.go index b70cff3..10308e5 100644 --- a/main.go +++ b/main.go @@ -355,7 +355,6 @@ func sanitizarItem(item string) string { } // Realiza o download do json com as rubricas desambiguadas -// Ex. saída: map["auxilio-alimentacao":map["alimentacao":{}, "aux alimentacao":{}...]] func getItems() map[string][]string { // json com rubricas desambiguadas const url = "https://raw.githubusercontent.com/dadosjusbr/desambiguador/main/rubricas.json" From 7d0c60a34b0fd8d2e10da1b1df51c51841a1dcfb Mon Sep 17 00:00:00 2001 From: Joellensilva Date: Thu, 14 Dec 2023 14:54:33 -0300 Subject: [PATCH 6/6] =?UTF-8?q?endere=C3=A7ando=20=20coment=C3=A1rios?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 10308e5..a9d124b 100644 --- a/main.go +++ b/main.go @@ -388,9 +388,9 @@ func aggregatingItems(itemValues map[string]float64) models.ItemSummary { var others float64 // Esse processo visa facilitar a iteração mútua de rubricas do contracheque <> rubricas desambiguadas - itemStruct := make(map[string]map[string]struct{}) + itemStruct := make(map[string]map[string]struct{}, len(items)) for key, values := range items { - itemStruct[key] = make(map[string]struct{}) + itemStruct[key] = make(map[string]struct{}, len(values)) for _, value := range values { itemStruct[key][value] = struct{}{} }