-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathexemplo-llm.r
117 lines (94 loc) · 2.44 KB
/
exemplo-llm.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# curl https://api.openai.com/v1/chat/completions \
# -H "Content-Type: application/json" \
# -H "Authorization: Bearer $OPENAI_API_KEY" \
# -d '{
# "model": "gpt-4o",
# "messages": [],
# "temperature": 0,
# "max_tokens": 1861,
# "top_p": 1,
# "frequency_penalty": 0,
# "presence_penalty": 0
# }'
tjsp::baixar_cjpg(
assunto = 3608,
diretorio = "live_llm",
paginas = 1:20
)
dados_processos <- fs::dir_ls("live_llm") |>
tjsp::tjsp_ler_cjpg()
View(dados_processos)
analise_gpt <- function(txt) {
u_openai <- "https://api.openai.com/v1/chat/completions"
messages <- list(
list(
role = "system",
content = readr::read_file("prompt_drogas.md")
),
list(
role = "user",
content = txt
)
)
body <- list(
model = "gpt-4o",
messages = messages,
temperature = 0,
response_format = list("type" = "json_object")
)
# usethis::edit_r_environ()
api_key <- Sys.getenv("OPENAI_API_KEY")
headers <- httr::add_headers(
"Authorization" = paste("Bearer", api_key)
)
res <- httr::POST(
u_openai,
body = body,
headers,
encode = "json"
)
res |>
httr::content() |>
purrr::pluck("choices", 1, "message", "content") |>
jsonlite::fromJSON(simplifyDataFrame = TRUE) |>
tibble::as_tibble()
}
dim(dados_processos)
set.seed(42)
dados_processos_amostra <- dados_processos |>
dplyr::distinct(processo, .keep_all = TRUE) |>
dplyr::slice_sample(n = 30)
safe_analise_gpt <- purrr::possibly(
analise_gpt, tibble::tibble(erro = "erro")
)
resultado <- purrr::map(
dados_processos_amostra$julgado |>
purrr::set_names(dados_processos_amostra$processo),
safe_analise_gpt,
.progress = TRUE
)
base_final <- resultado |>
purrr::map(\(x) {
if (!is.null(x$outras_drogas)) {
x |>
tidyr::unnest(outras_drogas) |>
dplyr::mutate(dplyr::across(dplyr::everything(), as.character))
}
}) |>
purrr::list_rbind(names_to = "processo")
View(base_final)
base_tidy <- base_final |>
dplyr::select(processo, dplyr::ends_with("em_g"), decisao, pena) |>
tidyr::pivot_longer(dplyr::ends_with("em_g")) |>
dplyr::filter(
!value %in% "não especificado"
) |>
dplyr::mutate(
tipo_droga = stringr::str_extract(name, "[a-z]+"),
value = readr::parse_number(value),
pena = readr::parse_number(pena),
)
base_tidy |>
dplyr::filter(value < 4000) |>
ggplot2::ggplot(ggplot2::aes(x = value, y = pena)) +
ggplot2::geom_point()