Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Member enrichment llm lfx 1712 #2688

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
create table "llmPromptHistory" (
id bigserial primary key,
type varchar(255) not null,
model text not null,
"entityId" text null,
metadata jsonb null,
prompt text not null,
answer text not null,
"inputTokenCount" int not null,
"outputTokenCount" int not null,
"responseTimeSeconds" decimal not null,
"createdAt" timestamptz not null default now()
);
Comment on lines +1 to +13
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consider additional constraints and optimizations

  1. The type column could be more efficient as an enum type
  2. The model column should have a CHECK constraint to validate allowed models
  3. The metadata column should have validation to ensure JSON structure
+create type llm_prompt_type as enum (
+    'organization_merge_suggestion',
+    'member_merge_suggestion'
+    -- add other types as needed
+);
+
 create table "llmPromptHistory" (
     id                    bigserial primary key,
-    type                  varchar(255) not null,
+    type                  llm_prompt_type not null,
     model                 text         not null,
+                         check (model in ('anthropic.claude-v2', 'anthropic.claude-v1')),
     "entityId"            text         null,
-    metadata              jsonb        null,
+    metadata              jsonb        null check (jsonb_typeof(metadata) = 'object'),
     prompt                text         not null,
     answer                text         not null,
     "inputTokenCount"     int          not null,
     "outputTokenCount"    int          not null,
     "responseTimeSeconds" decimal      not null,
     "createdAt"           timestamptz  not null default now()
 );

Committable suggestion skipped: line range outside the PR's diff.


create index "ix_llmPromptHistory_type_entityId" on "llmPromptHistory"("type", "entityId");
create index "ix_llmPromptHistory_entityId" on "llmPromptHistory"("entityId");
create index "ix_llmPromptHistory_type" on "llmPromptHistory"("type");

-- backup members table
create table members_backup_14_11_2024 as
select *
from members
with no data;

-- Copy all data
insert into members_backup_14_11_2024
select *
from members;

-- backup memberIdentities table
create table member_identities_backup_14_11_2024 as
select *
from "memberIdentities"
with no data;

-- Copy all data
insert into member_identities_backup_14_11_2024
select *
from "memberIdentities";

-- backup memberOrganizations table
create table member_organizations_backup_14_11_2024 as
select *
from "memberOrganizations"
with no data;

-- Copy all data
insert into member_organizations_backup_14_11_2024
select *
from "memberOrganizations";
Comment on lines +19 to +50
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve backup strategy

  1. Use dynamic date generation instead of hardcoding
  2. Add row count validation
  3. Consider cleanup strategy for old backups
+do $$
+declare
+    backup_date text := to_char(current_timestamp, 'DD_MM_YYYY');
+    members_count int;
+    member_identities_count int;
+    member_organizations_count int;
+begin
+    -- Get original counts
+    select count(*) into members_count from members;
+    select count(*) into member_identities_count from "memberIdentities";
+    select count(*) into member_organizations_count from "memberOrganizations";
+
     -- backup members table
-    create table members_backup_14_11_2024 as
+    execute format('create table members_backup_%s as
     select *
     from members
-        with no data;
+        with no data', backup_date);

     -- Copy all data
-    insert into members_backup_14_11_2024
+    execute format('insert into members_backup_%s
     select *
-    from members;
+    from members', backup_date);

+    -- Validate backup
+    execute format('
+        if (select count(*) from members_backup_%s) != $1 then
+            raise exception ''Members backup validation failed'';
+        end if
+    ', backup_date) using members_count;

     -- Similar changes for other tables...

+    -- Cleanup old backups (older than 30 days)
+    for backup_table in
+        select tablename 
+        from pg_tables 
+        where tablename like 'members_backup_%'
+           or tablename like 'member_identities_backup_%'
+           or tablename like 'member_organizations_backup_%'
+    loop
+        if to_date(right(backup_table, 10), 'DD_MM_YYYY') < current_date - interval '30 days' then
+            execute format('drop table if exists %I', backup_table);
+        end if;
+    end loop;
+end $$;

Committable suggestion skipped: line range outside the PR's diff.

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
alter table members
drop column "oldEmails";

alter table members
drop column "oldWeakIdentities";
Loading
Loading