Skip to content

Commit

Permalink
Tons of work here on extract and some improvements to Django Vector S…
Browse files Browse the repository at this point in the history
…tore.
  • Loading branch information
JSv4 committed Jun 11, 2024
1 parent bdfe0eb commit 231f2fe
Show file tree
Hide file tree
Showing 36 changed files with 708 additions and 333 deletions.
9 changes: 7 additions & 2 deletions config/graphql/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,13 +224,18 @@ def filter_by_labelset_id(self, queryset, name, value):
return queryset.filter(included_in_labelset__pk=django_pk)

def filter_by_used_in_labelset_for_corpus_id(self, queryset, name, value):

print(f"Raw corpus id: {value}")
django_pk = from_global_id(value)[1]
print("Lookup labels for pk", django_pk)
queryset = queryset.filter(
Q(included_in_labelset__used_by_corpus=django_pk)
)
print(
"Filtered to values",
queryset.filter(included_in_labelset__used_by_corpus_id=django_pk),
queryset,
)
return queryset.filter(included_in_labelset__used_by_corpus_id=django_pk)
return queryset.filter(included_in_labelset__used_by_corpus=django_pk)

class Meta:
model = AnnotationLabel
Expand Down
37 changes: 33 additions & 4 deletions config/graphql/mutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1534,6 +1534,8 @@ class Arguments:
instructions = graphene.String(required=False)
language_model_id = graphene.ID(required=False)
agentic = graphene.Boolean(required=False)
extract_is_list = graphene.Boolean(required=False)
must_contain_text = graphene.String(required=False)

ok = graphene.Boolean()
message = graphene.String()
Expand All @@ -1552,8 +1554,10 @@ def mutate(
limit_to_label=None,
instructions=None,
agentic=None,
extract_is_list=None,
language_model_id=None,
fieldset_id=None,
must_contain_text=None
):

ok = False
Expand Down Expand Up @@ -1591,6 +1595,12 @@ def mutate(
if agentic is not None:
obj.agentic = agentic

if extract_is_list is not None:
obj.extract_is_list = extract_is_list

if must_contain_text is not None:
obj.must_contain_text = must_contain_text

obj.save()
message = "SUCCESS!"
ok = True
Expand All @@ -1611,6 +1621,8 @@ class Arguments:
instructions = graphene.String(required=False)
language_model_id = graphene.ID(required=True)
agentic = graphene.Boolean(required=False)
extract_is_list = graphene.Boolean(required=False)
must_contain_text = graphene.String(required=False)
name = graphene.String(required=True)

ok = graphene.Boolean()
Expand All @@ -1627,6 +1639,8 @@ def mutate(
output_type,
language_model_id,
agentic=None,
extract_is_list=None,
must_contain_text=None,
query=None,
match_text=None,
limit_to_label=None,
Expand All @@ -1648,7 +1662,9 @@ def mutate(
limit_to_label=limit_to_label,
instructions=instructions,
language_model=language_model,
must_contain_text=must_contain_text,
agentic=agentic if agentic is not None else False,
extract_is_list=extract_is_list if extract_is_list is not None else False,
creator=info.context.user,
)
column.save()
Expand Down Expand Up @@ -1702,7 +1718,7 @@ class CreateExtract(graphene.Mutation):
"""

class Arguments:
corpus_id = graphene.ID(required=True)
corpus_id = graphene.ID(required=False)
name = graphene.String(required=True)
fieldset_id = graphene.ID(required=False)
fieldset_name = graphene.String(required=False)
Expand All @@ -1717,23 +1733,29 @@ class Arguments:
def mutate(
root,
info,
corpus_id,
name,
corpus_id=None,
fieldset_id=None,
fieldset_name=None,
fieldset_description=None,
):

corpus = Corpus.objects.get(pk=from_global_id(corpus_id)[1])
corpus = None
if corpus_id is not None:
corpus = Corpus.objects.get(pk=from_global_id(corpus_id)[1])
print(f"Corpus is: {corpus}")

if fieldset_id is not None:
print(f"Fieldset id is not None: {fieldset_id}")
fieldset = Fieldset.objects.get(pk=from_global_id(fieldset_id)[1])
else:
if fieldset_name is None:
fieldset_name = f"{name} Fieldset"
print(f"Creating new fieldset... name will be: {fieldset_name}")

fieldset = Fieldset.objects.create(
name=fieldset_name,
description=fieldset_description,
description=fieldset_description if fieldset_description is not None else f"Autogenerated {fieldset_name}",
creator=info.context.user,
)
set_permissions_for_obj_to_user(
Expand All @@ -1747,6 +1769,13 @@ def mutate(
creator=info.context.user,
)
extract.save()

if corpus is not None:
print(f"Try to add corpus docs: {corpus.documents.all()}")
extract.documents.add(*corpus.documents.all())
else:
print(f"Corpus IS still None... no docs to add.")

set_permissions_for_obj_to_user(
info.context.user, extract, [PermissionTypes.CRUD]
)
Expand Down
3 changes: 3 additions & 0 deletions config/graphql/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ class Meta:
"instructions",
"language_model_id",
"agentic",
"extract_is_list",
"must_contain_text"
]
read_only_fields = ["id", "created"]

Expand All @@ -96,6 +98,7 @@ class Meta:
"icon",
"text",
"creator_id",
"read_only"
]
read_only_fields = ["id"]

Expand Down
1 change: 1 addition & 0 deletions frontend/src/components/annotations/AnnotationCards.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ export const AnnotationCards = ({

const handleUpdate = () => {
if (!loading && pageInfo?.hasNextPage) {
console.log("Fetching more annotation cards...");
fetchMore({
variables: {
limit: 20,
Expand Down
94 changes: 18 additions & 76 deletions frontend/src/components/annotator/Annotator.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ import { SidebarContainer } from "../common";
import { getPawlsLayer } from "./api/rest";
import {
AnnotationLabelType,
CorpusQueryType,
CorpusType,
DocumentType,
LabelDisplayBehavior,
Expand All @@ -75,7 +74,7 @@ import { getPermissions } from "../../utils/transform";
import _ from "lodash";
import {
displayAnnotationOnAnnotatorLoad,
openedQueryObj,
pagesVisible,
selectedAnalysesIds,
} from "../../graphql/cache";
import useWindowDimensions from "../hooks/WindowDimensionHook";
Expand Down Expand Up @@ -115,7 +114,6 @@ interface AnnotatorProps {
show_selected_annotation_only: boolean;
show_annotation_bounding_boxes: boolean;
show_annotation_labels: LabelDisplayBehavior;
show_query?: CorpusQueryType | null;
onClose: (args?: any) => void | any;
}

Expand All @@ -128,13 +126,15 @@ export const Annotator = ({
show_selected_annotation_only,
show_annotation_bounding_boxes,
show_annotation_labels,
show_query,
onClose,
}: AnnotatorProps) => {
console.log("Opened document: ", openedDocument);
console.log("Opened corpus: ", openedCorpus);
console.log("scroll_to_annotation_on_open: ", scroll_to_annotation_on_open);

const { width } = useWindowDimensions();
const responsive_sidebar_width = width <= 1000 ? "0px" : "400px";

const opened_query = useReactiveVar(openedQueryObj);
const selected_analysis_ids = useReactiveVar(selectedAnalysesIds);
// console.log("selected_analysis_ids", selected_analysis_ids);

Expand Down Expand Up @@ -190,6 +190,18 @@ export const Annotator = ({
: {}),
};

const setPageVisible = (
page_number: number,
state: "VISIBLE" | "NOT VISIBLE"
) => {
setPagesVisible((old_pages_visible) => {
return {
...old_pages_visible,
[page_number]: state,
};
});
};

// Hold our query variables (using a state var lets us bundle updates to the
// query var in a single useEffect that prevents multiple re-renders)
const [annotator_query_vars, setAnnotatorQueryVars] =
Expand Down Expand Up @@ -444,72 +456,6 @@ export const Annotator = ({
}
}, [openedDocument]);

// If oquery we want to show changes, load it and its annotations into state store
useEffect(() => {
if (show_query && show_query.fullSourceList.length > 0) {
if (!read_only) {
throw new TypeError(
"read_only must be true when show_query is not null"
);
}

// First let's get all of the labels used in our answer by looking at the returned source annotation and getting unique list of labels by ids
const unique_annot_labels: AnnotationLabelType[] = _.uniqBy(
show_query.fullSourceList.map((source) => source.annotationLabel),
(label) => label.id
);
const span_label_lookup = unique_annot_labels
.filter((label) => label.labelType === LabelType.TokenLabel)
.reduce(function (obj: Record<string, any>, label) {
obj[label.id] = {
id: label.id,
color: label.color,
text: label.text,
icon: label.icon as SemanticICONS,
description: label.description,
labelType: label.labelType,
};
return obj;
}, {});

// TODO - store labels in state store
setSpanLabels(Object.values(span_label_lookup));

// We want to make sure we jump to the FIRST source
// We only want to load annotation page for selected annotation on load ONCE
const first_annotation = show_query.fullSourceList[0]; // TODO - make sure these are filtered by page on server
if (
loaded_page_for_annotation === null &&
jumped_to_annotation_on_load !== first_annotation.id
) {
setLoadedPageForAnnotation(scroll_to_annotation_on_open);
}

// This is the annotations start loading
// Turn existing annotation data into PDFAnnotations obj and inject into state:
let annotation_objs: ServerAnnotation[] = show_query.fullSourceList
.filter((annotation) => annotation.analysis !== null)
.map(
(annot) =>
new ServerAnnotation(
annot.page,
annot.annotationLabel,
annot.rawText ? annot.rawText : "",
annot.json ? annot.json : {},
annot.myPermissions ? getPermissions(annot.myPermissions) : [],
annot.id
)
);

// TODO - let queries label docs and create relationships
// For now, we're assuming relationships and doc type labels cannot come out of the query... there is no reasons for this to remain true. Just a lot of work :-)
setPdfAnnotations(new PdfAnnotations(annotation_objs, [], []));

// Set up contexts for annotations
setViewState(ViewState.LOADED);
}
}, [show_query]);

useEffect(() => {
// console.log("New Annotator data", annotator_data);

Expand All @@ -526,10 +472,7 @@ export const Annotator = ({
// );
}

// if annotator_data changes due to loading from graphql (and we didn't somehow also have show_query set)
if (annotator_data && !show_query) {
console.log("Processing annotator data", annotator_data);

if (annotator_data) {
// Build proper span label objs from GraphQL results
let span_label_lookup: LooseObject = {};
let human_span_label_lookup: LooseObject = {};
Expand All @@ -551,7 +494,6 @@ export const Annotator = ({
return obj;
}, {}),
};
setSpanLabels(Object.values(span_label_lookup));

// console.log(
// "Span choices",
Expand Down
24 changes: 0 additions & 24 deletions frontend/src/components/annotator/PDF.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -174,30 +174,6 @@ const Page = ({

useEffect(() => {
try {
// Saving this for now... is a great piece of code to determine if page is visible for page-by-page
// loading or rendering.
// const determinePageVisiblity = () => {

// if (canvasRef.current !== null && scrollContainerRef && scrollContainerRef.current !== null) {

// const scroll_window_bounding_rect = scrollContainerRef.current?.getBoundingClientRect()
// const page_bounding_rect = canvasRef.current.getBoundingClientRect();

// let pageVisibility = (
// (page_bounding_rect.top >= scroll_window_bounding_rect.top && page_bounding_rect.top <= scroll_window_bounding_rect.bottom) ||
// (page_bounding_rect.bottom >= scroll_window_bounding_rect.top && page_bounding_rect.bottom <= scroll_window_bounding_rect.bottom) ||
// (page_bounding_rect.top < scroll_window_bounding_rect.top && page_bounding_rect.bottom >= scroll_window_bounding_rect.bottom)
// );

// // if (pageVisibility) {
// // console.log(`Page ${pageInfo.page.pageNumber} is VISIBLE!`);
// // setPageVisible(pageInfo.page.pageNumber, "VISIBLE");
// // } else {
// // setPageVisible(pageInfo.page.pageNumber, "NOT VISIBLE");
// // }
// }
// };

if (canvasRef.current === null) {
onError(new Error("No canvas element"));
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ export const AnnotationStore = createContext<_AnnotationStore>({
pageSelectionQueue: [],
spanLabels: [],
humanSpanLabelChoices: [],
showStructuralLabels: false,
showStructuralLabels: true,
activeSpanLabel: undefined,
showOnlySpanLabels: [],
docText: undefined,
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/components/annotator/pages/PDFView.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ export const PDFView = ({
Record<number, PDFPageInfo>
>([]);

const [showStructuralLabels, setShowStructuralLabels] = useState(false);
const [showStructuralLabels, setShowStructuralLabels] = useState(true);
const [activeSpanLabel, setActiveSpanLabel] = useState<
AnnotationLabelType | undefined
>(humanSpanLabelChoices.length > 0 ? humanSpanLabelChoices[0] : undefined);
Expand Down
13 changes: 10 additions & 3 deletions frontend/src/components/queries/NewQuerySearch.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import { useMutation } from "@apollo/client";
import React from "react";
import { Button, Container, Header, Image, Input } from "semantic-ui-react";
import {
Button,
Container,
Header,
Icon,
Image,
Input,
} from "semantic-ui-react";
import {
ASK_QUERY_OF_CORPUS,
AskQueryOfCorpusInputType,
Expand Down Expand Up @@ -62,9 +69,9 @@ export const NewQuerySearch: React.FC<NewQuerySearchProps> = ({
marginBottom: "1rem",
}}
>
<Image src="path/to/your/logo.png" size="small" />
<Icon name="search" size="huge" />
<Header as="h2" style={{ marginLeft: "1rem" }}>
Agentic Query
Corpus Query
<Header.Subheader>Query your document collection</Header.Subheader>
</Header>
</div>
Expand Down
Loading

0 comments on commit 231f2fe

Please sign in to comment.