JSv4 · JSv4 · Sep 22, 2024 · Sep 29, 2024 · Sep 29, 2024 · Sep 29, 2024
diff --git a/.ipython/profile_default/history.sqlite b/.ipython/profile_default/history.sqlite
diff --git a/config/graphql/graphene_types.py b/config/graphql/graphene_types.py
@@ -120,6 +120,7 @@ class LabelTypeEnum(graphene.Enum):
     DOC_TYPE_LABEL = "DOC_TYPE_LABEL"
     TOKEN_LABEL = "TOKEN_LABEL"
     METADATA_LABEL = "METADATA_LABEL"
+    SPAN_LABEL = "SPAN_LABEL"
 
 
 class AnnotationSummaryType(graphene.ObjectType):
@@ -412,7 +413,11 @@ def resolve_full_annotation_list(self, info, document_id=None):
         results = self.annotations.all()
         if document_id is not None:
             document_pk = from_global_id(document_id)[1]
+            logger.info(
+                f"Resolve full annotations for analysis {self.id} with doc {document_pk}"
+            )
             results = results.filter(document_id=document_pk)
+
         return results
 
     class Meta:
@@ -429,13 +434,22 @@ class Meta:
 
 
 class FieldsetType(AnnotatePermissionsForReadMixin, DjangoObjectType):
+    in_use = graphene.Boolean(
+        description="True if the fieldset is used in any extract that has started."
+    )
     full_column_list = graphene.List(ColumnType)
 
     class Meta:
         model = Fieldset
         interfaces = [relay.Node]
         connection_class = CountableConnection
 
+    def resolve_in_use(self, info) -> bool:
+        """
+        Returns True if the fieldset is used in any extract that has started.
+        """
+        return self.extracts.filter(started__isnull=False).exists()
+
     def resolve_full_column_list(self, info):
         return self.columns.all()
 

diff --git a/config/graphql/mutations.py b/config/graphql/mutations.py
@@ -74,9 +74,10 @@
     make_corpus_public_task,
 )
 from opencontractserver.types.dicts import OpenContractsAnnotatedDocumentImportType
-from opencontractserver.types.enums import ExportType, PermissionTypes
+from opencontractserver.types.enums import ExportType, LabelType, PermissionTypes
 from opencontractserver.users.models import UserExport
 from opencontractserver.utils.etl import is_dict_instance_of_typed_dict
+from opencontractserver.utils.files import is_plaintext_content
 from opencontractserver.utils.permissioning import (
     set_permissions_for_obj_to_user,
     user_has_permission_for_obj,
@@ -814,6 +815,10 @@ class Arguments:
             description="If provided, successfully uploaded document will "
             "be uploaded to corpus with specified id",
         )
+        add_to_extract_id = graphene.ID(
+            required=False,
+            description="If provided, successfully uploaded document will be added to extract with specified id",
+        )
         make_public = graphene.Boolean(
             required=True,
             description="If True, document is immediately public. "
@@ -835,7 +840,14 @@ def mutate(
         custom_meta,
         make_public,
         add_to_corpus_id=None,
+        add_to_extract_id=None,
     ):
+        if add_to_corpus_id is not None and add_to_extract_id is not None:
+            return UploadDocument(
+                message="Cannot simultaneously add document to both corpus and extract",
+                ok=False,
+                document=None,
+            )
 
         ok = False
         document = None
@@ -860,36 +872,75 @@ def mutate(
             # Check file type
             kind = filetype.guess(file_bytes)
             if kind is None:
-                return UploadDocument(
-                    message="Unable to determine file type", ok=False, document=None
-                )
 
-            if kind.mime not in settings.ALLOWED_DOCUMENT_MIMETYPES:
+                if is_plaintext_content(file_bytes):
+                    kind = "application/txt"
+                else:
+                    return UploadDocument(
+                        message="Unable to determine file type", ok=False, document=None
+                    )
+            else:
+                kind = kind.mime
+
+            if kind not in settings.ALLOWED_DOCUMENT_MIMETYPES:
                 return UploadDocument(
-                    message=f"Unallowed filetype: {kind.mime}", ok=False, document=None
+                    message=f"Unallowed filetype: {kind}", ok=False, document=None
                 )
 
             user = info.context.user
-            pdf_file = ContentFile(file_bytes, name=filename)
-            document = Document(
-                creator=user,
-                title=title,
-                description=description,
-                custom_meta=custom_meta,
-                pdf_file=pdf_file,
-                backend_lock=True,
-                is_public=make_public,
-            )
-            document.save()
+
+            if kind in [
+                "application/pdf",
+                "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+                "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            ]:
+                pdf_file = ContentFile(file_bytes, name=filename)
+                document = Document(
+                    creator=user,
+                    title=title,
+                    description=description,
+                    custom_meta=custom_meta,
+                    pdf_file=pdf_file,
+                    backend_lock=True,
+                    is_public=make_public,
+                    file_type=kind,  # Store filetype
+                )
+                document.save()
+            elif kind in ["application/txt"]:
+                txt_extract_file = ContentFile(file_bytes, name=filename)
+                document = Document(
+                    creator=user,
+                    title=title,
+                    description=description,
+                    custom_meta=custom_meta,
+                    txt_extract_file=txt_extract_file,
+                    backend_lock=True,
+                    is_public=make_public,
+                    file_type=kind,
+                )
+                document.save()
+
             set_permissions_for_obj_to_user(user, document, [PermissionTypes.CRUD])
 
-            # If add_to_corpus_id is not None, link uploaded document to corpus
+            # Handle linking to corpus or extract
             if add_to_corpus_id is not None:
                 try:
                     corpus = Corpus.objects.get(id=from_global_id(add_to_corpus_id)[1])
                     transaction.on_commit(lambda: corpus.documents.add(document))
                 except Exception as e:
                     message = f"Adding to corpus failed due to error: {e}"
+            elif add_to_extract_id is not None:
+                try:
+                    extract = Extract.objects.get(
+                        Q(pk=from_global_id(add_to_extract_id)[1]) 
+                        & (Q(creator=user) | Q(is_public=True))
+                    )
+                    if extract.finished is not None:
+                        raise ValueError("Cannot add document to a finished extract")
+                    transaction.on_commit(lambda: extract.documents.add(document))
+                except Exception as e:
+                    message = f"Adding to extract failed due to error: {e}"
 
             ok = True
 
@@ -1063,13 +1114,24 @@ class Arguments:
             required=True,
             description="Id of the label that is applied via this annotation.",
         )
+        annotation_type = graphene.Argument(
+            graphene.Enum.from_enum(LabelType), required=True
+        )
 
     ok = graphene.Boolean()
     annotation = graphene.Field(AnnotationType)
 
     @login_required
     def mutate(
-        root, info, json, page, raw_text, corpus_id, document_id, annotation_label_id
+        root,
+        info,
+        json,
+        page,
+        raw_text,
+        corpus_id,
+        document_id,
+        annotation_label_id,
+        annotation_type,
     ):
         corpus_pk = from_global_id(corpus_id)[1]
         document_pk = from_global_id(document_id)[1]
@@ -1085,6 +1147,7 @@ def mutate(
             annotation_label_id=label_pk,
             creator=user,
             json=json,
+            annotation_type=annotation_type.value,
         )
         annotation.save()
         set_permissions_for_obj_to_user(user, annotation, [PermissionTypes.CRUD])
@@ -1924,20 +1987,95 @@ def mutate(
         return CreateExtract(ok=True, msg="SUCCESS!", obj=extract)
 
 
-class UpdateExtractMutation(DRFMutation):
-    class IOSettings:
-        lookup_field = "id"
-        pk_fields = ["corpus", "fieldset", "creator"]
-        serializer = ExtractSerializer
-        model = Extract
-        graphene_model = ExtractType
+class UpdateExtractMutation(graphene.Mutation):
+    """
+    Mutation to update an existing Extract object.
 
+    Supports updating the name (title), corpus, fieldset, and error fields.
+    Ensures proper permission checks are applied.
+    """
     class Arguments:
-        id = graphene.String(required=True)
-        title = graphene.String(required=False)
-        description = graphene.String(required=False)
-        icon = graphene.String(required=False)
-        label_set = graphene.String(required=False)
+        id = graphene.ID(required=True, description="ID of the Extract to update.")
+        title = graphene.String(required=False, description="New title for the Extract.")
+        corpus_id = graphene.ID(required=False, description="ID of the Corpus to associate with the Extract.")
+        fieldset_id = graphene.ID(required=False, description="ID of the Fieldset to associate with the Extract.")
+        error = graphene.String(required=False, description="Error message to update on the Extract.")
+        # The Extract model does not have 'description', 'icon', or 'label_set' fields.
+        # If these fields are added to the model, they can be included here.
+
+    ok = graphene.Boolean()
+    message = graphene.String()
+    obj = graphene.Field(ExtractType)
+
+    @staticmethod
+    @login_required
+    def mutate(root, info, id, title=None, corpus_id=None, fieldset_id=None, error=None):
+        print(f"UpdateExtractMutation.mutate called with: id={id}, title={title}, corpus_id={corpus_id}, fieldset_id={fieldset_id}, error={error}")
+        user = info.context.user
+
+        try:
+            extract_pk = from_global_id(id)[1]
+            extract = Extract.objects.get(pk=extract_pk)
+        except Extract.DoesNotExist:
+            return UpdateExtractMutation(ok=False, message="Extract not found.", obj=None)
+
+        # Check if the user has permission to update the Extract object
+        if not user_has_permission_for_obj(
+                user_val=user,
+                instance=extract,
+                permission=PermissionTypes.UPDATE,
+                include_group_permissions=True,
+            ):
+            return UpdateExtractMutation(ok=False, message="You don't have permission to update this extract.", obj=None)
+
+        # Update fields
+        if title is not None:
+            extract.name = title
+
+        if error is not None:
+            extract.error = error
+
+        if corpus_id is not None:
+            corpus_pk = from_global_id(corpus_id)[1]
+            try:
+                corpus = Corpus.objects.get(pk=corpus_pk)
+                # Check permission
+                if not user_has_permission_for_obj(
+                        user_val=user,
+                        instance=corpus,
+                        permission=PermissionTypes.READ,
+                        include_group_permissions=True,
+                    ):
+                    return UpdateExtractMutation(ok=False, message="You don't have permission to use this corpus.", obj=None)
+                extract.corpus = corpus
+            except Corpus.DoesNotExist:
+                return UpdateExtractMutation(ok=False, message="Corpus not found.", obj=None)
+
+        if fieldset_id is not None:
+            fieldset_pk = from_global_id(fieldset_id)[1]
+            print(f"Attempting to update extract {extract.id} with fieldset_id {fieldset_id} (pk: {fieldset_pk})")
+            try:
+                fieldset = Fieldset.objects.get(pk=fieldset_pk)
+                print(f"Found fieldset {fieldset.id} for update")
+                # Check permission
+                if not user_has_permission_for_obj(
+                        user_val=user,
+                        instance=fieldset,
+                        permission=PermissionTypes.READ,
+                        include_group_permissions=True,
+                    ):
+                    print(f"User {user.id} denied permission to use fieldset {fieldset.id}")
+                    return UpdateExtractMutation(ok=False, message="You don't have permission to use this fieldset.", obj=None)
+                print(f"Updating extract {extract.id} fieldset to {fieldset.id}")
+                extract.fieldset = fieldset
+            except Fieldset.DoesNotExist:
+                print(f"Fieldset with pk {fieldset_pk} not found")
+                return UpdateExtractMutation(ok=False, message="Fieldset not found.", obj=None)
+
+        extract.save()
+        extract.refresh_from_db()
+
+        return UpdateExtractMutation(ok=True, message="Extract updated successfully.", obj=extract)
 
 
 class AddDocumentsToExtract(DRFMutation):

diff --git a/config/graphql/queries.py b/config/graphql/queries.py
@@ -200,18 +200,22 @@ def resolve_annotations(
 
         # Filter by annotation_label__label_type
         logger.info(
-            f"Queryset county before filtering by annotation_label__label_type: {queryset.count()}"
+            f"Queryset count before filtering by annotation_label__label_type: {queryset.count()}"
         )
         label_type = kwargs.get("annotation_label__label_type")
         if label_type:
             logger.info(f"Filtering by annotation_label__label_type: {label_type}")
             queryset = queryset.filter(annotation_label__label_type=label_type)
+        logger.info(f"Queryset count after filtering by label type: {queryset.count()}")
 
-        logger.info(f"QFilter value for analysis_isnull: {analysis_isnull}")
+        logger.info(f"Q Filter value for analysis_isnull: {analysis_isnull}")
         # Filter by analysis
         if analysis_isnull is not None:
-            logger.info(f"Filtering by analysis_isnull: {queryset.count()}")
+            logger.info(
+                f"QS count before filtering by analysis is null: {queryset.count()}"
+            )
             queryset = queryset.filter(analysis__isnull=analysis_isnull)
+            logger.info(f"Filtered by analysis_isnull: {queryset.count()}")
 
         # Filter by document_id
         document_id = kwargs.get("document_id")

diff --git a/config/settings/base.py b/config/settings/base.py
@@ -133,7 +133,13 @@
 
 # UPLOAD CONTROLS
 # ------------------------------------------------------------------------------
-ALLOWED_DOCUMENT_MIMETYPES = ["application/pdf"]
+ALLOWED_DOCUMENT_MIMETYPES = [
+    "application/pdf",
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    "application/txt",
+]
 
 # AUTHENTICATION
 # ------------------------------------------------------------------------------