refactor: combine RawContent and TextContent into Content (#149)

Combine RawContent and TextContent into a single, unified Content model for storing both text and file-backed data. The reasoning: 1. It reduces confusion when extending the content with models in other apps, since you don't have to choose which one to attach to. 2. It reduces the total storage requirements by removing a table that would have grown to be very large. 3. The old way of having TextContent build on RawContent meant that OLX had to be stored in the file system, which is both redundant (we always want to access it in low-latency text form), and much slower when importing. 4. It gives us the flexibility to more seamlessly optimize the representation of content later, either shifting things from files to text or vice-versa as necessary. 5. It lets us punt the operation question of how we're configuring file storage for a little bit, until the static asset serving support is more fleshed out. It's also worth nothing that we're upgrading to Django 4.2 very shortly, and that the storage configuration options will change. The tradeoff for this is that Content has become more complex, being able to store things as text, a file, or both, and having to decide between them.
openedx · Feb 9, 2024 · b386858 · b386858
1 parent c827081
commit b386858
Show file tree

Hide file tree

Showing 22 changed files with 658 additions and 574 deletions.
diff --git a/docs/decisions/0015-serving-static-assets.rst b/docs/decisions/0015-serving-static-assets.rst
@@ -18,7 +18,7 @@ Data Storage Implementation
 
 The underlying data models live in the openedx-learning repo. The most relevant models are:
 
-* `RawContent in contents/models.py <https://github.com/openedx/openedx-learning/blob/main/openedx_learning/core/contents/models.py>`_
+* `Content in contents/models.py <https://github.com/openedx/openedx-learning/blob/main/openedx_learning/core/contents/models.py>`_
 * `Component and ComponentVersion in components/models.py <https://github.com/openedx/openedx-learning/blob/main/openedx_learning/core/components/models.py>`_
 
 Key takeaways about how this data is stored:

diff --git a/olx_importer/management/commands/load_components.py b/olx_importer/management/commands/load_components.py
@@ -11,7 +11,7 @@
 
 Open Question: If the data model is extensible, how do we know whether a change
 has really happened between what's currently stored/published for a particular
-item and the new value we want to set? For RawContent that's easy, because we
+item and the new value we want to set? For Content that's easy, because we
 have actual hashes of the data. But it's not clear how that would work for
 something like an ComponentVersion. We'd have to have some kind of mechanism where every
 pp that wants to attach data gets to answer the question of "has anything
@@ -116,20 +116,20 @@ def create_content(self, static_local_path, now, component_version):
             logger.warning(f'  Static reference not found: "{real_path}"')
             return  # Might as well bail if we can't find the file.
 
-        raw_content, _created = contents_api.get_or_create_raw_content(
+        content = contents_api.get_or_create_file_content(
             self.learning_package.id,
-            data_bytes=data_bytes,
+            data=data_bytes,
             mime_type=mime_type,
             created=now,
         )
-        components_api.add_content_to_component_version(
+        components_api.create_component_version_content(
             component_version,
-            raw_content_id=raw_content.id,
+            content.id,
             key=key,
             learner_downloadable=True,
         )
 
-    def import_block_type(self, block_type, now): # , publish_log_entry):
+    def import_block_type(self, block_type_name, now): # , publish_log_entry):
         components_found = 0
         components_skipped = 0
 
@@ -138,8 +138,8 @@ def import_block_type(self, block_type, now): # , publish_log_entry):
         # not fool-proof as it will match static file references that are
         # outside of tag declarations as well.
         static_files_regex = re.compile(r"""['"]\/static\/(.+?)["'\?]""")
-        block_data_path = self.course_data_path / block_type
-        namespace="xblock.v1"
+        block_data_path = self.course_data_path / block_type_name
+        block_type = components_api.get_or_create_component_type("xblock.v1", block_type_name)
 
         for xml_file_path in block_data_path.glob("*.xml"):
             components_found += 1
@@ -157,27 +157,26 @@ def import_block_type(self, block_type, now): # , publish_log_entry):
             display_name = block_root.attrib.get("display_name", "")
             _component, component_version = components_api.create_component_and_version(
                 self.learning_package.id,
-                namespace=namespace,
-                type_name=block_type,
+                component_type=block_type,
                 local_key=local_key,
                 title=display_name,
                 created=now,
                 created_by=None,
             )
 
-            # Create the RawContent entry for the raw data...
-            data_bytes = xml_file_path.read_bytes()
-            text_content, _created = contents_api.get_or_create_text_content_from_bytes(
+            # Create the Content entry for the raw data...
+            text = xml_file_path.read_text('utf-8')
+            text_content, _created = contents_api.get_or_create_text_content(
                 self.learning_package.id,
-                data_bytes=data_bytes,
-                mime_type=f"application/vnd.openedx.xblock.v1.{block_type}+xml",
+                text=text,
+                mime_type=f"application/vnd.openedx.xblock.v1.{block_type_name}+xml",
                 created=now,
             )
             # Add the OLX source text to the ComponentVersion
-            components_api.add_content_to_component_version(
+            components_api.create_component_version_content(
                 component_version,
-                raw_content_id=text_content.pk,
-                key="source.xml",
+                text_content.pk,
+                key="block.xml",
                 learner_downloadable=False
             )
 

diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py
@@ -1,4 +1,4 @@
 """
 Open edX Learning ("Learning Core").
 """
-__version__ = "0.5.2"
+__version__ = "0.6.0"
diff --git a/openedx_learning/contrib/media_server/readme.rst b/openedx_learning/contrib/media_server/readme.rst
@@ -1,7 +1,7 @@
 Media Server App
 ================
 
-The ``media_server`` app exists to serve media files that are ultimately backed by the RawContent model, *for development purposes and for sites with light-to-moderate traffic*. It also provides an API that can be used by CDNs for high traffic sites.
+The ``media_server`` app exists to serve media files that are ultimately backed by the Content model, *for development purposes and for sites with light-to-moderate traffic*. It also provides an API that can be used by CDNs for high traffic sites.
 
 Motivation
 ----------

diff --git a/openedx_learning/contrib/media_server/views.py b/openedx_learning/contrib/media_server/views.py
@@ -8,7 +8,7 @@
 from django.core.exceptions import ObjectDoesNotExist, PermissionDenied
 from django.http import FileResponse, Http404
 
-from openedx_learning.core.components.api import get_component_version_content
+from openedx_learning.core.components.api import look_up_component_version_content
 
 
 def component_asset(
@@ -28,7 +28,7 @@ def component_asset(
     * Serving from a different domain than the rest of the service
     """
     try:
-        cvc = get_component_version_content(
+        cvc = look_up_component_version_content(
             learning_package_key, component_key, version_num, asset_path
         )
     except ObjectDoesNotExist:

diff --git a/openedx_learning/core/components/admin.py b/openedx_learning/core/components/admin.py
@@ -9,7 +9,7 @@
 
 from openedx_learning.lib.admin_utils import ReadOnlyModelAdmin
 
-from .models import Component, ComponentVersion, ComponentVersionRawContent
+from .models import Component, ComponentVersion, ComponentVersionContent
 
 
 class ComponentVersionInline(admin.TabularInline):
@@ -48,18 +48,18 @@ class ComponentAdmin(ReadOnlyModelAdmin):
     inlines = [ComponentVersionInline]
 
 
-class RawContentInline(admin.TabularInline):
+class ContentInline(admin.TabularInline):
     """
-    Django admin configuration for RawContent
+    Django admin configuration for Content
     """
-    model = ComponentVersion.raw_contents.through
+    model = ComponentVersion.contents.through
 
     def get_queryset(self, request):
         queryset = super().get_queryset(request)
         return queryset.select_related(
-            "raw_content",
-            "raw_content__learning_package",
-            "raw_content__text_content",
+            "content",
+            "content__learning_package",
+            "content__media_type",
             "component_version",
             "component_version__publishable_entity_version",
             "component_version__component",
@@ -73,7 +73,7 @@ def get_queryset(self, request):
         "rendered_data",
     ]
     readonly_fields = [
-        "raw_content",
+        "content",
         "format_key",
         "format_size",
         "rendered_data",
@@ -85,7 +85,7 @@ def rendered_data(self, cvc_obj):
 
     @admin.display(description="Size")
     def format_size(self, cvc_obj):
-        return filesizeformat(cvc_obj.raw_content.size)
+        return filesizeformat(cvc_obj.content.size)
 
     @admin.display(description="Key")
     def format_key(self, cvc_obj):
@@ -108,7 +108,7 @@ class ComponentVersionAdmin(ReadOnlyModelAdmin):
         "title",
         "version_num",
         "created",
-        "raw_contents",
+        "contents",
     ]
     fields = [
         "component",
@@ -118,7 +118,7 @@ class ComponentVersionAdmin(ReadOnlyModelAdmin):
         "created",
     ]
     list_display = ["component", "version_num", "uuid", "created"]
-    inlines = [RawContentInline]
+    inlines = [ContentInline]
 
     def get_queryset(self, request):
         queryset = super().get_queryset(request)
@@ -129,12 +129,12 @@ def get_queryset(self, request):
         )
 
 
-def link_for_cvc(cvc_obj: ComponentVersionRawContent) -> str:
+def link_for_cvc(cvc_obj: ComponentVersionContent) -> str:
     """
-    Get the download URL for the given ComponentVersionRawContent instance
+    Get the download URL for the given ComponentVersionContent instance
     """
     return "/media_server/component_asset/{}/{}/{}/{}".format(
-        cvc_obj.raw_content.learning_package.key,
+        cvc_obj.content.learning_package.key,
         cvc_obj.component_version.component.key,
         cvc_obj.component_version.version_num,
         cvc_obj.key,
@@ -151,27 +151,18 @@ def format_text_for_admin_display(text: str) -> SafeText:
     )
 
 
-def content_preview(cvc_obj: ComponentVersionRawContent) -> SafeText:
+def content_preview(cvc_obj: ComponentVersionContent) -> SafeText:
     """
-    Get the HTML to display a preview of the given ComponentVersionRawContent
+    Get the HTML to display a preview of the given ComponentVersionContent
     """
-    raw_content_obj = cvc_obj.raw_content
+    content_obj = cvc_obj.content
 
-    if raw_content_obj.media_type.type == "image":
+    if content_obj.media_type.type == "image":
         return format_html(
             '<img src="{}" style="max-width: 100%;" />',
-            # TODO: configure with settings value:
-            "/media_server/component_asset/{}/{}/{}/{}".format(
-                cvc_obj.raw_content.learning_package.key,
-                cvc_obj.component_version.component.key,
-                cvc_obj.component_version.version_num,
-                cvc_obj.key,
-            ),
+            content_obj.file_url(),
         )
 
-    if hasattr(raw_content_obj, "text_content"):
-        return format_text_for_admin_display(
-            raw_content_obj.text_content.text,
-        )
-
-    return format_html("This content type cannot be displayed.")
+    return format_text_for_admin_display(
+        content_obj.text or ""
+    )