diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py index 5bf29b11..7fde125a 100644 --- a/openedx_learning/__init__.py +++ b/openedx_learning/__init__.py @@ -1,4 +1,4 @@ """ Open edX Learning ("Learning Core"). """ -__version__ = "0.13.1" +__version__ = "0.14.0" diff --git a/openedx_learning/apps/authoring/components/admin.py b/openedx_learning/apps/authoring/components/admin.py index f09483e9..edb83921 100644 --- a/openedx_learning/apps/authoring/components/admin.py +++ b/openedx_learning/apps/authoring/components/admin.py @@ -1,6 +1,8 @@ """ Django admin for components models """ +import base64 + from django.contrib import admin from django.template.defaultfilters import filesizeformat from django.urls import reverse @@ -67,19 +69,22 @@ def get_queryset(self, request): ) fields = [ - "format_key", + "key", "format_size", "learner_downloadable", "rendered_data", ] readonly_fields = [ "content", - "format_key", + "key", "format_size", "rendered_data", ] extra = 0 + def has_file(self, cvc_obj): + return cvc_obj.content.has_file + def rendered_data(self, cvc_obj): return content_preview(cvc_obj) @@ -87,15 +92,6 @@ def rendered_data(self, cvc_obj): def format_size(self, cvc_obj): return filesizeformat(cvc_obj.content.size) - @admin.display(description="Key") - def format_key(self, cvc_obj): - return format_html( - '{}', - link_for_cvc(cvc_obj), - # reverse("admin:components_content_change", args=(cvc_obj.content_id,)), - cvc_obj.key, - ) - @admin.register(ComponentVersion) class ComponentVersionAdmin(ReadOnlyModelAdmin): @@ -129,18 +125,6 @@ def get_queryset(self, request): ) -def link_for_cvc(cvc_obj: ComponentVersionContent) -> str: - """ - Get the download URL for the given ComponentVersionContent instance - """ - return "/media_server/component_asset/{}/{}/{}/{}".format( - cvc_obj.content.learning_package.key, - cvc_obj.component_version.component.key, - cvc_obj.component_version.version_num, - cvc_obj.key, - ) - - def format_text_for_admin_display(text: str) -> SafeText: """ Get the HTML to display the given plain text (preserving formatting) @@ -158,9 +142,17 @@ def content_preview(cvc_obj: ComponentVersionContent) -> SafeText: content_obj = cvc_obj.content if content_obj.media_type.type == "image": + # This base64 encoding looks really goofy and is bad for performance, + # but image previews in the admin are extremely useful, and this lets us + # have them without creating a separate view in Learning Core. (Keep in + # mind that these assets are private, so they cannot be accessed via the + # MEDIA_URL like most Django uploaded assets.) + data = content_obj.read_file().read() return format_html( - '', - content_obj.file_url(), + '
{}
', + content_obj.mime_type, + base64.encodebytes(data).decode('utf8'), + content_obj.os_path(), ) return format_text_for_admin_display( diff --git a/openedx_learning/apps/authoring/components/api.py b/openedx_learning/apps/authoring/components/api.py index a65eee80..3be562a8 100644 --- a/openedx_learning/apps/authoring/components/api.py +++ b/openedx_learning/apps/authoring/components/api.py @@ -269,7 +269,15 @@ def get_component_by_uuid(uuid: UUID) -> Component: def get_component_version_by_uuid(uuid: UUID) -> ComponentVersion: - return ComponentVersion.objects.get(publishable_entity_version__uuid=uuid) + return ( + ComponentVersion + .objects + .select_related( + "component", + "component__learning_package", + ) + .get(publishable_entity_version__uuid=uuid) + ) def component_exists_by_key( @@ -395,7 +403,21 @@ def create_component_version_content( ) -> ComponentVersionContent: """ Add a Content to the given ComponentVersion + + We don't allow keys that would be absolute paths, e.g. ones that start with + '/'. Storing these causes headaches with building relative paths and because + of mismatches with things that expect a leading slash and those that don't. + So for safety and consistency, we strip off leading slashes and emit a + warning when we do. """ + if key.startswith('/'): + logger.warning( + "Absolute paths are not supported: " + f"removed leading '/' from ComponentVersion {component_version_id} " + f"content key: {repr(key)} (content_id: {content_id})" + ) + key = key.lstrip('/') + cvrc, _created = ComponentVersionContent.objects.get_or_create( component_version_id=component_version_id, content_id=content_id, @@ -510,7 +532,12 @@ def _error_header(error: AssetError) -> dict[str, str]: # Check: Does the ComponentVersion exist? try: - component_version = get_component_version_by_uuid(component_version_uuid) + component_version = ( + ComponentVersion + .objects + .select_related("component", "component__learning_package") + .get(publishable_entity_version__uuid=component_version_uuid) + ) except ComponentVersion.DoesNotExist: # No need to add headers here, because no ComponentVersion was found. logger.error(f"Asset Not Found: No ComponentVersion with UUID {component_version_uuid}") @@ -567,10 +594,9 @@ def _error_header(error: AssetError) -> dict[str, str]: # At this point, we know that there is valid Content that we want to send. # This adds Content-level headers, like the hash/etag and content type. info_headers.update(contents_api.get_content_info_headers(content)) - stored_file_path = content.file_path() # Recompute redirect headers (reminder: this should never be cached). - redirect_headers = contents_api.get_redirect_headers(stored_file_path, public) + redirect_headers = contents_api.get_redirect_headers(content.path, public) logger.info( "Asset redirect (uncached metadata): " f"{component_version_uuid}/{asset_path} -> {redirect_headers}" diff --git a/openedx_learning/apps/authoring/contents/admin.py b/openedx_learning/apps/authoring/contents/admin.py index 036d171b..029ebfd8 100644 --- a/openedx_learning/apps/authoring/contents/admin.py +++ b/openedx_learning/apps/authoring/contents/admin.py @@ -1,6 +1,8 @@ """ Django admin for contents models """ +import base64 + from django.contrib import admin from django.utils.html import format_html @@ -16,7 +18,6 @@ class ContentAdmin(ReadOnlyModelAdmin): """ list_display = [ "hash_digest", - "file_link", "learning_package", "media_type", "size", @@ -29,24 +30,42 @@ class ContentAdmin(ReadOnlyModelAdmin): "media_type", "size", "created", - "file_link", - "text_preview", "has_file", + "path", + "os_path", + "text_preview", + "image_preview", ] list_filter = ("media_type", "learning_package") search_fields = ("hash_digest",) - def file_link(self, content: Content): - if not content.has_file: - return "" + @admin.display(description="OS Path") + def os_path(self, content: Content): + return content.os_path() or "" - return format_html( - 'Download', - content.file_url(), - ) + def path(self, content: Content): + return content.path if content.has_file else "" def text_preview(self, content: Content): + if not content.text: + return "" return format_html( '
\n{}\n
', content.text, ) + + def image_preview(self, content: Content): + """ + Return HTML for an image, if that is the underlying Content. + + Otherwise, just return a blank string. + """ + if content.media_type.type != "image": + return "" + + data = content.read_file().read() + return format_html( + '', + content.mime_type, + base64.encodebytes(data).decode('utf8'), + ) diff --git a/openedx_learning/apps/authoring/contents/models.py b/openedx_learning/apps/authoring/contents/models.py index 7e4ff8cc..214c2d27 100644 --- a/openedx_learning/apps/authoring/contents/models.py +++ b/openedx_learning/apps/authoring/contents/models.py @@ -7,11 +7,13 @@ from functools import cache, cached_property -from django.core.exceptions import ValidationError +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured, ValidationError from django.core.files.base import File -from django.core.files.storage import Storage, default_storage +from django.core.files.storage import Storage from django.core.validators import MaxValueValidator from django.db import models +from django.utils.module_loading import import_string from ....lib.fields import MultiCollationTextField, case_insensitive_char_field, hash_field, manual_date_time_field from ....lib.managers import WithRelationsManager @@ -28,13 +30,25 @@ def get_storage() -> Storage: """ Return the Storage instance for our Content file persistence. - For right now, we're still only storing inline text and not static assets in - production, so just return the default_storage. We're also going through a - transition between Django 3.2 -> 4.2, where storage configuration has moved. + This will first search for an OPENEDX_LEARNING config dictionary and return + a Storage subclass based on that configuration. - Make this work properly as part of adding support for static assets. + If there is no value for the OPENEDX_LEARNING setting, we return the default + MEDIA storage class. TODO: Should we make it just error instead? """ - return default_storage + config_dict = getattr(settings, 'OPENEDX_LEARNING', {}) + + if 'MEDIA' in config_dict: + storage_cls = import_string(config_dict['MEDIA']['BACKEND']) + options = config_dict['MEDIA'].get('OPTIONS', {}) + return storage_cls(**options) + + raise ImproperlyConfigured( + "Cannot access file storage: Missing the OPENEDX_LEARNING['MEDIA'] " + "setting, which should have a storage BACKEND and OPTIONS values for " + "a Storage subclass. These files should be stored in a location that " + "is NOT publicly accessible to browsers (so not in the MEDIA_ROOT)." + ) class MediaType(models.Model): @@ -282,22 +296,53 @@ def mime_type(self) -> str: """ return str(self.media_type) - def file_path(self): + @cached_property + def path(self): + """ + Logical path at which this content is stored (or would be stored). + + This path is relative to OPENEDX_LEARNING['MEDIA'] configured storage + root. This file may not exist because has_file=False, or because we + haven't written the file yet (this is the method we call when trying to + figure out where the file *should* go). + """ + return f"content/{self.learning_package.uuid}/{self.hash_digest}" + + def os_path(self): + """ + The full OS path for the underlying file for this Content. + + This will not be supported by all Storage class types. + + This will return ``None`` if there is no backing file (has_file=False). """ - Path at which this content is stored (or would be stored). + if self.has_file: + return get_storage().path(self.path) + return None - This path is relative to configured storage root. + def read_file(self) -> File: """ - return f"{self.learning_package.uuid}/{self.hash_digest}" + Get a File object that has been open for reading. + + We intentionally don't expose an `open()` call where callers can open + this file in write mode. Writing a Content file should happen at most + once, and the logic is not obvious (see ``write_file``). + + At the end of the day, the caller can close the returned File and reopen + it in whatever mode they want, but we're trying to gently discourage + that kind of usage. + """ + return get_storage().open(self.path, 'rb') def write_file(self, file: File) -> None: """ Write file contents to the file storage backend. - This function does nothing if the file already exists. + This function does nothing if the file already exists. Note that Content + is supposed to be immutable, so this should normally only be called once + for a given Content row. """ storage = get_storage() - file_path = self.file_path() # There are two reasons why a file might already exist even if the the # Content row is new: @@ -314,15 +359,15 @@ def write_file(self, file: File) -> None: # 3. Similar to (2), but only part of the file was written before an # error occurred. This seems unlikely, but possible if the underlying # storage engine writes in chunks. - if storage.exists(file_path) and storage.size(file_path) == file.size: + if storage.exists(self.path) and storage.size(self.path) == file.size: return - storage.save(file_path, file) + storage.save(self.path, file) def file_url(self) -> str: """ This will sometimes be a time-limited signed URL. """ - return content_file_url(self.file_path()) + return get_storage().url(self.path) def clean(self): """ @@ -361,7 +406,3 @@ class Meta: ] verbose_name = "Content" verbose_name_plural = "Contents" - - -def content_file_url(file_path): - return get_storage().url(file_path) diff --git a/test_settings.py b/test_settings.py index db27f354..f5b154f5 100644 --- a/test_settings.py +++ b/test_settings.py @@ -69,3 +69,14 @@ def root(*args): 'DEFAULT_PAGINATION_CLASS': 'edx_rest_framework_extensions.paginators.DefaultPagination', 'PAGE_SIZE': 10, } + +######################## LEARNING CORE SETTINGS ######################## + +OPENEDX_LEARNING = { + 'MEDIA': { + 'BACKEND': 'django.core.files.storage.InMemoryStorage', + 'OPTIONS': { + 'location': MEDIA_ROOT + "_private" + } + } +} diff --git a/tests/openedx_learning/apps/authoring/components/test_api.py b/tests/openedx_learning/apps/authoring/components/test_api.py index 18e5d04d..1fd8f494 100644 --- a/tests/openedx_learning/apps/authoring/components/test_api.py +++ b/tests/openedx_learning/apps/authoring/components/test_api.py @@ -381,7 +381,7 @@ def test_add(self): components_api.create_component_version_content( new_version.pk, new_content.pk, - key="hello.txt", + key="my/path/to/hello.txt", learner_downloadable=False, ) # re-fetch from the database to check to see if we wrote it correctly @@ -390,7 +390,23 @@ def test_add(self): .get(publishable_entity_version__version_num=1) assert ( new_content == - new_version.contents.get(componentversioncontent__key="hello.txt") + new_version.contents.get(componentversioncontent__key="my/path/to/hello.txt") + ) + + # Write the same content again, but to an absolute path (should auto- + # strip) the leading '/'s. + components_api.create_component_version_content( + new_version.pk, + new_content.pk, + key="//nested/path/hello.txt", + learner_downloadable=False, + ) + new_version = components_api.get_component(self.problem.pk) \ + .versions \ + .get(publishable_entity_version__version_num=1) + assert ( + new_content == + new_version.contents.get(componentversioncontent__key="nested/path/hello.txt") ) def test_multiple_versions(self): diff --git a/tests/openedx_learning/apps/authoring/components/test_assets.py b/tests/openedx_learning/apps/authoring/components/test_assets.py index 6d631738..56bfda41 100644 --- a/tests/openedx_learning/apps/authoring/components/test_assets.py +++ b/tests/openedx_learning/apps/authoring/components/test_assets.py @@ -167,7 +167,7 @@ def _assert_html_content_headers(self, response): assert response.status_code == 200 assert response.headers["Etag"] == self.html_asset_content.hash_digest assert response.headers["Content-Type"] == "text/html" - assert response.headers["X-Accel-Redirect"] == self.html_asset_content.file_path() + assert response.headers["X-Accel-Redirect"] == self.html_asset_content.path assert "X-Open-edX-Error" not in response.headers def test_public_asset_response(self): diff --git a/tests/openedx_learning/apps/authoring/contents/test_file_storage.py b/tests/openedx_learning/apps/authoring/contents/test_file_storage.py new file mode 100644 index 00000000..a94b3157 --- /dev/null +++ b/tests/openedx_learning/apps/authoring/contents/test_file_storage.py @@ -0,0 +1,79 @@ +""" +Tests for file-backed Content +""" +from datetime import datetime, timezone + +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured +from django.test import override_settings + +from openedx_learning.apps.authoring.contents import api as contents_api +from openedx_learning.apps.authoring.contents.models import get_storage +from openedx_learning.apps.authoring.publishing import api as publishing_api +from openedx_learning.lib.test_utils import TestCase + + +class ContentFileStorageTestCase(TestCase): + """ + Test the storage of files backing Content. + """ + def setUp(self) -> None: + """ + It's actually important that we use setUp and not setUpTestData here, + because at least one of our tests will clear the get_storage cache, + meaning that subsequent tests will get a new instance of the + InMemoryStorage backend–and consequently wouldn't see any data loaded + by setUpTestData. + + Recreating the test data for every test lets individual tests change the + storage configuration without creating side-effects for other tests. + """ + super().setUp() + learning_package = publishing_api.create_learning_package( + key="ContentFileStorageTestCase-test-key", + title="Content File Storage Test Case Learning Package", + ) + self.html_media_type = contents_api.get_or_create_media_type("text/html") + self.html_content = contents_api.get_or_create_file_content( + learning_package.id, + self.html_media_type.id, + data=b"hello world!", + created=datetime.now(tz=timezone.utc), + ) + + def test_file_path(self): + """ + Test that the file path doesn't change. + + If this test breaks, it means that we've changed the convention for + where we're storing the backing files for Content, which means we'll be + breaking backwards compatibility for everyone. Please be very careful if + you're updating this test. + """ + content = self.html_content + assert content.path == f"content/{content.learning_package.uuid}/{content.hash_digest}" + + storage_root = settings.OPENEDX_LEARNING['MEDIA']['OPTIONS']['location'] + assert content.os_path() == f"{storage_root}/{content.path}" + + def test_read(self): + """Make sure we can read the file data back.""" + assert b"hello world!" == self.html_content.read_file().read() + + @override_settings() + def test_misconfiguration(self): + """ + Require the OPENEDX_LEARNING setting for file operations. + + The main goal of this is that we don't want to store our files in the + default MEDIA_ROOT, because that would make them publicly accessible. + + We set our OPENEDX_LEARNING value in test_settings.py. We're going to + delete this setting in our test and make sure we raise the correct + exception (The @override_settings decorator will set everything back to + normal after the test completes.) + """ + get_storage.cache_clear() + del settings.OPENEDX_LEARNING + with self.assertRaises(ImproperlyConfigured): + self.html_content.read_file()