Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement usage_key_hashes and expose via sequence metadata api #28436

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@
key_supports_outlines,
replace_course_outline,
)
from .sequences import (
get_learning_sequence,
get_learning_sequence_by_hash,
)
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def get_course_outline(course_key: CourseKey) -> CourseOutlineData:

sequence_data = CourseLearningSequenceData(
usage_key=sequence_model.usage_key,
usage_key_hash=sequence_model.usage_key_hash,
title=sequence_model.title,
inaccessible_after_due=sec_seq_model.inaccessible_after_due,
visibility=VisibilityData(
Expand Down Expand Up @@ -474,7 +475,10 @@ def _update_sequences(course_outline: CourseOutlineData, course_context: CourseC
LearningSequence.objects.update_or_create(
learning_context=course_context.learning_context,
usage_key=sequence_data.usage_key,
defaults={'title': sequence_data.title}
defaults={
'usage_key_hash': sequence_data.usage_key_hash,
'title': sequence_data.title,
},
)
LearningSequence.objects \
.filter(learning_context=course_context.learning_context) \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
All business logic related to fetching generic Learning Sequences information.
By 'generic', we mean context-agnostic; the data returned by these functions
should not be specific to Courses, Libraries, Pathways, or any other context
in which Learning Sequences can exist.

Do not import from this module directly.
Use openedx.core.djangoapps.content.learning_sequences.api -- that
__init__.py imports from here, and is a more stable place to import from.
"""
from opaque_keys.edx.keys import UsageKey

from ..data import LearningSequenceData
from ..models import LearningSequence


def get_learning_sequence(sequence_key: UsageKey) -> LearningSequenceData:
"""
Load generic data for a learning sequence given its usage key.
"""
try:
sequence = LearningSequence.objects.get(usage_key=sequence_key)
except LearningSequence.DoesNotExist as exc:
raise LearningSequenceData.DoesNotExist(
f"no such sequence with usage_key='{sequence_key}'"
) from exc
return _make_sequence_data(sequence)


def get_learning_sequence_by_hash(sequence_key_hash: str) -> LearningSequenceData:
"""
Load generic data for a learning sequence given the hash of its usage key.

WARNING! This is an experimental API function!
We do not currently handle the case of usage key hash collisions.

Before considering this API method stable, will either need to:
1. confirm that the probability of usage key hash collision (accounting for
potentially multiple orders of magnitude of catalog growth) is acceptably
small, or
2. declare that hash keys are only unique within a given learning context,
and update this API function to require a `learning_context_key` argument.
See TNL-8638.
"""
sequences = LearningSequence.objects.filter(usage_key_hash=sequence_key_hash)
if not sequences:
raise LearningSequenceData.DoesNotExist(
f"no such sequence with usage_key_hash={sequence_key_hash!r}"
)
if len(sequences) > 1:
usage_keys_list = ', '.join([
str(sequence.usage_key) for sequence in sequences
])
raise Exception(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we throw something more specific here. ValueError maybe?

f"Two or more sequences' usage keys hash to {sequence_key_hash!r}! "
f"Colliding usage keys: [{usage_keys_list}]."
)
return _make_sequence_data(sequences[0])


def _make_sequence_data(sequence: LearningSequence) -> LearningSequenceData:
"""
Build a LearningSequenceData instance from a LearningSequence model instance.
"""
return LearningSequenceData(
usage_key=sequence.usage_key,
usage_key_hash=sequence.usage_key_hash,
title=sequence.title,
)
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
# lint-amnesty, pylint: disable=missing-module-docstring
import re
from datetime import datetime, timezone
from unittest import TestCase

import pytest
from opaque_keys.edx.keys import CourseKey
import attr
import pytest
from opaque_keys.edx.keys import CourseKey, UsageKey

from ...data import (
CourseOutlineData, CourseSectionData, CourseLearningSequenceData, VisibilityData, CourseVisibility
hash_usage_key,
CourseOutlineData,
CourseLearningSequenceData,
CourseSectionData,
CourseVisibility,
LearningSequenceData,
VisibilityData,
USAGE_KEY_HASH_LENGTH,
USAGE_KEY_HASH_PATTERN,
)


Expand Down Expand Up @@ -151,6 +160,55 @@ def test_empty_user_partition_groups(self):
)


class TestUsageKeyHashing(TestCase):
"""
Basic sanity validation for usage key hashing functionality.
"""
sequence_key = UsageKey.from_string('block-v1:A+B+C+type@sequential+block@D')
sequence_title = 'Sequence ABCD!'

def test_make_sequence_data_with_auto_hash(self):
"""
If no usage_key_hash is specified, we calculate a default using hash_usage_key.
"""
sequence_data = LearningSequenceData(
usage_key=self.sequence_key,
title=self.sequence_title,
)
assert sequence_data.usage_key == self.sequence_key
assert sequence_data.usage_key_hash == hash_usage_key(self.sequence_key)
kdmccormick marked this conversation as resolved.
Show resolved Hide resolved

def test_make_sequence_data_with_explicit_hash(self):
"""
An explicit hash key be can be specified instead, for whatever reason.
"""
sequence_data = LearningSequenceData(
usage_key=self.sequence_key,
usage_key_hash="c00lhash",
title=self.sequence_title,
)
assert sequence_data.usage_key == self.sequence_key
assert sequence_data.usage_key_hash == "c00lhash"

def test_hash_usage_key_output(self):
"""
Compare the hash of `self.sequence_key` against certain expections we
have of it, including its literal value.

This test is meant to fail if the algorithm behind `hash_usage_key`
is modified. It is not implausible that we will want to tweak the algorithm
one day, but this test exists to make sure that doing so is a conscious
decision, taking into account all the potential downstream effects
(URLs breaking, etc).
kdmccormick marked this conversation as resolved.
Show resolved Hide resolved
"""
hash_output = hash_usage_key(self.sequence_key)

assert len(hash_output) == USAGE_KEY_HASH_LENGTH
assert re.match(rf"^{USAGE_KEY_HASH_PATTERN}$", hash_output)

assert hash_output == 'n3Ayh9YV'


def generate_sections(course_key, num_sequences):
"""
Generate a list of CourseSectionData.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""
Tests for generic sequence-featching API tests.

Use the learning_sequences outlines API to create test data.
Do not import/create/mock learning_sequences models directly.
"""
from datetime import datetime, timezone

from django.test import TestCase
from opaque_keys.edx.keys import CourseKey

from ...api import replace_course_outline
from ...data import (
hash_usage_key,
CourseLearningSequenceData,
CourseSectionData,
CourseOutlineData,
CourseVisibility,
LearningSequenceData,
VisibilityData,
)
from ..sequences import get_learning_sequence, get_learning_sequence_by_hash
from .test_data import generate_sections


class GetLearningSequenceTestCase(TestCase):
"""
Test get_learning_sequence and get_learning_sequence_by_hash.
"""

common_course_outline_fields = dict(
published_at=datetime(2021, 8, 12, tzinfo=timezone.utc),
entrance_exam_id=None,
days_early_for_beta=None,
self_paced=False,
course_visibility=CourseVisibility.PRIVATE,
)

@classmethod
def setUpTestData(cls):
"""
Set up test data, to be reusable across all tests in this class.
"""
super().setUpTestData()
cls.course_key = CourseKey.from_string("course-v1:Open-edX+Learn+GetSeq")
cls.course_outline = CourseOutlineData(
course_key=cls.course_key,
title="Get Learning Sequences Test Course!",
published_version="5ebece4b69cc593d82fe2021",
sections=generate_sections(cls.course_key, [0, 2, 1]),
**cls.common_course_outline_fields,
)
replace_course_outline(cls.course_outline)
cls.sequence_key = cls.course_outline.sections[1].sequences[1].usage_key
cls.sequence_key_hash = hash_usage_key(cls.sequence_key)
cls.fake_sequence_key = cls.course_key.make_usage_key('sequential', 'fake_sequence')
cls.fake_sequence_key_hash = hash_usage_key(cls.fake_sequence_key)

def test_get_learning_sequence_not_found(self):
with self.assertRaises(LearningSequenceData.DoesNotExist):
get_learning_sequence(self.fake_sequence_key)

def test_get_learning_sequence_by_hash_not_found(self):
with self.assertRaises(LearningSequenceData.DoesNotExist):
get_learning_sequence_by_hash(self.fake_sequence_key_hash)

def test_get_learning_sequence(self):
sequence = get_learning_sequence(self.sequence_key)
assert isinstance(sequence, LearningSequenceData)
assert sequence.usage_key == self.sequence_key
assert sequence.usage_key_hash == self.sequence_key_hash

def test_get_learning_sequence_by_hash(self):
sequence = get_learning_sequence_by_hash(self.sequence_key_hash)
assert isinstance(sequence, LearningSequenceData)
assert sequence.usage_key == self.sequence_key
assert sequence.usage_key_hash == self.sequence_key_hash

def test_get_learning_sequence_hash_collision(self):
normal_visibility = VisibilityData(
hide_from_toc=False,
visible_to_staff_only=False
)
course_key_1 = CourseKey.from_string("course-v1:Open-edX+Learn+Collide1")
outline_1 = CourseOutlineData(
course_key=course_key_1,
title="Learning Sequences - Collision Course 1",
published_version="5ebece4b79cc593d82fe2021",
sections=[
CourseSectionData(
usage_key=course_key_1.make_usage_key('chapter', 'ch_a'),
title="Chapter A",
visibility=normal_visibility,
sequences=[
CourseLearningSequenceData(
usage_key=course_key_1.make_usage_key('sequential', 'seq_a'),
usage_key_hash="2COLLIDE",
title="Seq A",
visibility=normal_visibility,
)
]
)
],
**self.common_course_outline_fields,
)
course_key_2 = CourseKey.from_string("course-v1:Open-edX+Learn+Collide2")
outline_2 = CourseOutlineData(
course_key=course_key_2,
title="Learning Sequences - Collision Course 2",
published_version="5ebece4b89cc593d82fe2021",
sections=[
CourseSectionData(
usage_key=course_key_2.make_usage_key('chapter', 'ch_a'),
title="Chapter A",
visibility=normal_visibility,
sequences=[
CourseLearningSequenceData(
usage_key=course_key_2.make_usage_key('sequential', 'seq_a'),
usage_key_hash="2COLLIDE",
title="Seq A",
visibility=normal_visibility,
)
]
)
],
**self.common_course_outline_fields,
)
replace_course_outline(outline_1)
replace_course_outline(outline_2)
with self.assertRaises(Exception) as exc:
get_learning_sequence_by_hash("2COLLIDE")
message = str(exc.exception)
assert "Two or more sequences" in message
assert str(outline_1.sections[0].sequences[0].usage_key) in message
assert str(outline_2.sections[0].sequences[0].usage_key) in message
Loading