Skip to content

Commit

Permalink
Merge pull request #3667 from rtibbles/autofix_my_messed_up_channels
Browse files Browse the repository at this point in the history
Add autofix for improperly imported content trees.
  • Loading branch information
rtibbles authored May 16, 2018
2 parents e815e92 + fe20d9e commit 4b8acc6
Show file tree
Hide file tree
Showing 2 changed files with 189 additions and 9 deletions.
158 changes: 150 additions & 8 deletions kolibri/content/test/test_annotation.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,29 @@
import tempfile
import uuid

from django.core.management import call_command
from django.test import TransactionTestCase

from kolibri.content.models import ContentNode, File, LocalFile
from kolibri.content.utils.annotation import (
mark_local_files_as_available, set_local_file_availability_from_disk,
set_leaf_node_availability_from_local_file_availability, recurse_availability_up_tree
)

from le_utils.constants import content_kinds

from mock import call
from mock import patch

from .sqlalchemytesting import django_connection_engine
from kolibri.content.models import ChannelMetadata
from kolibri.content.models import CONTENT_SCHEMA_VERSION
from kolibri.content.models import ContentNode
from kolibri.content.models import File
from kolibri.content.models import LocalFile
from kolibri.content.utils.annotation import fix_multiple_trees_with_id_one
from kolibri.content.utils.annotation import mark_local_files_as_available
from kolibri.content.utils.annotation import recurse_availability_up_tree
from kolibri.content.utils.annotation import set_leaf_node_availability_from_local_file_availability
from kolibri.content.utils.annotation import set_local_file_availability_from_disk


def get_engine(connection_string):
return django_connection_engine()


@patch('kolibri.content.utils.sqlalchemybridge.get_engine', new=get_engine)
class AnnotationFromLocalFileAvailability(TransactionTestCase):

Expand Down Expand Up @@ -112,6 +118,7 @@ def tearDown(self):

mock_content_file = tempfile.mkstemp()


@patch('kolibri.content.utils.sqlalchemybridge.get_engine', new=get_engine)
class LocalFileByDisk(TransactionTestCase):

Expand Down Expand Up @@ -188,3 +195,138 @@ def test_set_all_files_two_exist(self, path_mock):
def tearDown(self):
call_command('flush', interactive=False)
super(LocalFileByDisk, self).tearDown()


mock_content_db_file = tempfile.mkstemp()


@patch('kolibri.content.utils.channel_import.import_channel_from_local_db')
class FixMultipleTreesWithIdOneTestCase(TransactionTestCase):

fixtures = ['content_test.json']

@patch('kolibri.content.utils.annotation.get_content_database_file_path', return_value=mock_content_file[1])
def test_extra_channel_contentdb_exists(self, path_mock, import_mock):
root_node = ContentNode.objects.create(
title='test',
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=uuid.uuid4().hex,
)
ChannelMetadata.objects.create(
id=root_node.channel_id,
root=root_node,
name='test',
min_schema_version=CONTENT_SCHEMA_VERSION,
)
# Do this to side step django mptts auto tree_id code
ContentNode.objects.filter(parent=None).update(tree_id=1)
self.assertEqual(ContentNode.objects.filter(parent=None, tree_id=1).count(), 2)
fix_multiple_trees_with_id_one()
self.assertEqual(ContentNode.objects.filter(parent=None, tree_id=1).count(), 1)
import_mock.assert_called_with(root_node.channel_id)

@patch('kolibri.content.utils.annotation.get_content_database_file_path', return_value=mock_content_file[1])
def test_two_extra_channels_contentdb_exists(self, path_mock, import_mock):
root_node_1 = ContentNode.objects.create(
title='test',
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=uuid.uuid4().hex,
)
ChannelMetadata.objects.create(
id=root_node_1.channel_id,
root=root_node_1,
name='test',
min_schema_version=CONTENT_SCHEMA_VERSION,
)
root_node_2 = ContentNode.objects.create(
title='test',
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=uuid.uuid4().hex,
)
# Add an additional node so that root_node_1 channel is processed first.
ContentNode.objects.create(
title='test1',
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=root_node_2.channel_id,
parent=root_node_2,
)
ChannelMetadata.objects.create(
id=root_node_2.channel_id,
root=root_node_2,
name='test',
min_schema_version=CONTENT_SCHEMA_VERSION,
)
# Do this to side step django mptts auto tree_id code
ContentNode.objects.filter(parent=None).update(tree_id=1)
self.assertEqual(ContentNode.objects.filter(parent=None, tree_id=1).count(), 3)
fix_multiple_trees_with_id_one()
self.assertEqual(ContentNode.objects.filter(parent=None, tree_id=1).count(), 1)
import_mock.assert_has_calls([call(root_node_1.channel_id), call(root_node_2.channel_id)])

@patch('kolibri.content.utils.annotation.get_content_database_file_path', return_value='')
def test_extra_channel_no_contentdb_exists(self, path_mock, import_mock):
root_node = ContentNode.objects.create(
title='test',
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=uuid.uuid4().hex,
)
ChannelMetadata.objects.create(
id=root_node.channel_id,
root=root_node,
name='test',
min_schema_version=CONTENT_SCHEMA_VERSION,
)
# Do this to side step django mptts auto tree_id code
ContentNode.objects.filter(parent=None).update(tree_id=1)
self.assertEqual(ContentNode.objects.filter(parent=None, tree_id=1).count(), 2)
fix_multiple_trees_with_id_one()
self.assertEqual(ContentNode.objects.filter(parent=None, tree_id=1).count(), 2)
import_mock.assert_not_called()

@patch('kolibri.content.utils.annotation.get_content_database_file_path', side_effect=['', mock_content_file[1]])
def test_two_extra_channels_one_contentdb_exists(self, path_mock, import_mock):
root_node_1 = ContentNode.objects.create(
title='test',
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=uuid.uuid4().hex,
)
ChannelMetadata.objects.create(
id=root_node_1.channel_id,
root=root_node_1,
name='test',
min_schema_version=CONTENT_SCHEMA_VERSION,
)
root_node_2 = ContentNode.objects.create(
title='test',
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=uuid.uuid4().hex,
)
# Add an additional node so that root_node_1 channel is processed first.
ContentNode.objects.create(
title='test1',
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=root_node_2.channel_id,
parent=root_node_2,
)
ChannelMetadata.objects.create(
id=root_node_2.channel_id,
root=root_node_2,
name='test',
min_schema_version=CONTENT_SCHEMA_VERSION,
)
# Do this to side step django mptts auto tree_id code
ContentNode.objects.filter(parent=None).update(tree_id=1)
self.assertEqual(ContentNode.objects.filter(parent=None, tree_id=1).count(), 3)
fix_multiple_trees_with_id_one()
self.assertEqual(ContentNode.objects.filter(parent=None, tree_id=1).count(), 2)
with self.assertRaises(AssertionError):
import_mock.assert_called_with(root_node_1.channel_id)
import_mock.assert_called_with(root_node_2.channel_id)
40 changes: 39 additions & 1 deletion kolibri/content/utils/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from sqlalchemy import select

from .channels import get_channel_ids_for_content_database_dir
from .paths import get_content_database_file_path
from .paths import get_content_file_name
from .paths import get_content_storage_file_path
from .sqlalchemybridge import Bridge
Expand All @@ -25,18 +26,55 @@

CHUNKSIZE = 10000


def update_channel_metadata():
"""
If we are potentially moving from a version of Kolibri that did not import its content data,
scan through the settings.CONTENT_DATABASE_DIR folder for all channel content databases,
and pull the data from each database if we have not already imported it.
Additionally, fix any potential issues that might be in the current content database from bugs
in a previous version.
"""
from .channel_import import import_channel_from_local_db
channel_ids = get_channel_ids_for_content_database_dir(settings.CONTENT_DATABASE_DIR)
for channel_id in channel_ids:
if not ChannelMetadata.objects.filter(id=channel_id).exists():
import_channel_from_local_db(channel_id)
set_availability(channel_id)
fix_multiple_trees_with_id_one()


def fix_multiple_trees_with_id_one():
# Do a check for improperly imported ContentNode trees
# These trees have been naively imported, and so there are multiple trees
# with tree_ids set to 1. Just check the root nodes to reduce the query size.
tree_id_one_channel_ids = ContentNode.objects.filter(parent=None, tree_id=1).values_list('channel_id', flat=True)
if len(tree_id_one_channel_ids) > 1:
logging.warning("Improperly imported channels discovered")
# There is more than one channel with a tree_id of 1
# Find which channel has the most content nodes, and then delete and reimport the rest.
channel_sizes = {}
for channel_id in tree_id_one_channel_ids:
channel_sizes[channel_id] = ContentNode.objects.filter(channel_id=channel_id).count()
# Get sorted list of ids by increasing number of nodes
sorted_channel_ids = sorted(channel_sizes, key=channel_sizes.get)
# Loop through all but the largest channel, delete and reimport
count = 0
from .channel_import import import_channel_from_local_db
for channel_id in sorted_channel_ids[:-1]:
# Double check that we have a content db to import from before deleting any metadata
if os.path.exists(get_content_database_file_path(channel_id)):
logging.warning("Deleting and reimporting channel metadata for {channel_id}".format(channel_id=channel_id))
ChannelMetadata.objects.get(id=channel_id).delete_content_tree_and_files()
import_channel_from_local_db(channel_id)
logging.info("Successfully reimported channel metadata for {channel_id}".format(channel_id=channel_id))
count += 1
else:
logging.warning("Attempted to reimport channel metadata for channel {channel_id} but no content database found".format(channel_id=channel_id))
if count:
logging.info("Successfully reimported channel metadata for {count} channels".format(count=count))
failed_count = len(sorted_channel_ids) - 1 - count
if failed_count:
logging.warning("Failed to reimport channel metadata for {count} channels".format(count=failed_count))


def set_leaf_node_availability_from_local_file_availability():
Expand Down

0 comments on commit 4b8acc6

Please sign in to comment.