Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sample script to update database character set #435

Merged
merged 2 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ sudo apt-get -y upgrade
sudo apt-get install -y openjdk-8-jre-headless mysql-server
sudo apt-get install -y elasticsearch

sudo mysql -e "DROP DATABASE IF EXISTS SS; CREATE DATABASE SS CHARACTER SET utf8 COLLATE utf8_unicode_ci;"
sudo mysql -e "DROP DATABASE IF EXISTS SS; CREATE DATABASE SS CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci;"
sudo mysql -e "CREATE USER 'archivematica'@'localhost' IDENTIFIED BY 'demo';"
sudo mysql -e "GRANT ALL ON SS.* TO 'archivematica'@'localhost';"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ sudo -u root systemctl start mariadb
sudo -u root systemctl enable gearmand
sudo -u root systemctl start gearmand

sudo -H -u root mysql -hlocalhost -uroot -e "DROP DATABASE IF EXISTS MCP; CREATE DATABASE MCP CHARACTER SET utf8 COLLATE utf8_unicode_ci;"
sudo -H -u root mysql -hlocalhost -uroot -e "DROP DATABASE IF EXISTS SS; CREATE DATABASE SS CHARACTER SET utf8 COLLATE utf8_unicode_ci;"
sudo -H -u root mysql -hlocalhost -uroot -e "DROP DATABASE IF EXISTS MCP; CREATE DATABASE MCP CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;"
sudo -H -u root mysql -hlocalhost -uroot -e "DROP DATABASE IF EXISTS SS; CREATE DATABASE SS CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;"
sudo -H -u root mysql -hlocalhost -uroot -e "CREATE USER 'archivematica'@'localhost' IDENTIFIED BY 'demo';"
sudo -H -u root mysql -hlocalhost -uroot -e "GRANT ALL ON MCP.* TO 'archivematica'@'localhost';"
sudo -H -u root mysql -hlocalhost -uroot -e "GRANT ALL ON SS.* TO 'archivematica'@'localhost';"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env bash

set -o errexit # abort on nonzero exitstatus
set -o nounset # abort on unbound variable
set -o pipefail # do not hide errors within pipes

# Array of database names
DATABASES=(
MCP
SS
)

# Collation and CHARSET
CHARSET="utf8mb4"
COLLATION="utf8mb4_0900_ai_ci"

# MySQL authentication (optional, default no auth)
MYSQL_USE_AUTH=False
MYSQL_USER=root
MYSQL_PASSWORD="THE_PASSWORD"

# Function to execute a query
execute_query() {
local query="$1"
local db_name="$2"
local user_arg=""

if [ "$MYSQL_USE_AUTH" = "True" ]; then
user_arg="-u$MYSQL_USER"
export MYSQL_PWD="$MYSQL_PASSWORD"
fi

mysql -N -B $user_arg -e "$query" "$db_name"
}

# Function to fix database charset and collation
fix_database_charset() {
local query="ALTER DATABASE ${DB_NAME} CHARACTER SET $CHARSET COLLATE $COLLATION;"
echo "Fixing database charset and collation"
execute_query "$query" "$DB_NAME"
echo "Fixed database charset and collation"
}

# Function to fix tables charset and collation
fix_tables_charset() {
local query="SELECT CONCAT('ALTER TABLE \`', table_name, '\` CHARACTER SET $CHARSET COLLATE $COLLATION;') \
FROM information_schema.TABLES AS T, information_schema.\`COLLATION_CHARACTER_SET_APPLICABILITY\` AS C \
WHERE C.collation_name = T.table_collation \
AND T.table_schema = '$DB_NAME' \
AND (C.CHARACTER_SET_NAME != '$CHARSET' OR C.COLLATION_NAME != '$COLLATION');"

local alter_table_queries=$(execute_query "$query" "$DB_NAME")
alter_table_queries_no_foreign_key_checks=$(echo -e "SET FOREIGN_KEY_CHECKS=0;\n$alter_table_queries\nSET FOREIGN_KEY_CHECKS=1;")
# echo "$alter_table_queries_no_foreign_key_checks"
echo "Fixing tables charset and collation"
execute_query "$alter_table_queries_no_foreign_key_checks" "$DB_NAME"
echo "Fixed tables charset and collation"
}

# Function to fix column collation for varchar columns
fix_varchar_columns_collation() {
local query="SELECT CONCAT('ALTER TABLE \`', table_name, '\` MODIFY \`', column_name, '\` ', DATA_TYPE, \
'(', CHARACTER_MAXIMUM_LENGTH, ') CHARACTER SET $CHARSET COLLATE $COLLATION', \
(CASE WHEN IS_NULLABLE = 'NO' THEN ' NOT NULL' ELSE '' END), ';') \
FROM information_schema.COLUMNS WHERE TABLE_SCHEMA = '$DB_NAME' AND DATA_TYPE = 'varchar' AND \
( CHARACTER_SET_NAME != '$CHARSET' OR COLLATION_NAME != '$COLLATION');"

local alter_table_queries=$(execute_query "$query" "$DB_NAME")
alter_table_queries_no_foreign_key_checks=$(echo -e "SET FOREIGN_KEY_CHECKS=0;\n$alter_table_queries\nSET FOREIGN_KEY_CHECKS=1;")
# echo "$alter_table_queries_no_foreign_key_checks"
echo "Fixing column collation for varchar columns"
execute_query "$alter_table_queries_no_foreign_key_checks" "$DB_NAME"
echo "Fixed column collation for varchar columns"
}

# Function to fix column collation for non-varchar columns
fix_non_varchar_columns_collation() {
local query="SELECT CONCAT('ALTER TABLE \`', table_name, '\` MODIFY \`', column_name, '\` ', DATA_TYPE, ' \
CHARACTER SET $CHARSET COLLATE $COLLATION', (CASE WHEN IS_NULLABLE = 'NO' THEN ' NOT NULL' ELSE '' END), ';') \
FROM information_schema.COLUMNS \
WHERE TABLE_SCHEMA = '$DB_NAME' \
AND DATA_TYPE != 'varchar' \
AND (CHARACTER_SET_NAME != '$CHARSET' OR COLLATION_NAME != '$COLLATION');"

local alter_table_queries=$(execute_query "$query" "$DB_NAME")
alter_table_queries_no_foreign_key_checks=$(echo -e "SET FOREIGN_KEY_CHECKS=0;\n$alter_table_queries\nSET FOREIGN_KEY_CHECKS=1;")
# echo "$alter_table_queries_no_foreign_key_checks"
echo "Fixing column collation for non-varchar columns"
execute_query "$alter_table_queries_no_foreign_key_checks" "$DB_NAME"
echo "Fixed column collation for non-varchar columns"
}

# Loop through each database in the array
for DB_NAME in "${DATABASES[@]}"; do
echo "Processing database: $DB_NAME"
fix_database_charset
fix_tables_charset
fix_varchar_columns_collation
fix_non_varchar_columns_collation
echo "Migration completed for $DB_NAME"
done

# Unset the MYSQL_PWD environment variable after executing the queries
unset MYSQL_PWD
57 changes: 57 additions & 0 deletions admin-manual/installation-setup/upgrading/upgrading.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Upgrade from Archivematica |previous_version|.x to |release|
* :ref:`Upgrade with output capturing disabled <upgrade-no-output-capture>`
* :ref:`Update search indices <update-search-indices>`
* :ref:`Review the processing configuration <review-processing-configuration>`
* :ref:`Migrate from MySQL 5.x to 8.x <migrate-mysql>`

.. note::

Expand Down Expand Up @@ -502,6 +503,62 @@ introduced in Archivematica 1.13.
The ``default`` and ``automated`` bundled configurations can be reset to the
Archivematica defaults.

.. _migrate-mysql:

Migrate from MySQL 5.x to 8.x
-----------------------------

It is recommended the MySQL databases for Archivematica and Storage Service use
the MySQL 8 ``utf8mb4`` character set and its default collation
``utf8mb4_0900_ai_ci`` (or ``utf8mb4_general_ci`` in MariaDB).

If you migrate your databases from MySQL 5.x you can check the character set
and encoding of their tables with:

.. code:: sql

SELECT
t.table_schema, t.table_name, c.character_set_name, t.table_collation
FROM
information_schema.tables t,
information_schema.collation_character_set_applicability c
WHERE
c.collation_name = t.table_collation
AND t.table_type = 'BASE TABLE'
AND (t.table_schema = 'MCP' OR t.table_schema = 'SS');

If they use the ``utf8mb3`` character set and collation you should update them
to avoid potential migration conflicts like this:

.. code:: bash

Running migrations:
Applying admin.0003_logentry_add_action_flag_choices... OK
Applying auth.0009_alter_user_last_name_max_length... OK
Applying auth.0010_alter_group_name_max_length... OK
Applying auth.0011_update_proxy_permissions... OK
Applying auth.0012_alter_user_first_name_max_length... OK
Applying locations.0031_rclone_space...Traceback (most recent call last):
File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/django/db/backends/utils.py", line 84, in _execute
return self.cursor.execute(sql, params)
File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/django/db/backends/mysql/base.py", line 73, in execute
return self.cursor.execute(query, args)
File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/MySQLdb/cursors.py", line 179, in execute
res = self._query(mogrified_query)
File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/MySQLdb/cursors.py", line 330, in _query
db.query(q)
File "/pyenv/data/versions/3.9.18/lib/python3.9/site-packages/MySQLdb/connections.py", line 255, in query
_mysql.connection.query(self, query)
MySQLdb.OperationalError: (3780, "Referencing column 'space_id' and referenced column 'uuid' in foreign key constraint 'locations_rclone_space_id_adb7fd1d_fk_locations_space_uuid' are incompatible.")

django.db.utils.OperationalError: (3780, "Referencing column 'space_id' and referenced column 'uuid' in foreign key constraint 'locations_rclone_space_id_adb7fd1d_fk_locations_space_uuid' are incompatible.")

The following script can be used as a reference to update the character set of
the databases and their tables.

.. literalinclude:: scripts/mysql-change-encoding-collation.sh
:language: bash
:lines: 1-104

.. _`Elasticsearch 6.8 docs`: https://www.elastic.co/guide/en/elasticsearch/reference/6.8/modules-snapshots.html
.. _`release notes`: https://wiki.archivematica.org/Release_Notes
Expand Down
Loading