From bb6500730080b402608b706a0f9f876cb3369327 Mon Sep 17 00:00:00 2001 From: John Sharples <41682323+John-Sharples@users.noreply.github.com> Date: Thu, 5 Sep 2024 10:50:56 -0600 Subject: [PATCH] Feature 318 add db for testing (#326) * 318: add mariadb gh actions * 318: Update tests to use actual db * Update conftest with fixtures to access and update test database mv_test * Add fixure to clean database (drop/recreate) * Add fixture to access test data in METreformat * Add example test of met_db_load.py * 318: remove METdbLoad tests from sonarqube gh workflow. * 318: fix test, pass tmpdir as list. --- .github/workflows/unit_tests.yaml | 12 +- METdbLoad/conftest.py | 135 ++++++++++++++++++--- METdbLoad/test/test_load_specification.xml | 56 ++++----- METdbLoad/test/test_met_db_load.py | 27 +++++ METdbLoad/test/test_read_data_files.py | 6 +- METdbLoad/test/test_xml.py | 12 +- METdbLoad/ush/met_db_load.py | 43 +++---- METdbLoad/ush/write_mode_sql.py | 4 +- METdbLoad/ush/write_mtd_sql.py | 4 +- METdbLoad/ush/write_tcst_sql.py | 4 +- 10 files changed, 219 insertions(+), 84 deletions(-) create mode 100644 METdbLoad/test/test_met_db_load.py diff --git a/.github/workflows/unit_tests.yaml b/.github/workflows/unit_tests.yaml index 10dddca6..b651b3fb 100644 --- a/.github/workflows/unit_tests.yaml +++ b/.github/workflows/unit_tests.yaml @@ -27,8 +27,17 @@ on: jobs: build: - runs-on: ubuntu-latest + + services: + mariadb: + image: mariadb:latest + env: + MARIADB_ROOT_PASSWORD: root_password + ports: + - 3306:3306 + options: --health-cmd="healthcheck.sh --connect --innodb_initialized" --health-interval=10s --health-timeout=5s --health-retries=3 + strategy: fail-fast: false matrix: @@ -67,6 +76,7 @@ jobs: - name: Test with pytest run: | + coverage run --append -m pytest METdbLoad/test coverage run --append -m pytest METreformat coverage run --append -m pytest METreadnc coverage report -m diff --git a/METdbLoad/conftest.py b/METdbLoad/conftest.py index e5ca6351..06fec51a 100644 --- a/METdbLoad/conftest.py +++ b/METdbLoad/conftest.py @@ -1,12 +1,113 @@ import pytest import sys import os - +import pymysql +import logging from pathlib import Path +from unittest.mock import patch + +from METdataio.METdbLoad.ush.read_data_files import ReadDataFiles +from METdataio.METdbLoad.ush.run_sql import RunSql + # add METdataio directory to path so packages can be found -top_dir = str(Path(__file__).parents[1]) -sys.path.insert(0, os.path.abspath(top_dir)) +TOP_DIR = str(Path(__file__).parents[1]) +sys.path.insert(0, os.path.abspath(TOP_DIR)) + +def parse_sql(filename): + """Parse a .sql file and return a list of SQL statements""" + data = open(filename, 'r').readlines() + stmts = [] + DELIMITER = ';' + stmt = '' + + for line in data: + if not line.strip(): + continue + + if line.startswith('--'): + continue + + if (DELIMITER not in line): + stmt += line + continue + + if stmt: + stmt += line + stmts.append(stmt.strip()) + stmt = '' + else: + stmts.append(line.strip()) + return stmts + + +def maria_conn(): + """A databaseless connection to mariaDB server. + This will work even if no database has been created. + """ + try: + conn = pymysql.connect( + host='localhost', + port=3306, + user='root', + password='root_password', + ) + + except Exception as e: + # Test run will fail if db is not found. + # TODO: If we want to run tests that don't require a db when db is missing + # we could put pytest.skip here instead of raising the exception. + raise e + + return conn + + +@pytest.fixture +def emptyDB(): + """Drop and recreate the database. + Including this fixture in a test will DELETE all data from mv_test. + """ + + conn = maria_conn() + with conn.cursor() as cur: + cur.execute("DROP DATABASE IF EXISTS mv_test;") + cur.execute("CREATE DATABASE mv_test;") + conn.commit() + conn.close() + + db_conn = pymysql.connect( + host='localhost', + port=3306, + user='root', + password='root_password', + database='mv_test', + autocommit=True, + ) + + sql_statements = parse_sql(Path(TOP_DIR) / 'METdbLoad/sql/mv_mysql.sql') + + with db_conn.cursor() as cur: + for stm in sql_statements: + cur.execute(stm) + + db_conn.close() + + +@pytest.fixture +def testRunSql(): + """Return an instance of RunSql with a connection. + """ + connection = { + 'db_host': 'localhost', + 'db_port': 3306, + 'db_user': 'root', + 'db_password': 'root_password', + 'db_database': 'mv_test', + } + + testRunSql = RunSql() + testRunSql.sql_on(connection) + return testRunSql # This is a sample of data copied from test file point_stat_DUP_SINGLE_120000L_20120409_120000V.stat @@ -25,7 +126,7 @@ def _populate_xml_load_spec(met_data_dir, met_tool="point_stat", - host="192.168.0.42"): + host="localhost"): """Return the xml load specification with substitute values. """ #TODO: determine if other tags require substitution as well @@ -33,9 +134,9 @@ def _populate_xml_load_spec(met_data_dir, mysql {host}:3306 - mv_load_test - user - user_pwd + mv_test + root + root_password {met_data_dir} @@ -47,10 +148,11 @@ def _populate_xml_load_spec(met_data_dir, false false true - false - false + true + true true true + true {met_tool} @@ -61,25 +163,20 @@ def _populate_xml_load_spec(met_data_dir, """ +# TODO: give access to the other test data @pytest.fixture -def stat_file_dir(tmp_path): +def point_stat_file_dir(tmp_path): """Write test stat file and return parent dir.""" - stat_files_dir = tmp_path / "stat_files" - stat_files_dir.mkdir() - - stat_file = stat_files_dir / "point_stat.stat" - with open(stat_file, "w") as text_file: - text_file.write(POINT_STAT_DATA) - return stat_files_dir + return str(Path(TOP_DIR) / 'METreformat/test/data/point_stat' ) #TODO: see if we can restrict the scope of this fixture. @pytest.fixture -def get_xml_test_file(tmp_path, stat_file_dir): +def get_xml_test_file(tmp_path, point_stat_file_dir): """Write test_load_specification.xml and return path""" xml_path = tmp_path / "test_load_specification.xml" with open(xml_path, "w") as text_file: - text_file.write(_populate_xml_load_spec(stat_file_dir)) + text_file.write(_populate_xml_load_spec(point_stat_file_dir)) return xml_path diff --git a/METdbLoad/test/test_load_specification.xml b/METdbLoad/test/test_load_specification.xml index 233fddfd..2823e5a1 100644 --- a/METdbLoad/test/test_load_specification.xml +++ b/METdbLoad/test/test_load_specification.xml @@ -1,30 +1,30 @@ - - mysql - localhost:3306 - mv_load_test - user - user_pwd - + + mysql + localhost:3306 + mv_test + root + root_password + - /path-to/test_data/load_data/load/met_data/point_stat/2011070812/metprd - true - 1 - true - false - false - false - false - true - true - true - true - true - - - point_stat - - - Testing - testing DB load - + /METdataio/METreformat/test/data/point_stat + true + 1 + true + false + false + false + false + true + true + true + true + true + + + point_stat + + + Testing + testing DB load + diff --git a/METdbLoad/test/test_met_db_load.py b/METdbLoad/test/test_met_db_load.py new file mode 100644 index 00000000..11e7f48d --- /dev/null +++ b/METdbLoad/test/test_met_db_load.py @@ -0,0 +1,27 @@ +import argparse +from METdbLoad.conftest import TOP_DIR +from METdbLoad.ush.met_db_load import main as load_main +from METdbLoad.ush.run_sql import RunSql + +def test_met_db_load(emptyDB, get_xml_test_file, testRunSql, tmp_path): + + # TODO: parameterize this test data + test_data = { + "xmlfile": str(get_xml_test_file), + "index": True, + "tmpdir": [str(tmp_path)], + } + test_args = argparse.Namespace() + for k,v in test_data.items(): + setattr(test_args, k, v) + + load_main(test_args) + + # Check the correct number of rows written + testRunSql.cur.execute("SELECT * FROM line_data_cts") + cts_data = testRunSql.cur.fetchall() + + assert len(cts_data) == 24 + + #TODO: check all the other metrics and some values. + diff --git a/METdbLoad/test/test_read_data_files.py b/METdbLoad/test/test_read_data_files.py index 869d00ba..8d2c467f 100644 --- a/METdbLoad/test/test_read_data_files.py +++ b/METdbLoad/test/test_read_data_files.py @@ -19,8 +19,8 @@ def test_counts(get_xml_loadfile): XML_LOADFILE.line_types) # number of files - assert len(XML_LOADFILE.load_files) == 1 + assert len(XML_LOADFILE.load_files) == 2 # number of lines of data - assert FILE_DATA.stat_data.shape[0] == 6 + assert FILE_DATA.stat_data.shape[0] == 94 # number of line types - assert FILE_DATA.stat_data.line_type.unique().size == 5 + assert FILE_DATA.stat_data.line_type.unique().size == 7 diff --git a/METdbLoad/test/test_xml.py b/METdbLoad/test/test_xml.py index 85bf660a..b0aa97d3 100644 --- a/METdbLoad/test/test_xml.py +++ b/METdbLoad/test/test_xml.py @@ -5,8 +5,8 @@ def test_loadflags(get_xml_loadfile): """Read various flags from XML file.""" XML_LOADFILE = get_xml_loadfile() assert XML_LOADFILE.flags['load_stat'] - assert not XML_LOADFILE.flags['load_mode'] - assert not XML_LOADFILE.flags['load_mtd'] + assert XML_LOADFILE.flags['load_mode'] + assert XML_LOADFILE.flags['load_mtd'] assert XML_LOADFILE.flags['load_mpr'] assert XML_LOADFILE.flags['load_orank'] assert XML_LOADFILE.flags['verbose'] @@ -15,7 +15,7 @@ def test_loadflags(get_xml_loadfile): assert XML_LOADFILE.flags['stat_header_db_check'] assert not XML_LOADFILE.flags['mode_header_db_check'] assert not XML_LOADFILE.flags['mtd_header_db_check'] - assert not XML_LOADFILE.flags['force_dup_file'] + assert XML_LOADFILE.flags['force_dup_file'] assert XML_LOADFILE.flags['load_xml'] def test_loadgroup(get_xml_loadfile): @@ -27,10 +27,10 @@ def test_loadgroup(get_xml_loadfile): def test_connection(get_xml_loadfile): """Read connection tags from XML file.""" XML_LOADFILE = get_xml_loadfile() - assert XML_LOADFILE.connection['db_host'] == "192.168.0.42" + assert XML_LOADFILE.connection['db_host'] == "localhost" assert XML_LOADFILE.connection['db_port'] == 3306 - assert XML_LOADFILE.connection['db_database'] == "mv_load_test" - assert XML_LOADFILE.connection['db_user'] == "user" + assert XML_LOADFILE.connection['db_database'] == "mv_test" + assert XML_LOADFILE.connection['db_user'] == "root" assert XML_LOADFILE.connection['db_management_system'] == "mysql" def test_insertsize(get_xml_loadfile): diff --git a/METdbLoad/ush/met_db_load.py b/METdbLoad/ush/met_db_load.py index 10bdd01a..cf181abc 100644 --- a/METdbLoad/ush/met_db_load.py +++ b/METdbLoad/ush/met_db_load.py @@ -27,19 +27,19 @@ import os import getpass -import constants as CN +import METdbLoad.ush.constants as CN -from read_load_xml import XmlLoadFile -from read_data_files import ReadDataFiles -from run_sql import RunSql -from write_file_sql import WriteFileSql -from write_stat_sql import WriteStatSql -from write_mode_sql import WriteModeSql -from write_tcst_sql import WriteTcstSql -from write_mtd_sql import WriteMtdSql +from METdbLoad.ush.read_load_xml import XmlLoadFile +from METdbLoad.ush.read_data_files import ReadDataFiles +from METdbLoad.ush.run_sql import RunSql +from METdbLoad.ush.write_file_sql import WriteFileSql +from METdbLoad.ush.write_stat_sql import WriteStatSql +from METdbLoad.ush.write_mode_sql import WriteModeSql +from METdbLoad.ush.write_tcst_sql import WriteTcstSql +from METdbLoad.ush.write_mtd_sql import WriteMtdSql -def main(): +def main(args): """ Main program to load files into the METdataio/METviewer database Returns: N/A @@ -64,16 +64,6 @@ def main(): # time execution load_time_start = time.perf_counter() - parser = argparse.ArgumentParser() - # Allow user to choose dir for tmp files - default to user home - tmp_dir = [os.getenv('HOME')] - parser.add_argument("xmlfile", help="Please provide required xml load_spec filename") - parser.add_argument("-index", action="store_true", help="Only process index, do not load data") - parser.add_argument("tmpdir", nargs='*', default=tmp_dir, - help="Optional - when different directory wanted for tmp file") - - # get the command line arguments - args = parser.parse_args() # # Read the XML file @@ -405,4 +395,15 @@ def purge_files(load_files, xml_flags): if __name__ == '__main__': - main() + parser = argparse.ArgumentParser() + # Allow user to choose dir for tmp files - default to user home + tmp_dir = [os.getenv('HOME')] + parser.add_argument("xmlfile", help="Please provide required xml load_spec filename") + parser.add_argument("-index", action="store_true", help="Only process index, do not load data") + parser.add_argument("tmpdir", nargs='*', default=tmp_dir, + help="Optional - when different directory wanted for tmp file") + + # get the command line arguments + args = parser.parse_args() + + main(args) diff --git a/METdbLoad/ush/write_mode_sql.py b/METdbLoad/ush/write_mode_sql.py index f71bd349..3e3e69b4 100644 --- a/METdbLoad/ush/write_mode_sql.py +++ b/METdbLoad/ush/write_mode_sql.py @@ -21,9 +21,9 @@ from datetime import timedelta import pandas as pd -import constants as CN +import METdbLoad.ush.constants as CN -from run_sql import RunSql +from METdbLoad.ush.run_sql import RunSql class WriteModeSql: diff --git a/METdbLoad/ush/write_mtd_sql.py b/METdbLoad/ush/write_mtd_sql.py index da464d87..360254dd 100644 --- a/METdbLoad/ush/write_mtd_sql.py +++ b/METdbLoad/ush/write_mtd_sql.py @@ -21,9 +21,9 @@ from datetime import timedelta import pandas as pd -import constants as CN +import METdbLoad.ush.constants as CN -from run_sql import RunSql +from METdbLoad.ush.run_sql import RunSql class WriteMtdSql: diff --git a/METdbLoad/ush/write_tcst_sql.py b/METdbLoad/ush/write_tcst_sql.py index 07bad200..cbc1447f 100644 --- a/METdbLoad/ush/write_tcst_sql.py +++ b/METdbLoad/ush/write_tcst_sql.py @@ -22,9 +22,9 @@ from datetime import timedelta import pandas as pd -import constants as CN +import METdbLoad.ush.constants as CN -from run_sql import RunSql +from METdbLoad.ush.run_sql import RunSql class WriteTcstSql: