Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Task/rdmp 271 Update catalogue overview #2107

Draft
wants to merge 14 commits into
base: develop
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ private T Activate<T, T2>(T2 databaseObject, Image<Rgba32> tabImage)

uiInstance.SetDatabaseObject(this, databaseObject);

if (insertIndex is not null)
if (insertIndex is not null && _mainDockPanel.ActivePane is not null)
{
_mainDockPanel.ActivePane.SetContentIndex(floatable, (int)insertIndex);
}
Expand Down
293 changes: 90 additions & 203 deletions Rdmp.Core/Curation/Data/Overview/OverviewModel.cs
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
// Copyright (c) The University of Dundee 2024-2024
// Copyright (c) The University of Dundee 2024-2025
// This file is part of the Research Data Management Platform (RDMP).
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.

using MongoDB.Driver;
using Rdmp.Core.CommandExecution;
using Rdmp.Core.DataExport.Data;
using Rdmp.Core.DataLoad.Triggers;
using Rdmp.Core.DataViewing;
using Rdmp.Core.Logging;
using Rdmp.Core.QueryBuilding;
using Rdmp.Core.DataQualityEngine.Data;
using Rdmp.Core.Repositories;
using Rdmp.Core.ReusableLibraryCode.DataAccess;
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;

Expand All @@ -24,244 +21,134 @@ namespace Rdmp.Core.Curation.Data.Overview;
/// </summary>
public class OverviewModel
{

private readonly ICatalogue _catalogue;
private readonly IBasicActivateItems _activator;

private DataTable _dataLoads;

private int _numberOfPeople;
private int _numberOfRecords;
private Evaluation _evaluation;
private readonly DataTable _dqeTable = new();
private readonly DateTime? _extractionDate;
private readonly DateTime? _dataLoadDate;

public OverviewModel(IBasicActivateItems activator, ICatalogue catalogue)
{
_activator = activator;
_catalogue = catalogue;
if (catalogue != null)
{
Regen("");
}
_dqeTable = GetCountsByDatePeriod();
_dataLoadDate = GetDataLoadDate();
_extractionDate = GetExtractionDate();
}

public void Regen(string whereClause)
{
using var dt = new DataTable();
var hasExtractionIdentifier = true;
var column = _catalogue.CatalogueItems.FirstOrDefault(static ci => ci.ExtractionInformation.IsExtractionIdentifier);
if (column is null)
{
column = _catalogue.CatalogueItems.FirstOrDefault();
hasExtractionIdentifier = false;
}

if (column is null) return;

var discoveredColumn = column.ColumnInfo.Discover(DataAccessContext.InternalDataProcessing);
var server = discoveredColumn.Table.Database.Server;
using var con = server.GetConnection();
con.Open();
var populatedWhere = !string.IsNullOrWhiteSpace(whereClause) ? $"WHERE {whereClause}" : "";
var sql = $"SELECT {column.ColumnInfo.GetRuntimeName()} FROM {discoveredColumn.Table.GetRuntimeName()} {populatedWhere}";
using var cmd = server.GetCommand(sql, con);
cmd.CommandTimeout = 30000;
using var da = server.GetDataAdapter(cmd);
dt.BeginLoadData();
da.Fill(dt);
dt.EndLoadData();
_numberOfRecords = dt.Rows.Count;
_numberOfPeople = hasExtractionIdentifier
? dt.AsEnumerable().Select(r => r[column.ColumnInfo.GetRuntimeName()]).Distinct().Count()
: 0;
GetDataLoads();
}

public int GetNumberOfRecords()
public bool HasDQEEvaluation()
{
return _numberOfRecords;
return _evaluation is not null;
}

public int GetNumberOfPeople()
public DataTable GetTableData()
{
return _numberOfPeople;
return _dqeTable;
}

public Tuple<DateTime, DateTime> GetStartEndDates(ColumnInfo dateColumn, string whereClause)
private DateTime? GetExtractionDate()
{
using var dt = new DataTable();

var discoveredColumn = _catalogue.CatalogueItems.First().ColumnInfo.Discover(DataAccessContext.InternalDataProcessing);
var server = discoveredColumn.Table.Database.Server;
var populatedWhereClause = !string.IsNullOrWhiteSpace(whereClause) ? $"WHERE {whereClause}" : "";
using var con = server.GetConnection();
con.Open();
if (server.DatabaseType == FAnsi.DatabaseType.MicrosoftSQLServer)
{
var sql = $@"
select min({dateColumn.GetRuntimeName()}) as min, max({dateColumn.GetRuntimeName()}) as max
from
(select {dateColumn.GetRuntimeName()},
count(1) over (partition by year({dateColumn.GetRuntimeName()})) as occurs
from {discoveredColumn.Table.GetRuntimeName()} {populatedWhereClause}) as t
where occurs >1
";

using var cmd = server.GetCommand(sql, con);
cmd.CommandTimeout = 30000;
using var da = server.GetDataAdapter(cmd);
dt.BeginLoadData();
da.Fill(dt);
dt.EndLoadData();
}
else
var extractableDataSets = _activator.RepositoryLocator.DataExportRepository.GetAllObjectsWhere<ExtractableDataSet>("Catalogue_ID", _catalogue.ID);
DateTime? maxDateOfExtraction = null;
foreach (var eds in extractableDataSets)
{
var repo = new MemoryCatalogueRepository();
var qb = new QueryBuilder(null, null);
qb.AddColumn(new ColumnInfoToIColumn(repo, dateColumn));
qb.AddCustomLine($"{dateColumn.Name} IS NOT NULL", FAnsi.Discovery.QuerySyntax.QueryComponent.WHERE);
var cmd = server.GetCommand(qb.SQL, con);
using var da = server.GetDataAdapter(cmd);
dt.BeginLoadData();
da.Fill(dt);
var latest = dt.AsEnumerable()
.Max(r => r.Field<DateTime>(dateColumn.Name));
var earliest = dt.AsEnumerable()
.Min(r => r.Field<DateTime>(dateColumn.Name));
dt.Rows.Clear();
dt.Rows.Add(earliest, latest);
var results = _activator.RepositoryLocator.DataExportRepository.GetAllObjectsWhere<CumulativeExtractionResults>("ExtractableDataSet_ID", eds.ID);
if (results.Length != 0)
{
var max = results.Select(cer => cer.DateOfExtraction).Max();
if (maxDateOfExtraction == null || max > maxDateOfExtraction)
{
maxDateOfExtraction = max;
}
}
}

return new Tuple<DateTime, DateTime>(DateTime.Parse(dt.Rows[0].ItemArray[0].ToString()), DateTime.Parse(dt.Rows[0].ItemArray[1].ToString()));
return maxDateOfExtraction;
}


public static DataTable GetCountsByDatePeriod(ColumnInfo dateColumn, string datePeriod, string optionalWhere = "")
private DateTime? GetDataLoadDate()
{
var dt = new DataTable();
if (!(new[] { "Day", "Month", "Year" }).Contains(datePeriod))
{
throw new Exception("Invalid Date period");
}

var discoveredColumn = dateColumn.Discover(DataAccessContext.InternalDataProcessing);
var server = discoveredColumn.Table.Database.Server;
using var con = server.GetConnection();
con.Open();
var dateString = datePeriod switch
if (_evaluation is null) return null;
int dataLoadID = _evaluation.RowStates.Max(rs => rs.DataLoadRunID);
if (dataLoadID > 0)
{
"Day" => "yyyy-MM-dd",
"Month" => "yyyy-MM",
"Year" => "yyyy",
_ => "yyyy-MM"
};

if (server.DatabaseType == FAnsi.DatabaseType.MicrosoftSQLServer)
{
var sql = @$"
SELECT format({dateColumn.GetRuntimeName()}, '{dateString}') as YearMonth, count(*) as '# Records'
FROM {discoveredColumn.Table.GetRuntimeName()}
WHERE {dateColumn.GetRuntimeName()} IS NOT NULL
{(optionalWhere != "" ? "AND" : "")} {optionalWhere.Replace('"', '\'')}
GROUP BY format({dateColumn.GetRuntimeName()}, '{dateString}')
ORDER BY 1
";

using var cmd = server.GetCommand(sql, con);
cmd.CommandTimeout = 30000;
using var da = server.GetDataAdapter(cmd);
dt.BeginLoadData();
da.Fill(dt);
dt.EndLoadData();
}
else
{
var repo = new MemoryCatalogueRepository();
var qb = new QueryBuilder(null, null);
qb.AddColumn(new ColumnInfoToIColumn(repo, dateColumn));
qb.AddCustomLine($"{dateColumn.Name} IS NOT NULL", FAnsi.Discovery.QuerySyntax.QueryComponent.WHERE);
var cmd = server.GetCommand(qb.SQL, con);
using var da = server.GetDataAdapter(cmd);
dt.BeginLoadData();
da.Fill(dt);
Dictionary<string, int> counts = [];
foreach (var key in dt.AsEnumerable().Select(row => DateTime.Parse(row.ItemArray[0].ToString()).ToString(dateString)))
var loggingDB = _catalogue.CatalogueRepository.GetAllObjectsWhere<ExternalDatabaseServer>("CreatedByAssembly", "Rdmp.Core/Databases.LoggingDatabase").FirstOrDefault();
if (loggingDB != null)
{
counts[key]++;
var discoveredDB = loggingDB.Discover(DataAccessContext.InternalDataProcessing);
var discoveredTable = discoveredDB.ExpectTable("TableLoadRun");
if (discoveredTable != null)
{
var conn = discoveredDB.Server.GetConnection();
conn.Open();
var cmd = discoveredTable.GetCommand($"SELECT startTime FROM {discoveredTable.GetFullyQualifiedName()} WHERE dataLoadRunID = {dataLoadID}", conn);
var result = cmd.ExecuteScalar();
conn.Close();
if (result != null)
{
return DateTime.Parse(result.ToString());
}
}
}

dt = new DataTable();
foreach (var item in counts)
{
var dr = dt.NewRow();
dr["YearMonth"] = item.Key;
dr["# Records"] = item.Value;
dt.Rows.Add(dr);
}

dt.EndLoadData();
}

return dt;
return null;
}

private void GetDataLoads()
public int GetNumberOfRecords()
{
_dataLoads = new DataTable();
var repo = new MemoryCatalogueRepository();
var qb = new QueryBuilder(null, null);
var columnInfo = _catalogue.CatalogueItems.Where(static c => c.Name == SpecialFieldNames.DataLoadRunID).Select(c => c.ColumnInfo).FirstOrDefault();
if (columnInfo == null) return;
return _dqeTable.AsEnumerable()
.Sum(x => int.Parse(x["# Records"].ToString()));
}

qb.AddColumn(new ColumnInfoToIColumn(repo, columnInfo));
qb.AddCustomLine($"{columnInfo.Name} IS NOT NULL", FAnsi.Discovery.QuerySyntax.QueryComponent.WHERE);
var sql = qb.SQL;
var server = columnInfo.Discover(DataAccessContext.InternalDataProcessing).Table.Database.Server;
using var con = server.GetConnection();
con.Open();
public string GetLatestExtraction()
{
return _extractionDate != null ? ((DateTime)_extractionDate).ToString("dd/MM/yyyy") : null;
}

using var cmd = server.GetCommand(sql, con);
cmd.CommandTimeout = 30000;
using var da = server.GetDataAdapter(cmd);
_dataLoads.BeginLoadData();
da.Fill(_dataLoads);
_dataLoads.EndLoadData();
public string GetLatestDataLoad()
{
return _dataLoadDate != null ? ((DateTime)_dataLoadDate).ToString("dd/MM/yyyy") : null;
}

public DataTable GetMostRecentDataLoad()
public Tuple<DateTime?, DateTime?> GetStartEndDates()
{
if (_dataLoads == null) GetDataLoads();
if (_dataLoads.Rows.Count == 0) return null;
if (_dqeTable.Rows.Count == 0) return new Tuple<DateTime?, DateTime?>(null, null);
var start = DateTime.Parse(_dqeTable.AsEnumerable().First()["YearMonth"].ToString());
var end = DateTime.Parse(_dqeTable.AsEnumerable().Last()["YearMonth"].ToString());
return new Tuple<DateTime?, DateTime?>(start, end);
}

var maxDataLoadId = _dataLoads.AsEnumerable().Select(static r => int.Parse(r[0].ToString())).Max();
var loggingServers = _activator.RepositoryLocator.CatalogueRepository.GetAllObjectsWhere<ExternalDatabaseServer>("CreatedByAssembly", "Rdmp.Core/Databases.LoggingDatabase");
var columnInfo = _catalogue.CatalogueItems.Where(c => c.Name == SpecialFieldNames.DataLoadRunID).Select(c => c.ColumnInfo).First();
var server = columnInfo.Discover(DataAccessContext.InternalDataProcessing).Table.Database.Server;
private DataTable GetCountsByDatePeriod()
{

DataTable dt = new();
foreach (var loggingServer in loggingServers)
var dt = new DataTable();
try
{
var repo = new DQERepository(_catalogue.CatalogueRepository);
_evaluation = repo.GetAllObjectsWhere<Evaluation>("CatalogueID", _catalogue.ID).LastOrDefault();
}
catch (Exception)
{
return dt;
}
if (_evaluation != null)
{
var logCollection = new ViewLogsCollection(loggingServer, new LogViewerFilter(LoggingTables.DataLoadRun));
var dataLoadRunSql = $"{logCollection.GetSql()} WHERE ID={maxDataLoadId}";
var logServer = loggingServer.Discover(DataAccessContext.InternalDataProcessing).Server;
using var loggingCon = logServer.GetConnection();
loggingCon.Open();
using var loggingCmd = logServer.GetCommand(dataLoadRunSql, loggingCon);
loggingCmd.CommandTimeout = 30000;
using var loggingDa = server.GetDataAdapter(loggingCmd);
dt.BeginLoadData();
loggingDa.Fill(dt);
dt.EndLoadData();
if (dt.Rows.Count > 0)
dt = PeriodicityState.GetPeriodicityForDataTableForEvaluation(_evaluation, "ALL", true);
dt.Columns.Add("# Records");
foreach (DataRow row in dt.Rows)
{
break;
row["# Records"] = int.Parse(row["Correct"].ToString()) + int.Parse(row["Wrong"].ToString()) + int.Parse(row["Missing"].ToString()) + int.Parse(row["InvalidatesRow"].ToString());
}
dt.Columns.Remove("Year");
dt.Columns.Remove("Month");
dt.Columns.Remove("Correct");
dt.Columns.Remove("Wrong");
dt.Columns.Remove("Missing");
dt.Columns.Remove("InvalidatesRow");
}

return dt;
}

public List<CumulativeExtractionResults> GetExtractions()
{
var datasets = _activator.RepositoryLocator.DataExportRepository.GetAllObjectsWhere<ExtractableDataSet>("Catalogue_ID", _catalogue.ID).Select(d => d.ID);
var results = _activator.RepositoryLocator.DataExportRepository.GetAllObjects<CumulativeExtractionResults>().Where(result => datasets.Contains(result.ExtractableDataSet_ID)).ToList();
return results;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1400,7 +1400,6 @@ CONSTRAINT [PK_RegexRedactionKey] PRIMARY KEY CLUSTERED
END
GO


EXEC sys.sp_addextendedproperty @name=N'MS_Description', @value=N'Table ID' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'Catalogue', @level2type=N'COLUMN',@level2name=N'ID'
GO
EXEC sys.sp_addextendedproperty @name=N'MS_Description', @value=N'‘SMR01’ for example' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'Catalogue', @level2type=N'COLUMN',@level2name=N'Acronym'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public ProposeExecutionWhenTargetIsCatalogue(IActivateItems itemActivator) : bas

public override void Activate(Catalogue c)
{
ItemActivator.Activate<CatalogueUI, Catalogue>(c);
ItemActivator.Activate<ViewCatalogueOverviewUI, Catalogue>(c);
}

public override ICommandExecution ProposeExecution(ICombineToMakeCommand cmd, Catalogue targetCatalogue,
Expand Down
Loading
Loading