Skip to content

Commit

Permalink
Mainly rework of escaping (#92)
Browse files Browse the repository at this point in the history
* 1. Reworked escaping to be more robust
2. Added new methods to standard library of methods
3. Fixes around ToString with DateTimeOffset.

* Update test results badge [skip ci]

---------

Co-authored-by: github-actions <[email protected]>
  • Loading branch information
Puchaczov and github-actions authored Jan 9, 2025
1 parent d209a7d commit 3637020
Show file tree
Hide file tree
Showing 17 changed files with 964 additions and 39 deletions.
274 changes: 274 additions & 0 deletions Musoq.Evaluator.Tests/EscapeTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
using System.Collections.Generic;
using System.Linq;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Musoq.Evaluator.Tests.Schema.Basic;

namespace Musoq.Evaluator.Tests;

[TestClass]
public class EscapeTests : BasicEntityTestBase
{
[TestMethod]
public void WhenBackslashEscaped_ShouldBePresent()
{
const string query = """select '\\' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual(@"\", table[0].Values[0]);
}

[TestMethod]
public void WhenDoubleBackslashEscaped_ShouldBeSingleBackslash()
{
const string query = """select '\\\\' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual(@"\\", table[0].Values[0]);
}

[TestMethod]
public void WhenQuoteEscaped_ShouldBePresent()
{
const string query = """select '\'' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual("'", table[0].Values[0]);
}

[TestMethod]
public void WhenNewlineEscaped_ShouldBePresent()
{
const string query = """select '\n' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual("\n", table[0].Values[0]);
}

[TestMethod]
public void WhenTabEscaped_ShouldBePresent()
{
const string query = """select '\t' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual("\t", table[0].Values[0]);
}

[TestMethod]
public void WhenCarriageReturnEscaped_ShouldBePresent()
{
const string query = """select '\r' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual("\r", table[0].Values[0]);
}

[TestMethod]
public void WhenUnicodeEscaped_ShouldBePresent()
{
const string query = """select '\u0041' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual("A", table[0].Values[0]);
}

[TestMethod]
public void WhenHexEscaped_ShouldBePresent()
{
const string query = """select '\x41' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual("A", table[0].Values[0]);
}

[TestMethod]
public void WhenComplexMixedEscapes_ShouldBePresent()
{
const string query = """select 'Hello\nWorld\t\u0394\\test' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual("Hello\nWorld\tΔ\\test", table[0].Values[0]);
}

[TestMethod]
public void WhenEscapeAtStartAndEnd_ShouldBePresent()
{
const string query = """select '\\test\\' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual(@"\test\", table[0].Values[0]);
}

[TestMethod]
public void WhenMultipleConsecutiveBackslashes_ShouldBePresent()
{
const string query = """select '\\\\\\\\' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual(@"\\\\", table[0].Values[0]);
}

[TestMethod]
public void WhenSpecialCharactersEscaped_ShouldBePresent()
{
const string query = """select '\0\b\f\e' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual("\0\b\f\u001B", table[0].Values[0]);
}

[TestMethod]
public void WhenUnknownEscapeSequence_ShouldRemoveBackslash()
{
const string query = """select '\z\y\x' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual("\\z\\y\\x", table[0].Values[0]);
}

[TestMethod]
public void WhenQuoteWithBackslashCombinations_ShouldBePresent()
{
// Testing various combinations of quotes and backslashes
const string query = """select '\\\'' from #A.entities()"""; // Escaped backslash followed by escaped quote

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual(@"\'", table[0].Values[0]);
}

[TestMethod]
public void WhenInvalidUnicodeSequences_ShouldHandleGracefully()
{
// Incomplete or invalid unicode sequences
const string query = """select '\u123' from #A.entities()"""; // Incomplete unicode

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual("\\u123", table[0].Values[0]);
}

[TestMethod]
public void WhenMultipleEscapedColumns_ShouldAllBeHandledCorrectly()
{
const string query = """select '\\', '\n', '\u0041' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(3, table.Columns.Count());
Assert.AreEqual(@"\", table[0].Values[0]);
Assert.AreEqual("\n", table[0].Values[1]);
Assert.AreEqual("A", table[0].Values[2]);
}

[TestMethod]
public void WhenConcatenatingEscapedStrings_ShouldBeHandledCorrectly()
{
const string query = """select '\\' + '\n' + '\u0041' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual("\\\nA", table[0].Values[0]);
}

[TestMethod]
public void WhenUsingExtendedUnicode_ShouldBeHandledCorrectly()
{
// Testing surrogate pairs and other special Unicode characters
const string query = """select '\u0001\uFFFF\u0000' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
Assert.AreEqual("\u0001\uFFFF\u0000", table[0].Values[0]);
}

[TestMethod]
public void WhenUsingLongStringWithEscapes_ShouldBeHandledCorrectly()
{
var longString = string.Join("", Enumerable.Repeat(@"\\\'\n\t", 1000));
var query = $"""select '{longString}' from #A.entities()""";

var sources = CreateSource();
var vm = CreateAndRunVirtualMachine(query, sources);
var table = vm.Run();

Assert.AreEqual(1, table.Columns.Count());
}

private static Dictionary<string, IEnumerable<BasicEntity>> CreateSource()
{
return new Dictionary<string, IEnumerable<BasicEntity>>
{
{
"#A", [
new BasicEntity("test")
]
}
};
}
}
2 changes: 1 addition & 1 deletion Musoq.Evaluator/Musoq.Evaluator.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<Version>7.5.2</Version>
<Version>7.5.3</Version>
<Authors>Jakub Puchała</Authors>
<Product>Musoq</Product>
<PackageProjectUrl>https://github.com/Puchaczov/Musoq</PackageProjectUrl>
Expand Down
16 changes: 2 additions & 14 deletions Musoq.Evaluator/Visitors/ToCSharpRewriteTreeVisitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -413,8 +413,7 @@ public void Visit(StringNode node)
SyntaxFactory.LiteralExpression(
SyntaxKind.StringLiteralExpression,
SyntaxFactory.Literal(
$"@\"{EscapeQuoteString(node.Value, EscapeQuoteStringCharacterReplacement)}\"",
UnescapeLanguageSpecificString(node.Value))));
$"@\"{EscapeQuoteString(node.Value, EscapeQuoteStringCharacterReplacement)}\"", node.Value)));
}

public void Visit(DecimalNode node)
Expand Down Expand Up @@ -571,9 +570,7 @@ public void Visit(BooleanNode node)

public void Visit(WordNode node)
{
Nodes.Push(Generator.LiteralExpression(
UnescapeLanguageSpecificString(
EscapeQuoteString(node.Value, EscapeQuoteStringCharacterReplacement))));
Nodes.Push(Generator.LiteralExpression(node.Value));
}

public void Visit(NullNode node)
Expand Down Expand Up @@ -4000,15 +3997,6 @@ private StatementSyntax GetRowsSourceOrEmpty(string alias)
: SyntaxFactory.EmptyStatement();
}

private static string UnescapeLanguageSpecificString(string value)
{
const string pattern = @"(?<!\\)'";
var result = Regex.Replace(value, pattern, string.Empty);
result = result.Replace("\\'", "'");

return result;
}

private static bool CheckIfNullable(Type type)
{
if (type.IsValueType)
Expand Down
Loading

0 comments on commit 3637020

Please sign in to comment.