Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Parse functionnality and some unit tests #3

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 193 additions & 0 deletions HexDump.Tests/HexDump_Format_Test.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
using System.Linq;
using Xunit;

namespace HexDump.Tests
{
public class HexDump_Format_Test
{

[Fact]
public void When_Encode_16_Then_ok()
{
var data = new byte[16];
var expected = @"
0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
".Trim();

var result = HexDump.Format(data);
var parsed = HexDump.Parse(result);
var result2 = HexDump.Format(parsed);

Assert.Equal(expected, result2);
Assert.Equal(expected, result);
}

[Fact]
public void When_Encode_32_Then_ok()
{
var data = new byte[32];
var expected = @"
0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
".Trim();;

var result = HexDump.Format(data);
var parsed = HexDump.Parse(result);
var result2 = HexDump.Format(parsed);

Assert.Equal(expected, result2);
Assert.Equal(expected, result);
}


[Fact]
public void When_Encode_48_Then_ok()
{
var data = new byte[48];
var expected = @"
0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0020 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
".Trim();

var result = HexDump.Format(data);
var parsed = HexDump.Parse(result);
var result2 = HexDump.Format(parsed);

Assert.Equal(expected, result2);
Assert.Equal(expected, result);
}


[Fact]
public void When_Encode_256_Then_ok()
{
var data = new byte[256];
var expected = @"
0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0020 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0040 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0050 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0060 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0070 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0080 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0090 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
00A0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
00B0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
00C0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
00D0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
00E0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
00F0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
".Trim();

var result = HexDump.Format(data);
var parsed = HexDump.Parse(result);
var result2 = HexDump.Format(parsed);

Assert.Equal(expected, result2);
Assert.Equal(expected, result);
}


[Fact]
public void When_Encode_8_Then_ok()
{
var data = new byte[8];
var expected = @"
0000 00 00 00 00 00 00 00 00 ........ ".TrimStart();

var result = HexDump.Format(data);
var parsed = HexDump.Parse(result);
var result2 = HexDump.Format(parsed);

Assert.Equal(expected, result2);
Assert.Equal(expected, result);
}

[Fact]
public void When_Encode_0_Then_ok()
{
var data = new byte[0];
var expected = @"";

var result = HexDump.Format(data);
var parsed = HexDump.Parse(result);
var result2 = HexDump.Format(parsed);

Assert.Equal(expected, result2);
Assert.Equal(expected, result);
}


[Fact]
public void When_Encode_24_Then_ok()
{
var data = new byte[24];
var expected = @"
0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0010 00 00 00 00 00 00 00 00 ........ ".TrimStart();

var result = HexDump.Format(data);
var parsed = HexDump.Parse(result);
var result2 = HexDump.Format(parsed);

Assert.Equal(expected, result2);
Assert.Equal(expected, result);
}

[Fact]
public void When_Encode_40_Then_ok()
{
var data = new byte[40];
var expected = @"
0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ........ ........
0020 00 00 00 00 00 00 00 00 ........ ".TrimStart();

var result = HexDump.Format(data);
var parsed = HexDump.Parse(result);
var result2 = HexDump.Format(parsed);

Assert.Equal(expected, result2);
Assert.Equal(expected, result);
}

[Fact]
public void When_Encode_40_one_Then_ok()
{
var data = Enumerable.Repeat((byte) 1, 40).ToArray();

var expected = @"
0000 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 ........ ........
0010 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 ........ ........
0020 01 01 01 01 01 01 01 01 ........ ".TrimStart();

var result = HexDump.Format(data);
var parsed = HexDump.Parse(result);
var result2 = HexDump.Format(parsed);

Assert.Equal(expected, result2);
Assert.Equal(expected, result);
}

[Fact]
public void When_Encode_40_61_Then_ok()
{
var data = Enumerable.Repeat((byte) 'a', 40).ToArray();

var expected = @"
0000 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 aaaaaaaa aaaaaaaa
0010 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 aaaaaaaa aaaaaaaa
0020 61 61 61 61 61 61 61 61 aaaaaaaa ".TrimStart();

var result = HexDump.Format(data);
var parsed = HexDump.Parse(result);
var result2 = HexDump.Format(parsed);

Assert.Equal(expected, result2);
Assert.Equal(expected, result);
}
}
}
2 changes: 1 addition & 1 deletion HexDump/HexDump.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

namespace HexDump
{
public static class HexDump
public static partial class HexDump
{
[Pure]
public static string Format(byte[] bytes, int columnWidth = 8, int columnCount = 2, bool includeOffset = true, bool includeAscii = true)
Expand Down
58 changes: 58 additions & 0 deletions HexDump/HexDump_Parse.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright (c) Drew Noakes. All Rights Reserved. Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information.

using System;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

namespace HexDump
{
public static partial class HexDump
{

private static readonly Regex _re =
new Regex(@"^((?<offset>[0-9a-f]+)\s+)?(?<hexa>[0-9a-f\s]{48})\s+(?<dump>.+)?$",
RegexOptions.Compiled | RegexOptions.IgnoreCase);

/// <summary>
/// Parse HexDump into a byte array
/// Support only hexdumped bytes array with these attributes:
/// - columnWidth = 8
/// - columnCount = 2
lduchosal marked this conversation as resolved.
Show resolved Hide resolved
/// - includeOffset = true
/// - includeAscii = true
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These documented restrictions don't appear to match the regular expression pattern.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

implenented in parse so we have same behaviour in both methodes

///
/// </summary>
/// <param name="dump"></param>
/// <returns></returns>
public static byte[] Parse(string dump )
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just an FYI: This method allocates a lot of temporary memory on the heap. It is possible to write this without allocating anything other than the List<byte>, though the parsing has to be more manual. I can provide more details if you're interested.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

glad to see any improvement if you feel like it's worth it.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whether it's worth it depends upon the consumer's situation. If they're only parsing one short blob of text then it's probably fine as is. If they're running many operations concurrently or in an application that's sensitive to GC pauses, then the current implementation may be problematic. This is general library code, so I try to be as well behaved as possible as we cannot make many assumptions about the user's requirements.

At a high level, all of these can be avoided:

  • A string per line of the input
  • A Replace string per line
  • A string per hex character pair
  • Enumerators (via Linq) per line
  • StringReader per call

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ParseLookup implementation seems the most promising one.
Will have a look at a state machine to implement special cases.

{
//00000000 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
//00000000 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................

var result = new List<byte>();
var lines = dump.Split(Environment.NewLine.ToCharArray());
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using Environment.NewLine means you can get different behaviours on different machines for the same input. This kind of thing regularly breaks CI, for example. I'd rather see split explicitly on '\r' and '\n' and remove empty entries (though I'd rather not use String.Split at all as it allocates an array and temporary strings, both of which can be avoided).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed it to use StringReader and remove the ToArray() in linq query.

foreach (var line in lines)
{
var capture = _re.Match(line);
if (!capture.Success) continue;
var hexa = capture.Groups["hexa"]
.Value
.Replace(" ", "")
;

var bytes = Enumerable.Range(0, hexa.Length)
.Where(x => x % 2 == 0)
.Select(x => Convert.ToByte(hexa.Substring(x, 2), 16))
.ToArray();

result.AddRange(bytes);
}

return result.ToArray();
}

}
}