Skip to content

Commit

Permalink
Adds support for entity hierarchies in compare
Browse files Browse the repository at this point in the history
This change allows users to declaratively specify hierarchical entities in their expected utterance results. For example, a user may declare the following:

```json
{
  "text": "Order a pepperoni pizza",
  "intent": "OrderFood",
  "entities": {
    "entity": "FoodItem",
    "startPos": 8,
    "endPos": 22,
    "children": [
      {
        "entity": "Topping",
        "startPos": 8,
        "endPos": 16
      },
      {
        "entity": "FoodType",
        "startPos": 18,
        "endPos": 22
      }
    ]
  }
}
```

This would result in 3 test cases, one for the parent entity (the "FoodItem" entity), and two additional test cases for each of the two nested entities ("FoodItem::Topping" and "FoodItem::FoodType").

Child entity type names are prefixed by their parent entity type names in the format `parentType::childType`. As such, the recursive entity parsing for the LUIS V3 provider has been updated to use this convention.

Fixes microsoft#335
  • Loading branch information
rozele committed Nov 19, 2020
1 parent 89f296e commit 811812b
Show file tree
Hide file tree
Showing 11 changed files with 255 additions and 83 deletions.
36 changes: 0 additions & 36 deletions docs/Analyze.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,42 +125,6 @@ thresholds:
threshold: 0.1
```
#### Example
While it's useful to set up the performance regression testing in a CI environment, you can also run the tools locally. Here's an end-to-end example for running a performance regression test.
The assumptions are that you have the following:
1. An existing NLU endpoint (in this case, for LUIS).
2. Environment variables or app settings pointing to the correct LUIS application to query and update.
3. A set of changes to the NLU training utterances to evaluate (`utterances.json`).
4. A test set that can be used to evaluate the endpoint (`tests.json`).

Here is the end-to-end:
```sh
# Get predictions from the current endpoint
dotnet nlu test -s luis -u tests.json -o baselineResults.json
# Generate the confusion matrix statistics for the results
dotnet nlu compare -e tests.json -a baselineResults.json -o baseline
# Train a new version of the model
dotnet nlu train -s luis -u utterances.json -a
# Get predictions from the new endpoint
dotnet nlu test -s luis -u tests.json -o latestResults.json
# Create a regression threshold for the overall intent F1 score
echo -e "thresholds:\n\
- type: intent\n\
- threshold: 0.1\n" > \
thresholds.yml
# Generate the confusion matrix statistics for the results and validate regression thresholds
dotnet nlu compare \
-e tests.json \
-a latestResults.json \
-o latest \
-b baseline/statistics.json \
-t thresholds.yml
```

If the F<sub>1</sub> score for overall intents has not dropped more than 0.1, the exit code for the final command will be 0, otherwise it will be 1 (or, more generally, the number of regression threshold tests failed).

### Unit Test Mode
Unit test mode can be enabled using the [`--unit-test`](#-u---unit-test) flag. This flag configures the command to return a non-zero exit code if any false positive or false negative results are detected. When in unit test mode, false positive results for entities are only generated for entity types included in the `strictEntities` configuration from `--test-settings` or the labeled test utterance. Similarly, false positive results will only be generated for intents when an explicit negative intent (e.g., "None") is included in the expected results. For example:
Expand Down
59 changes: 58 additions & 1 deletion src/NLU.DevOps.Core.Tests/JsonLabeledUtteranceConverterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
namespace NLU.DevOps.Core.Tests
{
using System;
using System.Collections.Generic;
using System.Linq;
using FluentAssertions;
using FluentAssertions.Json;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using Newtonsoft.Json.Serialization;
Expand Down Expand Up @@ -87,6 +87,63 @@ public static void ConvertsUtteranceWithStartPosAndEndPosEntity()
actual.Entities[0].MatchIndex.Should().Be(2);
}

[Test]
public static void ConvertsUtteranceWithNestedEntities()
{
var text = "foo bar baz";

var leafEntity = new JObject
{
{ "entity", "baz" },
{ "startPos", 8 },
{ "endPos", 10 },
{ "foo", new JArray(42) },
{ "bar", null },
{ "baz", 42 },
{ "qux", JValue.CreateUndefined() },
};

var midEntity = new JObject
{
{ "entityType", "bar" },
{ "matchText", "bar baz" },
{ "children", new JArray { leafEntity } },
{ "entityValue", new JObject { { "bar", "qux" } } },
};

var entity = new JObject
{
{ "entity", "foo" },
{ "startPos", 0 },
{ "endPos", 10 },
{ "children", new JArray { midEntity } },
};

var json = new JObject
{
{ "text", text },
{ "entities", new JArray { entity } },
};

var serializer = CreateSerializer();
var actual = json.ToObject<JsonLabeledUtterance>(serializer);
actual.Text.Should().Be(text);
actual.Entities.Count.Should().Be(3);
actual.Entities[0].EntityType.Should().Be("foo");
actual.Entities[0].MatchText.Should().Be(text);
actual.Entities[1].EntityType.Should().Be("foo::bar");
actual.Entities[1].MatchText.Should().Be("bar baz");
actual.Entities[1].EntityValue.Should().BeEquivalentTo(new JObject { { "bar", "qux" } });
actual.Entities[2].EntityType.Should().Be("foo::bar::baz");
actual.Entities[2].MatchText.Should().Be("baz");

var additionalProperties = actual.Entities[2].As<Entity>().AdditionalProperties;
additionalProperties["foo"].As<JToken>().Should().BeEquivalentTo(new JArray(42));
additionalProperties["bar"].Should().BeNull();
additionalProperties["baz"].Should().Be(42);
additionalProperties["qux"].Should().BeNull();
}

private static JsonSerializer CreateSerializer()
{
var serializer = JsonSerializer.CreateDefault();
Expand Down
3 changes: 2 additions & 1 deletion src/NLU.DevOps.Core.Tests/NLU.DevOps.Core.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<PackageReference Include="nunit" Version="3.12.0" />
<PackageReference Include="NUnit3TestAdapter" Version="3.13.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.2.0" />
<PackageReference Include="FluentAssertions" Version="5.7.0" />
<PackageReference Include="FluentAssertions" Version="5.5.3" />
<PackageReference Include="FluentAssertions.Json" Version="5.0.0" />
</ItemGroup>

<ItemGroup>
Expand Down
130 changes: 100 additions & 30 deletions src/NLU.DevOps.Core/EntityConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
namespace NLU.DevOps.Core
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;

Expand All @@ -16,42 +18,33 @@ public EntityConverter(string utterance)

private string Utterance { get; }

private string Prefix { get; set; } = string.Empty;

public override Entity ReadJson(JsonReader reader, Type objectType, Entity existingValue, bool hasExistingValue, JsonSerializer serializer)
{
Debug.Assert(!hasExistingValue, "Entity instance can only be constructor initialized.");

var jsonObject = JObject.Load(reader);
return typeof(HierarchicalEntity).IsAssignableFrom(objectType)
? this.ReadHierarchicalEntity(jsonObject, serializer)
: this.ReadEntity(jsonObject, objectType, serializer);
}

public override void WriteJson(JsonWriter writer, Entity value, JsonSerializer serializer)
{
throw new NotImplementedException();
}

private Entity ReadEntity(JObject jsonObject, Type objectType, JsonSerializer serializer)
{
var matchText = jsonObject.Value<string>("matchText");
var matchIndex = jsonObject.Value<int>("matchIndex");
var startPosOrNull = jsonObject.Value<int?>("startPos");
var endPosOrNull = jsonObject.Value<int?>("endPos");
if (matchText == null && startPosOrNull != null && endPosOrNull != null)
if (matchText == null && startPosOrNull.HasValue && endPosOrNull.HasValue)
{
var startPos = startPosOrNull.Value;
var endPos = endPosOrNull.Value;
var length = endPos - startPos + 1;
if (!this.IsValid(startPos, endPos))
{
throw new InvalidOperationException(
$"Invalid start position '{startPos}' or end position '{endPos}' for utterance '{this.Utterance}'.");
}

matchText = this.Utterance.Substring(startPos, length);
(matchText, matchIndex) = this.GetMatchInfo(startPosOrNull.Value, endPosOrNull.Value);
jsonObject.Add("matchText", matchText);
var matchIndex = 0;
var currentPos = 0;
while (true)
{
currentPos = this.Utterance.IndexOf(matchText, currentPos, StringComparison.InvariantCulture);

// Because 'matchText' is derived from the utterance from 'startPos' and 'endPos',
// we are guaranteed to find a match at with index 'startPos'.
if (currentPos == startPos)
{
break;
}

currentPos += length;
matchIndex++;
}

jsonObject.Add("matchIndex", matchIndex);
jsonObject.Remove("startPos");
jsonObject.Remove("endPos");
Expand All @@ -76,9 +69,86 @@ public override Entity ReadJson(JsonReader reader, Type objectType, Entity exist
}
}

public override void WriteJson(JsonWriter writer, Entity value, JsonSerializer serializer)
private HierarchicalEntity ReadHierarchicalEntity(JObject jsonObject, JsonSerializer serializer)
{
throw new NotImplementedException();
var matchText = jsonObject.Value<string>("matchText");
var matchIndex = jsonObject.Value<int>("matchIndex");
var startPosOrNull = jsonObject.Value<int?>("startPos");
var endPosOrNull = jsonObject.Value<int?>("endPos");
if (matchText == null && startPosOrNull.HasValue && endPosOrNull.HasValue)
{
(matchText, matchIndex) = this.GetMatchInfo(startPosOrNull.Value, endPosOrNull.Value);
}

var entityType = jsonObject.Value<string>("entityType") ?? jsonObject.Value<string>("entity");
var childrenJson = jsonObject["children"];
var children = default(IEnumerable<HierarchicalEntity>);
if (childrenJson != null)
{
var prefix = $"{entityType}::";
this.Prefix += prefix;
try
{
children = childrenJson.ToObject<IEnumerable<HierarchicalEntity>>(serializer);
}
finally
{
this.Prefix = this.Prefix.Substring(0, this.Prefix.Length - prefix.Length);
}
}

var entity = new HierarchicalEntity($"{this.Prefix}{entityType}", jsonObject["entityValue"], matchText, matchIndex, children);
foreach (var property in jsonObject)
{
switch (property.Key)
{
case "children":
case "endPos":
case "entity":
case "entityType":
case "entityValue":
case "matchText":
case "matchIndex":
case "startPos":
break;
default:
var value = property.Value is JValue jsonValue ? jsonValue.Value : property.Value;
entity.AdditionalProperties.Add(property.Key, value);
break;
}
}

return entity;
}

private Tuple<string, int> GetMatchInfo(int startPos, int endPos)
{
if (!this.IsValid(startPos, endPos))
{
throw new InvalidOperationException(
$"Invalid start position '{startPos}' or end position '{endPos}' for utterance '{this.Utterance}'.");
}

var length = endPos - startPos + 1;
var matchText = this.Utterance.Substring(startPos, length);
var matchIndex = 0;
var currentPos = 0;
while (true)
{
currentPos = this.Utterance.IndexOf(matchText, currentPos, StringComparison.InvariantCulture);

// Because 'matchText' is derived from the utterance from 'startPos' and 'endPos',
// we are guaranteed to find a match at with index 'startPos'.
if (currentPos == startPos)
{
break;
}

currentPos += length;
matchIndex++;
}

return Tuple.Create(matchText, matchIndex);
}

private bool IsValid(int startPos, int endPos)
Expand Down
31 changes: 31 additions & 0 deletions src/NLU.DevOps.Core/HierarchicalEntity.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

namespace NLU.DevOps.Core
{
using System.Collections.Generic;
using Newtonsoft.Json.Linq;

/// <summary>
/// Entity appearing in utterance.
/// </summary>
public sealed class HierarchicalEntity : Entity, IHierarchicalEntity
{
/// <summary>
/// Initializes a new instance of the <see cref="HierarchicalEntity"/> class.
/// </summary>
/// <param name="entityType">Entity type name.</param>
/// <param name="entityValue">Entity value, generally a canonical form of the entity.</param>
/// <param name="matchText">Matching text in the utterance.</param>
/// <param name="matchIndex">Occurrence index of matching token in the utterance.</param>
/// <param name="children">Children entities.</param>
public HierarchicalEntity(string entityType, JToken entityValue, string matchText, int matchIndex, IEnumerable<HierarchicalEntity> children)
: base(entityType, entityValue, matchText, matchIndex)
{
this.Children = children;
}

/// <inheritdoc />
public IEnumerable<IHierarchicalEntity> Children { get; }
}
}
19 changes: 19 additions & 0 deletions src/NLU.DevOps.Core/IHierarchicalEntity.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

namespace NLU.DevOps.Core
{
using System.Collections.Generic;
using Models;

/// <summary>
/// Entity with nested children.
/// </summary>
public interface IHierarchicalEntity : IEntity
{
/// <summary>
/// Gets the child entities.
/// </summary>
IEnumerable<IHierarchicalEntity> Children { get; }
}
}
33 changes: 30 additions & 3 deletions src/NLU.DevOps.Core/JsonEntities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@

namespace NLU.DevOps.Core
{
using System;
using System.Collections.Generic;
using System.Linq;
using Models;
using Newtonsoft.Json;

/// <summary>
Expand All @@ -15,20 +18,44 @@ public class JsonEntities
/// Initializes a new instance of the <see cref="JsonEntities"/> class.
/// </summary>
/// <param name="entities">Entities referenced in the utterance.</param>
public JsonEntities(IReadOnlyList<Entity> entities)
public JsonEntities(IEnumerable<HierarchicalEntity> entities)
{
this.Entities = entities;
this.Entities = FlattenChildren(entities)?.ToArray();
}

/// <summary>
/// Gets the entities referenced in the utterance.
/// </summary>
public IReadOnlyList<Entity> Entities { get; }
public IReadOnlyList<IEntity> Entities { get; }

/// <summary>
/// Gets the additional properties.
/// </summary>
[JsonExtensionData]
public IDictionary<string, object> AdditionalProperties { get; } = new Dictionary<string, object>();

private static IEnumerable<IEntity> FlattenChildren(IEnumerable<IHierarchicalEntity> entities, string prefix = "")
{
if (entities == null)
{
return null;
}

IEnumerable<IEntity> getChildren(IHierarchicalEntity entity)
{
yield return entity;

var children = FlattenChildren(entity.Children, $"{prefix}{entity.EntityType}::");
if (children != null)
{
foreach (var child in children)
{
yield return child;
}
}
}

return entities.SelectMany(getChildren);
}
}
}
Loading

0 comments on commit 811812b

Please sign in to comment.