diff --git a/packages/compass-generative-ai/scripts/ai-accuracy-tests/ai-accuracy-tests.ts b/packages/compass-generative-ai/scripts/ai-accuracy-tests/ai-accuracy-tests.ts index 5418b81fb0a..8a8d4abe946 100644 --- a/packages/compass-generative-ai/scripts/ai-accuracy-tests/ai-accuracy-tests.ts +++ b/packages/compass-generative-ai/scripts/ai-accuracy-tests/ai-accuracy-tests.ts @@ -539,10 +539,24 @@ const tests: TestOptions[] = [ collectionName: 'listingsAndReviews', userInput: 'what is the bed count that occurs the most? return it in a field called bedCount', - assertResult: isDeepStrictEqualTo([ - { - bedCount: 1, - }, + assertResult: anyOf([ + isDeepStrictEqualTo([ + { + bedCount: 1, + }, + ]), + isDeepStrictEqualTo([ + { + _id: 1, + bedCount: 1, + }, + ]), + isDeepStrictEqualTo([ + { + _id: null, + bedCount: 1, + }, + ]), ]), }, { @@ -552,7 +566,7 @@ const tests: TestOptions[] = [ collectionName: 'listingsAndReviews', includeSampleDocuments: true, userInput: - 'whats the total number of reviews? return it in a field called totalReviewsOverall', + 'whats the total number of reviews across all listings? return it in a field called totalReviewsOverall', assertResult: anyOf([ isDeepStrictEqualTo([ { @@ -572,8 +586,11 @@ const tests: TestOptions[] = [ acceptAggregationResponse: true, databaseName: 'sample_airbnb', collectionName: 'listingsAndReviews', + // This currently fails with our method of formatting arrays with documents in our prompt, + // at least with gpt-3.5-turbo. So we set the min accuracy to 0. + minAccuracyForTest: 0, userInput: - 'which host id has the most reviews in total? return it in a field called hostId', + 'which host id has the most reviews across all listings? return it in a field called hostId', assertResult: isDeepStrictEqualTo([ { hostId: '16187044', @@ -731,16 +748,29 @@ const tests: TestOptions[] = [ }, { type: 'aggregation', - databaseName: 'listingsAndReviews', - collectionName: 'movies', + databaseName: 'sample_airbnb', + collectionName: 'listingsAndReviews', // Test $unwind with array of documents. - // This currently fails with our method of formatting arrays with documents in our prompt, - // at least with gpt-3.5-turbo. So we set the min accuracy to 0. + // This currently fails a good amount with gpt-3.5-turbo. So we set the min accuracy to 0. minAccuracyForTest: 0, userInput: - 'build an array of all of the review comments by reviewer id 72064521', - assertResult: isDeepStrictEqualTo([ - 'Our stay was fantastic. Mehmet was was excellent with communication and made us feel at home. His place is centrally located and the cafe downstairs as a nice welcoming vibe. Would recommend to stay here on a trip to Istanbul.', + 'build an array called reviewComments of all of the review comments by reviewer id 72064521.', + assertResult: anyOf([ + isDeepStrictEqualTo([ + { + reviewComments: [ + 'Our stay was fantastic. Mehmet was was excellent with communication and made us feel at home. His place is centrally located and the cafe downstairs as a nice welcoming vibe. Would recommend to stay here on a trip to Istanbul.', + ], + }, + ]), + isDeepStrictEqualTo([ + { + _id: null, + reviewComments: [ + 'Our stay was fantastic. Mehmet was was excellent with communication and made us feel at home. His place is centrally located and the cafe downstairs as a nice welcoming vibe. Would recommend to stay here on a trip to Istanbul.', + ], + }, + ]), ]), }, {