Skip to content

Commit

Permalink
fix behavior of score total in match queries to contain scored matches
Browse files Browse the repository at this point in the history
  • Loading branch information
pudo committed Sep 13, 2024
1 parent 19f94d1 commit 56fb184
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 17 deletions.
8 changes: 2 additions & 6 deletions contrib/test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@

from yente.data.entity import Entity
from yente.data.common import EntityExample
from yente.search.base import close_es
from yente.search.queries import entity_query
from yente.search.search import search_entities, result_entities
from yente.data.entity import Entity
from yente.scoring import score_results
from yente.routers.util import get_dataset

Expand All @@ -34,7 +32,7 @@ async def test_example():
print(ent.id, ent.caption, ent.schema.name)

algorithm = get_algorithm("name-based")
scored = score_results(
total, scored = score_results(
algorithm,
entity,
ents,
Expand All @@ -43,11 +41,9 @@ async def test_example():
limit=LIMIT,
)

print("\n\nSCORED RESULTS:")
print("\n\nSCORED RESULTS [%d]:" % total)
for res in scored:
print(res.id, res.caption, res.schema_, res.score)

await close_es()


asyncio.run(test_example())
11 changes: 5 additions & 6 deletions yente/routers/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from yente.logs import get_logger
from yente.data.common import ErrorResponse
from yente.data.common import EntityMatchQuery, EntityMatchResponse, EntityExample
from yente.data.common import EntityMatches
from yente.data.common import EntityMatches, TotalSpec
from yente.provider import SearchProvider, get_provider
from yente.search.queries import entity_query, FilterDict
from yente.search.search import search_entities, result_entities, result_total
from yente.search.search import search_entities, result_entities
from yente.data.entity import Entity
from yente.util import limit_window
from yente.scoring import score_results
Expand Down Expand Up @@ -173,7 +173,7 @@ async def match(

for (name, entity), resp in zip(entities, results):
ents = result_entities(resp)
scored = score_results(
total, scored = score_results(
algorithm_type,
entity,
ents,
Expand All @@ -182,17 +182,16 @@ async def match(
limit=limit,
weights=match.weights,
)
total = result_total(resp)
log.info(
f"/match/{ds.name}",
action="match",
schema=entity.schema.name,
results=total.value,
results=total,
)
responses[name] = EntityMatches(
status=200,
results=scored,
total=total,
total=TotalSpec(value=total, relation="eq"),
query=EntityExample.model_validate(entity.to_dict()),
)
response.headers["x-batch-size"] = str(len(responses))
Expand Down
4 changes: 2 additions & 2 deletions yente/routers/reconcile.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,13 +188,13 @@ async def reconcile_query(
resp = await search_entities(provider, query, limit=limit, offset=offset)
algorithm_ = get_algorithm_by_name(algorithm)
entities = result_entities(resp)
scoreds = [s for s in score_results(algorithm_, proxy, entities, limit=limit)]
total, scoreds = score_results(algorithm_, proxy, entities, limit=limit)
results = [FreebaseScoredEntity.from_scored(s) for s in scoreds]
log.info(
f"/reconcile/{dataset.name}",
action="reconcile",
schema=proxy.schema.name,
results=result_total(resp).value,
matches=total,
)
return name, FreebaseEntityResult(result=results)

Expand Down
6 changes: 3 additions & 3 deletions yente/scoring.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Iterable, List, Optional, Type, Dict
from typing import Iterable, List, Optional, Type, Dict, Tuple
from nomenklatura.matching.types import ScoringAlgorithm

from yente import settings
Expand All @@ -14,7 +14,7 @@ def score_results(
cutoff: float = 0.0,
limit: Optional[int] = None,
weights: Dict[str, float] = {},
) -> List[ScoredEntityResponse]:
) -> Tuple[int, List[ScoredEntityResponse]]:
scored: List[ScoredEntityResponse] = []
matches = 0
for proxy in results:
Expand All @@ -29,4 +29,4 @@ def score_results(
scored = sorted(scored, key=lambda r: r.score, reverse=True)
if limit is not None:
scored = scored[:limit]
return scored
return matches, scored

0 comments on commit 56fb184

Please sign in to comment.