Skip to content

Commit

Permalink
Add weighted search results with ts_rank.
Browse files Browse the repository at this point in the history
  • Loading branch information
cyrillkuettel committed Jul 6, 2024
1 parent b13f041 commit 057b27f
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 11 deletions.
38 changes: 38 additions & 0 deletions src/privatim/models/searchable.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from functools import wraps

from privatim.orm import Base


Expand Down Expand Up @@ -27,3 +29,39 @@ def searchable_models() -> tuple[type['HasSearchableFields'], ...]:
if issubclass(cls, SearchableMixin):
model_classes.add(cls)
return tuple(model_classes)


def prioritize_search_field(f):
"""
Mark as primary search fiel. This priorizites search matches for this
column in `SearchCollection.build_attribute_query`.
This decorator is used to annotate the `searchable_fields` method of a
model (typically on it's title), indicating which field should be
considered a more important field in search compared to other searchable
fields.
Usage:
class YourModel(Base):
@primary_search_field
title: Mapped[str] = mapped_column(nullable=False)
description: Mapped[str]
def searchable_fields(self):
yield 'title'
yield 'description'
Note:
The decorated method ('searchable_fields') should yield
all searchable fields, including the primary field. The primary
field is weighted more in the search.
"""

@wraps(f)
def wrapper(*args, **kwargs):
column = f(*args, **kwargs)
column.is_primary_search_field = True
return column
return wrapper
64 changes: 53 additions & 11 deletions src/privatim/views/search.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from markupsafe import Markup
from pyramid.httpexceptions import HTTPFound
from sqlalchemy import (func, select, cast, literal, String, Select)
from sqlalchemy import (func, select, literal, Select)
from privatim.forms.search_form import SearchForm
from privatim.layouts import Layout
from privatim.i18n import locales
from sqlalchemy import or_

from privatim.models import SearchableAssociatedFiles
from privatim.models.file import SearchableFile
Expand Down Expand Up @@ -169,8 +168,9 @@ def build_file_query(
)

def build_attribute_query(
self, model: type['HasSearchableFields']
self, model: type['HasSearchableFields']
) -> 'Select[SearchResultType]':

headline_expressions = [
func.ts_headline(
self.lang,
Expand All @@ -179,25 +179,67 @@ def build_attribute_query(
'StartSel=<mark>, StopSel=</mark>, MaxWords=35, MinWords=15, '
'ShortWord=3, HighlightAll=FALSE, MaxFragments=3, '
'FragmentDelimiter=" ... "',
).label(field.name)
).label(field.key)
for field in model.searchable_fields()
]

combined_vector = self._create_rank_expression(model)
rank_expression = func.ts_rank(combined_vector, self.ts_query)
select_fields = [
model.id,
*headline_expressions,
literal(model.__name__).label('type'), # noqa: MS001
rank_expression.label('rank')
]

return select(*select_fields).filter(
or_(
*[
func.to_tsvector(self.lang, field).op('@@')(self.ts_query)
for field in model.searchable_fields()
]
)
return (
select(*select_fields)
.filter(combined_vector.op('@@')(self.ts_query))
.order_by(rank_expression.desc())
)

def _create_rank_expression(self, model):
""" Weight the search results based on the importance of the field.
- 1.0 for primary fields (A)
- 0.4 for high importance fields (B)
- 0.2 for medium importance fields (C)
- 0.1 for low importance fields (D)
See also:
https://www.postgresql.org/docs/current/textsearch-controls.html
#TEXTSEARCH-RANKING
Not all search matches hold equal importance. I assume matches in
model.title should rank higher in the search results. To achieve
this, we'll mark columns decorated with @prioritize_search_field as
'A', and all other columns as 'B'. (And we use this decorator approach
because the field name is not always literally 'title'.)
"""

weights = {'primary': 'A', 'high': 'B', 'medium': 'C', 'low': 'D'}
# Create weighted vectors using list comprehension
weighted_vectors = [
func.setweight(
func.to_tsvector(self.lang, field),
(
weights['primary']
if getattr(field, 'is_primary_search_field', False)
else weights['high']
),
)
for field in model.searchable_fields()
]
# Combine all weighted vectors
if weighted_vectors:
combined_vector = weighted_vectors[0]
for vector in weighted_vectors[1:]:
combined_vector = combined_vector.op('||')(vector)

This comment has been minimized.

Copy link
@Tschuppi81

Tschuppi81 Jul 15, 2024

@cyrillkuettel just found out there is tsvector_concat to combine the different vectors e.g. func.tsvector_concat(*weighted_vectors)

This comment has been minimized.

Copy link
@cyrillkuettel

cyrillkuettel Jul 15, 2024

Author Member

Ah thank you that would have been nice to use.

else:
combined_vector = func.to_tsvector('')
return combined_vector,

def _add_comments_to_results(self) -> None:
"""Extends self.results with the complete query for Comment.
Expand Down

0 comments on commit 057b27f

Please sign in to comment.