Skip to content

Commit

Permalink
AMP FTS experiment logic
Browse files Browse the repository at this point in the history
  • Loading branch information
bendk committed Jan 14, 2025
1 parent fc22cdb commit f0276a2
Show file tree
Hide file tree
Showing 7 changed files with 254 additions and 25 deletions.
214 changes: 192 additions & 22 deletions components/suggest/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,35 +321,85 @@ impl<'a> SuggestDao<'a> {
&self,
query: &SuggestionQuery,
suggestion_type: AmpSuggestionType,
) -> Result<Vec<Suggestion>> {
// While we're running the AMP FTS experiment, there are several paths for this one.
// This is a bit messy, let's clean it up once the experiment completes.
let branch = match &query.provider_constraints {
Some(c) => c.amp_experiment_branch,
None => 0,
};
match branch {
1 => self.fetch_amp_suggestions_using_keywords(query, suggestion_type, false),
2 => self.fetch_amp_suggestions_using_fts(query, suggestion_type, "full_keywords"),
3 => self.fetch_amp_suggestions_using_fts(query, suggestion_type, "title"),
_ => self.fetch_amp_suggestions_using_keywords(query, suggestion_type, true),
}
}

pub fn fetch_amp_suggestions_using_keywords(
&self,
query: &SuggestionQuery,
suggestion_type: AmpSuggestionType,
allow_keyword_expansion: bool,
) -> Result<Vec<Suggestion>> {
let keyword_lowercased = &query.keyword.to_lowercase();
let provider = match suggestion_type {
AmpSuggestionType::Mobile => SuggestionProvider::AmpMobile,
AmpSuggestionType::Desktop => SuggestionProvider::Amp,
};
let suggestions = self.conn.query_rows_and_then_cached(
r#"
SELECT
s.id,
k.rank,
s.title,
s.url,
s.provider,
s.score,
fk.full_keyword
FROM
suggestions s
JOIN
keywords k
ON k.suggestion_id = s.id
LEFT JOIN
full_keywords fk
ON k.full_keyword_id = fk.id
WHERE
s.provider = :provider
AND k.keyword = :keyword
AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url)
"#,
if allow_keyword_expansion {
// Original SQL
r#"
SELECT
s.id,
k.rank,
s.title,
s.url,
s.provider,
s.score,
fk.full_keyword
FROM
suggestions s
JOIN
keywords k
ON k.suggestion_id = s.id
LEFT JOIN
full_keywords fk
ON k.full_keyword_id = fk.id
WHERE
s.provider = :provider
AND k.keyword = :keyword
AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url)
"#
} else {
// Use `INSTR` to ensure all matched keywords are substrings of the full keyword
// The `CONCAT` is to handle the extra space at the end that some keywords
// have
r#"
SELECT
s.id,
k.rank,
s.title,
s.url,
s.provider,
s.score,
fk.full_keyword
FROM
suggestions s
JOIN
keywords k
ON k.suggestion_id = s.id
LEFT JOIN
full_keywords fk
ON k.full_keyword_id = fk.id
WHERE
s.provider = :provider
AND k.keyword = :keyword
AND INSTR(CONCAT(fk.full_keyword, ' '), k.keyword) != 0
AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url)
"#
},
named_params! {
":keyword": keyword_lowercased,
":provider": provider
Expand Down Expand Up @@ -427,6 +477,96 @@ impl<'a> SuggestDao<'a> {
Ok(suggestions)
}

pub fn fetch_amp_suggestions_using_fts(
&self,
query: &SuggestionQuery,
suggestion_type: AmpSuggestionType,
fts_column: &str,
) -> Result<Vec<Suggestion>> {
let keyword_lowercased = &query.keyword.to_lowercase();
let provider = match suggestion_type {
AmpSuggestionType::Mobile => SuggestionProvider::AmpMobile,
AmpSuggestionType::Desktop => SuggestionProvider::Amp,
};
let suggestions = self.conn.query_rows_and_then_cached(
&format!(
r#"
SELECT
s.id,
s.title,
s.url,
s.provider,
s.score
FROM
suggestions s
JOIN
amp_fts fts
ON fts.rowid = s.id
WHERE
s.provider = :provider
AND amp_fts match '{fts_column}: {keyword_lowercased}'
AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url)
"#
),
named_params! {
":provider": provider
},
|row| -> Result<Suggestion> {
let suggestion_id: i64 = row.get("id")?;
let title = row.get("title")?;
let raw_url: String = row.get("url")?;
let score: f64 = row.get("score")?;

// We don't have a good way to determine the full keyword, let's just leave that
// blank for now.

self.conn.query_row_and_then(
r#"
SELECT
amp.advertiser,
amp.block_id,
amp.iab_category,
amp.impression_url,
amp.click_url,
i.data AS icon,
i.mimetype AS icon_mimetype
FROM
amp_custom_details amp
LEFT JOIN
icons i ON amp.icon_id = i.id
WHERE
amp.suggestion_id = :suggestion_id
"#,
named_params! {
":suggestion_id": suggestion_id
},
|row| {
let cooked_url = cook_raw_suggestion_url(&raw_url);
let raw_click_url = row.get::<_, String>("click_url")?;
let cooked_click_url = cook_raw_suggestion_url(&raw_click_url);

Ok(Suggestion::Amp {
block_id: row.get("block_id")?,
advertiser: row.get("advertiser")?,
iab_category: row.get("iab_category")?,
title,
url: cooked_url,
raw_url,
full_keyword: "".into(),
icon: row.get("icon")?,
icon_mimetype: row.get("icon_mimetype")?,
impression_url: row.get("impression_url")?,
click_url: cooked_click_url,
raw_click_url,
score,
})
},
)
},
)?;
Ok(suggestions)
}

/// Fetches Suggestions of type Wikipedia provider that match the given query
pub fn fetch_wikipedia_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
let keyword_lowercased = &query.keyword.to_lowercase();
Expand Down Expand Up @@ -954,6 +1094,7 @@ impl<'a> SuggestDao<'a> {
let mut amp_insert = AmpInsertStatement::new(self.conn)?;
let mut wiki_insert = WikipediaInsertStatement::new(self.conn)?;
let mut keyword_insert = KeywordInsertStatement::new(self.conn)?;
let mut fts_insert = AmpFtsInsertStatement::new(self.conn)?;
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
let common_details = suggestion.common_details();
Expand All @@ -974,6 +1115,11 @@ impl<'a> SuggestDao<'a> {
wiki_insert.execute(suggestion_id, wikipedia)?;
}
}
fts_insert.execute(
suggestion_id,
&common_details.full_keywords_string(),
&common_details.title,
)?;
let mut full_keyword_inserter = FullKeywordInserter::new(self.conn, suggestion_id);
for keyword in common_details.keywords() {
let full_keyword_id = match (suggestion, keyword.full_keyword) {
Expand Down Expand Up @@ -1764,6 +1910,30 @@ impl<'conn> KeywordMetricsInsertStatement<'conn> {
}
}

pub(crate) struct AmpFtsInsertStatement<'conn>(rusqlite::Statement<'conn>);

impl<'conn> AmpFtsInsertStatement<'conn> {
pub(crate) fn new(conn: &'conn Connection) -> Result<Self> {
Ok(Self(conn.prepare(
"INSERT INTO amp_fts(rowid, full_keywords, title)
VALUES(?, ?, ?)
",
)?))
}

pub(crate) fn execute(
&mut self,
suggestion_id: i64,
full_keywords: &str,
title: &str,
) -> Result<()> {
self.0
.execute((suggestion_id, full_keywords, title))
.with_context("keyword metrics insert")?;
Ok(())
}
}

fn provider_config_meta_key(provider: SuggestionProvider) -> String {
format!("{}{}", PROVIDER_CONFIG_META_KEY_PREFIX, provider as u8)
}
5 changes: 5 additions & 0 deletions components/suggest/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,9 @@ pub struct SuggestionProviderConstraints {
/// settings record(s).
#[uniffi(default = None)]
pub exposure_suggestion_types: Option<Vec<String>>,
/// Which experiment branch should we use for the AMP provider while the FTS experiment is
/// ongoing?
/// `0` means the treatment branch.
#[uniffi(default = 0)]
pub amp_experiment_branch: u32,
}
1 change: 1 addition & 0 deletions components/suggest/src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ impl SuggestionQuery {
exposure_suggestion_types: Some(
suggestion_types.iter().map(|s| s.to_string()).collect(),
),
..SuggestionProviderConstraints::default()
}),
..Self::default()
}
Expand Down
6 changes: 6 additions & 0 deletions components/suggest/src/rs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,12 @@ impl DownloadedSuggestionCommonDetails {
},
)
}

/// Get the full keywords as a single string
pub fn full_keywords_string(&self) -> String {
let parts: Vec<_> = self.full_keywords.iter().map(|(s, _)| s.as_str()).collect();
parts.join(" ")
}
}

#[derive(Debug, PartialEq, Eq)]
Expand Down
27 changes: 26 additions & 1 deletion components/suggest/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use sql_support::{
/// `clear_database()` by adding their names to `conditional_tables`, unless
/// they are cleared via a deletion trigger or there's some other good
/// reason not to do so.
pub const VERSION: u32 = 31;
pub const VERSION: u32 = 32;

/// The current Suggest database schema.
pub const SQL: &str = "
Expand Down Expand Up @@ -143,6 +143,14 @@ CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
WHERE id = new.suggestion_id;
END;
CREATE VIRTUAL TABLE IF NOT EXISTS amp_fts USING FTS5(
full_keywords,
title,
content='',
contentless_delete=1,
tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
);
-- DELETE/UPDATE triggers are difficult to implement, since the FTS contents are split between the fakespot_custom_details and suggestions tables.
-- If you use an AFTER trigger, then the data from the other table has already been deleted.
-- BEFORE triggers are discouraged by the SQLite docs.
Expand Down Expand Up @@ -597,6 +605,23 @@ CREATE INDEX geonames_alternates_geoname_id ON geonames_alternates(geoname_id);
)?;
Ok(())
}
31 => {
// Need to clear the database so that the FTS index will get filled.
clear_database(tx)?;
tx.execute_batch(
"
CREATE VIRTUAL TABLE IF NOT EXISTS amp_fts USING FTS5(
full_keywords,
title,
content='',
contentless_delete=1,
tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
);
",
)?;
Ok(())
}
_ => Err(open_database::Error::IncompatibleVersion(version)),
}
}
Expand Down
7 changes: 7 additions & 0 deletions components/suggest/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2538,6 +2538,7 @@ pub(crate) mod tests {
providers: Some(vec![SuggestionProvider::Exposure]),
provider_constraints: Some(SuggestionProviderConstraints {
exposure_suggestion_types: Some(vec!["aaa".to_string(), "bbb".to_string()]),
..SuggestionProviderConstraints::default()
}),
..SuggestIngestionConstraints::all_providers()
});
Expand Down Expand Up @@ -2810,6 +2811,7 @@ pub(crate) mod tests {
providers: Some(vec![SuggestionProvider::Exposure]),
provider_constraints: Some(SuggestionProviderConstraints {
exposure_suggestion_types: Some(vec!["aaa".to_string()]),
..SuggestionProviderConstraints::default()
}),
..SuggestIngestionConstraints::all_providers()
});
Expand Down Expand Up @@ -2847,6 +2849,7 @@ pub(crate) mod tests {
providers: Some(vec![SuggestionProvider::Exposure]),
provider_constraints: Some(SuggestionProviderConstraints {
exposure_suggestion_types: Some(vec!["aaa".to_string()]),
..SuggestionProviderConstraints::default()
}),
..SuggestIngestionConstraints::all_providers()
});
Expand Down Expand Up @@ -2946,6 +2949,7 @@ pub(crate) mod tests {
providers: Some(vec![SuggestionProvider::Exposure]),
provider_constraints: Some(SuggestionProviderConstraints {
exposure_suggestion_types: Some(vec!["bbb".to_string()]),
..SuggestionProviderConstraints::default()
}),
..SuggestIngestionConstraints::all_providers()
});
Expand Down Expand Up @@ -2979,6 +2983,7 @@ pub(crate) mod tests {
providers: Some(vec![SuggestionProvider::Exposure]),
provider_constraints: Some(SuggestionProviderConstraints {
exposure_suggestion_types: Some(vec!["aaa".to_string()]),
..SuggestionProviderConstraints::default()
}),
..SuggestIngestionConstraints::all_providers()
});
Expand Down Expand Up @@ -3029,6 +3034,7 @@ pub(crate) mod tests {
providers: Some(vec![SuggestionProvider::Exposure]),
provider_constraints: Some(SuggestionProviderConstraints {
exposure_suggestion_types: Some(vec!["aaa".to_string()]),
..SuggestionProviderConstraints::default()
}),
..SuggestIngestionConstraints::all_providers()
});
Expand Down Expand Up @@ -3063,6 +3069,7 @@ pub(crate) mod tests {
providers: Some(vec![SuggestionProvider::Exposure]),
provider_constraints: Some(SuggestionProviderConstraints {
exposure_suggestion_types: Some(vec!["aaa".to_string()]),
..SuggestionProviderConstraints::default()
}),
..SuggestIngestionConstraints::all_providers()
});
Expand Down
Loading

0 comments on commit f0276a2

Please sign in to comment.