Fixed false positives in search results

This commit is contained in:
Antoine Gersant 2024-09-21 21:20:22 -07:00
parent 81403960b0
commit 5128796825

View file

@ -49,7 +49,7 @@ impl Search {
fn eval(&self, strings: &RodeoReader, expr: &Expr) -> HashSet<SongKey> { fn eval(&self, strings: &RodeoReader, expr: &Expr) -> HashSet<SongKey> {
match expr { match expr {
Expr::Fuzzy(s) => self.eval_fuzzy(s), Expr::Fuzzy(s) => self.eval_fuzzy(strings, s),
Expr::TextCmp(field, op, s) => self.eval_text_operator(strings, *field, *op, &s), Expr::TextCmp(field, op, s) => self.eval_text_operator(strings, *field, *op, &s),
Expr::NumberCmp(field, op, n) => self.eval_number_operator(*field, *op, *n), Expr::NumberCmp(field, op, n) => self.eval_number_operator(*field, *op, *n),
Expr::Combined(e, op, f) => self.combine(strings, e, *op, f), Expr::Combined(e, op, f) => self.combine(strings, e, *op, f),
@ -77,12 +77,12 @@ impl Search {
} }
} }
fn eval_fuzzy(&self, value: &Literal) -> HashSet<SongKey> { fn eval_fuzzy(&self, strings: &RodeoReader, value: &Literal) -> HashSet<SongKey> {
match value { match value {
Literal::Text(s) => { Literal::Text(s) => {
let mut songs = HashSet::new(); let mut songs = HashSet::new();
for field in self.text_fields.values() { for field in self.text_fields.values() {
songs.extend(field.find_like(s)); songs.extend(field.find_like(strings, s));
} }
songs songs
} }
@ -92,7 +92,7 @@ impl Search {
songs.extend(field.find_equal(*n)); songs.extend(field.find_equal(*n));
} }
songs songs
.union(&self.eval_fuzzy(&Literal::Text(n.to_string()))) .union(&self.eval_fuzzy(strings, &Literal::Text(n.to_string())))
.copied() .copied()
.collect() .collect()
} }
@ -112,7 +112,7 @@ impl Search {
match operator { match operator {
TextOp::Eq => field_index.find_exact(strings, value), TextOp::Eq => field_index.find_exact(strings, value),
TextOp::Like => field_index.find_like(value), TextOp::Like => field_index.find_like(strings, value),
} }
} }
@ -148,11 +148,17 @@ impl TextFieldIndex {
self.exact.entry(value).or_default().insert(key); self.exact.entry(value).or_default().insert(key);
} }
pub fn find_like(&self, value: &str) -> HashSet<SongKey> { pub fn find_like(&self, strings: &RodeoReader, value: &str) -> HashSet<SongKey> {
let characters = value.chars().collect::<Vec<_>>(); let characters = value.chars().collect::<Vec<_>>();
let empty_set = HashSet::new();
let mut candidates = characters[..] let mut candidates = characters[..]
.windows(NGRAM_SIZE) .windows(NGRAM_SIZE)
.filter_map(|s| self.ngrams.get::<[char; NGRAM_SIZE]>(s.try_into().unwrap())) .map(|s| {
self.ngrams
.get::<[char; NGRAM_SIZE]>(s.try_into().unwrap())
.unwrap_or(&empty_set)
})
.collect::<Vec<_>>(); .collect::<Vec<_>>();
if candidates.is_empty() { if candidates.is_empty() {
@ -164,6 +170,7 @@ impl TextFieldIndex {
candidates[0] candidates[0]
.iter() .iter()
.filter(move |c| candidates[1..].iter().all(|s| s.contains(c))) .filter(move |c| candidates[1..].iter().all(|s| s.contains(c)))
.filter(|s| strings.resolve(&s.virtual_path.0).contains(value))
.copied() .copied()
.collect() .collect()
} }