Small perf improvement to search index building

This commit is contained in:
Antoine Gersant 2024-09-21 22:28:38 -07:00
parent cb33c96548
commit 390ee03020
4 changed files with 24 additions and 14 deletions

7
Cargo.lock generated
View file

@ -1368,6 +1368,12 @@ dependencies = [
"winapi-build", "winapi-build",
] ]
[[package]]
name = "nohash-hasher"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451"
[[package]] [[package]]
name = "nom" name = "nom"
version = "7.1.3" version = "7.1.3"
@ -1676,6 +1682,7 @@ dependencies = [
"mp4ameta", "mp4ameta",
"native-windows-derive", "native-windows-derive",
"native-windows-gui", "native-windows-gui",
"nohash-hasher",
"num_cpus", "num_cpus",
"opus_headers", "opus_headers",
"pbkdf2", "pbkdf2",

View file

@ -26,6 +26,7 @@ log = "0.4.22"
metaflac = "0.2.7" metaflac = "0.2.7"
mp3-duration = "0.1.10" mp3-duration = "0.1.10"
mp4ameta = "0.11.0" mp4ameta = "0.11.0"
nohash-hasher = "0.2.0"
num_cpus = "1.14.0" num_cpus = "1.14.0"
opus_headers = "0.1.2" opus_headers = "0.1.2"
pbkdf2 = "0.11" pbkdf2 = "0.11"

View file

@ -1,5 +1,6 @@
use chumsky::Parser; use chumsky::Parser;
use lasso2::{RodeoReader, Spur}; use lasso2::{RodeoReader, Spur};
use nohash_hasher::IntSet;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{ use std::{
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
@ -47,7 +48,7 @@ impl Search {
.collect::<Vec<_>>()) .collect::<Vec<_>>())
} }
fn eval(&self, strings: &RodeoReader, expr: &Expr) -> HashSet<SongKey> { fn eval(&self, strings: &RodeoReader, expr: &Expr) -> IntSet<SongKey> {
match expr { match expr {
Expr::Fuzzy(s) => self.eval_fuzzy(strings, s), Expr::Fuzzy(s) => self.eval_fuzzy(strings, s),
Expr::TextCmp(field, op, s) => self.eval_text_operator(strings, *field, *op, &s), Expr::TextCmp(field, op, s) => self.eval_text_operator(strings, *field, *op, &s),
@ -62,7 +63,7 @@ impl Search {
e: &Box<Expr>, e: &Box<Expr>,
op: BoolOp, op: BoolOp,
f: &Box<Expr>, f: &Box<Expr>,
) -> HashSet<SongKey> { ) -> IntSet<SongKey> {
match op { match op {
BoolOp::And => self BoolOp::And => self
.eval(strings, e) .eval(strings, e)
@ -77,17 +78,17 @@ impl Search {
} }
} }
fn eval_fuzzy(&self, strings: &RodeoReader, value: &Literal) -> HashSet<SongKey> { fn eval_fuzzy(&self, strings: &RodeoReader, value: &Literal) -> IntSet<SongKey> {
match value { match value {
Literal::Text(s) => { Literal::Text(s) => {
let mut songs = HashSet::new(); let mut songs = IntSet::default();
for field in self.text_fields.values() { for field in self.text_fields.values() {
songs.extend(field.find_like(strings, s)); songs.extend(field.find_like(strings, s));
} }
songs songs
} }
Literal::Number(n) => { Literal::Number(n) => {
let mut songs = HashSet::new(); let mut songs = IntSet::default();
for field in self.number_fields.values() { for field in self.number_fields.values() {
songs.extend(field.find_equal(*n)); songs.extend(field.find_equal(*n));
} }
@ -105,9 +106,9 @@ impl Search {
field: TextField, field: TextField,
operator: TextOp, operator: TextOp,
value: &str, value: &str,
) -> HashSet<SongKey> { ) -> IntSet<SongKey> {
let Some(field_index) = self.text_fields.get(&field) else { let Some(field_index) = self.text_fields.get(&field) else {
return HashSet::new(); return IntSet::default();
}; };
match operator { match operator {
@ -121,7 +122,7 @@ impl Search {
field: NumberField, field: NumberField,
operator: NumberOp, operator: NumberOp,
value: i32, value: i32,
) -> HashSet<SongKey> { ) -> IntSet<SongKey> {
todo!() todo!()
} }
} }
@ -130,8 +131,8 @@ const NGRAM_SIZE: usize = 2;
#[derive(Default, Deserialize, Serialize)] #[derive(Default, Deserialize, Serialize)]
struct TextFieldIndex { struct TextFieldIndex {
exact: HashMap<Spur, HashSet<SongKey>>, exact: HashMap<Spur, IntSet<SongKey>>,
ngrams: HashMap<[char; NGRAM_SIZE], HashSet<SongKey>>, ngrams: HashMap<[char; NGRAM_SIZE], IntSet<SongKey>>,
} }
impl TextFieldIndex { impl TextFieldIndex {
@ -149,9 +150,9 @@ impl TextFieldIndex {
self.exact.entry(value).or_default().insert(key); self.exact.entry(value).or_default().insert(key);
} }
pub fn find_like(&self, strings: &RodeoReader, value: &str) -> HashSet<SongKey> { pub fn find_like(&self, strings: &RodeoReader, value: &str) -> IntSet<SongKey> {
let characters = value.chars().collect::<Vec<_>>(); let characters = value.chars().collect::<Vec<_>>();
let empty_set = HashSet::new(); let empty_set = IntSet::default();
let mut candidates = characters[..] let mut candidates = characters[..]
.windows(NGRAM_SIZE) .windows(NGRAM_SIZE)
@ -163,7 +164,7 @@ impl TextFieldIndex {
.collect::<Vec<_>>(); .collect::<Vec<_>>();
if candidates.is_empty() { if candidates.is_empty() {
return HashSet::new(); return IntSet::default();
} }
candidates.sort_by_key(|h| h.len()); candidates.sort_by_key(|h| h.len());
@ -176,7 +177,7 @@ impl TextFieldIndex {
.collect() .collect()
} }
pub fn find_exact(&self, strings: &RodeoReader, value: &str) -> HashSet<SongKey> { pub fn find_exact(&self, strings: &RodeoReader, value: &str) -> IntSet<SongKey> {
strings strings
.get(value) .get(value)
.and_then(|k| self.exact.get(&k)) .and_then(|k| self.exact.get(&k))

View file

@ -78,6 +78,7 @@ pub struct AlbumKey {
pub struct SongKey { pub struct SongKey {
pub virtual_path: PathKey, pub virtual_path: PathKey,
} }
impl nohash_hasher::IsEnabled for SongKey {}
impl Song { impl Song {
pub fn album_key(&self) -> Option<AlbumKey> { pub fn album_key(&self) -> Option<AlbumKey> {