Small perf improvement to search index building
This commit is contained in:
parent
cb33c96548
commit
390ee03020
4 changed files with 24 additions and 14 deletions
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -1368,6 +1368,12 @@ dependencies = [
|
||||||
"winapi-build",
|
"winapi-build",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nohash-hasher"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nom"
|
name = "nom"
|
||||||
version = "7.1.3"
|
version = "7.1.3"
|
||||||
|
@ -1676,6 +1682,7 @@ dependencies = [
|
||||||
"mp4ameta",
|
"mp4ameta",
|
||||||
"native-windows-derive",
|
"native-windows-derive",
|
||||||
"native-windows-gui",
|
"native-windows-gui",
|
||||||
|
"nohash-hasher",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
"opus_headers",
|
"opus_headers",
|
||||||
"pbkdf2",
|
"pbkdf2",
|
||||||
|
|
|
@ -26,6 +26,7 @@ log = "0.4.22"
|
||||||
metaflac = "0.2.7"
|
metaflac = "0.2.7"
|
||||||
mp3-duration = "0.1.10"
|
mp3-duration = "0.1.10"
|
||||||
mp4ameta = "0.11.0"
|
mp4ameta = "0.11.0"
|
||||||
|
nohash-hasher = "0.2.0"
|
||||||
num_cpus = "1.14.0"
|
num_cpus = "1.14.0"
|
||||||
opus_headers = "0.1.2"
|
opus_headers = "0.1.2"
|
||||||
pbkdf2 = "0.11"
|
pbkdf2 = "0.11"
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
use chumsky::Parser;
|
use chumsky::Parser;
|
||||||
use lasso2::{RodeoReader, Spur};
|
use lasso2::{RodeoReader, Spur};
|
||||||
|
use nohash_hasher::IntSet;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::{
|
use std::{
|
||||||
collections::{HashMap, HashSet},
|
collections::{HashMap, HashSet},
|
||||||
|
@ -47,7 +48,7 @@ impl Search {
|
||||||
.collect::<Vec<_>>())
|
.collect::<Vec<_>>())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, strings: &RodeoReader, expr: &Expr) -> HashSet<SongKey> {
|
fn eval(&self, strings: &RodeoReader, expr: &Expr) -> IntSet<SongKey> {
|
||||||
match expr {
|
match expr {
|
||||||
Expr::Fuzzy(s) => self.eval_fuzzy(strings, s),
|
Expr::Fuzzy(s) => self.eval_fuzzy(strings, s),
|
||||||
Expr::TextCmp(field, op, s) => self.eval_text_operator(strings, *field, *op, &s),
|
Expr::TextCmp(field, op, s) => self.eval_text_operator(strings, *field, *op, &s),
|
||||||
|
@ -62,7 +63,7 @@ impl Search {
|
||||||
e: &Box<Expr>,
|
e: &Box<Expr>,
|
||||||
op: BoolOp,
|
op: BoolOp,
|
||||||
f: &Box<Expr>,
|
f: &Box<Expr>,
|
||||||
) -> HashSet<SongKey> {
|
) -> IntSet<SongKey> {
|
||||||
match op {
|
match op {
|
||||||
BoolOp::And => self
|
BoolOp::And => self
|
||||||
.eval(strings, e)
|
.eval(strings, e)
|
||||||
|
@ -77,17 +78,17 @@ impl Search {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval_fuzzy(&self, strings: &RodeoReader, value: &Literal) -> HashSet<SongKey> {
|
fn eval_fuzzy(&self, strings: &RodeoReader, value: &Literal) -> IntSet<SongKey> {
|
||||||
match value {
|
match value {
|
||||||
Literal::Text(s) => {
|
Literal::Text(s) => {
|
||||||
let mut songs = HashSet::new();
|
let mut songs = IntSet::default();
|
||||||
for field in self.text_fields.values() {
|
for field in self.text_fields.values() {
|
||||||
songs.extend(field.find_like(strings, s));
|
songs.extend(field.find_like(strings, s));
|
||||||
}
|
}
|
||||||
songs
|
songs
|
||||||
}
|
}
|
||||||
Literal::Number(n) => {
|
Literal::Number(n) => {
|
||||||
let mut songs = HashSet::new();
|
let mut songs = IntSet::default();
|
||||||
for field in self.number_fields.values() {
|
for field in self.number_fields.values() {
|
||||||
songs.extend(field.find_equal(*n));
|
songs.extend(field.find_equal(*n));
|
||||||
}
|
}
|
||||||
|
@ -105,9 +106,9 @@ impl Search {
|
||||||
field: TextField,
|
field: TextField,
|
||||||
operator: TextOp,
|
operator: TextOp,
|
||||||
value: &str,
|
value: &str,
|
||||||
) -> HashSet<SongKey> {
|
) -> IntSet<SongKey> {
|
||||||
let Some(field_index) = self.text_fields.get(&field) else {
|
let Some(field_index) = self.text_fields.get(&field) else {
|
||||||
return HashSet::new();
|
return IntSet::default();
|
||||||
};
|
};
|
||||||
|
|
||||||
match operator {
|
match operator {
|
||||||
|
@ -121,7 +122,7 @@ impl Search {
|
||||||
field: NumberField,
|
field: NumberField,
|
||||||
operator: NumberOp,
|
operator: NumberOp,
|
||||||
value: i32,
|
value: i32,
|
||||||
) -> HashSet<SongKey> {
|
) -> IntSet<SongKey> {
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -130,8 +131,8 @@ const NGRAM_SIZE: usize = 2;
|
||||||
|
|
||||||
#[derive(Default, Deserialize, Serialize)]
|
#[derive(Default, Deserialize, Serialize)]
|
||||||
struct TextFieldIndex {
|
struct TextFieldIndex {
|
||||||
exact: HashMap<Spur, HashSet<SongKey>>,
|
exact: HashMap<Spur, IntSet<SongKey>>,
|
||||||
ngrams: HashMap<[char; NGRAM_SIZE], HashSet<SongKey>>,
|
ngrams: HashMap<[char; NGRAM_SIZE], IntSet<SongKey>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TextFieldIndex {
|
impl TextFieldIndex {
|
||||||
|
@ -149,9 +150,9 @@ impl TextFieldIndex {
|
||||||
self.exact.entry(value).or_default().insert(key);
|
self.exact.entry(value).or_default().insert(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn find_like(&self, strings: &RodeoReader, value: &str) -> HashSet<SongKey> {
|
pub fn find_like(&self, strings: &RodeoReader, value: &str) -> IntSet<SongKey> {
|
||||||
let characters = value.chars().collect::<Vec<_>>();
|
let characters = value.chars().collect::<Vec<_>>();
|
||||||
let empty_set = HashSet::new();
|
let empty_set = IntSet::default();
|
||||||
|
|
||||||
let mut candidates = characters[..]
|
let mut candidates = characters[..]
|
||||||
.windows(NGRAM_SIZE)
|
.windows(NGRAM_SIZE)
|
||||||
|
@ -163,7 +164,7 @@ impl TextFieldIndex {
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
if candidates.is_empty() {
|
if candidates.is_empty() {
|
||||||
return HashSet::new();
|
return IntSet::default();
|
||||||
}
|
}
|
||||||
|
|
||||||
candidates.sort_by_key(|h| h.len());
|
candidates.sort_by_key(|h| h.len());
|
||||||
|
@ -176,7 +177,7 @@ impl TextFieldIndex {
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn find_exact(&self, strings: &RodeoReader, value: &str) -> HashSet<SongKey> {
|
pub fn find_exact(&self, strings: &RodeoReader, value: &str) -> IntSet<SongKey> {
|
||||||
strings
|
strings
|
||||||
.get(value)
|
.get(value)
|
||||||
.and_then(|k| self.exact.get(&k))
|
.and_then(|k| self.exact.get(&k))
|
||||||
|
|
|
@ -78,6 +78,7 @@ pub struct AlbumKey {
|
||||||
pub struct SongKey {
|
pub struct SongKey {
|
||||||
pub virtual_path: PathKey,
|
pub virtual_path: PathKey,
|
||||||
}
|
}
|
||||||
|
impl nohash_hasher::IsEnabled for SongKey {}
|
||||||
|
|
||||||
impl Song {
|
impl Song {
|
||||||
pub fn album_key(&self) -> Option<AlbumKey> {
|
pub fn album_key(&self) -> Option<AlbumKey> {
|
||||||
|
|
Loading…
Add table
Reference in a new issue