Case insensitive search
This commit is contained in:
parent
409d79d8a2
commit
bdc4f840a4
4 changed files with 100 additions and 49 deletions
|
@ -259,7 +259,9 @@ impl Manager {
|
||||||
let index_manager = self.clone();
|
let index_manager = self.clone();
|
||||||
move || {
|
move || {
|
||||||
let index = index_manager.index.read().unwrap();
|
let index = index_manager.index.read().unwrap();
|
||||||
index.search.find_songs(&index.strings, &query)
|
index
|
||||||
|
.search
|
||||||
|
.find_songs(&index.strings, &index.canon, &query)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
|
@ -270,6 +272,7 @@ impl Manager {
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
pub struct Index {
|
pub struct Index {
|
||||||
pub strings: RodeoReader,
|
pub strings: RodeoReader,
|
||||||
|
pub canon: HashMap<String, Spur>,
|
||||||
pub browser: browser::Browser,
|
pub browser: browser::Browser,
|
||||||
pub collection: collection::Collection,
|
pub collection: collection::Collection,
|
||||||
pub search: search::Search,
|
pub search: search::Search,
|
||||||
|
@ -279,6 +282,7 @@ impl Default for Index {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
strings: Rodeo::new().into_reader(),
|
strings: Rodeo::new().into_reader(),
|
||||||
|
canon: Default::default(),
|
||||||
browser: Default::default(),
|
browser: Default::default(),
|
||||||
collection: Default::default(),
|
collection: Default::default(),
|
||||||
search: Default::default(),
|
search: Default::default(),
|
||||||
|
@ -288,7 +292,7 @@ impl Default for Index {
|
||||||
|
|
||||||
pub struct Builder {
|
pub struct Builder {
|
||||||
strings: Rodeo,
|
strings: Rodeo,
|
||||||
minuscules: HashMap<String, Spur>,
|
canon: HashMap<String, Spur>,
|
||||||
browser_builder: browser::Builder,
|
browser_builder: browser::Builder,
|
||||||
collection_builder: collection::Builder,
|
collection_builder: collection::Builder,
|
||||||
search_builder: search::Builder,
|
search_builder: search::Builder,
|
||||||
|
@ -298,7 +302,7 @@ impl Builder {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
strings: Rodeo::new(),
|
strings: Rodeo::new(),
|
||||||
minuscules: HashMap::default(),
|
canon: HashMap::default(),
|
||||||
browser_builder: browser::Builder::default(),
|
browser_builder: browser::Builder::default(),
|
||||||
collection_builder: collection::Builder::default(),
|
collection_builder: collection::Builder::default(),
|
||||||
search_builder: search::Builder::default(),
|
search_builder: search::Builder::default(),
|
||||||
|
@ -311,9 +315,7 @@ impl Builder {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_song(&mut self, scanner_song: scanner::Song) {
|
pub fn add_song(&mut self, scanner_song: scanner::Song) {
|
||||||
if let Some(storage_song) =
|
if let Some(storage_song) = store_song(&mut self.strings, &mut self.canon, &scanner_song) {
|
||||||
store_song(&mut self.strings, &mut self.minuscules, &scanner_song)
|
|
||||||
{
|
|
||||||
self.browser_builder
|
self.browser_builder
|
||||||
.add_song(&mut self.strings, &scanner_song);
|
.add_song(&mut self.strings, &scanner_song);
|
||||||
self.collection_builder.add_song(&storage_song);
|
self.collection_builder.add_song(&storage_song);
|
||||||
|
@ -327,6 +329,7 @@ impl Builder {
|
||||||
collection: self.collection_builder.build(),
|
collection: self.collection_builder.build(),
|
||||||
search: self.search_builder.build(),
|
search: self.search_builder.build(),
|
||||||
strings: self.strings.into_reader(),
|
strings: self.strings.into_reader(),
|
||||||
|
canon: self.canon,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -374,11 +374,11 @@ mod test {
|
||||||
|
|
||||||
fn setup_test(songs: Vec<scanner::Song>) -> (Collection, RodeoReader) {
|
fn setup_test(songs: Vec<scanner::Song>) -> (Collection, RodeoReader) {
|
||||||
let mut strings = Rodeo::new();
|
let mut strings = Rodeo::new();
|
||||||
let mut minuscules = HashMap::new();
|
let mut canon = HashMap::new();
|
||||||
let mut builder = Builder::default();
|
let mut builder = Builder::default();
|
||||||
|
|
||||||
for song in songs {
|
for song in songs {
|
||||||
let song = store_song(&mut strings, &mut minuscules, &song).unwrap();
|
let song = store_song(&mut strings, &mut canon, &song).unwrap();
|
||||||
builder.add_song(&song);
|
builder.add_song(&song);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,10 @@ use crate::app::{
|
||||||
scanner, Error,
|
scanner, Error,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{query::make_parser, storage};
|
use super::{
|
||||||
|
query::make_parser,
|
||||||
|
storage::{self, sanitize},
|
||||||
|
};
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
pub struct Search {
|
pub struct Search {
|
||||||
|
@ -35,44 +38,55 @@ impl Default for Search {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Search {
|
impl Search {
|
||||||
pub fn find_songs(&self, strings: &RodeoReader, query: &str) -> Result<Vec<PathBuf>, Error> {
|
pub fn find_songs(
|
||||||
|
&self,
|
||||||
|
strings: &RodeoReader,
|
||||||
|
canon: &HashMap<String, Spur>,
|
||||||
|
query: &str,
|
||||||
|
) -> Result<Vec<PathBuf>, Error> {
|
||||||
let parser = make_parser();
|
let parser = make_parser();
|
||||||
let parsed_query = parser
|
let parsed_query = parser
|
||||||
.parse(query)
|
.parse(query)
|
||||||
.map_err(|_| Error::SearchQueryParseError)?;
|
.map_err(|_| Error::SearchQueryParseError)?;
|
||||||
|
|
||||||
let keys = self.eval(strings, &parsed_query);
|
let keys = self.eval(strings, canon, &parsed_query);
|
||||||
Ok(keys
|
Ok(keys
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|k| Path::new(OsStr::new(strings.resolve(&k.virtual_path.0))).to_owned())
|
.map(|k| Path::new(OsStr::new(strings.resolve(&k.virtual_path.0))).to_owned())
|
||||||
.collect::<Vec<_>>())
|
.collect::<Vec<_>>())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval(&self, strings: &RodeoReader, expr: &Expr) -> IntSet<SongKey> {
|
fn eval(
|
||||||
|
&self,
|
||||||
|
strings: &RodeoReader,
|
||||||
|
canon: &HashMap<String, Spur>,
|
||||||
|
expr: &Expr,
|
||||||
|
) -> IntSet<SongKey> {
|
||||||
match expr {
|
match expr {
|
||||||
Expr::Fuzzy(s) => self.eval_fuzzy(strings, s),
|
Expr::Fuzzy(s) => self.eval_fuzzy(strings, s),
|
||||||
Expr::TextCmp(field, op, s) => self.eval_text_operator(strings, *field, *op, &s),
|
Expr::TextCmp(field, op, s) => self.eval_text_operator(canon, *field, *op, &s),
|
||||||
Expr::NumberCmp(field, op, n) => self.eval_number_operator(*field, *op, *n),
|
Expr::NumberCmp(field, op, n) => self.eval_number_operator(*field, *op, *n),
|
||||||
Expr::Combined(e, op, f) => self.combine(strings, e, *op, f),
|
Expr::Combined(e, op, f) => self.combine(strings, canon, e, *op, f),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn combine(
|
fn combine(
|
||||||
&self,
|
&self,
|
||||||
strings: &RodeoReader,
|
strings: &RodeoReader,
|
||||||
|
canon: &HashMap<String, Spur>,
|
||||||
e: &Box<Expr>,
|
e: &Box<Expr>,
|
||||||
op: BoolOp,
|
op: BoolOp,
|
||||||
f: &Box<Expr>,
|
f: &Box<Expr>,
|
||||||
) -> IntSet<SongKey> {
|
) -> IntSet<SongKey> {
|
||||||
match op {
|
match op {
|
||||||
BoolOp::And => self
|
BoolOp::And => self
|
||||||
.eval(strings, e)
|
.eval(strings, canon, e)
|
||||||
.intersection(&self.eval(strings, f))
|
.intersection(&self.eval(strings, canon, f))
|
||||||
.cloned()
|
.cloned()
|
||||||
.collect(),
|
.collect(),
|
||||||
BoolOp::Or => self
|
BoolOp::Or => self
|
||||||
.eval(strings, e)
|
.eval(strings, canon, e)
|
||||||
.union(&self.eval(strings, f))
|
.union(&self.eval(strings, canon, f))
|
||||||
.cloned()
|
.cloned()
|
||||||
.collect(),
|
.collect(),
|
||||||
}
|
}
|
||||||
|
@ -102,7 +116,7 @@ impl Search {
|
||||||
|
|
||||||
fn eval_text_operator(
|
fn eval_text_operator(
|
||||||
&self,
|
&self,
|
||||||
strings: &RodeoReader,
|
canon: &HashMap<String, Spur>,
|
||||||
field: TextField,
|
field: TextField,
|
||||||
operator: TextOp,
|
operator: TextOp,
|
||||||
value: &str,
|
value: &str,
|
||||||
|
@ -112,7 +126,7 @@ impl Search {
|
||||||
};
|
};
|
||||||
|
|
||||||
match operator {
|
match operator {
|
||||||
TextOp::Eq => field_index.find_exact(strings, value),
|
TextOp::Eq => field_index.find_exact(canon, value),
|
||||||
TextOp::Like => field_index.find_like(value),
|
TextOp::Like => field_index.find_like(value),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -139,7 +153,7 @@ impl TextFieldIndex {
|
||||||
pub fn insert(&mut self, raw_value: &str, value: Spur, key: SongKey) {
|
pub fn insert(&mut self, raw_value: &str, value: Spur, key: SongKey) {
|
||||||
// TODO sanitize ngrams to be case insensitive, free from diacritics and punctuation
|
// TODO sanitize ngrams to be case insensitive, free from diacritics and punctuation
|
||||||
// And do the same thing to query fragments!
|
// And do the same thing to query fragments!
|
||||||
let characters = raw_value.chars().collect::<TinyVec<[char; 32]>>();
|
let characters = sanitize(raw_value).chars().collect::<TinyVec<[char; 32]>>();
|
||||||
for substring in characters[..].windows(NGRAM_SIZE) {
|
for substring in characters[..].windows(NGRAM_SIZE) {
|
||||||
self.ngrams
|
self.ngrams
|
||||||
.entry(substring.try_into().unwrap())
|
.entry(substring.try_into().unwrap())
|
||||||
|
@ -151,7 +165,7 @@ impl TextFieldIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn find_like(&self, value: &str) -> IntSet<SongKey> {
|
pub fn find_like(&self, value: &str) -> IntSet<SongKey> {
|
||||||
let characters = value.chars().collect::<Vec<_>>();
|
let characters = sanitize(value).chars().collect::<Vec<_>>();
|
||||||
let empty_set = IntSet::default();
|
let empty_set = IntSet::default();
|
||||||
|
|
||||||
let mut candidates = characters[..]
|
let mut candidates = characters[..]
|
||||||
|
@ -179,10 +193,10 @@ impl TextFieldIndex {
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn find_exact(&self, strings: &RodeoReader, value: &str) -> IntSet<SongKey> {
|
pub fn find_exact(&self, canon: &HashMap<String, Spur>, value: &str) -> IntSet<SongKey> {
|
||||||
strings
|
canon
|
||||||
.get(value)
|
.get(&sanitize(value))
|
||||||
.and_then(|k| self.exact.get(&k))
|
.and_then(|s| self.exact.get(&s))
|
||||||
.cloned()
|
.cloned()
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
}
|
}
|
||||||
|
@ -305,7 +319,7 @@ mod test {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
fn setup_test(songs: Vec<scanner::Song>) -> (Search, RodeoReader) {
|
fn setup_test(songs: Vec<scanner::Song>) -> (Search, RodeoReader, HashMap<String, Spur>) {
|
||||||
let mut strings = Rodeo::new();
|
let mut strings = Rodeo::new();
|
||||||
let mut canon = HashMap::new();
|
let mut canon = HashMap::new();
|
||||||
|
|
||||||
|
@ -317,12 +331,12 @@ mod test {
|
||||||
|
|
||||||
let search = builder.build();
|
let search = builder.build();
|
||||||
let strings = strings.into_reader();
|
let strings = strings.into_reader();
|
||||||
(search, strings)
|
(search, strings, canon)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn can_find_fuzzy() {
|
fn can_find_fuzzy() {
|
||||||
let (search, strings) = setup_test(vec![
|
let (search, strings, canon) = setup_test(vec![
|
||||||
scanner::Song {
|
scanner::Song {
|
||||||
virtual_path: PathBuf::from("seasons.mp3"),
|
virtual_path: PathBuf::from("seasons.mp3"),
|
||||||
title: Some("Seasons".to_owned()),
|
title: Some("Seasons".to_owned()),
|
||||||
|
@ -343,7 +357,7 @@ mod test {
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let songs = search.find_songs(&strings, "agon").unwrap();
|
let songs = search.find_songs(&strings, &canon, "agon").unwrap();
|
||||||
|
|
||||||
assert_eq!(songs.len(), 2);
|
assert_eq!(songs.len(), 2);
|
||||||
assert!(songs.contains(&PathBuf::from("seasons.mp3")));
|
assert!(songs.contains(&PathBuf::from("seasons.mp3")));
|
||||||
|
@ -352,7 +366,7 @@ mod test {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn can_find_field_like() {
|
fn can_find_field_like() {
|
||||||
let (search, strings) = setup_test(vec![
|
let (search, strings, canon) = setup_test(vec![
|
||||||
scanner::Song {
|
scanner::Song {
|
||||||
virtual_path: PathBuf::from("seasons.mp3"),
|
virtual_path: PathBuf::from("seasons.mp3"),
|
||||||
title: Some("Seasons".to_owned()),
|
title: Some("Seasons".to_owned()),
|
||||||
|
@ -367,15 +381,36 @@ mod test {
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let songs = search.find_songs(&strings, "artist % agon").unwrap();
|
let songs = search
|
||||||
|
.find_songs(&strings, &canon, "artist % agon")
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(songs.len(), 1);
|
assert_eq!(songs.len(), 1);
|
||||||
assert!(songs.contains(&PathBuf::from("seasons.mp3")));
|
assert!(songs.contains(&PathBuf::from("seasons.mp3")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn text_is_case_insensitive() {
|
||||||
|
let (search, strings, canon) = setup_test(vec![scanner::Song {
|
||||||
|
virtual_path: PathBuf::from("seasons.mp3"),
|
||||||
|
artists: vec!["Dragonforce".to_owned()],
|
||||||
|
..Default::default()
|
||||||
|
}]);
|
||||||
|
|
||||||
|
let songs = search.find_songs(&strings, &canon, "dragonforce").unwrap();
|
||||||
|
assert_eq!(songs.len(), 1);
|
||||||
|
assert!(songs.contains(&PathBuf::from("seasons.mp3")));
|
||||||
|
|
||||||
|
let songs = search
|
||||||
|
.find_songs(&strings, &canon, "artist = dragonforce")
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(songs.len(), 1);
|
||||||
|
assert!(songs.contains(&PathBuf::from("seasons.mp3")));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn can_find_field_exact() {
|
fn can_find_field_exact() {
|
||||||
let (search, strings) = setup_test(vec![
|
let (search, strings, canon) = setup_test(vec![
|
||||||
scanner::Song {
|
scanner::Song {
|
||||||
virtual_path: PathBuf::from("seasons.mp3"),
|
virtual_path: PathBuf::from("seasons.mp3"),
|
||||||
title: Some("Seasons".to_owned()),
|
title: Some("Seasons".to_owned()),
|
||||||
|
@ -390,17 +425,21 @@ mod test {
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let songs = search.find_songs(&strings, "artist = Dragon").unwrap();
|
let songs = search
|
||||||
|
.find_songs(&strings, &canon, "artist = Dragon")
|
||||||
|
.unwrap();
|
||||||
assert!(songs.is_empty());
|
assert!(songs.is_empty());
|
||||||
|
|
||||||
let songs = search.find_songs(&strings, "artist = Dragonforce").unwrap();
|
let songs = search
|
||||||
|
.find_songs(&strings, &canon, "artist = Dragonforce")
|
||||||
|
.unwrap();
|
||||||
assert_eq!(songs.len(), 1);
|
assert_eq!(songs.len(), 1);
|
||||||
assert!(songs.contains(&PathBuf::from("seasons.mp3")));
|
assert!(songs.contains(&PathBuf::from("seasons.mp3")));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn can_use_and_operator() {
|
fn can_use_and_operator() {
|
||||||
let (search, strings) = setup_test(vec![
|
let (search, strings, canon) = setup_test(vec![
|
||||||
scanner::Song {
|
scanner::Song {
|
||||||
virtual_path: PathBuf::from("whale.mp3"),
|
virtual_path: PathBuf::from("whale.mp3"),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
|
@ -415,18 +454,20 @@ mod test {
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let songs = search.find_songs(&strings, "space && whale").unwrap();
|
let songs = search
|
||||||
|
.find_songs(&strings, &canon, "space && whale")
|
||||||
|
.unwrap();
|
||||||
assert_eq!(songs.len(), 1);
|
assert_eq!(songs.len(), 1);
|
||||||
assert!(songs.contains(&PathBuf::from("whales in space.mp3")));
|
assert!(songs.contains(&PathBuf::from("whales in space.mp3")));
|
||||||
|
|
||||||
let songs = search.find_songs(&strings, "space whale").unwrap();
|
let songs = search.find_songs(&strings, &canon, "space whale").unwrap();
|
||||||
assert_eq!(songs.len(), 1);
|
assert_eq!(songs.len(), 1);
|
||||||
assert!(songs.contains(&PathBuf::from("whales in space.mp3")));
|
assert!(songs.contains(&PathBuf::from("whales in space.mp3")));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn can_use_or_operator() {
|
fn can_use_or_operator() {
|
||||||
let (search, strings) = setup_test(vec![
|
let (search, strings, canon) = setup_test(vec![
|
||||||
scanner::Song {
|
scanner::Song {
|
||||||
virtual_path: PathBuf::from("whale.mp3"),
|
virtual_path: PathBuf::from("whale.mp3"),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
|
@ -441,7 +482,9 @@ mod test {
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let songs = search.find_songs(&strings, "space || whale").unwrap();
|
let songs = search
|
||||||
|
.find_songs(&strings, &canon, "space || whale")
|
||||||
|
.unwrap();
|
||||||
assert_eq!(songs.len(), 3);
|
assert_eq!(songs.len(), 3);
|
||||||
assert!(songs.contains(&PathBuf::from("whale.mp3")));
|
assert!(songs.contains(&PathBuf::from("whale.mp3")));
|
||||||
assert!(songs.contains(&PathBuf::from("space.mp3")));
|
assert!(songs.contains(&PathBuf::from("space.mp3")));
|
||||||
|
|
|
@ -101,9 +101,19 @@ impl Song {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn sanitize(s: &str) -> String {
|
||||||
|
// TODO merge inconsistent diacritic usage
|
||||||
|
let mut cleaned = s.to_owned();
|
||||||
|
cleaned.retain(|c| match c {
|
||||||
|
' ' | '_' | '-' | '\'' => false,
|
||||||
|
_ => true,
|
||||||
|
});
|
||||||
|
cleaned.to_lowercase()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn store_song(
|
pub fn store_song(
|
||||||
strings: &mut Rodeo,
|
strings: &mut Rodeo,
|
||||||
minuscules: &mut HashMap<String, Spur>,
|
canon: &mut HashMap<String, Spur>,
|
||||||
song: &scanner::Song,
|
song: &scanner::Song,
|
||||||
) -> Option<Song> {
|
) -> Option<Song> {
|
||||||
let Some(real_path) = (&song.real_path).get_or_intern(strings) else {
|
let Some(real_path) = (&song.real_path).get_or_intern(strings) else {
|
||||||
|
@ -123,17 +133,12 @@ pub fn store_song(
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut canonicalize = |s: &String| {
|
let mut canonicalize = |s: &String| {
|
||||||
let mut cleaned = s.clone();
|
let cleaned = sanitize(s);
|
||||||
cleaned.retain(|c| match c {
|
|
||||||
' ' | '_' | '-' | '\'' => false,
|
|
||||||
_ => true,
|
|
||||||
});
|
|
||||||
// TODO merge inconsistent diacritic usage
|
|
||||||
match cleaned.is_empty() {
|
match cleaned.is_empty() {
|
||||||
true => None,
|
true => None,
|
||||||
false => Some(
|
false => Some(
|
||||||
minuscules
|
canon
|
||||||
.entry(cleaned.to_lowercase())
|
.entry(cleaned)
|
||||||
.or_insert_with(|| strings.get_or_intern(s))
|
.or_insert_with(|| strings.get_or_intern(s))
|
||||||
.to_owned(),
|
.to_owned(),
|
||||||
),
|
),
|
||||||
|
|
Loading…
Add table
Reference in a new issue