Sort search results

This commit is contained in:
Antoine Gersant 2024-09-22 20:05:20 -07:00
parent 99263ddeca
commit b5762bd7bf
3 changed files with 168 additions and 57 deletions

View file

@ -254,14 +254,14 @@ impl Manager {
.unwrap() .unwrap()
} }
pub async fn search(&self, query: String) -> Result<Vec<PathBuf>, Error> { pub async fn search(&self, query: String) -> Result<Vec<Song>, Error> {
spawn_blocking({ spawn_blocking({
let index_manager = self.clone(); let index_manager = self.clone();
move || { move || {
let index = index_manager.index.read().unwrap(); let index = index_manager.index.read().unwrap();
index index
.search .search
.find_songs(&index.strings, &index.canon, &query) .find_songs(&index.collection, &index.strings, &index.canon, &query)
} }
}) })
.await .await

View file

@ -3,9 +3,8 @@ use lasso2::{RodeoReader, Spur};
use nohash_hasher::{IntMap, IntSet}; use nohash_hasher::{IntMap, IntSet};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{ use std::{
cmp::Ordering,
collections::{BTreeMap, HashMap}, collections::{BTreeMap, HashMap},
ffi::OsStr,
path::{Path, PathBuf},
}; };
use tinyvec::TinyVec; use tinyvec::TinyVec;
@ -18,6 +17,7 @@ use crate::app::{
}; };
use super::{ use super::{
collection,
query::make_parser, query::make_parser,
storage::{self, sanitize}, storage::{self, sanitize},
}; };
@ -37,23 +37,50 @@ impl Default for Search {
} }
} }
fn compare_songs(a: &collection::Song, b: &collection::Song) -> Ordering {
let a_key = {
let artists = if a.album_artists.is_empty() {
&a.artists
} else {
&a.album_artists
};
(artists, a.year, &a.album, a.track_number)
};
let b_key = {
let artists = if b.album_artists.is_empty() {
&b.artists
} else {
&b.album_artists
};
(artists, b.year, &b.album, b.track_number)
};
a_key.cmp(&b_key)
}
impl Search { impl Search {
pub fn find_songs( pub fn find_songs(
&self, &self,
collection: &collection::Collection,
strings: &RodeoReader, strings: &RodeoReader,
canon: &HashMap<String, Spur>, canon: &HashMap<String, Spur>,
query: &str, query: &str,
) -> Result<Vec<PathBuf>, Error> { ) -> Result<Vec<collection::Song>, Error> {
let parser = make_parser(); let parser = make_parser();
let parsed_query = parser let parsed_query = parser
.parse(query) .parse(query)
.map_err(|_| Error::SearchQueryParseError)?; .map_err(|_| Error::SearchQueryParseError)?;
let keys = self.eval(strings, canon, &parsed_query); let mut songs = self
Ok(keys .eval(strings, canon, &parsed_query)
.into_iter() .into_iter()
.map(|k| Path::new(OsStr::new(strings.resolve(&k.virtual_path.0))).to_owned()) .filter_map(|song_key| collection.get_song(strings, song_key))
.collect::<Vec<_>>()) .collect::<Vec<_>>();
songs.sort_by(compare_songs);
Ok(songs)
} }
fn eval( fn eval(
@ -362,25 +389,49 @@ mod test {
use storage::store_song; use storage::store_song;
use super::*; use super::*;
use collection::Collection;
fn setup_test(songs: Vec<scanner::Song>) -> (Search, RodeoReader, HashMap<String, Spur>) { struct Context {
canon: HashMap<String, Spur>,
collection: Collection,
search: Search,
strings: RodeoReader,
}
impl Context {
pub fn search(&self, query: &str) -> Vec<PathBuf> {
self.search
.find_songs(&self.collection, &self.strings, &self.canon, query)
.unwrap()
.into_iter()
.map(|s| s.virtual_path)
.collect()
}
}
fn setup_test(songs: Vec<scanner::Song>) -> Context {
let mut strings = Rodeo::new(); let mut strings = Rodeo::new();
let mut canon = HashMap::new(); let mut canon = HashMap::new();
let mut builder = Builder::default(); let mut collection_builder = collection::Builder::default();
let mut search_builder = Builder::default();
for song in songs { for song in songs {
let storage_song = store_song(&mut strings, &mut canon, &song).unwrap(); let storage_song = store_song(&mut strings, &mut canon, &song).unwrap();
builder.add_song(&song, &storage_song); collection_builder.add_song(&storage_song);
search_builder.add_song(&song, &storage_song);
} }
let search = builder.build(); Context {
let strings = strings.into_reader(); canon,
(search, strings, canon) collection: collection_builder.build(),
search: search_builder.build(),
strings: strings.into_reader(),
}
} }
#[test] #[test]
fn can_find_fuzzy() { fn can_find_fuzzy() {
let (search, strings, canon) = setup_test(vec![ let ctx = setup_test(vec![
scanner::Song { scanner::Song {
virtual_path: PathBuf::from("seasons.mp3"), virtual_path: PathBuf::from("seasons.mp3"),
title: Some("Seasons".to_owned()), title: Some("Seasons".to_owned()),
@ -401,8 +452,7 @@ mod test {
}, },
]); ]);
let songs = search.find_songs(&strings, &canon, "agon").unwrap(); let songs = ctx.search("agon");
assert_eq!(songs.len(), 2); assert_eq!(songs.len(), 2);
assert!(songs.contains(&PathBuf::from("seasons.mp3"))); assert!(songs.contains(&PathBuf::from("seasons.mp3")));
assert!(songs.contains(&PathBuf::from("potd.mp3"))); assert!(songs.contains(&PathBuf::from("potd.mp3")));
@ -410,7 +460,7 @@ mod test {
#[test] #[test]
fn can_find_field_like() { fn can_find_field_like() {
let (search, strings, canon) = setup_test(vec![ let ctx = setup_test(vec![
scanner::Song { scanner::Song {
virtual_path: PathBuf::from("seasons.mp3"), virtual_path: PathBuf::from("seasons.mp3"),
title: Some("Seasons".to_owned()), title: Some("Seasons".to_owned()),
@ -425,36 +475,31 @@ mod test {
}, },
]); ]);
let songs = search let songs = ctx.search("artist % agon");
.find_songs(&strings, &canon, "artist % agon")
.unwrap();
assert_eq!(songs.len(), 1); assert_eq!(songs.len(), 1);
assert!(songs.contains(&PathBuf::from("seasons.mp3"))); assert!(songs.contains(&PathBuf::from("seasons.mp3")));
} }
#[test] #[test]
fn text_is_case_insensitive() { fn text_is_case_insensitive() {
let (search, strings, canon) = setup_test(vec![scanner::Song { let ctx = setup_test(vec![scanner::Song {
virtual_path: PathBuf::from("seasons.mp3"), virtual_path: PathBuf::from("seasons.mp3"),
artists: vec!["Dragonforce".to_owned()], artists: vec!["Dragonforce".to_owned()],
..Default::default() ..Default::default()
}]); }]);
let songs = search.find_songs(&strings, &canon, "dragonforce").unwrap(); let songs = ctx.search("dragonforce");
assert_eq!(songs.len(), 1); assert_eq!(songs.len(), 1);
assert!(songs.contains(&PathBuf::from("seasons.mp3"))); assert!(songs.contains(&PathBuf::from("seasons.mp3")));
let songs = search let songs = ctx.search("artist = dragonforce");
.find_songs(&strings, &canon, "artist = dragonforce")
.unwrap();
assert_eq!(songs.len(), 1); assert_eq!(songs.len(), 1);
assert!(songs.contains(&PathBuf::from("seasons.mp3"))); assert!(songs.contains(&PathBuf::from("seasons.mp3")));
} }
#[test] #[test]
fn can_find_field_exact() { fn can_find_field_exact() {
let (search, strings, canon) = setup_test(vec![ let ctx = setup_test(vec![
scanner::Song { scanner::Song {
virtual_path: PathBuf::from("seasons.mp3"), virtual_path: PathBuf::from("seasons.mp3"),
title: Some("Seasons".to_owned()), title: Some("Seasons".to_owned()),
@ -469,21 +514,17 @@ mod test {
}, },
]); ]);
let songs = search let songs = ctx.search("artist = Dragon");
.find_songs(&strings, &canon, "artist = Dragon")
.unwrap();
assert!(songs.is_empty()); assert!(songs.is_empty());
let songs = search let songs = ctx.search("artist = Dragonforce");
.find_songs(&strings, &canon, "artist = Dragonforce")
.unwrap();
assert_eq!(songs.len(), 1); assert_eq!(songs.len(), 1);
assert!(songs.contains(&PathBuf::from("seasons.mp3"))); assert!(songs.contains(&PathBuf::from("seasons.mp3")));
} }
#[test] #[test]
fn can_query_number_fields() { fn can_query_number_fields() {
let (search, strings, canon) = setup_test(vec![ let ctx = setup_test(vec![
scanner::Song { scanner::Song {
virtual_path: PathBuf::from("1999.mp3"), virtual_path: PathBuf::from("1999.mp3"),
year: Some(1999), year: Some(1999),
@ -501,24 +542,24 @@ mod test {
}, },
]); ]);
let songs = search.find_songs(&strings, &canon, "year=2000").unwrap(); let songs = ctx.search("year=2000");
assert_eq!(songs.len(), 1); assert_eq!(songs.len(), 1);
assert!(songs.contains(&PathBuf::from("2000.mp3"))); assert!(songs.contains(&PathBuf::from("2000.mp3")));
let songs = search.find_songs(&strings, &canon, "year>2000").unwrap(); let songs = ctx.search("year>2000");
assert_eq!(songs.len(), 1); assert_eq!(songs.len(), 1);
assert!(songs.contains(&PathBuf::from("2001.mp3"))); assert!(songs.contains(&PathBuf::from("2001.mp3")));
let songs = search.find_songs(&strings, &canon, "year<2000").unwrap(); let songs = ctx.search("year<2000");
assert_eq!(songs.len(), 1); assert_eq!(songs.len(), 1);
assert!(songs.contains(&PathBuf::from("1999.mp3"))); assert!(songs.contains(&PathBuf::from("1999.mp3")));
let songs = search.find_songs(&strings, &canon, "year>=2000").unwrap(); let songs = ctx.search("year>=2000");
assert_eq!(songs.len(), 2); assert_eq!(songs.len(), 2);
assert!(songs.contains(&PathBuf::from("2000.mp3"))); assert!(songs.contains(&PathBuf::from("2000.mp3")));
assert!(songs.contains(&PathBuf::from("2001.mp3"))); assert!(songs.contains(&PathBuf::from("2001.mp3")));
let songs = search.find_songs(&strings, &canon, "year<=2000").unwrap(); let songs = ctx.search("year<=2000");
assert_eq!(songs.len(), 2); assert_eq!(songs.len(), 2);
assert!(songs.contains(&PathBuf::from("1999.mp3"))); assert!(songs.contains(&PathBuf::from("1999.mp3")));
assert!(songs.contains(&PathBuf::from("2000.mp3"))); assert!(songs.contains(&PathBuf::from("2000.mp3")));
@ -526,7 +567,7 @@ mod test {
#[test] #[test]
fn fuzzy_numbers_query_all_fields() { fn fuzzy_numbers_query_all_fields() {
let (search, strings, canon) = setup_test(vec![ let ctx = setup_test(vec![
scanner::Song { scanner::Song {
virtual_path: PathBuf::from("music.mp3"), virtual_path: PathBuf::from("music.mp3"),
year: Some(2000), year: Some(2000),
@ -542,7 +583,7 @@ mod test {
}, },
]); ]);
let songs = search.find_songs(&strings, &canon, "2000").unwrap(); let songs = ctx.search("2000");
assert_eq!(songs.len(), 2); assert_eq!(songs.len(), 2);
assert!(songs.contains(&PathBuf::from("music.mp3"))); assert!(songs.contains(&PathBuf::from("music.mp3")));
assert!(songs.contains(&PathBuf::from("fireworks 2000.mp3"))); assert!(songs.contains(&PathBuf::from("fireworks 2000.mp3")));
@ -550,7 +591,7 @@ mod test {
#[test] #[test]
fn can_use_and_operator() { fn can_use_and_operator() {
let (search, strings, canon) = setup_test(vec![ let ctx = setup_test(vec![
scanner::Song { scanner::Song {
virtual_path: PathBuf::from("whale.mp3"), virtual_path: PathBuf::from("whale.mp3"),
..Default::default() ..Default::default()
@ -565,20 +606,18 @@ mod test {
}, },
]); ]);
let songs = search let songs = ctx.search("space && whale");
.find_songs(&strings, &canon, "space && whale")
.unwrap();
assert_eq!(songs.len(), 1); assert_eq!(songs.len(), 1);
assert!(songs.contains(&PathBuf::from("whales in space.mp3"))); assert!(songs.contains(&PathBuf::from("whales in space.mp3")));
let songs = search.find_songs(&strings, &canon, "space whale").unwrap(); let songs = ctx.search("space whale");
assert_eq!(songs.len(), 1); assert_eq!(songs.len(), 1);
assert!(songs.contains(&PathBuf::from("whales in space.mp3"))); assert!(songs.contains(&PathBuf::from("whales in space.mp3")));
} }
#[test] #[test]
fn can_use_or_operator() { fn can_use_or_operator() {
let (search, strings, canon) = setup_test(vec![ let ctx = setup_test(vec![
scanner::Song { scanner::Song {
virtual_path: PathBuf::from("whale.mp3"), virtual_path: PathBuf::from("whale.mp3"),
..Default::default() ..Default::default()
@ -593,23 +632,78 @@ mod test {
}, },
]); ]);
let songs = search let songs = ctx.search("space || whale");
.find_songs(&strings, &canon, "space || whale")
.unwrap();
assert_eq!(songs.len(), 3); assert_eq!(songs.len(), 3);
assert!(songs.contains(&PathBuf::from("whale.mp3"))); assert!(songs.contains(&PathBuf::from("whale.mp3")));
assert!(songs.contains(&PathBuf::from("space.mp3"))); assert!(songs.contains(&PathBuf::from("space.mp3")));
assert!(songs.contains(&PathBuf::from("whales in space.mp3"))); assert!(songs.contains(&PathBuf::from("whales in space.mp3")));
} }
#[test]
fn results_are_sorted() {
let ctx = setup_test(vec![
scanner::Song {
virtual_path: PathBuf::from("cry thunder.mp3"),
artists: vec!["Dragonforce".to_owned()],
album: Some("The Power Within".to_owned()),
year: Some(2012),
genres: vec!["Metal".to_owned()],
..Default::default()
},
scanner::Song {
virtual_path: PathBuf::from("revelations.mp3"),
artists: vec!["Dragonforce".to_owned()],
album: Some("Valley of the Damned".to_owned()),
year: Some(2003),
track_number: Some(7),
genres: vec!["Metal".to_owned()],
..Default::default()
},
scanner::Song {
virtual_path: PathBuf::from("starfire.mp3"),
artists: vec!["Dragonforce".to_owned()],
album: Some("Valley of the Damned".to_owned()),
year: Some(2003),
track_number: Some(5),
genres: vec!["Metal".to_owned()],
..Default::default()
},
scanner::Song {
virtual_path: PathBuf::from("eternal snow.mp3"),
artists: vec!["Rhapsody".to_owned()],
genres: vec!["Metal".to_owned()],
..Default::default()
},
scanner::Song {
virtual_path: PathBuf::from("alchemy.mp3"),
artists: vec!["Avantasia".to_owned()],
genres: vec!["Metal".to_owned()],
..Default::default()
},
]);
let songs = ctx.search("metal");
assert_eq!(songs.len(), 5);
assert_eq!(
songs,
vec![
PathBuf::from("alchemy.mp3"),
PathBuf::from("starfire.mp3"),
PathBuf::from("revelations.mp3"),
PathBuf::from("cry thunder.mp3"),
PathBuf::from("eternal snow.mp3"),
]
);
}
#[test] #[test]
fn avoids_bigram_false_positives() { fn avoids_bigram_false_positives() {
let (search, strings, canon) = setup_test(vec![scanner::Song { let ctx = setup_test(vec![scanner::Song {
virtual_path: PathBuf::from("lorry bovine vehicle.mp3"), virtual_path: PathBuf::from("lorry bovine vehicle.mp3"),
..Default::default() ..Default::default()
}]); }]);
let songs = search.find_songs(&strings, &canon, "love").unwrap(); let songs = ctx.search("love");
assert!(songs.is_empty()); assert!(songs.is_empty());
} }
} }

View file

@ -292,8 +292,10 @@ fn index_files_to_response(files: Vec<index::File>, api_version: APIMajorVersion
} }
} }
const SONG_LIST_CAPACITY: usize = 200;
async fn make_song_list(paths: Vec<PathBuf>, index_manager: &index::Manager) -> dto::SongList { async fn make_song_list(paths: Vec<PathBuf>, index_manager: &index::Manager) -> dto::SongList {
let first_paths = paths.iter().take(200).cloned().collect(); let first_paths = paths.iter().take(SONG_LIST_CAPACITY).cloned().collect();
let first_songs = index_manager let first_songs = index_manager
.get_songs(first_paths) .get_songs(first_paths)
.await .await
@ -512,11 +514,26 @@ async fn get_search(
State(index_manager): State<index::Manager>, State(index_manager): State<index::Manager>,
Path(query): Path<String>, Path(query): Path<String>,
) -> Response { ) -> Response {
let paths = match index_manager.search(query).await { let songs = match index_manager.search(query).await {
Ok(f) => f, Ok(f) => f,
Err(e) => return APIError::from(e).into_response(), Err(e) => return APIError::from(e).into_response(),
}; };
let song_list = make_song_list(paths, &index_manager).await;
let paths = songs
.iter()
.take(SONG_LIST_CAPACITY)
.map(|s| s.virtual_path.clone())
.collect();
let song_list = dto::SongList {
paths,
first_songs: songs
.into_iter()
.take(SONG_LIST_CAPACITY)
.map(|s| s.into())
.collect(),
};
match api_version { match api_version {
APIMajorVersion::V7 => Json( APIMajorVersion::V7 => Json(
song_list song_list