Sorting for accented characters

This commit is contained in:
Antoine Gersant 2025-01-01 13:31:11 -08:00
parent e8845c7ef9
commit 68b8041f97
9 changed files with 583 additions and 208 deletions

279
Cargo.lock generated
View file

@ -528,6 +528,17 @@ dependencies = [
"subtle", "subtle",
] ]
[[package]]
name = "displaydoc"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.72",
]
[[package]] [[package]]
name = "either" name = "either"
version = "1.13.0" version = "1.13.0"
@ -977,6 +988,149 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "icu_collator"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d370371887d31d56f361c3eaa15743e54f13bc677059c9191c77e099ed6966b2"
dependencies = [
"displaydoc",
"icu_collator_data",
"icu_collections",
"icu_locid_transform",
"icu_normalizer",
"icu_properties",
"icu_provider",
"smallvec",
"utf16_iter",
"utf8_iter",
"zerovec",
]
[[package]]
name = "icu_collator_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ee3f88741364b7d6269cce6827a3e6a8a2cf408a78f766c9224ab479d5e4ae5"
[[package]]
name = "icu_collections"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
dependencies = [
"displaydoc",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_locid"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
dependencies = [
"displaydoc",
"litemap",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_locid_transform"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
dependencies = [
"displaydoc",
"icu_locid",
"icu_locid_transform_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_locid_transform_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
[[package]]
name = "icu_normalizer"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
dependencies = [
"displaydoc",
"icu_collections",
"icu_normalizer_data",
"icu_properties",
"icu_provider",
"smallvec",
"utf16_iter",
"utf8_iter",
"write16",
"zerovec",
]
[[package]]
name = "icu_normalizer_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
[[package]]
name = "icu_properties"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
dependencies = [
"displaydoc",
"icu_collections",
"icu_locid_transform",
"icu_properties_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_properties_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
[[package]]
name = "icu_provider"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
dependencies = [
"displaydoc",
"icu_locid",
"icu_provider_macros",
"stable_deref_trait",
"tinystr",
"writeable",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_provider_macros"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.72",
]
[[package]] [[package]]
name = "id3" name = "id3"
version = "1.14.0" version = "1.14.0"
@ -1120,6 +1274,12 @@ version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]]
name = "litemap"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704"
[[package]] [[package]]
name = "lock_api" name = "lock_api"
version = "0.4.12" version = "0.4.12"
@ -1567,6 +1727,7 @@ dependencies = [
"getopts", "getopts",
"headers", "headers",
"http 1.1.0", "http 1.1.0",
"icu_collator",
"id3", "id3",
"image", "image",
"lasso2", "lasso2",
@ -2101,6 +2262,12 @@ version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]] [[package]]
name = "stacker" name = "stacker"
version = "0.1.15" version = "0.1.15"
@ -2356,6 +2523,17 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
[[package]]
name = "synstructure"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.72",
]
[[package]] [[package]]
name = "tempfile" name = "tempfile"
version = "3.11.0" version = "3.11.0"
@ -2431,6 +2609,16 @@ dependencies = [
"time-core", "time-core",
] ]
[[package]]
name = "tinystr"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
dependencies = [
"displaydoc",
"zerovec",
]
[[package]] [[package]]
name = "tinyvec" name = "tinyvec"
version = "1.8.0" version = "1.8.0"
@ -2720,6 +2908,18 @@ dependencies = [
"percent-encoding", "percent-encoding",
] ]
[[package]]
name = "utf16_iter"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
[[package]]
name = "utf8_iter"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.5" version = "0.9.5"
@ -2999,12 +3199,48 @@ dependencies = [
"toml 0.5.11", "toml 0.5.11",
] ]
[[package]]
name = "write16"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
[[package]]
name = "writeable"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
[[package]] [[package]]
name = "yansi" name = "yansi"
version = "0.5.1" version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
[[package]]
name = "yoke"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5"
dependencies = [
"serde",
"stable_deref_trait",
"yoke-derive",
"zerofrom",
]
[[package]]
name = "yoke-derive"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.72",
"synstructure",
]
[[package]] [[package]]
name = "zerocopy" name = "zerocopy"
version = "0.7.35" version = "0.7.35"
@ -3026,12 +3262,55 @@ dependencies = [
"syn 2.0.72", "syn 2.0.72",
] ]
[[package]]
name = "zerofrom"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.72",
"synstructure",
]
[[package]] [[package]]
name = "zeroize" name = "zeroize"
version = "1.8.1" version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
[[package]]
name = "zerovec"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
dependencies = [
"yoke",
"zerofrom",
"zerovec-derive",
]
[[package]]
name = "zerovec-derive"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.72",
]
[[package]] [[package]]
name = "zune-core" name = "zune-core"
version = "0.4.12" version = "0.4.12"

View file

@ -22,6 +22,7 @@ enum-map = { version = "2.7.3", features = ["serde"] }
getopts = "0.2.21" getopts = "0.2.21"
headers = "0.4" headers = "0.4"
http = "1.1.0" http = "1.1.0"
icu_collator = "1.5.0"
id3 = "1.14.0" id3 = "1.14.0"
lasso2 = { version = "0.8.2", features = ["serialize"] } lasso2 = { version = "0.8.2", features = ["serialize"] }
lewton = "0.10.2" lewton = "0.10.2"

View file

@ -1,10 +1,8 @@
use std::{ use std::{
collections::HashMap,
path::{Path, PathBuf}, path::{Path, PathBuf},
sync::{Arc, RwLock}, sync::{Arc, RwLock},
}; };
use lasso2::{Rodeo, RodeoReader, Spur};
use log::{error, info}; use log::{error, info};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tokio::task::spawn_blocking; use tokio::task::spawn_blocking;
@ -13,6 +11,7 @@ use crate::app::{scanner, Error};
mod browser; mod browser;
mod collection; mod collection;
mod dictionary;
mod query; mod query;
mod search; mod search;
mod storage; mod storage;
@ -108,7 +107,7 @@ impl Manager {
let index_manager = self.clone(); let index_manager = self.clone();
move || { move || {
let index = index_manager.index.read().unwrap(); let index = index_manager.index.read().unwrap();
index.browser.browse(&index.strings, virtual_path) index.browser.browse(&index.dictionary, virtual_path)
} }
}) })
.await .await
@ -120,7 +119,7 @@ impl Manager {
let index_manager = self.clone(); let index_manager = self.clone();
move || { move || {
let index = index_manager.index.read().unwrap(); let index = index_manager.index.read().unwrap();
index.browser.flatten(&index.strings, virtual_path) index.browser.flatten(&index.dictionary, virtual_path)
} }
}) })
.await .await
@ -132,7 +131,7 @@ impl Manager {
let index_manager = self.clone(); let index_manager = self.clone();
move || { move || {
let index = index_manager.index.read().unwrap(); let index = index_manager.index.read().unwrap();
index.collection.get_genres(&index.strings) index.collection.get_genres(&index.dictionary)
} }
}) })
.await .await
@ -145,13 +144,13 @@ impl Manager {
move || { move || {
let index = index_manager.index.read().unwrap(); let index = index_manager.index.read().unwrap();
let name = index let name = index
.strings .dictionary
.get(&name) .get(&name)
.ok_or_else(|| Error::GenreNotFound)?; .ok_or_else(|| Error::GenreNotFound)?;
let genre_key = GenreKey(name); let genre_key = GenreKey(name);
index index
.collection .collection
.get_genre(&index.strings, genre_key) .get_genre(&index.dictionary, genre_key)
.ok_or_else(|| Error::GenreNotFound) .ok_or_else(|| Error::GenreNotFound)
} }
}) })
@ -164,7 +163,7 @@ impl Manager {
let index_manager = self.clone(); let index_manager = self.clone();
move || { move || {
let index = index_manager.index.read().unwrap(); let index = index_manager.index.read().unwrap();
index.collection.get_albums(&index.strings) index.collection.get_albums(&index.dictionary)
} }
}) })
.await .await
@ -176,7 +175,7 @@ impl Manager {
let index_manager = self.clone(); let index_manager = self.clone();
move || { move || {
let index = index_manager.index.read().unwrap(); let index = index_manager.index.read().unwrap();
index.collection.get_artists(&index.strings) index.collection.get_artists(&index.dictionary)
} }
}) })
.await .await
@ -189,13 +188,13 @@ impl Manager {
move || { move || {
let index = index_manager.index.read().unwrap(); let index = index_manager.index.read().unwrap();
let name = index let name = index
.strings .dictionary
.get(name) .get(name)
.ok_or_else(|| Error::ArtistNotFound)?; .ok_or_else(|| Error::ArtistNotFound)?;
let artist_key = ArtistKey(name); let artist_key = ArtistKey(name);
index index
.collection .collection
.get_artist(&index.strings, artist_key) .get_artist(&index.dictionary, artist_key)
.ok_or_else(|| Error::ArtistNotFound) .ok_or_else(|| Error::ArtistNotFound)
} }
}) })
@ -209,20 +208,20 @@ impl Manager {
move || { move || {
let index = index_manager.index.read().unwrap(); let index = index_manager.index.read().unwrap();
let name = index let name = index
.strings .dictionary
.get(&name) .get(&name)
.ok_or_else(|| Error::AlbumNotFound)?; .ok_or_else(|| Error::AlbumNotFound)?;
let album_key = AlbumKey { let album_key = AlbumKey {
artists: artists artists: artists
.into_iter() .into_iter()
.filter_map(|a| index.strings.get(a)) .filter_map(|a| index.dictionary.get(a))
.map(|k| ArtistKey(k)) .map(|k| ArtistKey(k))
.collect(), .collect(),
name, name,
}; };
index index
.collection .collection
.get_album(&index.strings, album_key) .get_album(&index.dictionary, album_key)
.ok_or_else(|| Error::AlbumNotFound) .ok_or_else(|| Error::AlbumNotFound)
} }
}) })
@ -242,7 +241,7 @@ impl Manager {
let index = index_manager.index.read().unwrap(); let index = index_manager.index.read().unwrap();
Ok(index Ok(index
.collection .collection
.get_random_albums(&index.strings, seed, offset, count)) .get_random_albums(&index.dictionary, seed, offset, count))
} }
}) })
.await .await
@ -260,7 +259,7 @@ impl Manager {
let index = index_manager.index.read().unwrap(); let index = index_manager.index.read().unwrap();
Ok(index Ok(index
.collection .collection
.get_recent_albums(&index.strings, offset, count)) .get_recent_albums(&index.dictionary, offset, count))
} }
}) })
.await .await
@ -275,10 +274,10 @@ impl Manager {
virtual_paths virtual_paths
.into_iter() .into_iter()
.map(|p| { .map(|p| {
p.get(&index.strings) p.get(&index.dictionary)
.and_then(|virtual_path| { .and_then(|virtual_path| {
let key = SongKey { virtual_path }; let key = SongKey { virtual_path };
index.collection.get_song(&index.strings, key) index.collection.get_song(&index.dictionary, key)
}) })
.ok_or_else(|| Error::SongNotFound) .ok_or_else(|| Error::SongNotFound)
}) })
@ -296,7 +295,7 @@ impl Manager {
let index = index_manager.index.read().unwrap(); let index = index_manager.index.read().unwrap();
index index
.search .search
.find_songs(&index.collection, &index.strings, &index.canon, &query) .find_songs(&index.collection, &index.dictionary, &query)
} }
}) })
.await .await
@ -306,8 +305,7 @@ impl Manager {
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct Index { pub struct Index {
pub strings: RodeoReader, pub dictionary: dictionary::Dictionary,
pub canon: HashMap<String, Spur>,
pub browser: browser::Browser, pub browser: browser::Browser,
pub collection: collection::Collection, pub collection: collection::Collection,
pub search: search::Search, pub search: search::Search,
@ -316,8 +314,7 @@ pub struct Index {
impl Default for Index { impl Default for Index {
fn default() -> Self { fn default() -> Self {
Self { Self {
strings: Rodeo::new().into_reader(), dictionary: Default::default(),
canon: Default::default(),
browser: Default::default(), browser: Default::default(),
collection: Default::default(), collection: Default::default(),
search: Default::default(), search: Default::default(),
@ -327,8 +324,7 @@ impl Default for Index {
#[derive(Clone)] #[derive(Clone)]
pub struct Builder { pub struct Builder {
strings: Rodeo, dictionary_builder: dictionary::Builder,
canon: HashMap<String, Spur>,
browser_builder: browser::Builder, browser_builder: browser::Builder,
collection_builder: collection::Builder, collection_builder: collection::Builder,
search_builder: search::Builder, search_builder: search::Builder,
@ -337,8 +333,7 @@ pub struct Builder {
impl Builder { impl Builder {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
strings: Rodeo::new(), dictionary_builder: dictionary::Builder::default(),
canon: HashMap::default(),
browser_builder: browser::Builder::default(), browser_builder: browser::Builder::default(),
collection_builder: collection::Builder::default(), collection_builder: collection::Builder::default(),
search_builder: search::Builder::default(), search_builder: search::Builder::default(),
@ -347,13 +342,13 @@ impl Builder {
pub fn add_directory(&mut self, directory: scanner::Directory) { pub fn add_directory(&mut self, directory: scanner::Directory) {
self.browser_builder self.browser_builder
.add_directory(&mut self.strings, directory); .add_directory(&mut self.dictionary_builder, directory);
} }
pub fn add_song(&mut self, scanner_song: scanner::Song) { pub fn add_song(&mut self, scanner_song: scanner::Song) {
if let Some(storage_song) = store_song(&mut self.strings, &mut self.canon, &scanner_song) { if let Some(storage_song) = store_song(&mut self.dictionary_builder, &scanner_song) {
self.browser_builder self.browser_builder
.add_song(&mut self.strings, &scanner_song); .add_song(&mut self.dictionary_builder, &scanner_song);
self.collection_builder.add_song(&storage_song); self.collection_builder.add_song(&storage_song);
self.search_builder.add_song(&scanner_song, &storage_song); self.search_builder.add_song(&scanner_song, &storage_song);
} }
@ -361,11 +356,10 @@ impl Builder {
pub fn build(self) -> Index { pub fn build(self) -> Index {
Index { Index {
dictionary: self.dictionary_builder.build(),
browser: self.browser_builder.build(), browser: self.browser_builder.build(),
collection: self.collection_builder.build(), collection: self.collection_builder.build(),
search: self.search_builder.build(), search: self.search_builder.build(),
strings: self.strings.into_reader(),
canon: self.canon,
} }
} }
} }

View file

@ -6,14 +6,13 @@ use std::{
path::{Path, PathBuf}, path::{Path, PathBuf},
}; };
use lasso2::{Rodeo, RodeoReader};
use rayon::prelude::*; use rayon::prelude::*;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tinyvec::TinyVec; use tinyvec::TinyVec;
use trie_rs::{Trie, TrieBuilder}; use trie_rs::{Trie, TrieBuilder};
use unicase::UniCase;
use crate::app::index::{ use crate::app::index::{
dictionary::{self, Dictionary},
storage::{self, PathKey}, storage::{self, PathKey},
InternPath, InternPath,
}; };
@ -43,12 +42,12 @@ impl Default for Browser {
impl Browser { impl Browser {
pub fn browse<P: AsRef<Path>>( pub fn browse<P: AsRef<Path>>(
&self, &self,
strings: &RodeoReader, dictionary: &Dictionary,
virtual_path: P, virtual_path: P,
) -> Result<Vec<File>, Error> { ) -> Result<Vec<File>, Error> {
let path = virtual_path let path = virtual_path
.as_ref() .as_ref()
.get(strings) .get(dictionary)
.ok_or_else(|| Error::DirectoryNotFound(virtual_path.as_ref().to_owned()))?; .ok_or_else(|| Error::DirectoryNotFound(virtual_path.as_ref().to_owned()))?;
let Some(files) = self.directories.get(&path) else { let Some(files) = self.directories.get(&path) else {
@ -62,7 +61,7 @@ impl Browser {
storage::File::Directory(p) => p, storage::File::Directory(p) => p,
storage::File::Song(p) => p, storage::File::Song(p) => p,
}; };
let path = Path::new(OsStr::new(strings.resolve(&path.0))).to_owned(); let path = Path::new(OsStr::new(dictionary.resolve(&path.0))).to_owned();
match f { match f {
storage::File::Directory(_) => File::Directory(path), storage::File::Directory(_) => File::Directory(path),
storage::File::Song(_) => File::Song(path), storage::File::Song(_) => File::Song(path),
@ -72,10 +71,11 @@ impl Browser {
if virtual_path.as_ref().parent().is_none() { if virtual_path.as_ref().parent().is_none() {
if let [File::Directory(ref p)] = files[..] { if let [File::Directory(ref p)] = files[..] {
return self.browse(strings, p); return self.browse(dictionary, p);
} }
} }
let collator = dictionary::make_collator();
files.sort_by(|a, b| { files.sort_by(|a, b| {
let (a, b) = match (a, b) { let (a, b) = match (a, b) {
(File::Directory(_), File::Song(_)) => return Ordering::Less, (File::Directory(_), File::Song(_)) => return Ordering::Less,
@ -83,10 +83,10 @@ impl Browser {
(File::Directory(a), File::Directory(b)) => (a, b), (File::Directory(a), File::Directory(b)) => (a, b),
(File::Song(a), File::Song(b)) => (a, b), (File::Song(a), File::Song(b)) => (a, b),
}; };
// TODO replace unicase with icu_collator to handle accented characters too collator.compare(
let a = UniCase::new(a.as_os_str().to_string_lossy()); a.as_os_str().to_string_lossy().as_ref(),
let b = UniCase::new(b.as_os_str().to_string_lossy()); b.as_os_str().to_string_lossy().as_ref(),
a.cmp(&b) )
}); });
Ok(files) Ok(files)
@ -94,38 +94,46 @@ impl Browser {
pub fn flatten<P: AsRef<Path>>( pub fn flatten<P: AsRef<Path>>(
&self, &self,
strings: &RodeoReader, dictionary: &Dictionary,
virtual_path: P, virtual_path: P,
) -> Result<Vec<PathBuf>, Error> { ) -> Result<Vec<PathBuf>, Error> {
let path_components = virtual_path let path_components = virtual_path
.as_ref() .as_ref()
.components() .components()
.map(|c| c.as_os_str().to_str().unwrap_or_default()) .map(|c| c.as_os_str().to_str().unwrap_or_default())
.filter_map(|c| strings.get(c)) .filter_map(|c| dictionary.get(c))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
if !self.flattened.is_prefix(&path_components) { if !self.flattened.is_prefix(&path_components) {
return Err(Error::DirectoryNotFound(virtual_path.as_ref().to_owned())); return Err(Error::DirectoryNotFound(virtual_path.as_ref().to_owned()));
} }
let mut files = self let mut results: Vec<TinyVec<[_; 8]>> = self
.flattened .flattened
.predictive_search(path_components) .predictive_search(path_components)
.collect::<Vec<_>>();
results.par_sort_unstable_by(|a, b| {
for (x, y) in a.iter().zip(b.iter()) {
match dictionary.cmp(x, y) {
Ordering::Equal => continue,
ordering @ _ => return ordering,
}
}
a.len().cmp(&b.len())
});
let files = results
.into_iter()
.map(|c: TinyVec<[_; 8]>| -> PathBuf { .map(|c: TinyVec<[_; 8]>| -> PathBuf {
c.into_iter() c.into_iter()
.map(|s| strings.resolve(&s)) .map(|s| dictionary.resolve(&s))
.collect::<TinyVec<[&str; 8]>>() .collect::<TinyVec<[&str; 8]>>()
.join(std::path::MAIN_SEPARATOR_STR) .join(std::path::MAIN_SEPARATOR_STR)
.into() .into()
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
files.par_sort_by(|a, b| {
let a = UniCase::new(a.as_os_str().to_string_lossy());
let b = UniCase::new(b.as_os_str().to_string_lossy());
a.cmp(&b)
});
Ok(files) Ok(files)
} }
} }
@ -137,15 +145,19 @@ pub struct Builder {
} }
impl Builder { impl Builder {
pub fn add_directory(&mut self, strings: &mut Rodeo, directory: scanner::Directory) { pub fn add_directory(
let Some(virtual_path) = (&directory.virtual_path).get_or_intern(strings) else { &mut self,
dictionary_builder: &mut dictionary::Builder,
directory: scanner::Directory,
) {
let Some(virtual_path) = (&directory.virtual_path).get_or_intern(dictionary_builder) else {
return; return;
}; };
let Some(virtual_parent) = directory let Some(virtual_parent) = directory
.virtual_path .virtual_path
.parent() .parent()
.and_then(|p| p.get_or_intern(strings)) .and_then(|p| p.get_or_intern(dictionary_builder))
else { else {
return; return;
}; };
@ -158,15 +170,15 @@ impl Builder {
.insert(storage::File::Directory(virtual_path)); .insert(storage::File::Directory(virtual_path));
} }
pub fn add_song(&mut self, strings: &mut Rodeo, song: &scanner::Song) { pub fn add_song(&mut self, dictionary_builder: &mut dictionary::Builder, song: &scanner::Song) {
let Some(virtual_path) = (&song.virtual_path).get_or_intern(strings) else { let Some(virtual_path) = (&song.virtual_path).get_or_intern(dictionary_builder) else {
return; return;
}; };
let Some(virtual_parent) = song let Some(virtual_parent) = song
.virtual_path .virtual_path
.parent() .parent()
.and_then(|p| p.get_or_intern(strings)) .and_then(|p| p.get_or_intern(dictionary_builder))
else { else {
return; return;
}; };
@ -179,7 +191,7 @@ impl Builder {
self.flattened.push( self.flattened.push(
song.virtual_path song.virtual_path
.components() .components()
.map(|c| strings.get_or_intern(c.as_os_str().to_str().unwrap())) .map(|c| dictionary_builder.get_or_intern(c.as_os_str().to_str().unwrap()))
.collect::<TinyVec<[lasso2::Spur; 8]>>(), .collect::<TinyVec<[lasso2::Spur; 8]>>(),
); );
} }
@ -199,8 +211,8 @@ mod test {
use super::*; use super::*;
fn setup_test(songs: HashSet<PathBuf>) -> (Browser, RodeoReader) { fn setup_test(songs: HashSet<PathBuf>) -> (Browser, Dictionary) {
let mut strings = Rodeo::new(); let mut dictionary_builder = dictionary::Builder::default();
let mut builder = Builder::default(); let mut builder = Builder::default();
let directories = songs let directories = songs
@ -210,7 +222,7 @@ mod test {
for directory in directories { for directory in directories {
builder.add_directory( builder.add_directory(
&mut strings, &mut dictionary_builder,
scanner::Directory { scanner::Directory {
virtual_path: directory.to_owned(), virtual_path: directory.to_owned(),
}, },
@ -220,13 +232,13 @@ mod test {
for path in songs { for path in songs {
let mut song = scanner::Song::default(); let mut song = scanner::Song::default();
song.virtual_path = path.clone(); song.virtual_path = path.clone();
builder.add_song(&mut strings, &song); builder.add_song(&mut dictionary_builder, &song);
} }
let browser = builder.build(); let browser = builder.build();
let strings = strings.into_reader(); let dictionary = dictionary_builder.build();
(browser, strings) (browser, dictionary)
} }
#[test] #[test]
@ -288,6 +300,7 @@ mod test {
PathBuf::from_iter(["Ott", "Mir.mp3"]), PathBuf::from_iter(["Ott", "Mir.mp3"]),
PathBuf::from("Helios.mp3"), PathBuf::from("Helios.mp3"),
PathBuf::from("asura.mp3"), PathBuf::from("asura.mp3"),
PathBuf::from("à la maison.mp3"),
])); ]));
let files = browser.browse(&strings, PathBuf::new()).unwrap(); let files = browser.browse(&strings, PathBuf::new()).unwrap();
@ -296,6 +309,7 @@ mod test {
files, files,
[ [
File::Directory(PathBuf::from("Ott")), File::Directory(PathBuf::from("Ott")),
File::Song(PathBuf::from("à la maison.mp3")),
File::Song(PathBuf::from("asura.mp3")), File::Song(PathBuf::from("asura.mp3")),
File::Song(PathBuf::from("Helios.mp3")), File::Song(PathBuf::from("Helios.mp3")),
] ]
@ -342,6 +356,7 @@ mod test {
let (browser, strings) = setup_test(HashSet::from([ let (browser, strings) = setup_test(HashSet::from([
PathBuf::from_iter(["Ott", "Mir.mp3"]), PathBuf::from_iter(["Ott", "Mir.mp3"]),
PathBuf::from("Helios.mp3"), PathBuf::from("Helios.mp3"),
PathBuf::from("à la maison.mp3.mp3"),
PathBuf::from("asura.mp3"), PathBuf::from("asura.mp3"),
])); ]));
@ -350,6 +365,7 @@ mod test {
assert_eq!( assert_eq!(
files, files,
[ [
PathBuf::from("à la maison.mp3.mp3"),
PathBuf::from("asura.mp3"), PathBuf::from("asura.mp3"),
PathBuf::from("Helios.mp3"), PathBuf::from("Helios.mp3"),
PathBuf::from_iter(["Ott", "Mir.mp3"]), PathBuf::from_iter(["Ott", "Mir.mp3"]),

View file

@ -4,12 +4,12 @@ use std::{
path::PathBuf, path::PathBuf,
}; };
use lasso2::RodeoReader;
use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tinyvec::TinyVec; use tinyvec::TinyVec;
use unicase::UniCase; use unicase::UniCase;
use crate::app::index::dictionary::Dictionary;
use crate::app::index::storage::{self, AlbumKey, ArtistKey, GenreKey, SongKey}; use crate::app::index::storage::{self, AlbumKey, ArtistKey, GenreKey, SongKey};
use super::storage::fetch_song; use super::storage::fetch_song;
@ -90,37 +90,39 @@ pub struct Collection {
} }
impl Collection { impl Collection {
pub fn get_albums(&self, strings: &RodeoReader) -> Vec<AlbumHeader> { pub fn get_albums(&self, dictionary: &Dictionary) -> Vec<AlbumHeader> {
let mut albums = self let mut albums = self
.albums .albums
.values() .values()
.map(|a| make_album_header(a, strings)) .map(|a| make_album_header(a, dictionary))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
albums.sort_by(|a, b| a.name.cmp(&b.name)); albums.sort_by(|a, b| a.name.cmp(&b.name));
albums albums
} }
pub fn get_artists(&self, strings: &RodeoReader) -> Vec<ArtistHeader> { pub fn get_artists(&self, dictionary: &Dictionary) -> Vec<ArtistHeader> {
let exceptions = vec![strings.get("Various Artists"), strings.get("VA")]; let exceptions = vec![dictionary.get("Various Artists"), dictionary.get("VA")];
let mut artists = self let mut artists = self
.artists .artists
.values() .values()
.filter(|a| !exceptions.contains(&Some(a.name))) .filter(|a| !exceptions.contains(&Some(a.name)))
.map(|a| make_artist_header(a, strings)) .map(|a| make_artist_header(a, dictionary))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
// TODO collator sort
artists.sort_by(|a, b| a.name.cmp(&b.name)); artists.sort_by(|a, b| a.name.cmp(&b.name));
artists artists
} }
pub fn get_artist(&self, strings: &RodeoReader, artist_key: ArtistKey) -> Option<Artist> { pub fn get_artist(&self, dictionary: &Dictionary, artist_key: ArtistKey) -> Option<Artist> {
self.artists.get(&artist_key).map(|artist| { self.artists.get(&artist_key).map(|artist| {
let header = make_artist_header(artist, strings); let header = make_artist_header(artist, dictionary);
let albums = { let albums = {
let mut albums = artist let mut albums = artist
.all_albums .all_albums
.iter() .iter()
.filter_map(|key| self.get_album(strings, key.clone())) .filter_map(|key| self.get_album(dictionary, key.clone()))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
// TODO collator sort
albums.sort_by(|a, b| { albums.sort_by(|a, b| {
(&a.header.year, &a.header.name).cmp(&(&b.header.year, &b.header.name)) (&a.header.year, &a.header.name).cmp(&(&b.header.year, &b.header.name))
}); });
@ -130,14 +132,14 @@ impl Collection {
}) })
} }
pub fn get_album(&self, strings: &RodeoReader, album_key: AlbumKey) -> Option<Album> { pub fn get_album(&self, dictionary: &Dictionary, album_key: AlbumKey) -> Option<Album> {
self.albums.get(&album_key).map(|a| { self.albums.get(&album_key).map(|a| {
let mut songs = a let mut songs = a
.songs .songs
.iter() .iter()
.filter_map(|s| { .filter_map(|s| {
self.get_song( self.get_song(
strings, dictionary,
SongKey { SongKey {
virtual_path: s.virtual_path, virtual_path: s.virtual_path,
}, },
@ -148,7 +150,7 @@ impl Collection {
songs.sort_by_key(|s| (s.disc_number.unwrap_or(-1), s.track_number.unwrap_or(-1))); songs.sort_by_key(|s| (s.disc_number.unwrap_or(-1), s.track_number.unwrap_or(-1)));
Album { Album {
header: make_album_header(a, strings), header: make_album_header(a, dictionary),
songs, songs,
} }
}) })
@ -156,7 +158,7 @@ impl Collection {
pub fn get_random_albums( pub fn get_random_albums(
&self, &self,
strings: &RodeoReader, dictionary: &Dictionary,
seed: Option<u64>, seed: Option<u64>,
offset: usize, offset: usize,
count: usize, count: usize,
@ -175,13 +177,13 @@ impl Collection {
.into_iter() .into_iter()
.skip(offset) .skip(offset)
.take(count) .take(count)
.filter_map(|k| self.get_album(strings, k.clone())) .filter_map(|k| self.get_album(dictionary, k.clone()))
.collect() .collect()
} }
pub fn get_recent_albums( pub fn get_recent_albums(
&self, &self,
strings: &RodeoReader, dictionary: &Dictionary,
offset: usize, offset: usize,
count: usize, count: usize,
) -> Vec<Album> { ) -> Vec<Album> {
@ -189,21 +191,21 @@ impl Collection {
.iter() .iter()
.skip(offset) .skip(offset)
.take(count) .take(count)
.filter_map(|k| self.get_album(strings, k.clone())) .filter_map(|k| self.get_album(dictionary, k.clone()))
.collect() .collect()
} }
pub fn get_genres(&self, strings: &RodeoReader) -> Vec<GenreHeader> { pub fn get_genres(&self, dictionary: &Dictionary) -> Vec<GenreHeader> {
let mut genres = self let mut genres = self
.genres .genres
.values() .values()
.map(|a| make_genre_header(a, strings)) .map(|a| make_genre_header(a, dictionary))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
genres.sort_by(|a, b| a.name.cmp(&b.name)); genres.sort_by(|a, b| a.name.cmp(&b.name));
genres genres
} }
pub fn get_genre(&self, strings: &RodeoReader, genre_key: GenreKey) -> Option<Genre> { pub fn get_genre(&self, dictionary: &Dictionary, genre_key: GenreKey) -> Option<Genre> {
self.genres.get(&genre_key).map(|genre| { self.genres.get(&genre_key).map(|genre| {
let mut albums = genre let mut albums = genre
.albums .albums
@ -211,7 +213,7 @@ impl Collection {
.filter_map(|album_key| { .filter_map(|album_key| {
self.albums self.albums
.get(album_key) .get(album_key)
.map(|a| make_album_header(a, strings)) .map(|a| make_album_header(a, dictionary))
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
albums.sort_by(|a, b| a.name.cmp(&b.name)); albums.sort_by(|a, b| a.name.cmp(&b.name));
@ -222,7 +224,7 @@ impl Collection {
.filter_map(|artist_key| { .filter_map(|artist_key| {
self.artists self.artists
.get(artist_key) .get(artist_key)
.map(|a| make_artist_header(a, strings)) .map(|a| make_artist_header(a, dictionary))
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
artists.sort_by(|a, b| a.name.cmp(&b.name)); artists.sort_by(|a, b| a.name.cmp(&b.name));
@ -230,7 +232,7 @@ impl Collection {
let songs = genre let songs = genre
.songs .songs
.iter() .iter()
.filter_map(|k| self.get_song(strings, *k)) .filter_map(|k| self.get_song(dictionary, *k))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
// TODO sort songs // TODO sort songs
@ -238,12 +240,12 @@ impl Collection {
.related_genres .related_genres
.iter() .iter()
.map(|(genre_key, song_count)| { .map(|(genre_key, song_count)| {
(strings.resolve(&genre_key.0).to_owned(), *song_count) (dictionary.resolve(&genre_key.0).to_owned(), *song_count)
}) })
.collect(); .collect();
Genre { Genre {
header: make_genre_header(genre, strings), header: make_genre_header(genre, dictionary),
albums, albums,
artists, artists,
related_genres, related_genres,
@ -256,32 +258,33 @@ impl Collection {
self.songs.len() self.songs.len()
} }
pub fn get_song(&self, strings: &RodeoReader, song_key: SongKey) -> Option<Song> { pub fn get_song(&self, dictionary: &Dictionary, song_key: SongKey) -> Option<Song> {
self.songs.get(&song_key).map(|s| fetch_song(strings, s)) self.songs.get(&song_key).map(|s| fetch_song(dictionary, s))
} }
} }
fn make_album_header(album: &storage::Album, strings: &RodeoReader) -> AlbumHeader { fn make_album_header(album: &storage::Album, dictionary: &Dictionary) -> AlbumHeader {
AlbumHeader { AlbumHeader {
name: strings.resolve(&album.name).to_string(), name: dictionary.resolve(&album.name).to_string(),
artwork: album artwork: album
.artwork .artwork
.as_ref() .as_ref()
.map(|a| strings.resolve(&a.0)) .map(|a| dictionary.resolve(&a.0))
.map(PathBuf::from), .map(PathBuf::from),
artists: album artists: album
.artists .artists
.iter() .iter()
.map(|a| strings.resolve(&a.0).to_string()) .map(|a| dictionary.resolve(&a.0).to_string())
.collect(), .collect(),
year: album.year, year: album.year,
date_added: album.date_added, date_added: album.date_added,
} }
} }
fn make_artist_header(artist: &storage::Artist, strings: &RodeoReader) -> ArtistHeader { fn make_artist_header(artist: &storage::Artist, dictionary: &Dictionary) -> ArtistHeader {
// TODO drop unicase
ArtistHeader { ArtistHeader {
name: UniCase::new(strings.resolve(&artist.name).to_owned()), name: UniCase::new(dictionary.resolve(&artist.name).to_owned()),
num_albums_as_performer: artist.albums_as_performer.len() as u32, num_albums_as_performer: artist.albums_as_performer.len() as u32,
num_albums_as_additional_performer: artist.albums_as_additional_performer.len() as u32, num_albums_as_additional_performer: artist.albums_as_additional_performer.len() as u32,
num_albums_as_composer: artist.albums_as_composer.len() as u32, num_albums_as_composer: artist.albums_as_composer.len() as u32,
@ -289,15 +292,15 @@ fn make_artist_header(artist: &storage::Artist, strings: &RodeoReader) -> Artist
num_songs_by_genre: artist num_songs_by_genre: artist
.num_songs_by_genre .num_songs_by_genre
.iter() .iter()
.map(|(genre, num)| (strings.resolve(genre).to_string(), *num)) .map(|(genre, num)| (dictionary.resolve(genre).to_string(), *num))
.collect(), .collect(),
num_songs: artist.num_songs, num_songs: artist.num_songs,
} }
} }
fn make_genre_header(genre: &storage::Genre, strings: &RodeoReader) -> GenreHeader { fn make_genre_header(genre: &storage::Genre, dictionary: &Dictionary) -> GenreHeader {
GenreHeader { GenreHeader {
name: strings.resolve(&genre.name).to_string(), name: dictionary.resolve(&genre.name).to_string(),
} }
} }
@ -504,28 +507,26 @@ impl Builder {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use lasso2::Rodeo;
use tinyvec::tiny_vec; use tinyvec::tiny_vec;
use crate::app::scanner; use crate::app::{index::dictionary, scanner};
use storage::{store_song, InternPath}; use storage::{store_song, InternPath};
use super::*; use super::*;
fn setup_test(songs: Vec<scanner::Song>) -> (Collection, RodeoReader) { fn setup_test(songs: Vec<scanner::Song>) -> (Collection, Dictionary) {
let mut strings = Rodeo::new(); let mut dictionary_builder = dictionary::Builder::default();
let mut canon = HashMap::new();
let mut builder = Builder::default(); let mut builder = Builder::default();
for song in songs { for song in songs {
let song = store_song(&mut strings, &mut canon, &song).unwrap(); let song = store_song(&mut dictionary_builder, &song).unwrap();
builder.add_song(&song); builder.add_song(&song);
} }
let browser = builder.build(); let browser = builder.build();
let strings = strings.into_reader(); let dictionary = dictionary_builder.build();
(browser, strings) (browser, dictionary)
} }
#[test] #[test]

110
src/app/index/dictionary.rs Normal file
View file

@ -0,0 +1,110 @@
use std::{cmp::Ordering, collections::HashMap};
use icu_collator::{Collator, CollatorOptions, Strength};
use lasso2::{Rodeo, RodeoReader, Spur};
use rayon::slice::ParallelSliceMut;
use serde::{Deserialize, Serialize};
pub fn sanitize(s: &str) -> String {
// TODO merge inconsistent diacritic usage
let mut cleaned = s.to_owned();
cleaned.retain(|c| match c {
' ' | '_' | '-' | '\'' => false,
_ => true,
});
cleaned.to_lowercase()
}
pub fn make_collator() -> Collator {
let options = {
let mut o = CollatorOptions::new();
o.strength = Some(Strength::Secondary);
o
};
Collator::try_new(&Default::default(), options).unwrap()
}
#[derive(Serialize, Deserialize)]
pub struct Dictionary {
strings: RodeoReader, // Interned strings
canon: HashMap<String, Spur>, // Canonical representation of similar strings
sort_keys: HashMap<Spur, u32>, // All spurs sorted against each other
}
impl Dictionary {
pub fn get<S: AsRef<str>>(&self, string: S) -> Option<Spur> {
self.strings.get(string)
}
pub fn get_canon<S: AsRef<str>>(&self, string: S) -> Option<Spur> {
self.canon.get(&sanitize(string.as_ref())).copied()
}
pub fn resolve(&self, spur: &Spur) -> &str {
self.strings.resolve(spur)
}
pub fn cmp(&self, a: &Spur, b: &Spur) -> Ordering {
self.sort_keys
.get(a)
.copied()
.unwrap_or_default()
.cmp(&self.sort_keys.get(b).copied().unwrap_or_default())
}
}
impl Default for Dictionary {
fn default() -> Self {
Self {
strings: Rodeo::default().into_reader(),
canon: Default::default(),
sort_keys: Default::default(),
}
}
}
#[derive(Clone, Default)]
pub struct Builder {
strings: Rodeo,
canon: HashMap<String, Spur>,
}
impl Builder {
pub fn build(self) -> Dictionary {
let mut sorted_spurs = self.strings.iter().collect::<Vec<_>>();
// TODO this is too slow!
sorted_spurs.par_sort_unstable_by(|(_, a), (_, b)| {
let collator = make_collator();
collator.compare(a, b)
});
let sort_keys = sorted_spurs
.into_iter()
.enumerate()
.map(|(i, (spur, _))| (spur, i as u32))
.collect();
Dictionary {
strings: self.strings.into_reader(),
canon: self.canon,
sort_keys,
}
}
pub fn get_or_intern<S: AsRef<str>>(&mut self, string: S) -> Spur {
self.strings.get_or_intern(string)
}
pub fn get_or_intern_canon<S: AsRef<str>>(&mut self, string: S) -> Option<Spur> {
let cleaned = sanitize(string.as_ref());
match cleaned.is_empty() {
true => None,
false => Some(
self.canon
.entry(cleaned)
.or_insert_with(|| self.strings.get_or_intern(string.as_ref()))
.to_owned(),
),
}
}
}

View file

@ -1,6 +1,6 @@
use chumsky::Parser; use chumsky::Parser;
use enum_map::EnumMap; use enum_map::EnumMap;
use lasso2::{RodeoReader, Spur}; use lasso2::Spur;
use nohash_hasher::{IntMap, IntSet}; use nohash_hasher::{IntMap, IntSet};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{ use std::{
@ -11,17 +11,14 @@ use tinyvec::TinyVec;
use crate::app::{ use crate::app::{
index::{ index::{
dictionary::Dictionary,
query::{BoolOp, Expr, Literal, NumberField, NumberOp, TextField, TextOp}, query::{BoolOp, Expr, Literal, NumberField, NumberOp, TextField, TextOp},
storage::SongKey, storage::SongKey,
}, },
scanner, Error, scanner, Error,
}; };
use super::{ use super::{collection, dictionary::sanitize, query::make_parser, storage};
collection,
query::make_parser,
storage::{self, sanitize},
};
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct Search { pub struct Search {
@ -64,8 +61,7 @@ impl Search {
pub fn find_songs( pub fn find_songs(
&self, &self,
collection: &collection::Collection, collection: &collection::Collection,
strings: &RodeoReader, dictionary: &Dictionary,
canon: &HashMap<String, Spur>,
query: &str, query: &str,
) -> Result<Vec<collection::Song>, Error> { ) -> Result<Vec<collection::Song>, Error> {
let parser = make_parser(); let parser = make_parser();
@ -74,9 +70,9 @@ impl Search {
.map_err(|_| Error::SearchQueryParseError)?; .map_err(|_| Error::SearchQueryParseError)?;
let mut songs = self let mut songs = self
.eval(strings, canon, &parsed_query) .eval(dictionary, &parsed_query)
.into_iter() .into_iter()
.filter_map(|song_key| collection.get_song(strings, song_key)) .filter_map(|song_key| collection.get_song(dictionary, song_key))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
songs.sort_by(compare_songs); songs.sort_by(compare_songs);
@ -84,24 +80,18 @@ impl Search {
Ok(songs) Ok(songs)
} }
fn eval( fn eval(&self, dictionary: &Dictionary, expr: &Expr) -> IntSet<SongKey> {
&self,
strings: &RodeoReader,
canon: &HashMap<String, Spur>,
expr: &Expr,
) -> IntSet<SongKey> {
match expr { match expr {
Expr::Fuzzy(s) => self.eval_fuzzy(strings, s), Expr::Fuzzy(s) => self.eval_fuzzy(dictionary, s),
Expr::TextCmp(field, op, s) => self.eval_text_operator(strings, canon, *field, *op, &s), Expr::TextCmp(field, op, s) => self.eval_text_operator(dictionary, *field, *op, &s),
Expr::NumberCmp(field, op, n) => self.eval_number_operator(*field, *op, *n), Expr::NumberCmp(field, op, n) => self.eval_number_operator(*field, *op, *n),
Expr::Combined(e, op, f) => self.combine(strings, canon, e, *op, f), Expr::Combined(e, op, f) => self.combine(dictionary, e, *op, f),
} }
} }
fn combine( fn combine(
&self, &self,
strings: &RodeoReader, dictionary: &Dictionary,
canon: &HashMap<String, Spur>,
e: &Box<Expr>, e: &Box<Expr>,
op: BoolOp, op: BoolOp,
f: &Box<Expr>, f: &Box<Expr>,
@ -113,8 +103,8 @@ impl Search {
_ => true, _ => true,
}; };
let left = is_operable(e).then(|| self.eval(strings, canon, e)); let left = is_operable(e).then(|| self.eval(dictionary, e));
let right = is_operable(f).then(|| self.eval(strings, canon, f)); let right = is_operable(f).then(|| self.eval(dictionary, f));
match (left, op, right) { match (left, op, right) {
(Some(l), BoolOp::And, Some(r)) => l.intersection(&r).cloned().collect(), (Some(l), BoolOp::And, Some(r)) => l.intersection(&r).cloned().collect(),
@ -127,12 +117,12 @@ impl Search {
} }
} }
fn eval_fuzzy(&self, strings: &RodeoReader, value: &Literal) -> IntSet<SongKey> { fn eval_fuzzy(&self, dictionary: &Dictionary, value: &Literal) -> IntSet<SongKey> {
match value { match value {
Literal::Text(s) => { Literal::Text(s) => {
let mut songs = IntSet::default(); let mut songs = IntSet::default();
for field in self.text_fields.values() { for field in self.text_fields.values() {
songs.extend(field.find_like(strings, s)); songs.extend(field.find_like(dictionary, s));
} }
songs songs
} }
@ -142,7 +132,7 @@ impl Search {
songs.extend(field.find(*n as i64, NumberOp::Eq)); songs.extend(field.find(*n as i64, NumberOp::Eq));
} }
songs songs
.union(&self.eval_fuzzy(strings, &Literal::Text(n.to_string()))) .union(&self.eval_fuzzy(dictionary, &Literal::Text(n.to_string())))
.copied() .copied()
.collect() .collect()
} }
@ -151,15 +141,14 @@ impl Search {
fn eval_text_operator( fn eval_text_operator(
&self, &self,
strings: &RodeoReader, dictionary: &Dictionary,
canon: &HashMap<String, Spur>,
field: TextField, field: TextField,
operator: TextOp, operator: TextOp,
value: &str, value: &str,
) -> IntSet<SongKey> { ) -> IntSet<SongKey> {
match operator { match operator {
TextOp::Eq => self.text_fields[field].find_exact(canon, value), TextOp::Eq => self.text_fields[field].find_exact(dictionary, value),
TextOp::Like => self.text_fields[field].find_like(strings, value), TextOp::Like => self.text_fields[field].find_like(dictionary, value),
} }
} }
@ -194,7 +183,7 @@ impl TextFieldIndex {
self.exact.entry(value).or_default().insert(key); self.exact.entry(value).or_default().insert(key);
} }
pub fn find_like(&self, strings: &RodeoReader, value: &str) -> IntSet<SongKey> { pub fn find_like(&self, dictionary: &Dictionary, value: &str) -> IntSet<SongKey> {
let sanitized = sanitize(value); let sanitized = sanitize(value);
let characters = sanitized.chars().collect::<Vec<_>>(); let characters = sanitized.chars().collect::<Vec<_>>();
let empty = IntMap::default(); let empty = IntMap::default();
@ -222,7 +211,7 @@ impl TextFieldIndex {
}) })
// [narrow phase] Only keep songs that actually contain the search term in full // [narrow phase] Only keep songs that actually contain the search term in full
.filter(|(_song_key, indexed_value)| { .filter(|(_song_key, indexed_value)| {
let resolved = strings.resolve(indexed_value); let resolved = dictionary.resolve(indexed_value);
sanitize(resolved).contains(&sanitized) sanitize(resolved).contains(&sanitized)
}) })
.map(|(k, _v)| k) .map(|(k, _v)| k)
@ -230,9 +219,9 @@ impl TextFieldIndex {
.collect() .collect()
} }
pub fn find_exact(&self, canon: &HashMap<String, Spur>, value: &str) -> IntSet<SongKey> { pub fn find_exact(&self, dictionary: &Dictionary, value: &str) -> IntSet<SongKey> {
canon dictionary
.get(&sanitize(value)) .get_canon(value)
.and_then(|s| self.exact.get(&s)) .and_then(|s| self.exact.get(&s))
.cloned() .cloned()
.unwrap_or_default() .unwrap_or_default()
@ -353,23 +342,21 @@ impl Builder {
mod test { mod test {
use std::path::PathBuf; use std::path::PathBuf;
use lasso2::Rodeo; use super::*;
use crate::app::index::dictionary;
use collection::Collection;
use storage::store_song; use storage::store_song;
use super::*;
use collection::Collection;
struct Context { struct Context {
canon: HashMap<String, Spur>, dictionary: Dictionary,
collection: Collection, collection: Collection,
search: Search, search: Search,
strings: RodeoReader,
} }
impl Context { impl Context {
pub fn search(&self, query: &str) -> Vec<PathBuf> { pub fn search(&self, query: &str) -> Vec<PathBuf> {
self.search self.search
.find_songs(&self.collection, &self.strings, &self.canon, query) .find_songs(&self.collection, &self.dictionary, query)
.unwrap() .unwrap()
.into_iter() .into_iter()
.map(|s| s.virtual_path) .map(|s| s.virtual_path)
@ -378,22 +365,19 @@ mod test {
} }
fn setup_test(songs: Vec<scanner::Song>) -> Context { fn setup_test(songs: Vec<scanner::Song>) -> Context {
let mut strings = Rodeo::new(); let mut dictionary_builder = dictionary::Builder::default();
let mut canon = HashMap::new();
let mut collection_builder = collection::Builder::default(); let mut collection_builder = collection::Builder::default();
let mut search_builder = Builder::default(); let mut search_builder = Builder::default();
for song in songs { for song in songs {
let storage_song = store_song(&mut strings, &mut canon, &song).unwrap(); let storage_song = store_song(&mut dictionary_builder, &song).unwrap();
collection_builder.add_song(&storage_song); collection_builder.add_song(&storage_song);
search_builder.add_song(&song, &storage_song); search_builder.add_song(&song, &storage_song);
} }
Context { Context {
canon,
collection: collection_builder.build(), collection: collection_builder.build(),
search: search_builder.build(), search: search_builder.build(),
strings: strings.into_reader(), dictionary: dictionary_builder.build(),
} }
} }

View file

@ -3,13 +3,15 @@ use std::{
path::{Path, PathBuf}, path::{Path, PathBuf},
}; };
use lasso2::{Rodeo, RodeoReader, Spur}; use lasso2::Spur;
use log::error; use log::error;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tinyvec::TinyVec; use tinyvec::TinyVec;
use crate::app::scanner; use crate::app::scanner;
use crate::app::index::dictionary::{self, Dictionary};
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] #[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum File { pub enum File {
Directory(PathKey), Directory(PathKey),
@ -111,49 +113,27 @@ impl Song {
} }
} }
pub fn sanitize(s: &str) -> String {
// TODO merge inconsistent diacritic usage
let mut cleaned = s.to_owned();
cleaned.retain(|c| match c {
' ' | '_' | '-' | '\'' => false,
_ => true,
});
cleaned.to_lowercase()
}
pub fn store_song( pub fn store_song(
strings: &mut Rodeo, dictionary_builder: &mut dictionary::Builder,
canon: &mut HashMap<String, Spur>,
song: &scanner::Song, song: &scanner::Song,
) -> Option<Song> { ) -> Option<Song> {
let Some(real_path) = (&song.real_path).get_or_intern(strings) else { let Some(real_path) = (&song.real_path).get_or_intern(dictionary_builder) else {
return None; return None;
}; };
let Some(virtual_path) = (&song.virtual_path).get_or_intern(strings) else { let Some(virtual_path) = (&song.virtual_path).get_or_intern(dictionary_builder) else {
return None; return None;
}; };
let artwork = match &song.artwork { let artwork = match &song.artwork {
Some(a) => match a.get_or_intern(strings) { Some(a) => match a.get_or_intern(dictionary_builder) {
Some(a) => Some(a), Some(a) => Some(a),
None => return None, None => return None,
}, },
None => None, None => None,
}; };
let mut canonicalize = |s: &String| { let mut canonicalize = |s: &String| dictionary_builder.get_or_intern_canon(s);
let cleaned = sanitize(s);
match cleaned.is_empty() {
true => None,
false => Some(
canon
.entry(cleaned)
.or_insert_with(|| strings.get_or_intern(s))
.to_owned(),
),
}
};
Some(Song { Some(Song {
real_path, real_path,
@ -195,63 +175,65 @@ pub fn store_song(
}) })
} }
pub fn fetch_song(strings: &RodeoReader, song: &Song) -> super::Song { pub fn fetch_song(dictionary: &Dictionary, song: &Song) -> super::Song {
super::Song { super::Song {
real_path: PathBuf::from(strings.resolve(&song.real_path.0)), real_path: PathBuf::from(dictionary.resolve(&song.real_path.0)),
virtual_path: PathBuf::from(strings.resolve(&song.virtual_path.0)), virtual_path: PathBuf::from(dictionary.resolve(&song.virtual_path.0)),
track_number: song.track_number, track_number: song.track_number,
disc_number: song.disc_number, disc_number: song.disc_number,
title: song.title.map(|s| strings.resolve(&s).to_string()), title: song.title.map(|s| dictionary.resolve(&s).to_string()),
artists: song artists: song
.artists .artists
.iter() .iter()
.map(|k| strings.resolve(&k.0).to_string()) .map(|k| dictionary.resolve(&k.0).to_string())
.collect(), .collect(),
album_artists: song album_artists: song
.album_artists .album_artists
.iter() .iter()
.map(|k| strings.resolve(&k.0).to_string()) .map(|k| dictionary.resolve(&k.0).to_string())
.collect(), .collect(),
year: song.year, year: song.year,
album: song.album.map(|s| strings.resolve(&s).to_string()), album: song.album.map(|s| dictionary.resolve(&s).to_string()),
artwork: song.artwork.map(|a| PathBuf::from(strings.resolve(&a.0))), artwork: song
.artwork
.map(|a| PathBuf::from(dictionary.resolve(&a.0))),
duration: song.duration, duration: song.duration,
lyricists: song lyricists: song
.lyricists .lyricists
.iter() .iter()
.map(|k| strings.resolve(&k.0).to_string()) .map(|k| dictionary.resolve(&k.0).to_string())
.collect(), .collect(),
composers: song composers: song
.composers .composers
.iter() .iter()
.map(|k| strings.resolve(&k.0).to_string()) .map(|k| dictionary.resolve(&k.0).to_string())
.collect(), .collect(),
genres: song genres: song
.genres .genres
.iter() .iter()
.map(|s| strings.resolve(&s).to_string()) .map(|s| dictionary.resolve(&s).to_string())
.collect(), .collect(),
labels: song labels: song
.labels .labels
.iter() .iter()
.map(|s| strings.resolve(&s).to_string()) .map(|s| dictionary.resolve(&s).to_string())
.collect(), .collect(),
date_added: song.date_added, date_added: song.date_added,
} }
} }
pub trait InternPath { pub trait InternPath {
fn get_or_intern(self, strings: &mut Rodeo) -> Option<PathKey>; fn get_or_intern(self, dictionary: &mut dictionary::Builder) -> Option<PathKey>;
fn get(self, strings: &RodeoReader) -> Option<PathKey>; fn get(self, dictionary: &Dictionary) -> Option<PathKey>;
} }
impl<P: AsRef<Path>> InternPath for P { impl<P: AsRef<Path>> InternPath for P {
fn get_or_intern(self, strings: &mut Rodeo) -> Option<PathKey> { fn get_or_intern(self, dictionary: &mut dictionary::Builder) -> Option<PathKey> {
let id = self let id = self
.as_ref() .as_ref()
.as_os_str() .as_os_str()
.to_str() .to_str()
.map(|s| strings.get_or_intern(s)) .map(|s| dictionary.get_or_intern(s))
.map(PathKey); .map(PathKey);
if id.is_none() { if id.is_none() {
error!("Unsupported path: `{}`", self.as_ref().to_string_lossy()); error!("Unsupported path: `{}`", self.as_ref().to_string_lossy());
@ -259,12 +241,12 @@ impl<P: AsRef<Path>> InternPath for P {
id id
} }
fn get(self, strings: &RodeoReader) -> Option<PathKey> { fn get(self, dictionary: &Dictionary) -> Option<PathKey> {
let id = self let id = self
.as_ref() .as_ref()
.as_os_str() .as_os_str()
.to_str() .to_str()
.and_then(|s| strings.get(s)) .and_then(|s| dictionary.get(s))
.map(PathKey); .map(PathKey);
if id.is_none() { if id.is_none() {
error!("Unsupported path: `{}`", self.as_ref().to_string_lossy()); error!("Unsupported path: `{}`", self.as_ref().to_string_lossy());

View file

@ -3,11 +3,11 @@ use std::collections::HashMap;
use std::path::PathBuf; use std::path::PathBuf;
use std::time::Duration; use std::time::Duration;
use icu_collator::{Collator, CollatorOptions, Strength};
use native_db::*; use native_db::*;
use native_model::{native_model, Model}; use native_model::{native_model, Model};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tokio::task::spawn_blocking; use tokio::task::spawn_blocking;
use unicase::UniCase;
use crate::app::{index, ndb, Error}; use crate::app::{index, ndb, Error};
@ -18,7 +18,7 @@ pub struct Manager {
#[derive(Debug)] #[derive(Debug)]
pub struct PlaylistHeader { pub struct PlaylistHeader {
pub name: UniCase<String>, pub name: String,
pub duration: Duration, pub duration: Duration,
pub num_songs_by_genre: HashMap<String, u32>, pub num_songs_by_genre: HashMap<String, u32>,
} }
@ -58,7 +58,7 @@ pub mod v1 {
impl From<PlaylistModel> for PlaylistHeader { impl From<PlaylistModel> for PlaylistHeader {
fn from(p: PlaylistModel) -> Self { fn from(p: PlaylistModel) -> Self {
Self { Self {
name: UniCase::new(p.name), name: p.name,
duration: p.duration, duration: p.duration,
num_songs_by_genre: p.num_songs_by_genre, num_songs_by_genre: p.num_songs_by_genre,
} }
@ -93,7 +93,15 @@ impl Manager {
.filter_map(|p| p.ok()) .filter_map(|p| p.ok())
.map(PlaylistHeader::from) .map(PlaylistHeader::from)
.collect::<Vec<_>>(); .collect::<Vec<_>>();
playlists.sort_by(|a, b| a.name.cmp(&b.name));
let collator_options = {
let mut o = CollatorOptions::new();
o.strength = Some(Strength::Secondary);
o
};
let collator = Collator::try_new(&Default::default(), collator_options).unwrap();
playlists.sort_by(|a, b| collator.compare(&a.name, &b.name));
Ok(playlists) Ok(playlists)
} }
}) })
@ -335,7 +343,7 @@ mod test {
.build() .build()
.await; .await;
for name in ["a", "b", "A", "B"] { for name in ["ax", "b", "Ay", "B", "àz"] {
ctx.playlist_manager ctx.playlist_manager
.save_playlist(name, TEST_USER, Vec::new()) .save_playlist(name, TEST_USER, Vec::new())
.await .await
@ -353,6 +361,6 @@ mod test {
.map(|p| p.name.to_string()) .map(|p| p.name.to_string())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
assert_eq!(names, vec!["A", "a", "B", "b"]); assert_eq!(names, vec!["ax", "Ay", "àz", "B", "b"]);
} }
} }