Merge pull request #59 from agersant/index

Index optimizations
This commit is contained in:
Antoine Gersant 2020-01-19 01:21:37 -08:00 committed by GitHub
commit 36260dcdce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 198 additions and 95 deletions

13
Cargo.lock generated
View file

@ -1004,7 +1004,7 @@ version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [ dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]] [[package]]
@ -1539,6 +1539,7 @@ dependencies = [
"pbkdf2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "pbkdf2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"rocket 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "rocket 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1799,17 +1800,17 @@ dependencies = [
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.2.1" version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [ dependencies = [
"crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
"either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon-core 1.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "rayon-core 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]] [[package]]
name = "rayon-core" name = "rayon-core"
version = "1.6.1" version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [ dependencies = [
"crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -2926,8 +2927,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071" "checksum rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
"checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44" "checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" "checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
"checksum rayon 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "43739f8831493b276363637423d3622d4bd6394ab6f0a9c4a552e208aeb7fddd" "checksum rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098"
"checksum rayon-core 1.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f8bf17de6f23b05473c437eb958b9c850bfc8af0961fe17b4cc92d5a627b4791" "checksum rayon-core 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9"
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" "checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
"checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" "checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
"checksum redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d" "checksum redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d"

View file

@ -33,6 +33,7 @@ metaflac = "0.2"
mp3-duration = "0.1" mp3-duration = "0.1"
pbkdf2 = "0.3" pbkdf2 = "0.3"
rand = "0.7" rand = "0.7"
rayon = "1.3"
regex = "1.2" regex = "1.2"
reqwest = "0.9.2" reqwest = "0.9.2"
rocket = { version = "0.4.2", optional = true } rocket = { version = "0.4.2", optional = true }

View file

@ -2,6 +2,7 @@ use anyhow::*;
use ape; use ape;
use id3; use id3;
use lewton::inside_ogg::OggStreamReader; use lewton::inside_ogg::OggStreamReader;
use log::error;
use metaflac; use metaflac;
use mp3_duration; use mp3_duration;
use regex::Regex; use regex::Regex;
@ -24,22 +25,38 @@ pub struct SongTags {
} }
#[cfg_attr(feature = "profile-index", flame)] #[cfg_attr(feature = "profile-index", flame)]
pub fn read(path: &Path) -> Result<SongTags> { pub fn read(path: &Path) -> Option<SongTags> {
match utils::get_audio_format(path) { let data = match utils::get_audio_format(path) {
Some(AudioFormat::FLAC) => read_flac(path), Some(AudioFormat::FLAC) => Some(read_flac(path)),
Some(AudioFormat::MP3) => read_id3(path), Some(AudioFormat::MP3) => Some(read_id3(path)),
Some(AudioFormat::MPC) => read_ape(path), Some(AudioFormat::MPC) => Some(read_ape(path)),
Some(AudioFormat::OGG) => read_vorbis(path), Some(AudioFormat::OGG) => Some(read_vorbis(path)),
_ => bail!("Unsupported file format for reading metadata"), _ => None,
};
match data {
Some(Ok(d)) => Some(d),
Some(Err(e)) => {
error!("Error while reading file metadata for '{:?}': {}", path, e);
None
}
None => None,
} }
} }
#[cfg_attr(feature = "profile-index", flame)] #[cfg_attr(feature = "profile-index", flame)]
fn read_id3(path: &Path) -> Result<SongTags> { fn read_id3(path: &Path) -> Result<SongTags> {
let tag = id3::Tag::read_from_path(&path)?; let tag = {
let duration = mp3_duration::from_path(&path) #[cfg(feature = "profile-index")]
let _guard = flame::start_guard("id3_tag_read");
id3::Tag::read_from_path(&path)?
};
let duration = {
#[cfg(feature = "profile-index")]
let _guard = flame::start_guard("mp3_duration");
mp3_duration::from_path(&path)
.map(|d| d.as_secs() as u32) .map(|d| d.as_secs() as u32)
.ok(); .ok()
};
let artist = tag.artist().map(|s| s.to_string()); let artist = tag.artist().map(|s| s.to_string());
let album_artist = tag.album_artist().map(|s| s.to_string()); let album_artist = tag.album_artist().map(|s| s.to_string());

View file

@ -11,6 +11,7 @@ use crate::db::{misc_settings, DB};
use crate::config::MiscSettings; use crate::config::MiscSettings;
use crate::vfs::VFS; use crate::vfs::VFS;
mod metadata;
mod query; mod query;
#[cfg(test)] #[cfg(test)]
mod test; mod test;

View file

@ -4,14 +4,16 @@ use diesel::prelude::*;
#[cfg(feature = "profile-index")] #[cfg(feature = "profile-index")]
use flame; use flame;
use log::{error, info}; use log::{error, info};
use rayon::prelude::*;
use regex::Regex; use regex::Regex;
use std::fs; use std::fs;
use std::path::Path; use std::path::Path;
use std::time; use std::time;
use std::sync::mpsc::*;
use crate::config::MiscSettings; use crate::config::MiscSettings;
use crate::db::{directories, misc_settings, songs, DB}; use crate::db::{directories, misc_settings, songs, DB};
use crate::metadata; use crate::index::metadata;
use crate::vfs::VFSSource; use crate::vfs::VFSSource;
const INDEX_BUILDING_INSERT_BUFFER_SIZE: usize = 1000; // Insertions in each transaction const INDEX_BUILDING_INSERT_BUFFER_SIZE: usize = 1000; // Insertions in each transaction
@ -24,7 +26,7 @@ pub fn update(db: &DB) -> Result<()> {
populate(db)?; populate(db)?;
info!( info!(
"Library index update took {} seconds", "Library index update took {} seconds",
start.elapsed().as_secs() start.elapsed().as_millis() as f32 / 1000.0
); );
#[cfg(feature = "profile-index")] #[cfg(feature = "profile-index")]
flame::dump_html(&mut fs::File::create("index-flame-graph.html").unwrap()).unwrap(); flame::dump_html(&mut fs::File::create("index-flame-graph.html").unwrap()).unwrap();
@ -59,64 +61,30 @@ struct NewDirectory {
date_added: i32, date_added: i32,
} }
struct IndexBuilder { struct IndexUpdater {
new_songs: Vec<NewSong>, directory_sender: Sender<NewDirectory>,
new_directories: Vec<NewDirectory>, song_sender: Sender<NewSong>,
db: DB,
album_art_pattern: Regex, album_art_pattern: Regex,
} }
impl IndexBuilder { impl IndexUpdater {
#[cfg_attr(feature = "profile-index", flame)] #[cfg_attr(feature = "profile-index", flame)]
fn new(db: DB, album_art_pattern: Regex) -> Result<IndexBuilder> { fn new(album_art_pattern: Regex, directory_sender: Sender<NewDirectory>, song_sender: Sender<NewSong>) -> Result<IndexUpdater> {
let mut new_songs = Vec::new(); Ok(IndexUpdater {
let mut new_directories = Vec::new(); directory_sender,
new_songs.reserve_exact(INDEX_BUILDING_INSERT_BUFFER_SIZE); song_sender,
new_directories.reserve_exact(INDEX_BUILDING_INSERT_BUFFER_SIZE);
Ok(IndexBuilder {
new_songs,
new_directories,
db,
album_art_pattern, album_art_pattern,
}) })
} }
#[cfg_attr(feature = "profile-index", flame)]
fn flush_songs(&mut self) -> Result<()> {
let connection = self.db.connect()?;
diesel::insert_into(songs::table)
.values(&self.new_songs)
.execute(&*connection)?; // TODO https://github.com/diesel-rs/diesel/issues/1822
self.new_songs.clear();
Ok(())
}
#[cfg_attr(feature = "profile-index", flame)]
fn flush_directories(&mut self) -> Result<()> {
let connection = self.db.connect()?;
diesel::insert_into(directories::table)
.values(&self.new_directories)
.execute(&*connection)?; // TODO https://github.com/diesel-rs/diesel/issues/1822
self.new_directories.clear();
Ok(())
}
#[cfg_attr(feature = "profile-index", flame)] #[cfg_attr(feature = "profile-index", flame)]
fn push_song(&mut self, song: NewSong) -> Result<()> { fn push_song(&mut self, song: NewSong) -> Result<()> {
if self.new_songs.len() >= self.new_songs.capacity() { self.song_sender.send(song).map_err(Error::new)
self.flush_songs()?;
}
self.new_songs.push(song);
Ok(())
} }
#[cfg_attr(feature = "profile-index", flame)] #[cfg_attr(feature = "profile-index", flame)]
fn push_directory(&mut self, directory: NewDirectory) -> Result<()> { fn push_directory(&mut self, directory: NewDirectory) -> Result<()> {
if self.new_directories.len() >= self.new_directories.capacity() { self.directory_sender.send(directory).map_err(Error::new)
self.flush_directories()?;
}
self.new_directories.push(directory);
Ok(())
} }
fn get_artwork(&self, dir: &Path) -> Result<Option<String>> { fn get_artwork(&self, dir: &Path) -> Result<Option<String>> {
@ -134,19 +102,31 @@ impl IndexBuilder {
#[cfg_attr(feature = "profile-index", flame)] #[cfg_attr(feature = "profile-index", flame)]
fn populate_directory(&mut self, parent: Option<&Path>, path: &Path) -> Result<()> { fn populate_directory(&mut self, parent: Option<&Path>, path: &Path) -> Result<()> {
// Find artwork // Find artwork
let artwork = self.get_artwork(path).unwrap_or(None); let artwork = {
#[cfg(feature = "profile-index")]
let _guard = flame::start_guard("artwork");
self.get_artwork(path).unwrap_or(None)
};
// Extract path and parent path // Extract path and parent path
let parent_string = parent.and_then(|p| p.to_str()).map(|s| s.to_owned()); let parent_string = parent.and_then(|p| p.to_str()).map(|s| s.to_owned());
let path_string = path.to_str().ok_or(anyhow!("Invalid directory path"))?; let path_string = path.to_str().ok_or(anyhow!("Invalid directory path"))?;
// Find date added // Find date added
let metadata = fs::metadata(path_string)?; let metadata = {
let created = metadata #[cfg(feature = "profile-index")]
let _guard = flame::start_guard("metadata");
fs::metadata(path_string)?
};
let created = {
#[cfg(feature = "profile-index")]
let _guard = flame::start_guard("created_date");
metadata
.created() .created()
.or_else(|_| metadata.modified())? .or_else(|_| metadata.modified())?
.duration_since(time::UNIX_EPOCH)? .duration_since(time::UNIX_EPOCH)?
.as_secs() as i32; .as_secs() as i32
};
let mut directory_album = None; let mut directory_album = None;
let mut directory_year = None; let mut directory_year = None;
@ -176,7 +156,11 @@ impl IndexBuilder {
} }
if let Some(file_path_string) = file_path.to_str() { if let Some(file_path_string) = file_path.to_str() {
if let Ok(tags) = metadata::read(file_path.as_path()) { if let Some(tags) = metadata::read(file_path.as_path()) {
#[cfg(feature = "profile-index")]
let _guard = flame::start_guard("process_song");
if tags.year.is_some() { if tags.year.is_some() {
inconsistent_directory_year |= inconsistent_directory_year |=
directory_year.is_some() && directory_year != tags.year; directory_year.is_some() && directory_year != tags.year;
@ -219,25 +203,31 @@ impl IndexBuilder {
} }
// Insert directory // Insert directory
if inconsistent_directory_year { let directory = {
directory_year = None; #[cfg(feature = "profile-index")]
} let _guard = flame::start_guard("create_directory");
if inconsistent_directory_album {
directory_album = None;
}
if inconsistent_directory_artist {
directory_artist = None;
}
let directory = NewDirectory { if inconsistent_directory_year {
path: path_string.to_owned(), directory_year = None;
parent: parent_string, }
artwork, if inconsistent_directory_album {
album: directory_album, directory_album = None;
artist: directory_artist, }
year: directory_year, if inconsistent_directory_artist {
date_added: created, directory_artist = None;
}
NewDirectory {
path: path_string.to_owned(),
parent: parent_string,
artwork,
album: directory_album,
artist: directory_artist,
year: directory_year,
date_added: created,
}
}; };
self.push_directory(directory)?; self.push_directory(directory)?;
// Populate subdirectories // Populate subdirectories
@ -261,7 +251,7 @@ pub fn clean(db: &DB) -> Result<()> {
} }
let missing_songs = all_songs let missing_songs = all_songs
.into_iter() .par_iter()
.filter(|ref song_path| { .filter(|ref song_path| {
let path = Path::new(&song_path); let path = Path::new(&song_path);
!path.exists() || vfs.real_to_virtual(path).is_err() !path.exists() || vfs.real_to_virtual(path).is_err()
@ -287,7 +277,7 @@ pub fn clean(db: &DB) -> Result<()> {
} }
let missing_directories = all_directories let missing_directories = all_directories
.into_iter() .par_iter()
.filter(|ref directory_path| { .filter(|ref directory_path| {
let path = Path::new(&directory_path); let path = Path::new(&directory_path);
!path.exists() || vfs.real_to_virtual(path).is_err() !path.exists() || vfs.real_to_virtual(path).is_err()
@ -311,18 +301,112 @@ pub fn populate(db: &DB) -> Result<()> {
let vfs = db.get_vfs()?; let vfs = db.get_vfs()?;
let mount_points = vfs.get_mount_points(); let mount_points = vfs.get_mount_points();
let album_art_pattern; let album_art_pattern = {
{
let connection = db.connect()?; let connection = db.connect()?;
let settings: MiscSettings = misc_settings::table.get_result(&connection)?; let settings: MiscSettings = misc_settings::table.get_result(&connection)?;
album_art_pattern = Regex::new(&settings.index_album_art_pattern)?; Regex::new(&settings.index_album_art_pattern)?
};
let (directory_sender, directory_receiver) = channel();
let (song_sender, song_receiver) = channel();
let songs_db = db.clone();
let directories_db = db.clone();
let directories_thread = std::thread::spawn(move || {
insert_directories(directory_receiver, directories_db);
});
let songs_thread = std::thread::spawn(move || {
insert_songs(song_receiver, songs_db);
});
{
let mut updater = IndexUpdater::new(album_art_pattern, directory_sender, song_sender)?;
for target in mount_points.values() {
updater.populate_directory(None, target.as_path())?;
}
} }
let mut builder = IndexBuilder::new(db.clone(), album_art_pattern)?; match directories_thread.join() {
for target in mount_points.values() { Err(e) => error!("Error while waiting for directory insertions to complete: {:?}", e),
builder.populate_directory(None, target.as_path())?; _ => (),
} }
builder.flush_songs()?;
builder.flush_directories()?; match songs_thread.join() {
Err(e) => error!("Error while waiting for song insertions to complete: {:?}", e),
_ => (),
}
Ok(()) Ok(())
} }
fn flush_directories(db: &DB, entries: &Vec<NewDirectory>) {
if db.connect()
.and_then(|connection|{
diesel::insert_into(directories::table)
.values(entries)
.execute(&*connection) // TODO https://github.com/diesel-rs/diesel/issues/1822
.map_err(Error::new)
})
.is_err() {
error!("Could not insert new directories in database");
}
}
fn flush_songs(db: &DB, entries: &Vec<NewSong>) {
if db.connect()
.and_then(|connection|{
diesel::insert_into(songs::table)
.values(entries)
.execute(&*connection) // TODO https://github.com/diesel-rs/diesel/issues/1822
.map_err(Error::new)
})
.is_err() {
error!("Could not insert new songs in database");
}
}
fn insert_directories(receiver: Receiver<NewDirectory>, db: DB) {
let mut new_entries = Vec::new();
new_entries.reserve_exact(INDEX_BUILDING_INSERT_BUFFER_SIZE);
loop {
match receiver.recv() {
Ok(s) => {
new_entries.push(s);
if new_entries.len() >= INDEX_BUILDING_INSERT_BUFFER_SIZE {
flush_directories(&db, &new_entries);
new_entries.clear();
}
},
Err(_) => break,
}
}
if new_entries.len() > 0 {
flush_directories(&db, &new_entries);
}
}
fn insert_songs(receiver: Receiver<NewSong>, db: DB) {
let mut new_entries = Vec::new();
new_entries.reserve_exact(INDEX_BUILDING_INSERT_BUFFER_SIZE);
loop {
match receiver.recv() {
Ok(s) => {
new_entries.push(s);
if new_entries.len() >= INDEX_BUILDING_INSERT_BUFFER_SIZE {
flush_songs(&db, &new_entries);
new_entries.clear();
}
},
Err(_) => break,
}
}
if new_entries.len() > 0 {
flush_songs(&db, &new_entries);
}
}

View file

@ -31,7 +31,6 @@ mod db;
mod ddns; mod ddns;
mod index; mod index;
mod lastfm; mod lastfm;
mod metadata;
mod playlist; mod playlist;
mod service; mod service;