Merge pull request #59 from agersant/index

Index optimizations
This commit is contained in:
Antoine Gersant 2020-01-19 01:21:37 -08:00 committed by GitHub
commit 36260dcdce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 198 additions and 95 deletions

13
Cargo.lock generated
View file

@ -1004,7 +1004,7 @@ version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -1539,6 +1539,7 @@ dependencies = [
"pbkdf2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"rocket 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1799,17 +1800,17 @@ dependencies = [
[[package]]
name = "rayon"
version = "1.2.1"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
"either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon-core 1.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon-core 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rayon-core"
version = "1.6.1"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -2926,8 +2927,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
"checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
"checksum rayon 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "43739f8831493b276363637423d3622d4bd6394ab6f0a9c4a552e208aeb7fddd"
"checksum rayon-core 1.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f8bf17de6f23b05473c437eb958b9c850bfc8af0961fe17b4cc92d5a627b4791"
"checksum rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098"
"checksum rayon-core 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9"
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
"checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
"checksum redox_users 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecedbca3bf205f8d8f5c2b44d83cd0690e39ee84b951ed649e9f1841132b66d"

View file

@ -33,6 +33,7 @@ metaflac = "0.2"
mp3-duration = "0.1"
pbkdf2 = "0.3"
rand = "0.7"
rayon = "1.3"
regex = "1.2"
reqwest = "0.9.2"
rocket = { version = "0.4.2", optional = true }

View file

@ -2,6 +2,7 @@ use anyhow::*;
use ape;
use id3;
use lewton::inside_ogg::OggStreamReader;
use log::error;
use metaflac;
use mp3_duration;
use regex::Regex;
@ -24,22 +25,38 @@ pub struct SongTags {
}
#[cfg_attr(feature = "profile-index", flame)]
pub fn read(path: &Path) -> Result<SongTags> {
match utils::get_audio_format(path) {
Some(AudioFormat::FLAC) => read_flac(path),
Some(AudioFormat::MP3) => read_id3(path),
Some(AudioFormat::MPC) => read_ape(path),
Some(AudioFormat::OGG) => read_vorbis(path),
_ => bail!("Unsupported file format for reading metadata"),
pub fn read(path: &Path) -> Option<SongTags> {
let data = match utils::get_audio_format(path) {
Some(AudioFormat::FLAC) => Some(read_flac(path)),
Some(AudioFormat::MP3) => Some(read_id3(path)),
Some(AudioFormat::MPC) => Some(read_ape(path)),
Some(AudioFormat::OGG) => Some(read_vorbis(path)),
_ => None,
};
match data {
Some(Ok(d)) => Some(d),
Some(Err(e)) => {
error!("Error while reading file metadata for '{:?}': {}", path, e);
None
}
None => None,
}
}
#[cfg_attr(feature = "profile-index", flame)]
fn read_id3(path: &Path) -> Result<SongTags> {
let tag = id3::Tag::read_from_path(&path)?;
let duration = mp3_duration::from_path(&path)
let tag = {
#[cfg(feature = "profile-index")]
let _guard = flame::start_guard("id3_tag_read");
id3::Tag::read_from_path(&path)?
};
let duration = {
#[cfg(feature = "profile-index")]
let _guard = flame::start_guard("mp3_duration");
mp3_duration::from_path(&path)
.map(|d| d.as_secs() as u32)
.ok();
.ok()
};
let artist = tag.artist().map(|s| s.to_string());
let album_artist = tag.album_artist().map(|s| s.to_string());

View file

@ -11,6 +11,7 @@ use crate::db::{misc_settings, DB};
use crate::config::MiscSettings;
use crate::vfs::VFS;
mod metadata;
mod query;
#[cfg(test)]
mod test;

View file

@ -4,14 +4,16 @@ use diesel::prelude::*;
#[cfg(feature = "profile-index")]
use flame;
use log::{error, info};
use rayon::prelude::*;
use regex::Regex;
use std::fs;
use std::path::Path;
use std::time;
use std::sync::mpsc::*;
use crate::config::MiscSettings;
use crate::db::{directories, misc_settings, songs, DB};
use crate::metadata;
use crate::index::metadata;
use crate::vfs::VFSSource;
const INDEX_BUILDING_INSERT_BUFFER_SIZE: usize = 1000; // Insertions in each transaction
@ -24,7 +26,7 @@ pub fn update(db: &DB) -> Result<()> {
populate(db)?;
info!(
"Library index update took {} seconds",
start.elapsed().as_secs()
start.elapsed().as_millis() as f32 / 1000.0
);
#[cfg(feature = "profile-index")]
flame::dump_html(&mut fs::File::create("index-flame-graph.html").unwrap()).unwrap();
@ -59,64 +61,30 @@ struct NewDirectory {
date_added: i32,
}
struct IndexBuilder {
new_songs: Vec<NewSong>,
new_directories: Vec<NewDirectory>,
db: DB,
struct IndexUpdater {
directory_sender: Sender<NewDirectory>,
song_sender: Sender<NewSong>,
album_art_pattern: Regex,
}
impl IndexBuilder {
impl IndexUpdater {
#[cfg_attr(feature = "profile-index", flame)]
fn new(db: DB, album_art_pattern: Regex) -> Result<IndexBuilder> {
let mut new_songs = Vec::new();
let mut new_directories = Vec::new();
new_songs.reserve_exact(INDEX_BUILDING_INSERT_BUFFER_SIZE);
new_directories.reserve_exact(INDEX_BUILDING_INSERT_BUFFER_SIZE);
Ok(IndexBuilder {
new_songs,
new_directories,
db,
fn new(album_art_pattern: Regex, directory_sender: Sender<NewDirectory>, song_sender: Sender<NewSong>) -> Result<IndexUpdater> {
Ok(IndexUpdater {
directory_sender,
song_sender,
album_art_pattern,
})
}
#[cfg_attr(feature = "profile-index", flame)]
fn flush_songs(&mut self) -> Result<()> {
let connection = self.db.connect()?;
diesel::insert_into(songs::table)
.values(&self.new_songs)
.execute(&*connection)?; // TODO https://github.com/diesel-rs/diesel/issues/1822
self.new_songs.clear();
Ok(())
}
#[cfg_attr(feature = "profile-index", flame)]
fn flush_directories(&mut self) -> Result<()> {
let connection = self.db.connect()?;
diesel::insert_into(directories::table)
.values(&self.new_directories)
.execute(&*connection)?; // TODO https://github.com/diesel-rs/diesel/issues/1822
self.new_directories.clear();
Ok(())
}
#[cfg_attr(feature = "profile-index", flame)]
fn push_song(&mut self, song: NewSong) -> Result<()> {
if self.new_songs.len() >= self.new_songs.capacity() {
self.flush_songs()?;
}
self.new_songs.push(song);
Ok(())
self.song_sender.send(song).map_err(Error::new)
}
#[cfg_attr(feature = "profile-index", flame)]
fn push_directory(&mut self, directory: NewDirectory) -> Result<()> {
if self.new_directories.len() >= self.new_directories.capacity() {
self.flush_directories()?;
}
self.new_directories.push(directory);
Ok(())
self.directory_sender.send(directory).map_err(Error::new)
}
fn get_artwork(&self, dir: &Path) -> Result<Option<String>> {
@ -134,19 +102,31 @@ impl IndexBuilder {
#[cfg_attr(feature = "profile-index", flame)]
fn populate_directory(&mut self, parent: Option<&Path>, path: &Path) -> Result<()> {
// Find artwork
let artwork = self.get_artwork(path).unwrap_or(None);
let artwork = {
#[cfg(feature = "profile-index")]
let _guard = flame::start_guard("artwork");
self.get_artwork(path).unwrap_or(None)
};
// Extract path and parent path
let parent_string = parent.and_then(|p| p.to_str()).map(|s| s.to_owned());
let path_string = path.to_str().ok_or(anyhow!("Invalid directory path"))?;
// Find date added
let metadata = fs::metadata(path_string)?;
let created = metadata
let metadata = {
#[cfg(feature = "profile-index")]
let _guard = flame::start_guard("metadata");
fs::metadata(path_string)?
};
let created = {
#[cfg(feature = "profile-index")]
let _guard = flame::start_guard("created_date");
metadata
.created()
.or_else(|_| metadata.modified())?
.duration_since(time::UNIX_EPOCH)?
.as_secs() as i32;
.as_secs() as i32
};
let mut directory_album = None;
let mut directory_year = None;
@ -176,7 +156,11 @@ impl IndexBuilder {
}
if let Some(file_path_string) = file_path.to_str() {
if let Ok(tags) = metadata::read(file_path.as_path()) {
if let Some(tags) = metadata::read(file_path.as_path()) {
#[cfg(feature = "profile-index")]
let _guard = flame::start_guard("process_song");
if tags.year.is_some() {
inconsistent_directory_year |=
directory_year.is_some() && directory_year != tags.year;
@ -219,6 +203,10 @@ impl IndexBuilder {
}
// Insert directory
let directory = {
#[cfg(feature = "profile-index")]
let _guard = flame::start_guard("create_directory");
if inconsistent_directory_year {
directory_year = None;
}
@ -229,7 +217,7 @@ impl IndexBuilder {
directory_artist = None;
}
let directory = NewDirectory {
NewDirectory {
path: path_string.to_owned(),
parent: parent_string,
artwork,
@ -237,7 +225,9 @@ impl IndexBuilder {
artist: directory_artist,
year: directory_year,
date_added: created,
}
};
self.push_directory(directory)?;
// Populate subdirectories
@ -261,7 +251,7 @@ pub fn clean(db: &DB) -> Result<()> {
}
let missing_songs = all_songs
.into_iter()
.par_iter()
.filter(|ref song_path| {
let path = Path::new(&song_path);
!path.exists() || vfs.real_to_virtual(path).is_err()
@ -287,7 +277,7 @@ pub fn clean(db: &DB) -> Result<()> {
}
let missing_directories = all_directories
.into_iter()
.par_iter()
.filter(|ref directory_path| {
let path = Path::new(&directory_path);
!path.exists() || vfs.real_to_virtual(path).is_err()
@ -311,18 +301,112 @@ pub fn populate(db: &DB) -> Result<()> {
let vfs = db.get_vfs()?;
let mount_points = vfs.get_mount_points();
let album_art_pattern;
{
let album_art_pattern = {
let connection = db.connect()?;
let settings: MiscSettings = misc_settings::table.get_result(&connection)?;
album_art_pattern = Regex::new(&settings.index_album_art_pattern)?;
Regex::new(&settings.index_album_art_pattern)?
};
let (directory_sender, directory_receiver) = channel();
let (song_sender, song_receiver) = channel();
let songs_db = db.clone();
let directories_db = db.clone();
let directories_thread = std::thread::spawn(move || {
insert_directories(directory_receiver, directories_db);
});
let songs_thread = std::thread::spawn(move || {
insert_songs(song_receiver, songs_db);
});
{
let mut updater = IndexUpdater::new(album_art_pattern, directory_sender, song_sender)?;
for target in mount_points.values() {
updater.populate_directory(None, target.as_path())?;
}
}
let mut builder = IndexBuilder::new(db.clone(), album_art_pattern)?;
for target in mount_points.values() {
builder.populate_directory(None, target.as_path())?;
match directories_thread.join() {
Err(e) => error!("Error while waiting for directory insertions to complete: {:?}", e),
_ => (),
}
builder.flush_songs()?;
builder.flush_directories()?;
match songs_thread.join() {
Err(e) => error!("Error while waiting for song insertions to complete: {:?}", e),
_ => (),
}
Ok(())
}
fn flush_directories(db: &DB, entries: &Vec<NewDirectory>) {
if db.connect()
.and_then(|connection|{
diesel::insert_into(directories::table)
.values(entries)
.execute(&*connection) // TODO https://github.com/diesel-rs/diesel/issues/1822
.map_err(Error::new)
})
.is_err() {
error!("Could not insert new directories in database");
}
}
fn flush_songs(db: &DB, entries: &Vec<NewSong>) {
if db.connect()
.and_then(|connection|{
diesel::insert_into(songs::table)
.values(entries)
.execute(&*connection) // TODO https://github.com/diesel-rs/diesel/issues/1822
.map_err(Error::new)
})
.is_err() {
error!("Could not insert new songs in database");
}
}
fn insert_directories(receiver: Receiver<NewDirectory>, db: DB) {
let mut new_entries = Vec::new();
new_entries.reserve_exact(INDEX_BUILDING_INSERT_BUFFER_SIZE);
loop {
match receiver.recv() {
Ok(s) => {
new_entries.push(s);
if new_entries.len() >= INDEX_BUILDING_INSERT_BUFFER_SIZE {
flush_directories(&db, &new_entries);
new_entries.clear();
}
},
Err(_) => break,
}
}
if new_entries.len() > 0 {
flush_directories(&db, &new_entries);
}
}
fn insert_songs(receiver: Receiver<NewSong>, db: DB) {
let mut new_entries = Vec::new();
new_entries.reserve_exact(INDEX_BUILDING_INSERT_BUFFER_SIZE);
loop {
match receiver.recv() {
Ok(s) => {
new_entries.push(s);
if new_entries.len() >= INDEX_BUILDING_INSERT_BUFFER_SIZE {
flush_songs(&db, &new_entries);
new_entries.clear();
}
},
Err(_) => break,
}
}
if new_entries.len() > 0 {
flush_songs(&db, &new_entries);
}
}

View file

@ -31,7 +31,6 @@ mod db;
mod ddns;
mod index;
mod lastfm;
mod metadata;
mod playlist;
mod service;