Search syntax first pass

This commit is contained in:
Antoine Gersant 2024-09-20 23:52:13 -07:00
parent bc17954db9
commit b96cd2d781
5 changed files with 336 additions and 0 deletions

33
Cargo.lock generated
View file

@ -377,6 +377,16 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
dependencies = [
"hashbrown",
"stacker",
]
[[package]]
name = "chunked_transfer"
version = "1.5.0"
@ -1650,6 +1660,7 @@ dependencies = [
"bitcode",
"branca",
"bytes",
"chumsky",
"daemonize",
"embed-resource",
"getopts",
@ -1751,6 +1762,15 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "psm"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205"
dependencies = [
"cc",
]
[[package]]
name = "publicsuffix"
version = "1.5.6"
@ -2498,6 +2518,19 @@ dependencies = [
"url",
]
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"winapi",
]
[[package]]
name = "standback"
version = "0.2.17"

View file

@ -15,6 +15,7 @@ axum-range = "0.4.0"
base64 = "0.22.1"
bitcode = { version = "0.6.3", features = ["serde"] }
branca = "0.10.1"
chumsky = "0.9.3"
getopts = "0.2.21"
headers = "0.4"
http = "1.1.0"

View file

@ -15,6 +15,7 @@ use crate::db::DB;
mod browser;
mod collection;
mod query;
mod search;
mod storage;

252
src/app/index/query.rs Normal file
View file

@ -0,0 +1,252 @@
use chumsky::{
error::Simple,
prelude::{choice, filter, just, none_of},
text::{int, keyword, TextParser},
Parser,
};
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum TextField {
Album,
AlbumArtist,
Artist,
Composer,
Genre,
Label,
Lyricist,
Path,
Title,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum TextOp {
Eq,
NotEq,
Like,
NotLike,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum NumberField {
DiscNumber,
TrackNumber,
Year,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum NumberOp {
Eq,
NotEq,
Greater,
GreaterOrEq,
Less,
LessOrEq,
}
#[derive(Debug, Eq, PartialEq)]
pub enum Literal {
Text(String),
Number(i32),
}
#[derive(Debug, Eq, PartialEq)]
pub enum Expr {
Fuzzy(Literal),
TextCmp(TextField, TextOp, String),
NumberCmp(NumberField, NumberOp, i32),
And(Box<Expr>, Box<Expr>),
Or(Box<Expr>, Box<Expr>),
}
pub fn make_parser() -> impl Parser<char, Expr, Error = Simple<char>> {
let quoted_str = just('"')
.ignore_then(none_of('"').repeated().collect::<String>())
.then_ignore(just('"'));
let raw_str = filter(|c: &char| !c.is_whitespace() && *c != '"')
.repeated()
.at_least(1)
.collect::<String>();
let str_ = choice((quoted_str, raw_str)).padded();
let number = int(10).map(|n: String| n.parse::<i32>().unwrap()).padded();
let text_field = choice((
keyword("album").to(TextField::Album),
keyword("albumartist").to(TextField::AlbumArtist),
keyword("artist").to(TextField::Artist),
keyword("composer").to(TextField::Composer),
keyword("genre").to(TextField::Genre),
keyword("label").to(TextField::Label),
keyword("lyricist").to(TextField::Lyricist),
keyword("path").to(TextField::Path),
keyword("title").to(TextField::Title),
))
.padded();
let text_op = choice((
just("=").to(TextOp::Eq),
just("!=").to(TextOp::NotEq),
just("%").to(TextOp::Like),
just("!%").to(TextOp::NotLike),
))
.padded();
let text_cmp = text_field
.then(text_op)
.then(str_.clone())
.map(|((a, b), c)| Expr::TextCmp(a, b, c));
let number_field = choice((
keyword("discnumber").to(NumberField::DiscNumber),
keyword("tracknumber").to(NumberField::TrackNumber),
keyword("year").to(NumberField::Year),
))
.padded();
let number_op = choice((
just("=").to(NumberOp::Eq),
just("!=").to(NumberOp::NotEq),
just(">=").to(NumberOp::GreaterOrEq),
just(">").to(NumberOp::Greater),
just("<=").to(NumberOp::LessOrEq),
just("<").to(NumberOp::Less),
))
.padded();
let number_cmp = number_field
.then(number_op)
.then(number)
.map(|((a, b), c)| Expr::NumberCmp(a, b, c));
let literal = number.map(Literal::Number).or(str_.map(Literal::Text));
let fuzzy = literal.map(Expr::Fuzzy);
text_cmp.or(number_cmp).or(fuzzy)
}
#[test]
fn can_parse_fuzzy_query() {
let parser = make_parser();
assert_eq!(
parser.parse(r#"rhapsody"#).unwrap(),
Expr::Fuzzy(Literal::Text("rhapsody".to_owned())),
);
assert_eq!(
parser.parse(r#"2005"#).unwrap(),
Expr::Fuzzy(Literal::Number(2005)),
);
}
#[test]
fn can_parse_text_fields() {
let parser = make_parser();
assert_eq!(
parser.parse(r#"album = "legendary tales""#).unwrap(),
Expr::TextCmp(TextField::Album, TextOp::Eq, "legendary tales".to_owned()),
);
assert_eq!(
parser.parse(r#"albumartist = "rhapsody""#).unwrap(),
Expr::TextCmp(TextField::AlbumArtist, TextOp::Eq, "rhapsody".to_owned()),
);
assert_eq!(
parser.parse(r#"artist = "rhapsody""#).unwrap(),
Expr::TextCmp(TextField::Artist, TextOp::Eq, "rhapsody".to_owned()),
);
assert_eq!(
parser.parse(r#"composer = "yoko kanno""#).unwrap(),
Expr::TextCmp(TextField::Composer, TextOp::Eq, "yoko kanno".to_owned()),
);
assert_eq!(
parser.parse(r#"genre = "jazz""#).unwrap(),
Expr::TextCmp(TextField::Genre, TextOp::Eq, "jazz".to_owned()),
);
assert_eq!(
parser.parse(r#"label = "diverse system""#).unwrap(),
Expr::TextCmp(TextField::Label, TextOp::Eq, "diverse system".to_owned()),
);
assert_eq!(
parser.parse(r#"lyricist = "dalida""#).unwrap(),
Expr::TextCmp(TextField::Lyricist, TextOp::Eq, "dalida".to_owned()),
);
assert_eq!(
parser.parse(r#"path = "electronic/big beat""#).unwrap(),
Expr::TextCmp(
TextField::Path,
TextOp::Eq,
"electronic/big beat".to_owned()
),
);
assert_eq!(
parser.parse(r#"title = "emerald sword""#).unwrap(),
Expr::TextCmp(TextField::Title, TextOp::Eq, "emerald sword".to_owned()),
);
}
#[test]
fn can_parse_text_operators() {
let parser = make_parser();
assert_eq!(
parser.parse(r#"album = "legendary tales""#).unwrap(),
Expr::TextCmp(TextField::Album, TextOp::Eq, "legendary tales".to_owned()),
);
assert_eq!(
parser.parse(r#"album != legendary"#).unwrap(),
Expr::TextCmp(TextField::Album, TextOp::NotEq, "legendary".to_owned()),
);
assert_eq!(
parser.parse(r#"album % "legendary tales""#).unwrap(),
Expr::TextCmp(TextField::Album, TextOp::Like, "legendary tales".to_owned()),
);
assert_eq!(
parser.parse(r#"album !% "legendary""#).unwrap(),
Expr::TextCmp(TextField::Album, TextOp::NotLike, "legendary".to_owned()),
);
}
#[test]
fn can_parse_number_fields() {
let parser = make_parser();
assert_eq!(
parser.parse(r#"discnumber = 6"#).unwrap(),
Expr::NumberCmp(NumberField::DiscNumber, NumberOp::Eq, 6),
);
assert_eq!(
parser.parse(r#"tracknumber = 12"#).unwrap(),
Expr::NumberCmp(NumberField::TrackNumber, NumberOp::Eq, 12),
);
assert_eq!(
parser.parse(r#"year = 1999"#).unwrap(),
Expr::NumberCmp(NumberField::Year, NumberOp::Eq, 1999),
);
}
#[test]
fn can_parse_number_operators() {
let parser = make_parser();
assert_eq!(
parser.parse(r#"discnumber = 6"#).unwrap(),
Expr::NumberCmp(NumberField::DiscNumber, NumberOp::Eq, 6),
);
assert_eq!(
parser.parse(r#"discnumber != 6"#).unwrap(),
Expr::NumberCmp(NumberField::DiscNumber, NumberOp::NotEq, 6),
);
assert_eq!(
parser.parse(r#"discnumber > 6"#).unwrap(),
Expr::NumberCmp(NumberField::DiscNumber, NumberOp::Greater, 6),
);
assert_eq!(
parser.parse(r#"discnumber >= 6"#).unwrap(),
Expr::NumberCmp(NumberField::DiscNumber, NumberOp::GreaterOrEq, 6),
);
assert_eq!(
parser.parse(r#"discnumber < 6"#).unwrap(),
Expr::NumberCmp(NumberField::DiscNumber, NumberOp::Less, 6),
);
assert_eq!(
parser.parse(r#"discnumber <= 6"#).unwrap(),
Expr::NumberCmp(NumberField::DiscNumber, NumberOp::LessOrEq, 6),
);
}

View file

@ -1 +1,50 @@
use std::collections::HashSet;
use crate::app::index::{
query::{Expr, Literal, NumberField, NumberOp, TextField, TextOp},
storage::SongKey,
};
struct SearchIndex {}
impl SearchIndex {
fn eval_expr(&self, expr: &Expr) -> HashSet<SongKey> {
match expr {
Expr::Fuzzy(s) => self.eval_fuzzy(s),
Expr::TextCmp(field, op, s) => self.eval_text_operator(*field, *op, &s),
Expr::NumberCmp(field, op, n) => self.eval_number_operator(*field, *op, *n),
Expr::And(e, f) => self
.eval_expr(e)
.intersection(&self.eval_expr(f))
.cloned()
.collect(),
Expr::Or(e, f) => self
.eval_expr(e)
.union(&self.eval_expr(f))
.cloned()
.collect(),
}
}
fn eval_fuzzy(&self, value: &Literal) -> HashSet<SongKey> {
HashSet::new()
}
fn eval_text_operator(
&self,
field: TextField,
operator: TextOp,
value: &str,
) -> HashSet<SongKey> {
HashSet::new()
}
fn eval_number_operator(
&self,
field: NumberField,
operator: NumberOp,
value: i32,
) -> HashSet<SongKey> {
HashSet::new()
}
}