mirror of
https://github.com/navidrome/navidrome.git
synced 2026-04-28 03:19:38 +00:00
feat(matcher): add Matcher.PreferStarred option to bias fuzzy matcher toward starred/high-rated tracks (#5387)
* matcher: update godoc for matcher config scoring order * conf: log deprecated SimilarSongsMatchThreshold option * conf: enable matcher prefer-starred by default
This commit is contained in:
parent
28eba567a7
commit
3b7d3f4383
4 changed files with 88 additions and 26 deletions
|
|
@ -60,8 +60,8 @@ type configOptions struct {
|
|||
SmartPlaylistRefreshDelay time.Duration
|
||||
AutoTranscodeDownload bool
|
||||
DefaultDownsamplingFormat string
|
||||
Search searchOptions `json:",omitzero"`
|
||||
SimilarSongsMatchThreshold int
|
||||
Search searchOptions `json:",omitzero"`
|
||||
Matcher matcherOptions `json:",omitzero"`
|
||||
RecentlyAddedByModTime bool
|
||||
PreferSortTags bool
|
||||
IgnoredArticles string
|
||||
|
|
@ -261,6 +261,11 @@ type searchOptions struct {
|
|||
FullString bool
|
||||
}
|
||||
|
||||
type matcherOptions struct {
|
||||
PreferStarred bool
|
||||
FuzzyThreshold int
|
||||
}
|
||||
|
||||
// logFatal prints a fatal error message to stderr and exits.
|
||||
// Overridden in tests to allow testing fatal paths.
|
||||
var logFatal = func(args ...any) {
|
||||
|
|
@ -291,6 +296,7 @@ func Load(noConfigDump bool) {
|
|||
mapDeprecatedOption("ReverseProxyUserHeader", "ExtAuth.UserHeader")
|
||||
mapDeprecatedOption("HTTPSecurityHeaders.CustomFrameOptionsValue", "HTTPHeaders.FrameOptions")
|
||||
mapDeprecatedOption("CoverJpegQuality", "CoverArtQuality")
|
||||
mapDeprecatedOption("SimilarSongsMatchThreshold", "Matcher.FuzzyThreshold")
|
||||
|
||||
err := viper.Unmarshal(&Server)
|
||||
if err != nil {
|
||||
|
|
@ -424,6 +430,7 @@ func Load(noConfigDump bool) {
|
|||
logDeprecatedOptions("ReverseProxyUserHeader", "ExtAuth.UserHeader")
|
||||
logDeprecatedOptions("HTTPSecurityHeaders.CustomFrameOptionsValue", "HTTPHeaders.FrameOptions")
|
||||
logDeprecatedOptions("CoverJpegQuality", "CoverArtQuality")
|
||||
logDeprecatedOptions("SimilarSongsMatchThreshold", "Matcher.FuzzyThreshold")
|
||||
|
||||
// Removed options
|
||||
logRemovedOptions("Spotify.ID", "Spotify.Secret")
|
||||
|
|
@ -716,7 +723,8 @@ func setViperDefaults() {
|
|||
viper.SetDefault("defaultdownsamplingformat", consts.DefaultDownsamplingFormat)
|
||||
viper.SetDefault("search.fullstring", false)
|
||||
viper.SetDefault("search.backend", "fts")
|
||||
viper.SetDefault("similarsongsmatchthreshold", 85)
|
||||
viper.SetDefault("matcher.preferstarred", true)
|
||||
viper.SetDefault("matcher.fuzzythreshold", 85)
|
||||
viper.SetDefault("recentlyaddedbymodtime", false)
|
||||
viper.SetDefault("prefersorttags", false)
|
||||
viper.SetDefault("ignoredarticles", "The El La Los Las Le Les Os As O A")
|
||||
|
|
|
|||
2
core/external/provider_topsongs_test.go
vendored
2
core/external/provider_topsongs_test.go
vendored
|
|
@ -30,7 +30,7 @@ var _ = Describe("Provider - TopSongs", func() {
|
|||
BeforeEach(func() {
|
||||
DeferCleanup(configtest.SetupConfig())
|
||||
// Disable fuzzy matching for these tests to avoid unexpected GetAll calls
|
||||
conf.Server.SimilarSongsMatchThreshold = 100
|
||||
conf.Server.Matcher.FuzzyThreshold = 100
|
||||
|
||||
ctx = GinkgoT().Context()
|
||||
|
||||
|
|
|
|||
|
|
@ -46,18 +46,20 @@ func New(ds model.DataStore) *Matcher {
|
|||
// # Fuzzy Matching Details
|
||||
//
|
||||
// For title+artist matching, the algorithm uses Jaro-Winkler similarity (threshold configurable
|
||||
// via SimilarSongsMatchThreshold, default 85%). Matches are ranked by:
|
||||
// via Matcher.FuzzyThreshold, default 85%). Matches are ranked by:
|
||||
//
|
||||
// 1. Title similarity (Jaro-Winkler score, 0.0-1.0)
|
||||
// 2. Duration proximity (closer duration = higher score, 1.0 if unknown)
|
||||
// 3. Specificity level (0-5, based on metadata precision):
|
||||
// 3. Preferred track flag (enabled by Matcher.PreferStarred; prioritized when the track is
|
||||
// starred or has rating >= 4)
|
||||
// 4. Specificity level (0-5, based on metadata precision):
|
||||
// - Level 5: Title + Artist MBID + Album MBID (most specific)
|
||||
// - Level 4: Title + Artist MBID + Album name (fuzzy)
|
||||
// - Level 3: Title + Artist name + Album name (fuzzy)
|
||||
// - Level 2: Title + Artist MBID
|
||||
// - Level 1: Title + Artist name
|
||||
// - Level 0: Title only
|
||||
// 4. Album similarity (Jaro-Winkler, as final tiebreaker)
|
||||
// 5. Album similarity (Jaro-Winkler, as final tiebreaker)
|
||||
//
|
||||
// # Examples
|
||||
//
|
||||
|
|
@ -250,6 +252,7 @@ type songQuery struct {
|
|||
type matchScore struct {
|
||||
titleSimilarity float64
|
||||
durationProximity float64
|
||||
preferredMatch bool
|
||||
albumSimilarity float64
|
||||
specificityLevel int
|
||||
}
|
||||
|
|
@ -262,6 +265,9 @@ func (s matchScore) betterThan(other matchScore) bool {
|
|||
if s.durationProximity != other.durationProximity {
|
||||
return s.durationProximity > other.durationProximity
|
||||
}
|
||||
if s.preferredMatch != other.preferredMatch {
|
||||
return s.preferredMatch
|
||||
}
|
||||
if s.specificityLevel != other.specificityLevel {
|
||||
return s.specificityLevel > other.specificityLevel
|
||||
}
|
||||
|
|
@ -322,7 +328,7 @@ func (m *Matcher) loadTracksByTitleAndArtist(ctx context.Context, songs []agents
|
|||
return map[string]model.MediaFile{}, nil
|
||||
}
|
||||
|
||||
threshold := float64(conf.Server.SimilarSongsMatchThreshold) / 100.0
|
||||
threshold := float64(conf.Server.Matcher.FuzzyThreshold) / 100.0
|
||||
|
||||
byArtist := map[string][]songQuery{}
|
||||
for _, q := range queries {
|
||||
|
|
@ -393,6 +399,7 @@ func (m *Matcher) findBestMatch(q songQuery, sanitizedTracks []sanitizedTrack, t
|
|||
score := matchScore{
|
||||
titleSimilarity: titleSim,
|
||||
durationProximity: durationProximity(q.durationMs, t.mf.Duration),
|
||||
preferredMatch: conf.Server.Matcher.PreferStarred && isPreferredTrack(t.mf),
|
||||
albumSimilarity: albumSim,
|
||||
specificityLevel: computeSpecificityLevel(q, t, threshold),
|
||||
}
|
||||
|
|
@ -406,6 +413,10 @@ func (m *Matcher) findBestMatch(q songQuery, sanitizedTracks []sanitizedTrack, t
|
|||
return bestMatch, found
|
||||
}
|
||||
|
||||
func isPreferredTrack(mf *model.MediaFile) bool {
|
||||
return mf.Starred || mf.Rating >= 4
|
||||
}
|
||||
|
||||
// buildTitleQueries converts agent songs into normalized songQuery structs for title+artist matching.
|
||||
func (m *Matcher) buildTitleQueries(songs []agents.Song, priorMatches ...map[string]model.MediaFile) []songQuery {
|
||||
var queries []songQuery
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ var _ = Describe("Matcher", func() {
|
|||
Describe("MatchSongsToLibrary", func() {
|
||||
Context("matching by direct ID", func() {
|
||||
It("matches songs with an ID field to MediaFiles by ID", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 100
|
||||
conf.Server.Matcher.FuzzyThreshold = 100
|
||||
songs := []agents.Song{
|
||||
{ID: "track-1", Name: "Some Song", Artist: "Some Artist"},
|
||||
}
|
||||
|
|
@ -96,7 +96,7 @@ var _ = Describe("Matcher", func() {
|
|||
|
||||
Context("matching by MBID", func() {
|
||||
It("matches songs with MBID to tracks with matching mbz_recording_id", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 100
|
||||
conf.Server.Matcher.FuzzyThreshold = 100
|
||||
songs := []agents.Song{
|
||||
{Name: "Paranoid Android", MBID: "abc-123", Artist: "Radiohead"},
|
||||
}
|
||||
|
|
@ -115,7 +115,7 @@ var _ = Describe("Matcher", func() {
|
|||
|
||||
Context("matching by ISRC", func() {
|
||||
It("matches songs with ISRC to tracks with matching ISRC tag", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 100
|
||||
conf.Server.Matcher.FuzzyThreshold = 100
|
||||
songs := []agents.Song{
|
||||
{Name: "Paranoid Android", ISRC: "GBAYE0000351", Artist: "Radiohead"},
|
||||
}
|
||||
|
|
@ -134,7 +134,7 @@ var _ = Describe("Matcher", func() {
|
|||
|
||||
Context("fuzzy title+artist matching", func() {
|
||||
It("matches songs by title and artist name", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 100
|
||||
conf.Server.Matcher.FuzzyThreshold = 100
|
||||
songs := []agents.Song{
|
||||
{Name: "Enjoy the Silence", Artist: "Depeche Mode"},
|
||||
}
|
||||
|
|
@ -149,7 +149,7 @@ var _ = Describe("Matcher", func() {
|
|||
})
|
||||
|
||||
It("matches songs with fuzzy title similarity", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 85
|
||||
conf.Server.Matcher.FuzzyThreshold = 85
|
||||
songs := []agents.Song{
|
||||
{Name: "Bohemian Rhapsody", Artist: "Queen"},
|
||||
}
|
||||
|
|
@ -164,7 +164,7 @@ var _ = Describe("Matcher", func() {
|
|||
})
|
||||
|
||||
It("does not match completely different titles", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 85
|
||||
conf.Server.Matcher.FuzzyThreshold = 85
|
||||
songs := []agents.Song{
|
||||
{Name: "Yesterday", Artist: "The Beatles"},
|
||||
}
|
||||
|
|
@ -180,7 +180,7 @@ var _ = Describe("Matcher", func() {
|
|||
|
||||
Context("deduplication", func() {
|
||||
It("removes duplicates when different input songs match the same library track", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 85
|
||||
conf.Server.Matcher.FuzzyThreshold = 85
|
||||
songs := []agents.Song{
|
||||
{Name: "Bohemian Rhapsody (Live)", Artist: "Queen"},
|
||||
{Name: "Bohemian Rhapsody (Original Mix)", Artist: "Queen"},
|
||||
|
|
@ -196,7 +196,7 @@ var _ = Describe("Matcher", func() {
|
|||
})
|
||||
|
||||
It("preserves duplicates when identical input songs match the same library track", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 85
|
||||
conf.Server.Matcher.FuzzyThreshold = 85
|
||||
songs := []agents.Song{
|
||||
{Name: "Bohemian Rhapsody", Artist: "Queen", Album: "A Night at the Opera"},
|
||||
{Name: "Bohemian Rhapsody", Artist: "Queen", Album: "A Night at the Opera"},
|
||||
|
|
@ -215,7 +215,7 @@ var _ = Describe("Matcher", func() {
|
|||
|
||||
Context("priority ordering", func() {
|
||||
It("prefers ID match over MBID match", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 100
|
||||
conf.Server.Matcher.FuzzyThreshold = 100
|
||||
// Song has both ID and MBID set. The matcher should resolve via ID
|
||||
// and short-circuit the MBID phase entirely, so no MBID fetch should
|
||||
// occur even though an mbz_recording_id exists in the input.
|
||||
|
|
@ -236,7 +236,7 @@ var _ = Describe("Matcher", func() {
|
|||
|
||||
Context("count limit", func() {
|
||||
It("returns at most 'count' results", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 100
|
||||
conf.Server.Matcher.FuzzyThreshold = 100
|
||||
songs := []agents.Song{
|
||||
{Name: "Song A", Artist: "Artist"},
|
||||
{Name: "Song B", Artist: "Artist"},
|
||||
|
|
@ -265,7 +265,7 @@ var _ = Describe("Matcher", func() {
|
|||
|
||||
Describe("specificity level matching", func() {
|
||||
BeforeEach(func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 100
|
||||
conf.Server.Matcher.FuzzyThreshold = 100
|
||||
})
|
||||
|
||||
It("matches by title + artist MBID + album MBID (highest priority)", func() {
|
||||
|
|
@ -396,7 +396,7 @@ var _ = Describe("Matcher", func() {
|
|||
Describe("fuzzy matching thresholds", func() {
|
||||
Context("with default threshold (85%)", func() {
|
||||
It("matches songs with remastered suffix", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 85
|
||||
conf.Server.Matcher.FuzzyThreshold = 85
|
||||
|
||||
songs := []agents.Song{
|
||||
{Name: "Paranoid Android", Artist: "Radiohead"},
|
||||
|
|
@ -415,7 +415,7 @@ var _ = Describe("Matcher", func() {
|
|||
})
|
||||
|
||||
It("matches songs with live suffix", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 85
|
||||
conf.Server.Matcher.FuzzyThreshold = 85
|
||||
|
||||
songs := []agents.Song{
|
||||
{Name: "Bohemian Rhapsody", Artist: "Queen"},
|
||||
|
|
@ -436,7 +436,7 @@ var _ = Describe("Matcher", func() {
|
|||
|
||||
Context("with threshold set to 100 (exact match only)", func() {
|
||||
It("only matches exact titles", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 100
|
||||
conf.Server.Matcher.FuzzyThreshold = 100
|
||||
|
||||
songs := []agents.Song{
|
||||
{Name: "Paranoid Android", Artist: "Radiohead"},
|
||||
|
|
@ -456,7 +456,7 @@ var _ = Describe("Matcher", func() {
|
|||
|
||||
Context("with lower threshold (75%)", func() {
|
||||
It("matches more aggressively", func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 75
|
||||
conf.Server.Matcher.FuzzyThreshold = 75
|
||||
|
||||
songs := []agents.Song{
|
||||
{Name: "Song", Artist: "Artist"},
|
||||
|
|
@ -478,7 +478,8 @@ var _ = Describe("Matcher", func() {
|
|||
|
||||
Describe("fuzzy album matching", func() {
|
||||
BeforeEach(func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 85
|
||||
conf.Server.Matcher.FuzzyThreshold = 85
|
||||
conf.Server.Matcher.PreferStarred = false
|
||||
})
|
||||
|
||||
It("matches album with (Remaster) suffix", func() {
|
||||
|
|
@ -540,11 +541,53 @@ var _ = Describe("Matcher", func() {
|
|||
Expect(result).To(HaveLen(1))
|
||||
Expect(result[0].ID).To(Equal("exact"))
|
||||
})
|
||||
|
||||
It("prefers starred songs over better album match when enabled", func() {
|
||||
conf.Server.Matcher.PreferStarred = true
|
||||
songs := []agents.Song{
|
||||
{Name: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Violator"},
|
||||
}
|
||||
albumMatch := model.MediaFile{
|
||||
ID: "album-match", Title: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Violator",
|
||||
}
|
||||
starredTrack := model.MediaFile{
|
||||
ID: "starred", Title: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Singles", Annotations: model.Annotations{Starred: true},
|
||||
}
|
||||
|
||||
setupTitleOnlyExpectations(model.MediaFiles{albumMatch, starredTrack})
|
||||
|
||||
result, err := m.MatchSongsToLibrary(ctx, songs, 5)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(result).To(HaveLen(1))
|
||||
Expect(result[0].ID).To(Equal("starred"))
|
||||
})
|
||||
|
||||
It("prefers 4-star songs over better album match when enabled", func() {
|
||||
conf.Server.Matcher.PreferStarred = true
|
||||
songs := []agents.Song{
|
||||
{Name: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Violator"},
|
||||
}
|
||||
albumMatch := model.MediaFile{
|
||||
ID: "album-match", Title: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Violator",
|
||||
}
|
||||
ratedTrack := model.MediaFile{
|
||||
ID: "rated", Title: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Singles", Annotations: model.Annotations{Rating: 4},
|
||||
}
|
||||
|
||||
setupTitleOnlyExpectations(model.MediaFiles{albumMatch, ratedTrack})
|
||||
|
||||
result, err := m.MatchSongsToLibrary(ctx, songs, 5)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(result).To(HaveLen(1))
|
||||
Expect(result[0].ID).To(Equal("rated"))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("duration matching", func() {
|
||||
BeforeEach(func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 100
|
||||
conf.Server.Matcher.FuzzyThreshold = 100
|
||||
})
|
||||
|
||||
It("prefers tracks with matching duration", func() {
|
||||
|
|
@ -678,7 +721,7 @@ var _ = Describe("Matcher", func() {
|
|||
|
||||
Describe("deduplication edge cases", func() {
|
||||
BeforeEach(func() {
|
||||
conf.Server.SimilarSongsMatchThreshold = 85
|
||||
conf.Server.Matcher.FuzzyThreshold = 85
|
||||
})
|
||||
|
||||
It("handles mixed scenario with both identical and different input songs", func() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue