feat(matcher): add Matcher.PreferStarred option to bias fuzzy matcher toward starred/high-rated tracks (#5387)

* matcher: update godoc for matcher config scoring order

* conf: log deprecated SimilarSongsMatchThreshold option

* conf: enable matcher prefer-starred by default
This commit is contained in:
Deluan Quintão 2026-04-19 12:54:41 -04:00 committed by GitHub
parent 28eba567a7
commit 3b7d3f4383
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 88 additions and 26 deletions

View file

@ -60,8 +60,8 @@ type configOptions struct {
SmartPlaylistRefreshDelay time.Duration
AutoTranscodeDownload bool
DefaultDownsamplingFormat string
Search searchOptions `json:",omitzero"`
SimilarSongsMatchThreshold int
Search searchOptions `json:",omitzero"`
Matcher matcherOptions `json:",omitzero"`
RecentlyAddedByModTime bool
PreferSortTags bool
IgnoredArticles string
@ -261,6 +261,11 @@ type searchOptions struct {
FullString bool
}
type matcherOptions struct {
PreferStarred bool
FuzzyThreshold int
}
// logFatal prints a fatal error message to stderr and exits.
// Overridden in tests to allow testing fatal paths.
var logFatal = func(args ...any) {
@ -291,6 +296,7 @@ func Load(noConfigDump bool) {
mapDeprecatedOption("ReverseProxyUserHeader", "ExtAuth.UserHeader")
mapDeprecatedOption("HTTPSecurityHeaders.CustomFrameOptionsValue", "HTTPHeaders.FrameOptions")
mapDeprecatedOption("CoverJpegQuality", "CoverArtQuality")
mapDeprecatedOption("SimilarSongsMatchThreshold", "Matcher.FuzzyThreshold")
err := viper.Unmarshal(&Server)
if err != nil {
@ -424,6 +430,7 @@ func Load(noConfigDump bool) {
logDeprecatedOptions("ReverseProxyUserHeader", "ExtAuth.UserHeader")
logDeprecatedOptions("HTTPSecurityHeaders.CustomFrameOptionsValue", "HTTPHeaders.FrameOptions")
logDeprecatedOptions("CoverJpegQuality", "CoverArtQuality")
logDeprecatedOptions("SimilarSongsMatchThreshold", "Matcher.FuzzyThreshold")
// Removed options
logRemovedOptions("Spotify.ID", "Spotify.Secret")
@ -716,7 +723,8 @@ func setViperDefaults() {
viper.SetDefault("defaultdownsamplingformat", consts.DefaultDownsamplingFormat)
viper.SetDefault("search.fullstring", false)
viper.SetDefault("search.backend", "fts")
viper.SetDefault("similarsongsmatchthreshold", 85)
viper.SetDefault("matcher.preferstarred", true)
viper.SetDefault("matcher.fuzzythreshold", 85)
viper.SetDefault("recentlyaddedbymodtime", false)
viper.SetDefault("prefersorttags", false)
viper.SetDefault("ignoredarticles", "The El La Los Las Le Les Os As O A")

View file

@ -30,7 +30,7 @@ var _ = Describe("Provider - TopSongs", func() {
BeforeEach(func() {
DeferCleanup(configtest.SetupConfig())
// Disable fuzzy matching for these tests to avoid unexpected GetAll calls
conf.Server.SimilarSongsMatchThreshold = 100
conf.Server.Matcher.FuzzyThreshold = 100
ctx = GinkgoT().Context()

View file

@ -46,18 +46,20 @@ func New(ds model.DataStore) *Matcher {
// # Fuzzy Matching Details
//
// For title+artist matching, the algorithm uses Jaro-Winkler similarity (threshold configurable
// via SimilarSongsMatchThreshold, default 85%). Matches are ranked by:
// via Matcher.FuzzyThreshold, default 85%). Matches are ranked by:
//
// 1. Title similarity (Jaro-Winkler score, 0.0-1.0)
// 2. Duration proximity (closer duration = higher score, 1.0 if unknown)
// 3. Specificity level (0-5, based on metadata precision):
// 3. Preferred track flag (enabled by Matcher.PreferStarred; prioritized when the track is
// starred or has rating >= 4)
// 4. Specificity level (0-5, based on metadata precision):
// - Level 5: Title + Artist MBID + Album MBID (most specific)
// - Level 4: Title + Artist MBID + Album name (fuzzy)
// - Level 3: Title + Artist name + Album name (fuzzy)
// - Level 2: Title + Artist MBID
// - Level 1: Title + Artist name
// - Level 0: Title only
// 4. Album similarity (Jaro-Winkler, as final tiebreaker)
// 5. Album similarity (Jaro-Winkler, as final tiebreaker)
//
// # Examples
//
@ -250,6 +252,7 @@ type songQuery struct {
type matchScore struct {
titleSimilarity float64
durationProximity float64
preferredMatch bool
albumSimilarity float64
specificityLevel int
}
@ -262,6 +265,9 @@ func (s matchScore) betterThan(other matchScore) bool {
if s.durationProximity != other.durationProximity {
return s.durationProximity > other.durationProximity
}
if s.preferredMatch != other.preferredMatch {
return s.preferredMatch
}
if s.specificityLevel != other.specificityLevel {
return s.specificityLevel > other.specificityLevel
}
@ -322,7 +328,7 @@ func (m *Matcher) loadTracksByTitleAndArtist(ctx context.Context, songs []agents
return map[string]model.MediaFile{}, nil
}
threshold := float64(conf.Server.SimilarSongsMatchThreshold) / 100.0
threshold := float64(conf.Server.Matcher.FuzzyThreshold) / 100.0
byArtist := map[string][]songQuery{}
for _, q := range queries {
@ -393,6 +399,7 @@ func (m *Matcher) findBestMatch(q songQuery, sanitizedTracks []sanitizedTrack, t
score := matchScore{
titleSimilarity: titleSim,
durationProximity: durationProximity(q.durationMs, t.mf.Duration),
preferredMatch: conf.Server.Matcher.PreferStarred && isPreferredTrack(t.mf),
albumSimilarity: albumSim,
specificityLevel: computeSpecificityLevel(q, t, threshold),
}
@ -406,6 +413,10 @@ func (m *Matcher) findBestMatch(q songQuery, sanitizedTracks []sanitizedTrack, t
return bestMatch, found
}
func isPreferredTrack(mf *model.MediaFile) bool {
return mf.Starred || mf.Rating >= 4
}
// buildTitleQueries converts agent songs into normalized songQuery structs for title+artist matching.
func (m *Matcher) buildTitleQueries(songs []agents.Song, priorMatches ...map[string]model.MediaFile) []songQuery {
var queries []songQuery

View file

@ -78,7 +78,7 @@ var _ = Describe("Matcher", func() {
Describe("MatchSongsToLibrary", func() {
Context("matching by direct ID", func() {
It("matches songs with an ID field to MediaFiles by ID", func() {
conf.Server.SimilarSongsMatchThreshold = 100
conf.Server.Matcher.FuzzyThreshold = 100
songs := []agents.Song{
{ID: "track-1", Name: "Some Song", Artist: "Some Artist"},
}
@ -96,7 +96,7 @@ var _ = Describe("Matcher", func() {
Context("matching by MBID", func() {
It("matches songs with MBID to tracks with matching mbz_recording_id", func() {
conf.Server.SimilarSongsMatchThreshold = 100
conf.Server.Matcher.FuzzyThreshold = 100
songs := []agents.Song{
{Name: "Paranoid Android", MBID: "abc-123", Artist: "Radiohead"},
}
@ -115,7 +115,7 @@ var _ = Describe("Matcher", func() {
Context("matching by ISRC", func() {
It("matches songs with ISRC to tracks with matching ISRC tag", func() {
conf.Server.SimilarSongsMatchThreshold = 100
conf.Server.Matcher.FuzzyThreshold = 100
songs := []agents.Song{
{Name: "Paranoid Android", ISRC: "GBAYE0000351", Artist: "Radiohead"},
}
@ -134,7 +134,7 @@ var _ = Describe("Matcher", func() {
Context("fuzzy title+artist matching", func() {
It("matches songs by title and artist name", func() {
conf.Server.SimilarSongsMatchThreshold = 100
conf.Server.Matcher.FuzzyThreshold = 100
songs := []agents.Song{
{Name: "Enjoy the Silence", Artist: "Depeche Mode"},
}
@ -149,7 +149,7 @@ var _ = Describe("Matcher", func() {
})
It("matches songs with fuzzy title similarity", func() {
conf.Server.SimilarSongsMatchThreshold = 85
conf.Server.Matcher.FuzzyThreshold = 85
songs := []agents.Song{
{Name: "Bohemian Rhapsody", Artist: "Queen"},
}
@ -164,7 +164,7 @@ var _ = Describe("Matcher", func() {
})
It("does not match completely different titles", func() {
conf.Server.SimilarSongsMatchThreshold = 85
conf.Server.Matcher.FuzzyThreshold = 85
songs := []agents.Song{
{Name: "Yesterday", Artist: "The Beatles"},
}
@ -180,7 +180,7 @@ var _ = Describe("Matcher", func() {
Context("deduplication", func() {
It("removes duplicates when different input songs match the same library track", func() {
conf.Server.SimilarSongsMatchThreshold = 85
conf.Server.Matcher.FuzzyThreshold = 85
songs := []agents.Song{
{Name: "Bohemian Rhapsody (Live)", Artist: "Queen"},
{Name: "Bohemian Rhapsody (Original Mix)", Artist: "Queen"},
@ -196,7 +196,7 @@ var _ = Describe("Matcher", func() {
})
It("preserves duplicates when identical input songs match the same library track", func() {
conf.Server.SimilarSongsMatchThreshold = 85
conf.Server.Matcher.FuzzyThreshold = 85
songs := []agents.Song{
{Name: "Bohemian Rhapsody", Artist: "Queen", Album: "A Night at the Opera"},
{Name: "Bohemian Rhapsody", Artist: "Queen", Album: "A Night at the Opera"},
@ -215,7 +215,7 @@ var _ = Describe("Matcher", func() {
Context("priority ordering", func() {
It("prefers ID match over MBID match", func() {
conf.Server.SimilarSongsMatchThreshold = 100
conf.Server.Matcher.FuzzyThreshold = 100
// Song has both ID and MBID set. The matcher should resolve via ID
// and short-circuit the MBID phase entirely, so no MBID fetch should
// occur even though an mbz_recording_id exists in the input.
@ -236,7 +236,7 @@ var _ = Describe("Matcher", func() {
Context("count limit", func() {
It("returns at most 'count' results", func() {
conf.Server.SimilarSongsMatchThreshold = 100
conf.Server.Matcher.FuzzyThreshold = 100
songs := []agents.Song{
{Name: "Song A", Artist: "Artist"},
{Name: "Song B", Artist: "Artist"},
@ -265,7 +265,7 @@ var _ = Describe("Matcher", func() {
Describe("specificity level matching", func() {
BeforeEach(func() {
conf.Server.SimilarSongsMatchThreshold = 100
conf.Server.Matcher.FuzzyThreshold = 100
})
It("matches by title + artist MBID + album MBID (highest priority)", func() {
@ -396,7 +396,7 @@ var _ = Describe("Matcher", func() {
Describe("fuzzy matching thresholds", func() {
Context("with default threshold (85%)", func() {
It("matches songs with remastered suffix", func() {
conf.Server.SimilarSongsMatchThreshold = 85
conf.Server.Matcher.FuzzyThreshold = 85
songs := []agents.Song{
{Name: "Paranoid Android", Artist: "Radiohead"},
@ -415,7 +415,7 @@ var _ = Describe("Matcher", func() {
})
It("matches songs with live suffix", func() {
conf.Server.SimilarSongsMatchThreshold = 85
conf.Server.Matcher.FuzzyThreshold = 85
songs := []agents.Song{
{Name: "Bohemian Rhapsody", Artist: "Queen"},
@ -436,7 +436,7 @@ var _ = Describe("Matcher", func() {
Context("with threshold set to 100 (exact match only)", func() {
It("only matches exact titles", func() {
conf.Server.SimilarSongsMatchThreshold = 100
conf.Server.Matcher.FuzzyThreshold = 100
songs := []agents.Song{
{Name: "Paranoid Android", Artist: "Radiohead"},
@ -456,7 +456,7 @@ var _ = Describe("Matcher", func() {
Context("with lower threshold (75%)", func() {
It("matches more aggressively", func() {
conf.Server.SimilarSongsMatchThreshold = 75
conf.Server.Matcher.FuzzyThreshold = 75
songs := []agents.Song{
{Name: "Song", Artist: "Artist"},
@ -478,7 +478,8 @@ var _ = Describe("Matcher", func() {
Describe("fuzzy album matching", func() {
BeforeEach(func() {
conf.Server.SimilarSongsMatchThreshold = 85
conf.Server.Matcher.FuzzyThreshold = 85
conf.Server.Matcher.PreferStarred = false
})
It("matches album with (Remaster) suffix", func() {
@ -540,11 +541,53 @@ var _ = Describe("Matcher", func() {
Expect(result).To(HaveLen(1))
Expect(result[0].ID).To(Equal("exact"))
})
It("prefers starred songs over better album match when enabled", func() {
conf.Server.Matcher.PreferStarred = true
songs := []agents.Song{
{Name: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Violator"},
}
albumMatch := model.MediaFile{
ID: "album-match", Title: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Violator",
}
starredTrack := model.MediaFile{
ID: "starred", Title: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Singles", Annotations: model.Annotations{Starred: true},
}
setupTitleOnlyExpectations(model.MediaFiles{albumMatch, starredTrack})
result, err := m.MatchSongsToLibrary(ctx, songs, 5)
Expect(err).ToNot(HaveOccurred())
Expect(result).To(HaveLen(1))
Expect(result[0].ID).To(Equal("starred"))
})
It("prefers 4-star songs over better album match when enabled", func() {
conf.Server.Matcher.PreferStarred = true
songs := []agents.Song{
{Name: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Violator"},
}
albumMatch := model.MediaFile{
ID: "album-match", Title: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Violator",
}
ratedTrack := model.MediaFile{
ID: "rated", Title: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Singles", Annotations: model.Annotations{Rating: 4},
}
setupTitleOnlyExpectations(model.MediaFiles{albumMatch, ratedTrack})
result, err := m.MatchSongsToLibrary(ctx, songs, 5)
Expect(err).ToNot(HaveOccurred())
Expect(result).To(HaveLen(1))
Expect(result[0].ID).To(Equal("rated"))
})
})
Describe("duration matching", func() {
BeforeEach(func() {
conf.Server.SimilarSongsMatchThreshold = 100
conf.Server.Matcher.FuzzyThreshold = 100
})
It("prefers tracks with matching duration", func() {
@ -678,7 +721,7 @@ var _ = Describe("Matcher", func() {
Describe("deduplication edge cases", func() {
BeforeEach(func() {
conf.Server.SimilarSongsMatchThreshold = 85
conf.Server.Matcher.FuzzyThreshold = 85
})
It("handles mixed scenario with both identical and different input songs", func() {