fix(search): use explicit AND in FTS5 queries to fix apostrophe search
Some checks are pending
Pipeline: Test, Lint, Build / Get version info (push) Waiting to run
Pipeline: Test, Lint, Build / Lint Go code (push) Waiting to run
Pipeline: Test, Lint, Build / Test Go code (push) Waiting to run
Pipeline: Test, Lint, Build / Test JS code (push) Waiting to run
Pipeline: Test, Lint, Build / Lint i18n files (push) Waiting to run
Pipeline: Test, Lint, Build / Check Docker configuration (push) Waiting to run
Pipeline: Test, Lint, Build / Build (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Build-1 (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Build-2 (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Build-3 (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Build-4 (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Build-5 (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Build-6 (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Build-7 (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Build-8 (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Build-9 (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Build-10 (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Push to GHCR (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Push to Docker Hub (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Cleanup digest artifacts (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Build Windows installers (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Package/Release (push) Blocked by required conditions
Pipeline: Test, Lint, Build / Upload Linux PKG (push) Blocked by required conditions

FTS5's implicit AND (space-separated tokens) silently fails when combined
with parenthesized OR groups produced by processPunctuatedWords. For example,
searching "you've got" generated the query `("you ve" OR youve*) got*` which
returned no results. Using explicit AND (`("you ve" OR youve*) AND got*`)
resolves this FTS5 quirk. Since implicit and explicit AND are semantically
identical in FTS5, this change is safe for all queries unconditionally.
This commit is contained in:
Deluan 2026-03-26 20:15:28 -04:00
parent 33e20d355e
commit ccee33f474
2 changed files with 18 additions and 15 deletions

View file

@ -178,7 +178,9 @@ func buildFTS5Query(userInput string) string {
tokens[i] = t + "*"
}
result = strings.Join(tokens, " ")
// Use explicit AND between tokens — FTS5's implicit AND (space-separated)
// doesn't work correctly with parenthesized OR groups from processPunctuatedWords.
result = strings.Join(tokens, " AND ")
for i, phrase := range phrases {
placeholder := fmt.Sprintf("\x00PHRASE%d\x00", i)

View file

@ -17,32 +17,33 @@ var _ = DescribeTable("buildFTS5Query",
Entry("returns empty string for empty input", "", ""),
Entry("returns empty string for whitespace-only input", " ", ""),
Entry("appends * to a single word for prefix matching", "beatles", "beatles*"),
Entry("appends * to each word for prefix matching", "abbey road", "abbey* road*"),
Entry("appends * to each word for prefix matching", "abbey road", "abbey* AND road*"),
Entry("preserves quoted phrases without appending *", `"the beatles"`, `"the beatles"`),
Entry("does not double-append * to existing prefix wildcard", "beat*", "beat*"),
Entry("strips FTS5 operators and appends * to lowercased words", "AND OR NOT NEAR", "and* or* not* near*"),
Entry("strips special FTS5 syntax characters and appends *", "test^col:val", "test* col* val*"),
Entry("handles mixed phrases and words", `"the beatles" abbey`, `"the beatles" abbey*`),
Entry("handles prefix with multiple words", "beat* abbey", "beat* abbey*"),
Entry("collapses multiple spaces", "abbey road", "abbey* road*"),
Entry("strips FTS5 operators and appends * to lowercased words", "AND OR NOT NEAR", "and* AND or* AND not* AND near*"),
Entry("strips special FTS5 syntax characters and appends *", "test^col:val", "test* AND col* AND val*"),
Entry("handles mixed phrases and words", `"the beatles" abbey`, `"the beatles" AND abbey*`),
Entry("handles prefix with multiple words", "beat* abbey", "beat* AND abbey*"),
Entry("collapses multiple spaces", "abbey road", "abbey* AND road*"),
Entry("strips leading * from tokens and appends trailing *", "*livia", "livia*"),
Entry("strips leading * and preserves existing trailing *", "*livia oliv*", "livia* oliv*"),
Entry("strips leading * and preserves existing trailing *", "*livia oliv*", "livia* AND oliv*"),
Entry("strips standalone *", "*", ""),
Entry("strips apostrophe from input", "Guns N' Roses", "Guns* N* Roses*"),
Entry("strips apostrophe from input", "Guns N' Roses", "Guns* AND N* AND Roses*"),
Entry("converts slashed word to phrase+concat OR", "AC/DC", `("AC DC" OR ACDC*)`),
Entry("converts hyphenated word to phrase+concat OR", "a-ha", `("a ha" OR aha*)`),
Entry("converts partial hyphenated word to phrase+concat OR", "a-h", `("a h" OR ah*)`),
Entry("converts hyphenated name to phrase+concat OR", "Jay-Z", `("Jay Z" OR JayZ*)`),
Entry("converts contraction to phrase+concat OR", "it's", `("it s" OR its*)`),
Entry("handles punctuated word mixed with plain words", "best of a-ha", `best* of* ("a ha" OR aha*)`),
Entry("strips miscellaneous punctuation", "rock & roll, vol. 2", "rock* roll* vol* 2*"),
Entry("preserves unicode characters with diacritics", "Björk début", "Björk* début*"),
Entry("handles punctuated word mixed with plain words", "best of a-ha", `best* AND of* AND ("a ha" OR aha*)`),
Entry("handles contraction followed by plain words", "you've got", `("you ve" OR youve*) AND got*`),
Entry("strips miscellaneous punctuation", "rock & roll, vol. 2", "rock* AND roll* AND vol* AND 2*"),
Entry("preserves unicode characters with diacritics", "Björk début", "Björk* AND début*"),
Entry("collapses dotted abbreviation into phrase", "R.E.M.", `"R E M"`),
Entry("collapses abbreviation without trailing dot", "R.E.M", `"R E M"`),
Entry("collapses abbreviation mixed with words", "best of R.E.M.", `best* of* "R E M"`),
Entry("collapses abbreviation mixed with words", "best of R.E.M.", `best* AND of* AND "R E M"`),
Entry("collapses two-letter abbreviation", "U.K.", `"U K"`),
Entry("does not collapse single letter surrounded by words", "I am fine", "I* am* fine*"),
Entry("does not collapse single standalone letter", "A test", "A* test*"),
Entry("does not collapse single letter surrounded by words", "I am fine", "I* AND am* AND fine*"),
Entry("does not collapse single standalone letter", "A test", "A* AND test*"),
Entry("preserves quoted phrase with punctuation verbatim", `"ac/dc"`, `"ac/dc"`),
Entry("preserves quoted abbreviation verbatim", `"R.E.M."`, `"R.E.M."`),
Entry("returns empty string for punctuation-only input", "!!!!!!!", ""),