mirror of
https://github.com/lfnovo/open-notebook.git
synced 2026-04-29 20:10:07 +00:00
fix: fail fast when source content extraction returns empty
Add empty-content validation in content_process() after extract_content() returns. Sources with no extractable text (e.g. YouTube videos without transcripts) now raise ValueError immediately instead of silently saving an empty source. ValueError is already configured as a permanent failure in the retry config, so no retries are wasted on unrecoverable situations. Closes #527
This commit is contained in:
parent
6226db2746
commit
12a3caf636
1 changed files with 15 additions and 0 deletions
|
|
@ -76,6 +76,21 @@ async def content_process(state: SourceState) -> dict:
|
|||
# Continue without custom audio model (content-core will use its default)
|
||||
|
||||
processed_state = await extract_content(content_state)
|
||||
|
||||
if not processed_state.content or not processed_state.content.strip():
|
||||
url = processed_state.url or ""
|
||||
if url and ("youtube.com" in url or "youtu.be" in url):
|
||||
raise ValueError(
|
||||
"Could not extract content from this YouTube video. "
|
||||
"No transcript or subtitles are available. "
|
||||
"Try configuring a Speech-to-Text model in Settings "
|
||||
"to transcribe the audio instead."
|
||||
)
|
||||
raise ValueError(
|
||||
"Could not extract any text content from this source. "
|
||||
"The content may be empty, inaccessible, or in an unsupported format."
|
||||
)
|
||||
|
||||
return {"content_state": processed_state}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue