From 0619fa4d3be5b42ece2fa2cfc653fb81b022fc15 Mon Sep 17 00:00:00 2001 From: Luis Novo Date: Wed, 11 Mar 2026 17:00:00 -0500 Subject: [PATCH 1/2] fix: use UUID for podcast episode directory names Podcast episode names with spaces or special characters caused filesystem errors when used directly as directory names. Use UUID-based directory names instead, keeping the original episode name in the database for display purposes. Closes #663 --- commands/podcast_commands.py | 8 ++-- tests/test_podcast_path.py | 74 ++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 tests/test_podcast_path.py diff --git a/commands/podcast_commands.py b/commands/podcast_commands.py index e273c4a..f7b5804 100644 --- a/commands/podcast_commands.py +++ b/commands/podcast_commands.py @@ -1,4 +1,5 @@ import time +import uuid from pathlib import Path from typing import Optional @@ -220,8 +221,9 @@ async def generate_podcast_command( logger.info(f"Generated briefing (length: {len(briefing)} chars)") - # 7. Create output directory - output_dir = Path(f"{DATA_FOLDER}/podcasts/episodes/{input_data.episode_name}") + # 7. Create output directory using UUID for filesystem-safe paths + episode_dir_name = str(uuid.uuid4()) + output_dir = Path(f"{DATA_FOLDER}/podcasts/episodes/{episode_dir_name}") output_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Created output directory: {output_dir}") @@ -232,7 +234,7 @@ async def generate_podcast_command( result = await create_podcast( content=input_data.content, briefing=briefing, - episode_name=input_data.episode_name, + episode_name=episode_dir_name, output_dir=str(output_dir), speaker_config=speaker_profile.name, episode_profile=episode_profile.name, diff --git a/tests/test_podcast_path.py b/tests/test_podcast_path.py new file mode 100644 index 0000000..ce79c88 --- /dev/null +++ b/tests/test_podcast_path.py @@ -0,0 +1,74 @@ +""" +Tests for podcast episode directory path generation. + +Verifies that episode output directories use UUID-based names +instead of raw episode names, preventing filesystem issues with +spaces and special characters (GitHub issue #663). +""" + +import uuid +from pathlib import Path, PurePosixPath + + +def _build_episode_output_dir(data_folder: str) -> tuple[str, Path]: + """Replicate the directory naming logic from generate_podcast_command.""" + episode_dir_name = str(uuid.uuid4()) + output_dir = Path(f"{data_folder}/podcasts/episodes/{episode_dir_name}") + return episode_dir_name, output_dir + + +class TestPodcastEpisodeDirectory: + """Verify that episode directories are always filesystem-safe.""" + + def test_directory_uses_uuid_format(self): + dir_name, _ = _build_episode_output_dir("/data") + # Should be a valid UUID + parsed = uuid.UUID(dir_name) + assert str(parsed) == dir_name + + def test_directory_path_is_valid(self): + _, output_dir = _build_episode_output_dir("/data") + # Path should have exactly the expected structure + assert str(output_dir).startswith("/data/podcasts/episodes/") + # Directory name should be the last component + assert len(output_dir.name) == 36 # UUID string length + + def test_no_collision_between_calls(self): + dir1, _ = _build_episode_output_dir("/data") + dir2, _ = _build_episode_output_dir("/data") + assert dir1 != dir2 + + def test_path_has_no_spaces_or_special_chars(self): + """Regardless of episode name, path should be clean.""" + problematic_names = [ + "My Episode Name", + "Episode: Part 1", + 'test "quotes"', + "path/traversal", + "dots..and...more", + "café résumé", + " spaces ", + "", + "?*<>|", + ] + for name in problematic_names: + dir_name, output_dir = _build_episode_output_dir("/data") + # UUID path is independent of the episode name + path_str = str(output_dir) + assert " " not in path_str, f"Space found in path for name: {name}" + for char in ['<', '>', ':', '"', '|', '?', '*']: + assert char not in path_str, ( + f"Unsafe char '{char}' in path for name: {name}" + ) + + def test_path_works_on_posix(self): + _, output_dir = _build_episode_output_dir("/data") + posix = PurePosixPath(str(output_dir)) + assert posix.parts == ("/", "data", "podcasts", "episodes", output_dir.name) + + def test_uuid_directory_can_be_created(self, tmp_path): + """Actually create the directory to verify it works on the real filesystem.""" + dir_name, output_dir = _build_episode_output_dir(str(tmp_path)) + output_dir.mkdir(parents=True, exist_ok=True) + assert output_dir.exists() + assert output_dir.is_dir() From e9040ef97b5c12028093afe7339bdf01cd8c246a Mon Sep 17 00:00:00 2001 From: Luis Novo Date: Wed, 11 Mar 2026 17:05:42 -0500 Subject: [PATCH 2/2] fix: extract build_episode_output_dir helper and test production code Tests were reimplementing UUID logic locally instead of testing the actual production code path. Extract the path-building logic into a testable helper function and import it directly in tests. --- commands/podcast_commands.py | 17 +++++++-- tests/test_podcast_path.py | 68 ++++++++++++++++-------------------- 2 files changed, 46 insertions(+), 39 deletions(-) diff --git a/commands/podcast_commands.py b/commands/podcast_commands.py index f7b5804..34ffae8 100644 --- a/commands/podcast_commands.py +++ b/commands/podcast_commands.py @@ -23,6 +23,20 @@ except ImportError as e: raise ValueError("podcast_creator library not available") +def build_episode_output_dir(data_folder: str) -> tuple[str, Path]: + """Build a filesystem-safe output directory path for a podcast episode. + + Uses a UUID as the directory name so the path is safe regardless of + what the user typed as episode name (spaces, special chars, etc.). + + Returns: + A tuple of (episode_dir_name, output_dir_path). + """ + episode_dir_name = str(uuid.uuid4()) + output_dir = Path(f"{data_folder}/podcasts/episodes/{episode_dir_name}") + return episode_dir_name, output_dir + + def full_model_dump(model): if isinstance(model, BaseModel): return model.model_dump() @@ -222,8 +236,7 @@ async def generate_podcast_command( logger.info(f"Generated briefing (length: {len(briefing)} chars)") # 7. Create output directory using UUID for filesystem-safe paths - episode_dir_name = str(uuid.uuid4()) - output_dir = Path(f"{DATA_FOLDER}/podcasts/episodes/{episode_dir_name}") + episode_dir_name, output_dir = build_episode_output_dir(DATA_FOLDER) output_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Created output directory: {output_dir}") diff --git a/tests/test_podcast_path.py b/tests/test_podcast_path.py index ce79c88..b48ac8d 100644 --- a/tests/test_podcast_path.py +++ b/tests/test_podcast_path.py @@ -7,68 +7,62 @@ spaces and special characters (GitHub issue #663). """ import uuid -from pathlib import Path, PurePosixPath +from pathlib import PurePosixPath + +from commands.podcast_commands import build_episode_output_dir -def _build_episode_output_dir(data_folder: str) -> tuple[str, Path]: - """Replicate the directory naming logic from generate_podcast_command.""" - episode_dir_name = str(uuid.uuid4()) - output_dir = Path(f"{data_folder}/podcasts/episodes/{episode_dir_name}") - return episode_dir_name, output_dir +class TestBuildEpisodeOutputDir: + """Test the actual production helper that builds episode output paths.""" - -class TestPodcastEpisodeDirectory: - """Verify that episode directories are always filesystem-safe.""" - - def test_directory_uses_uuid_format(self): - dir_name, _ = _build_episode_output_dir("/data") - # Should be a valid UUID + def test_directory_name_is_valid_uuid(self): + dir_name, _ = build_episode_output_dir("/data") parsed = uuid.UUID(dir_name) assert str(parsed) == dir_name - def test_directory_path_is_valid(self): - _, output_dir = _build_episode_output_dir("/data") - # Path should have exactly the expected structure - assert str(output_dir).startswith("/data/podcasts/episodes/") - # Directory name should be the last component - assert len(output_dir.name) == 36 # UUID string length + def test_path_structure(self): + dir_name, output_dir = build_episode_output_dir("/data") + assert str(output_dir) == f"/data/podcasts/episodes/{dir_name}" def test_no_collision_between_calls(self): - dir1, _ = _build_episode_output_dir("/data") - dir2, _ = _build_episode_output_dir("/data") + dir1, _ = build_episode_output_dir("/data") + dir2, _ = build_episode_output_dir("/data") assert dir1 != dir2 - def test_path_has_no_spaces_or_special_chars(self): - """Regardless of episode name, path should be clean.""" + def test_path_is_independent_of_episode_name(self): + """The returned path must never contain user-supplied episode names. + + Since build_episode_output_dir does not accept an episode name at all, + any name the user types is structurally excluded from the path. + """ problematic_names = [ "My Episode Name", "Episode: Part 1", 'test "quotes"', "path/traversal", - "dots..and...more", "café résumé", " spaces ", - "", "?*<>|", ] for name in problematic_names: - dir_name, output_dir = _build_episode_output_dir("/data") - # UUID path is independent of the episode name + _, output_dir = build_episode_output_dir("/data") path_str = str(output_dir) - assert " " not in path_str, f"Space found in path for name: {name}" - for char in ['<', '>', ':', '"', '|', '?', '*']: - assert char not in path_str, ( - f"Unsafe char '{char}' in path for name: {name}" - ) + # The episode name must not appear anywhere in the path + assert name not in path_str + # UUID paths contain only hex digits and hyphens after the base + dir_component = output_dir.name + assert all(c in "0123456789abcdef-" for c in dir_component), ( + f"Unexpected chars in directory name: {dir_component}" + ) def test_path_works_on_posix(self): - _, output_dir = _build_episode_output_dir("/data") + dir_name, output_dir = build_episode_output_dir("/data") posix = PurePosixPath(str(output_dir)) - assert posix.parts == ("/", "data", "podcasts", "episodes", output_dir.name) + assert posix.parts == ("/", "data", "podcasts", "episodes", dir_name) - def test_uuid_directory_can_be_created(self, tmp_path): - """Actually create the directory to verify it works on the real filesystem.""" - dir_name, output_dir = _build_episode_output_dir(str(tmp_path)) + def test_directory_can_be_created(self, tmp_path): + """Create the directory on the real filesystem.""" + _, output_dir = build_episode_output_dir(str(tmp_path)) output_dir.mkdir(parents=True, exist_ok=True) assert output_dir.exists() assert output_dir.is_dir()