done

2026-04-28 03:20:19 +00:00 · 2025-10-15 15:37:45 -04:00 · 2025-10-15 15:37:45 -04:00 · 3f1c7e8e27
commit 3f1c7e8e27
parent 4f5f71bdaf
5 changed files with 179 additions and 38 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +1,8 @@
 /BLOG.MD
 /__pycache__
 /archive
-/downloads
+/downloads/*
 /headers.json
 /renderer.js
-/ttf_character_mapping.json
+/ttf_character_mapping.json
+/decoded_book.epub
--- a/decoded_book.epub
+++ b/decoded_book.epub
--- a/download_full_book.py
+++ b/download_full_book.py
@ -23,19 +23,63 @@ def main():
    asin = sys.argv[1]
    auto_confirm = '--yes' in sys.argv or '-y' in sys.argv
    output_base = Path(f'downloads/{asin}')
-    output_base.mkdir(parents=True, exist_ok=True)
+
+    # Ensure output directory exists
+    try:
+        output_base.mkdir(parents=True, exist_ok=True)
+        print(f"[✓] Output directory ready: {output_base}/")
+    except Exception as e:
+        print(f"[✗] ERROR: Cannot create output directory {output_base}: {e}")
+        sys.exit(1)

    # Load credentials
    headers_file = Path('headers.json')
    if not headers_file.exists():
-        print("[✗] headers.json not found!")
+        print("[✗] ERROR: headers.json not found!")
+        print("\nCreate headers.json in the current directory with:")
+        print('  {')
+        print('    "headers": {"x-adp-session-token": "..."},')
+        print('    "cookies": "session-id=...; ..."')
+        print('  }')
        sys.exit(1)

-    with open(headers_file) as f:
-        headers_data = json.load(f)
+    try:
+        with open(headers_file) as f:
+            headers_data = json.load(f)
+    except json.JSONDecodeError as e:
+        print(f"[✗] ERROR: Invalid JSON in headers.json: {e}")
+        print("\nEnsure headers.json is valid JSON format")
+        sys.exit(1)
+    except Exception as e:
+        print(f"[✗] ERROR: Cannot read headers.json: {e}")
+        sys.exit(1)
+
+    # Validate headers structure
+    if not isinstance(headers_data, dict):
+        print("[✗] ERROR: headers.json must contain a JSON object")
+        sys.exit(1)

    cookies = headers_data.get('cookies', '')
-    adp_token = headers_data['headers'].get('x-adp-session-token') if 'headers' in headers_data else None
+    if not cookies:
+        print("[✗] ERROR: No 'cookies' field found in headers.json!")
+        print("\nEnsure headers.json contains:")
+        print('  {')
+        print('    "cookies": "session-id=...; ..."')
+        print('  }')
+        sys.exit(1)
+
+    if not cookies.strip():
+        print("[✗] ERROR: 'cookies' field is empty in headers.json!")
+        sys.exit(1)
+
+    adp_token = None
+    if 'headers' in headers_data:
+        if not isinstance(headers_data['headers'], dict):
+            print("[⚠] WARNING: 'headers' field is not a JSON object, ignoring")
+        else:
+            adp_token = headers_data['headers'].get('x-adp-session-token')
+            if not adp_token:
+                print("[⚠] WARNING: No 'x-adp-session-token' found in headers")

    # Initialize downloader (single session for entire book)
    print(f"\n{'='*80}")
@ -46,7 +90,15 @@ def main():

    # Get book metadata
    print("[*] Getting book metadata...")
-    metadata = downloader.start_reading(asin)
+    try:
+        metadata = downloader.start_reading(asin)
+    except Exception as e:
+        print(f"[✗] ERROR: Failed to get book metadata: {e}")
+        print("\nPossible issues:")
+        print("  - Invalid ASIN")
+        print("  - Invalid or expired credentials in headers.json")
+        print("  - Network connection problem")
+        sys.exit(1)

    title = metadata.get('deliveredAsin', asin)
    revision = metadata.get('contentVersion', '')
@ -70,22 +122,48 @@ def main():

    # Download from position 0 to get the complete book including front matter
    print(f"\n[*] Batch 0: position 0...")
-    first_tar = downloader.render_pages(asin, revision, start_position=0, num_pages=5)
-    first_files = downloader.extract_tar(first_tar, output_base / 'batch_0')
+    try:
+        first_tar = downloader.render_pages(asin, revision, start_position=0, num_pages=5)
+        batch_0_dir = output_base / 'batch_0'
+        batch_0_dir.mkdir(parents=True, exist_ok=True)
+        first_files = downloader.extract_tar(first_tar, batch_0_dir)
+    except Exception as e:
+        print(f"[✗] ERROR: Failed to download batch 0: {e}")
+        sys.exit(1)

    # Get position range from batch 0
-    page_data_file = list((output_base / 'batch_0').glob('page_data_*.json'))[0]
-    with open(page_data_file) as f:
-        first_pages = json.load(f)
+    try:
+        page_data_files = list((output_base / 'batch_0').glob('page_data_*.json'))
+        if not page_data_files:
+            print("[✗] ERROR: No page_data_*.json found in batch 0")
+            sys.exit(1)
+        page_data_file = page_data_files[0]
+        with open(page_data_file) as f:
+            first_pages = json.load(f)

-    batch_0_start = first_pages[0]['startPositionId']
-    batch_0_end = first_pages[-1]['endPositionId']
-    print(f"[✓] Batch 0: {batch_0_start} to {batch_0_end} ({len(first_files)} files)")
+        if not first_pages:
+            print("[✗] ERROR: No pages found in batch 0")
+            sys.exit(1)
+
+        batch_0_start = first_pages[0]['startPositionId']
+        batch_0_end = first_pages[-1]['endPositionId']
+        print(f"[✓] Batch 0: {batch_0_start} to {batch_0_end} ({len(first_files)} files)")
+    except Exception as e:
+        print(f"[✗] ERROR: Failed to parse batch 0 data: {e}")
+        sys.exit(1)

    # Load TOC to estimate book length
    toc_file = output_base / 'batch_0' / 'toc.json'
-    with open(toc_file) as f:
-        toc = json.load(f)
+    if not toc_file.exists():
+        print("[✗] ERROR: toc.json not found in batch 0")
+        sys.exit(1)
+
+    try:
+        with open(toc_file) as f:
+            toc = json.load(f)
+    except Exception as e:
+        print(f"[✗] ERROR: Failed to parse toc.json: {e}")
+        sys.exit(1)

    last_toc_pos = max(entry['tocPositionId'] for entry in toc)
    print(f"[*] Book ends around position {last_toc_pos}")
@ -116,10 +194,19 @@ def main():
        try:
            print(f"\n[*] Batch {batch_num}: position {current_pos}...")
            tar_data = downloader.render_pages(asin, revision, start_position=current_pos, num_pages=5)
-            files = downloader.extract_tar(tar_data, output_base / f'batch_{batch_num}')
+
+            # Ensure batch directory exists
+            batch_dir = output_base / f'batch_{batch_num}'
+            batch_dir.mkdir(parents=True, exist_ok=True)
+            files = downloader.extract_tar(tar_data, batch_dir)

            # Get end position from this batch
-            page_file = list((output_base / f'batch_{batch_num}').glob('page_data_*.json'))[0]
+            page_files = list(batch_dir.glob('page_data_*.json'))
+            if not page_files:
+                print(f"[!] Batch {batch_num}: No page data found, stopping")
+                break
+
+            page_file = page_files[0]
            with open(page_file) as f:
                pages = json.load(f)

@ -135,6 +222,7 @@ def main():

        except Exception as e:
            print(f"[✗] Error downloading batch {batch_num}: {e}")
+            print(f"[!] Stopping at batch {batch_num}. Partial download may be available.")
            break

    print(f"\n{'='*80}")
@ -153,8 +241,12 @@ def main():
        'estimated_positions': f'{start_pos} to {current_pos}'
    }

-    with open(output_base / 'download_info.json', 'w') as f:
-        json.dump(download_info, f, indent=2)
+    try:
+        with open(output_base / 'download_info.json', 'w') as f:
+            json.dump(download_info, f, indent=2)
+        print(f"[✓] Saved download metadata to {output_base / 'download_info.json'}")
+    except Exception as e:
+        print(f"[⚠] WARNING: Failed to save download metadata: {e}")

 if __name__ == '__main__':
    main()
--- a/downloader.py
+++ b/downloader.py
@ -239,44 +239,86 @@ Examples:
    # Load credentials from headers.json
    headers_file = Path('headers.json')
    if not headers_file.exists():
-        print("[✗] headers.json not found!")
-        print("\nCreate headers.json with:")
+        print("[✗] ERROR: headers.json not found!")
+        print("\nCreate headers.json in the current directory with:")
        print('  {')
        print('    "headers": {"x-adp-session-token": "..."},')
        print('    "cookies": "session-id=...; ..."')
        print('  }')
        sys.exit(1)

-    with open(headers_file) as f:
-        headers_data = json.load(f)
+    try:
+        with open(headers_file) as f:
+            headers_data = json.load(f)
+    except json.JSONDecodeError as e:
+        print(f"[✗] ERROR: Invalid JSON in headers.json: {e}")
+        print("\nEnsure headers.json is valid JSON format")
+        sys.exit(1)
+    except Exception as e:
+        print(f"[✗] ERROR: Cannot read headers.json: {e}")
+        sys.exit(1)
+
+    # Validate headers structure
+    if not isinstance(headers_data, dict):
+        print("[✗] ERROR: headers.json must contain a JSON object")
+        sys.exit(1)

    cookies = headers_data.get('cookies', '')
    if not cookies:
-        print("[✗] No cookies found in headers.json!")
+        print("[✗] ERROR: No 'cookies' field found in headers.json!")
+        print("\nEnsure headers.json contains:")
+        print('  {')
+        print('    "cookies": "session-id=...; ..."')
+        print('  }')
+        sys.exit(1)
+
+    if not cookies.strip():
+        print("[✗] ERROR: 'cookies' field is empty in headers.json!")
        sys.exit(1)

    adp_token = None
    if 'headers' in headers_data:
-        adp_token = headers_data['headers'].get('x-adp-session-token')
+        if not isinstance(headers_data['headers'], dict):
+            print("[⚠] WARNING: 'headers' field is not a JSON object, ignoring")
+        else:
+            adp_token = headers_data['headers'].get('x-adp-session-token')
+            if not adp_token:
+                print("[⚠] WARNING: No 'x-adp-session-token' found in headers")

    # Download
    downloader = KindleDownloader(cookies, adp_token)

    # Override start position if specified
    if args.start_position is not None:
-        metadata = downloader.start_reading(args.asin)
-        revision = metadata.get('contentVersion', '')
+        try:
+            metadata = downloader.start_reading(args.asin)
+            revision = metadata.get('contentVersion', '')

-        # Download from custom position
-        tar_data = downloader.render_pages(args.asin, revision, start_position=args.start_position, num_pages=args.pages)
+            # Download from custom position
+            tar_data = downloader.render_pages(args.asin, revision, start_position=args.start_position, num_pages=args.pages)

-        # Extract
-        output_dir = args.output or f"downloads/{args.asin}"
-        print(f"[*] Extracting to {output_dir}/...")
-        extracted_files = downloader.extract_tar(tar_data, output_dir)
-        print(f"[✓] Extracted {len(extracted_files)} files")
+            # Extract
+            output_dir = args.output or f"downloads/{args.asin}"
+            print(f"[*] Extracting to {output_dir}/...")
+            # Ensure output directory exists
+            Path(output_dir).mkdir(parents=True, exist_ok=True)
+            extracted_files = downloader.extract_tar(tar_data, output_dir)
+            print(f"[✓] Extracted {len(extracted_files)} files")
+        except requests.exceptions.RequestException as e:
+            print(f"[✗] ERROR: Network request failed: {e}")
+            sys.exit(1)
+        except Exception as e:
+            print(f"[✗] ERROR: Download failed: {e}")
+            sys.exit(1)
    else:
-        downloader.download(args.asin, num_pages=args.pages, output_dir=args.output)
+        try:
+            downloader.download(args.asin, num_pages=args.pages, output_dir=args.output)
+        except requests.exceptions.RequestException as e:
+            print(f"[✗] ERROR: Network request failed: {e}")
+            sys.exit(1)
+        except Exception as e:
+            print(f"[✗] ERROR: Download failed: {e}")
+            sys.exit(1)

 if __name__ == '__main__':
    main()
--- a/headers.example.json
+++ b/headers.example.json
@ -0,0 +1,6 @@
+{
+  "headers": {
+    
+},
+  "cookies": ""
+}