This commit is contained in:
PixelMelt 2025-10-15 15:37:45 -04:00
parent 4f5f71bdaf
commit 3f1c7e8e27
5 changed files with 179 additions and 38 deletions

5
.gitignore vendored
View file

@ -1,7 +1,8 @@
/BLOG.MD
/__pycache__
/archive
/downloads
/downloads/*
/headers.json
/renderer.js
/ttf_character_mapping.json
/ttf_character_mapping.json
/decoded_book.epub

Binary file not shown.

View file

@ -23,19 +23,63 @@ def main():
asin = sys.argv[1]
auto_confirm = '--yes' in sys.argv or '-y' in sys.argv
output_base = Path(f'downloads/{asin}')
output_base.mkdir(parents=True, exist_ok=True)
# Ensure output directory exists
try:
output_base.mkdir(parents=True, exist_ok=True)
print(f"[✓] Output directory ready: {output_base}/")
except Exception as e:
print(f"[✗] ERROR: Cannot create output directory {output_base}: {e}")
sys.exit(1)
# Load credentials
headers_file = Path('headers.json')
if not headers_file.exists():
print("[✗] headers.json not found!")
print("[✗] ERROR: headers.json not found!")
print("\nCreate headers.json in the current directory with:")
print(' {')
print(' "headers": {"x-adp-session-token": "..."},')
print(' "cookies": "session-id=...; ..."')
print(' }')
sys.exit(1)
with open(headers_file) as f:
headers_data = json.load(f)
try:
with open(headers_file) as f:
headers_data = json.load(f)
except json.JSONDecodeError as e:
print(f"[✗] ERROR: Invalid JSON in headers.json: {e}")
print("\nEnsure headers.json is valid JSON format")
sys.exit(1)
except Exception as e:
print(f"[✗] ERROR: Cannot read headers.json: {e}")
sys.exit(1)
# Validate headers structure
if not isinstance(headers_data, dict):
print("[✗] ERROR: headers.json must contain a JSON object")
sys.exit(1)
cookies = headers_data.get('cookies', '')
adp_token = headers_data['headers'].get('x-adp-session-token') if 'headers' in headers_data else None
if not cookies:
print("[✗] ERROR: No 'cookies' field found in headers.json!")
print("\nEnsure headers.json contains:")
print(' {')
print(' "cookies": "session-id=...; ..."')
print(' }')
sys.exit(1)
if not cookies.strip():
print("[✗] ERROR: 'cookies' field is empty in headers.json!")
sys.exit(1)
adp_token = None
if 'headers' in headers_data:
if not isinstance(headers_data['headers'], dict):
print("[⚠] WARNING: 'headers' field is not a JSON object, ignoring")
else:
adp_token = headers_data['headers'].get('x-adp-session-token')
if not adp_token:
print("[⚠] WARNING: No 'x-adp-session-token' found in headers")
# Initialize downloader (single session for entire book)
print(f"\n{'='*80}")
@ -46,7 +90,15 @@ def main():
# Get book metadata
print("[*] Getting book metadata...")
metadata = downloader.start_reading(asin)
try:
metadata = downloader.start_reading(asin)
except Exception as e:
print(f"[✗] ERROR: Failed to get book metadata: {e}")
print("\nPossible issues:")
print(" - Invalid ASIN")
print(" - Invalid or expired credentials in headers.json")
print(" - Network connection problem")
sys.exit(1)
title = metadata.get('deliveredAsin', asin)
revision = metadata.get('contentVersion', '')
@ -70,22 +122,48 @@ def main():
# Download from position 0 to get the complete book including front matter
print(f"\n[*] Batch 0: position 0...")
first_tar = downloader.render_pages(asin, revision, start_position=0, num_pages=5)
first_files = downloader.extract_tar(first_tar, output_base / 'batch_0')
try:
first_tar = downloader.render_pages(asin, revision, start_position=0, num_pages=5)
batch_0_dir = output_base / 'batch_0'
batch_0_dir.mkdir(parents=True, exist_ok=True)
first_files = downloader.extract_tar(first_tar, batch_0_dir)
except Exception as e:
print(f"[✗] ERROR: Failed to download batch 0: {e}")
sys.exit(1)
# Get position range from batch 0
page_data_file = list((output_base / 'batch_0').glob('page_data_*.json'))[0]
with open(page_data_file) as f:
first_pages = json.load(f)
try:
page_data_files = list((output_base / 'batch_0').glob('page_data_*.json'))
if not page_data_files:
print("[✗] ERROR: No page_data_*.json found in batch 0")
sys.exit(1)
page_data_file = page_data_files[0]
with open(page_data_file) as f:
first_pages = json.load(f)
batch_0_start = first_pages[0]['startPositionId']
batch_0_end = first_pages[-1]['endPositionId']
print(f"[✓] Batch 0: {batch_0_start} to {batch_0_end} ({len(first_files)} files)")
if not first_pages:
print("[✗] ERROR: No pages found in batch 0")
sys.exit(1)
batch_0_start = first_pages[0]['startPositionId']
batch_0_end = first_pages[-1]['endPositionId']
print(f"[✓] Batch 0: {batch_0_start} to {batch_0_end} ({len(first_files)} files)")
except Exception as e:
print(f"[✗] ERROR: Failed to parse batch 0 data: {e}")
sys.exit(1)
# Load TOC to estimate book length
toc_file = output_base / 'batch_0' / 'toc.json'
with open(toc_file) as f:
toc = json.load(f)
if not toc_file.exists():
print("[✗] ERROR: toc.json not found in batch 0")
sys.exit(1)
try:
with open(toc_file) as f:
toc = json.load(f)
except Exception as e:
print(f"[✗] ERROR: Failed to parse toc.json: {e}")
sys.exit(1)
last_toc_pos = max(entry['tocPositionId'] for entry in toc)
print(f"[*] Book ends around position {last_toc_pos}")
@ -116,10 +194,19 @@ def main():
try:
print(f"\n[*] Batch {batch_num}: position {current_pos}...")
tar_data = downloader.render_pages(asin, revision, start_position=current_pos, num_pages=5)
files = downloader.extract_tar(tar_data, output_base / f'batch_{batch_num}')
# Ensure batch directory exists
batch_dir = output_base / f'batch_{batch_num}'
batch_dir.mkdir(parents=True, exist_ok=True)
files = downloader.extract_tar(tar_data, batch_dir)
# Get end position from this batch
page_file = list((output_base / f'batch_{batch_num}').glob('page_data_*.json'))[0]
page_files = list(batch_dir.glob('page_data_*.json'))
if not page_files:
print(f"[!] Batch {batch_num}: No page data found, stopping")
break
page_file = page_files[0]
with open(page_file) as f:
pages = json.load(f)
@ -135,6 +222,7 @@ def main():
except Exception as e:
print(f"[✗] Error downloading batch {batch_num}: {e}")
print(f"[!] Stopping at batch {batch_num}. Partial download may be available.")
break
print(f"\n{'='*80}")
@ -153,8 +241,12 @@ def main():
'estimated_positions': f'{start_pos} to {current_pos}'
}
with open(output_base / 'download_info.json', 'w') as f:
json.dump(download_info, f, indent=2)
try:
with open(output_base / 'download_info.json', 'w') as f:
json.dump(download_info, f, indent=2)
print(f"[✓] Saved download metadata to {output_base / 'download_info.json'}")
except Exception as e:
print(f"[⚠] WARNING: Failed to save download metadata: {e}")
if __name__ == '__main__':
main()

View file

@ -239,44 +239,86 @@ Examples:
# Load credentials from headers.json
headers_file = Path('headers.json')
if not headers_file.exists():
print("[✗] headers.json not found!")
print("\nCreate headers.json with:")
print("[✗] ERROR: headers.json not found!")
print("\nCreate headers.json in the current directory with:")
print(' {')
print(' "headers": {"x-adp-session-token": "..."},')
print(' "cookies": "session-id=...; ..."')
print(' }')
sys.exit(1)
with open(headers_file) as f:
headers_data = json.load(f)
try:
with open(headers_file) as f:
headers_data = json.load(f)
except json.JSONDecodeError as e:
print(f"[✗] ERROR: Invalid JSON in headers.json: {e}")
print("\nEnsure headers.json is valid JSON format")
sys.exit(1)
except Exception as e:
print(f"[✗] ERROR: Cannot read headers.json: {e}")
sys.exit(1)
# Validate headers structure
if not isinstance(headers_data, dict):
print("[✗] ERROR: headers.json must contain a JSON object")
sys.exit(1)
cookies = headers_data.get('cookies', '')
if not cookies:
print("[✗] No cookies found in headers.json!")
print("[✗] ERROR: No 'cookies' field found in headers.json!")
print("\nEnsure headers.json contains:")
print(' {')
print(' "cookies": "session-id=...; ..."')
print(' }')
sys.exit(1)
if not cookies.strip():
print("[✗] ERROR: 'cookies' field is empty in headers.json!")
sys.exit(1)
adp_token = None
if 'headers' in headers_data:
adp_token = headers_data['headers'].get('x-adp-session-token')
if not isinstance(headers_data['headers'], dict):
print("[⚠] WARNING: 'headers' field is not a JSON object, ignoring")
else:
adp_token = headers_data['headers'].get('x-adp-session-token')
if not adp_token:
print("[⚠] WARNING: No 'x-adp-session-token' found in headers")
# Download
downloader = KindleDownloader(cookies, adp_token)
# Override start position if specified
if args.start_position is not None:
metadata = downloader.start_reading(args.asin)
revision = metadata.get('contentVersion', '')
try:
metadata = downloader.start_reading(args.asin)
revision = metadata.get('contentVersion', '')
# Download from custom position
tar_data = downloader.render_pages(args.asin, revision, start_position=args.start_position, num_pages=args.pages)
# Download from custom position
tar_data = downloader.render_pages(args.asin, revision, start_position=args.start_position, num_pages=args.pages)
# Extract
output_dir = args.output or f"downloads/{args.asin}"
print(f"[*] Extracting to {output_dir}/...")
extracted_files = downloader.extract_tar(tar_data, output_dir)
print(f"[✓] Extracted {len(extracted_files)} files")
# Extract
output_dir = args.output or f"downloads/{args.asin}"
print(f"[*] Extracting to {output_dir}/...")
# Ensure output directory exists
Path(output_dir).mkdir(parents=True, exist_ok=True)
extracted_files = downloader.extract_tar(tar_data, output_dir)
print(f"[✓] Extracted {len(extracted_files)} files")
except requests.exceptions.RequestException as e:
print(f"[✗] ERROR: Network request failed: {e}")
sys.exit(1)
except Exception as e:
print(f"[✗] ERROR: Download failed: {e}")
sys.exit(1)
else:
downloader.download(args.asin, num_pages=args.pages, output_dir=args.output)
try:
downloader.download(args.asin, num_pages=args.pages, output_dir=args.output)
except requests.exceptions.RequestException as e:
print(f"[✗] ERROR: Network request failed: {e}")
sys.exit(1)
except Exception as e:
print(f"[✗] ERROR: Download failed: {e}")
sys.exit(1)
if __name__ == '__main__':
main()

6
headers.example.json Normal file
View file

@ -0,0 +1,6 @@
{
"headers": {
},
"cookies": ""
}