From dfe04db49306495b6e8a5de2c7d205a2a27e08db Mon Sep 17 00:00:00 2001
From: brentperteet <brent.perteet@gmail.com>
Date: Mon, 23 Mar 2026 20:25:30 -0500
Subject: [PATCH] initial commit

---
 .gitignore       |  28 ++++++++++++++++++++++++++
 CLAUDE.md        |  39 ++++++++++++++++++++++++++++++++++++
 combine_pdfs.py  |  31 +++++++++++++++++++++++++++++
 invoices.py      |  50 +++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt | Bin 0 -> 650 bytes
 5 files changed, 148 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 CLAUDE.md
 create mode 100644 combine_pdfs.py
 create mode 100644 invoices.py
 create mode 100644 requirements.txt

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..500d1b6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,28 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+
+# Virtual Environment
+venv/
+env/
+ENV/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Project specific
+orders.txt
+*.pdf
+downloads/
+invoices/
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..c1a63d2
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,39 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+This is a Python automation script that downloads Amazon order invoices in bulk using Selenium WebDriver. It reads order IDs from [orders.txt](orders.txt) and automates the process of navigating to invoice pages and triggering the print dialog for each order.
+
+## Dependencies
+
+- Python 3.x with `selenium` package
+- Chrome browser with ChromeDriver
+- Virtual environment in `venv/` directory
+
+## Running the Script
+
+1. Activate the virtual environment:
+   - Windows: `venv\Scripts\activate`
+   - Unix/macOS: `source venv/bin/activate`
+
+2. Run the script:
+   ```
+   python invoices.py
+   ```
+
+3. When prompted, manually log into Amazon in the browser window, then press Enter in the terminal to continue
+
+4. The script will iterate through each order ID in [orders.txt](orders.txt) and open the print dialog for the invoice
+
+## Input Format
+
+[orders.txt](orders.txt) should contain one Amazon order ID per line (format: `XXX-XXXXXXX-XXXXXXX`)
+
+## Architecture Notes
+
+- The script uses direct URL construction to access invoice print pages: `https://www.amazon.com/gp/css/summary/print.html?orderID={order}`
+- Manual authentication is required due to Amazon's login protections
+- 3-second delay between orders to avoid rate limiting
+- Print dialog is triggered via JavaScript (`window.print()`) - user must manually save each PDF
diff --git a/combine_pdfs.py b/combine_pdfs.py
new file mode 100644
index 0000000..8ecafdc
--- /dev/null
+++ b/combine_pdfs.py
@@ -0,0 +1,31 @@
+from PyPDF2 import PdfMerger, PdfReader
+import os
+
+# Directory containing the invoice PDFs
+invoices_dir = "invoices"
+
+# Get all PDF files and sort them
+pdf_files = [f for f in os.listdir(invoices_dir) if f.endswith('.pdf')]
+pdf_files.sort()
+
+# Create a PDF merger object
+merger = PdfMerger()
+
+# Add each PDF to the merger
+for pdf_file in pdf_files:
+    pdf_path = os.path.join(invoices_dir, pdf_file)
+
+    # For Audible PDFs, only include the first page
+    if "Audible" in pdf_file:
+        merger.append(pdf_path, pages=(0, 1))
+        print(f"Added (first page only): {pdf_file}")
+    else:
+        merger.append(pdf_path)
+        print(f"Added: {pdf_file}")
+
+# Write the combined PDF
+output_path = "receipts.pdf"
+merger.write(output_path)
+merger.close()
+
+print(f"\nCombined {len(pdf_files)} PDFs into {output_path}")
diff --git a/invoices.py b/invoices.py
new file mode 100644
index 0000000..b2bbc98
--- /dev/null
+++ b/invoices.py
@@ -0,0 +1,50 @@
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+import time
+import os
+import base64
+
+# Configure Chrome to save PDFs automatically
+chrome_options = Options()
+download_dir = os.path.join(os.getcwd(), "invoices")
+os.makedirs(download_dir, exist_ok=True)
+
+prefs = {
+    "printing.print_preview_sticky_settings.appState": '{"recentDestinations":[{"id":"Save as PDF","origin":"local","account":""}],"selectedDestinationId":"Save as PDF","version":2}',
+    "savefile.default_directory": download_dir,
+    "download.default_directory": download_dir,
+    "download.prompt_for_download": False,
+    "plugins.always_open_pdf_externally": True
+}
+chrome_options.add_experimental_option("prefs", prefs)
+
+driver = webdriver.Chrome(options=chrome_options)
+driver.get("https://www.amazon.com/ap/signin?openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2F%3Fref_%3Dnav_signin&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.assoc_handle=usflex&openid.mode=checkid_setup&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0")
+
+# Wait for manual login
+input("Log in and press Enter...")
+
+with open("orders.txt") as f:
+    orders = [line.strip() for line in f]
+
+for order in orders:
+    url = f"https://www.amazon.com/gp/css/summary/print.html?orderID={order}&ref_=ppx_hzod_invoiceConns_dt_b_invoice"
+    driver.get(url)
+    time.sleep(3)
+
+    # Save as PDF with order ID as filename
+    print_options = {
+        'landscape': False,
+        'displayHeaderFooter': False,
+        'printBackground': True,
+        'preferCSSPageSize': True,
+    }
+
+    result = driver.execute_cdp_cmd("Page.printToPDF", print_options)
+
+    # Save the PDF
+    pdf_path = os.path.join(download_dir, f"{order}.pdf")
+    with open(pdf_path, 'wb') as f:
+        f.write(base64.b64decode(result['data']))
+
+    print(f"Saved: {order}.pdf")
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ecbf754b7355bd80685a02629287db8e7826ef0
GIT binary patch
literal 650
zcmZWn+e*Vg6r5+FpVH8Ewc<nH#OHz^P-0ALuuVcYLHqOS%sJT-D9cjVnKPG@{{5C1
zc+N=J@Z94O1s-@;Sl|v7YGMN#?0KEBnplCHf_&%gXADheg--+?P3kRReMK64pham|
zG%T1_hmnpII-KaP$v{*LDfE0##64e+3z<Dt)IeE1W5JBk)V5@7@q$;r2+KO|CG?Go
z6~3ko$m@yMacLfTTecsWM+(|bob^bCdjHNCId9hJY=<@bs#VMMCG}d%oA*B7y1qw4
zrrgvy+6KIF#+LZz{5w;7`kJblAJ|nJ(3ct5us62@cAWU(i~1yU&~NS(?AyD`$OK28
mh7QpfQq+IL@mudDyNNGtPogb6&n6OcoSAl*nhBSxEB*rlY+aTB

literal 0
HcmV?d00001