From dfe04db49306495b6e8a5de2c7d205a2a27e08db Mon Sep 17 00:00:00 2001 From: brentperteet Date: Mon, 23 Mar 2026 20:25:30 -0500 Subject: [PATCH] initial commit --- .gitignore | 28 ++++++++++++++++++++++++++ CLAUDE.md | 39 ++++++++++++++++++++++++++++++++++++ combine_pdfs.py | 31 +++++++++++++++++++++++++++++ invoices.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | Bin 0 -> 650 bytes 5 files changed, 148 insertions(+) create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 combine_pdfs.py create mode 100644 invoices.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..500d1b6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python + +# Virtual Environment +venv/ +env/ +ENV/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Project specific +orders.txt +*.pdf +downloads/ +invoices/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..c1a63d2 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,39 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +This is a Python automation script that downloads Amazon order invoices in bulk using Selenium WebDriver. It reads order IDs from [orders.txt](orders.txt) and automates the process of navigating to invoice pages and triggering the print dialog for each order. + +## Dependencies + +- Python 3.x with `selenium` package +- Chrome browser with ChromeDriver +- Virtual environment in `venv/` directory + +## Running the Script + +1. Activate the virtual environment: + - Windows: `venv\Scripts\activate` + - Unix/macOS: `source venv/bin/activate` + +2. Run the script: + ``` + python invoices.py + ``` + +3. When prompted, manually log into Amazon in the browser window, then press Enter in the terminal to continue + +4. The script will iterate through each order ID in [orders.txt](orders.txt) and open the print dialog for the invoice + +## Input Format + +[orders.txt](orders.txt) should contain one Amazon order ID per line (format: `XXX-XXXXXXX-XXXXXXX`) + +## Architecture Notes + +- The script uses direct URL construction to access invoice print pages: `https://www.amazon.com/gp/css/summary/print.html?orderID={order}` +- Manual authentication is required due to Amazon's login protections +- 3-second delay between orders to avoid rate limiting +- Print dialog is triggered via JavaScript (`window.print()`) - user must manually save each PDF diff --git a/combine_pdfs.py b/combine_pdfs.py new file mode 100644 index 0000000..8ecafdc --- /dev/null +++ b/combine_pdfs.py @@ -0,0 +1,31 @@ +from PyPDF2 import PdfMerger, PdfReader +import os + +# Directory containing the invoice PDFs +invoices_dir = "invoices" + +# Get all PDF files and sort them +pdf_files = [f for f in os.listdir(invoices_dir) if f.endswith('.pdf')] +pdf_files.sort() + +# Create a PDF merger object +merger = PdfMerger() + +# Add each PDF to the merger +for pdf_file in pdf_files: + pdf_path = os.path.join(invoices_dir, pdf_file) + + # For Audible PDFs, only include the first page + if "Audible" in pdf_file: + merger.append(pdf_path, pages=(0, 1)) + print(f"Added (first page only): {pdf_file}") + else: + merger.append(pdf_path) + print(f"Added: {pdf_file}") + +# Write the combined PDF +output_path = "receipts.pdf" +merger.write(output_path) +merger.close() + +print(f"\nCombined {len(pdf_files)} PDFs into {output_path}") diff --git a/invoices.py b/invoices.py new file mode 100644 index 0000000..b2bbc98 --- /dev/null +++ b/invoices.py @@ -0,0 +1,50 @@ +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +import time +import os +import base64 + +# Configure Chrome to save PDFs automatically +chrome_options = Options() +download_dir = os.path.join(os.getcwd(), "invoices") +os.makedirs(download_dir, exist_ok=True) + +prefs = { + "printing.print_preview_sticky_settings.appState": '{"recentDestinations":[{"id":"Save as PDF","origin":"local","account":""}],"selectedDestinationId":"Save as PDF","version":2}', + "savefile.default_directory": download_dir, + "download.default_directory": download_dir, + "download.prompt_for_download": False, + "plugins.always_open_pdf_externally": True +} +chrome_options.add_experimental_option("prefs", prefs) + +driver = webdriver.Chrome(options=chrome_options) +driver.get("https://www.amazon.com/ap/signin?openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2F%3Fref_%3Dnav_signin&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.assoc_handle=usflex&openid.mode=checkid_setup&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0") + +# Wait for manual login +input("Log in and press Enter...") + +with open("orders.txt") as f: + orders = [line.strip() for line in f] + +for order in orders: + url = f"https://www.amazon.com/gp/css/summary/print.html?orderID={order}&ref_=ppx_hzod_invoiceConns_dt_b_invoice" + driver.get(url) + time.sleep(3) + + # Save as PDF with order ID as filename + print_options = { + 'landscape': False, + 'displayHeaderFooter': False, + 'printBackground': True, + 'preferCSSPageSize': True, + } + + result = driver.execute_cdp_cmd("Page.printToPDF", print_options) + + # Save the PDF + pdf_path = os.path.join(download_dir, f"{order}.pdf") + with open(pdf_path, 'wb') as f: + f.write(base64.b64decode(result['data'])) + + print(f"Saved: {order}.pdf") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ecbf754b7355bd80685a02629287db8e7826ef0 GIT binary patch literal 650 zcmZWn+e*Vg6r5+FpVH8Ewc