initial commit

2026-03-23 20:25:30 -05:00
commit dfe04db493
5 changed files with 148 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,28 @@
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
 *.so
 .Python
 # Virtual Environment
 venv/
 env/
 ENV/
 # IDE
 .vscode/
 .idea/
 *.swp
 *.swo
 *~
 # OS
 .DS_Store
 Thumbs.db
 # Project specific
 orders.txt
 *.pdf
 downloads/
 invoices/
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -0,0 +1,39 @@
 # CLAUDE.md
 This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
 ## Project Overview
 This is a Python automation script that downloads Amazon order invoices in bulk using Selenium WebDriver. It reads order IDs from [orders.txt](orders.txt) and automates the process of navigating to invoice pages and triggering the print dialog for each order.
 ## Dependencies
 - Python 3.x with `selenium` package
 - Chrome browser with ChromeDriver
 - Virtual environment in `venv/` directory
 ## Running the Script
 1. Activate the virtual environment:
   - Windows: `venv\Scripts\activate`
   - Unix/macOS: `source venv/bin/activate`
 2. Run the script:
   ```
   python invoices.py
   ```
 3. When prompted, manually log into Amazon in the browser window, then press Enter in the terminal to continue
 4. The script will iterate through each order ID in [orders.txt](orders.txt) and open the print dialog for the invoice
 ## Input Format
 [orders.txt](orders.txt) should contain one Amazon order ID per line (format: `XXX-XXXXXXX-XXXXXXX`)
 ## Architecture Notes
 - The script uses direct URL construction to access invoice print pages: `https://www.amazon.com/gp/css/summary/print.html?orderID={order}`
 - Manual authentication is required due to Amazon's login protections
 - 3-second delay between orders to avoid rate limiting
 - Print dialog is triggered via JavaScript (`window.print()`) - user must manually save each PDF
--- a/combine_pdfs.py
+++ b/combine_pdfs.py
@@ -0,0 +1,31 @@
 from PyPDF2 import PdfMerger, PdfReader
 import os
 # Directory containing the invoice PDFs
 invoices_dir = "invoices"
 # Get all PDF files and sort them
 pdf_files = [f for f in os.listdir(invoices_dir) if f.endswith('.pdf')]
 pdf_files.sort()
 # Create a PDF merger object
 merger = PdfMerger()
 # Add each PDF to the merger
 for pdf_file in pdf_files:
    pdf_path = os.path.join(invoices_dir, pdf_file)
    # For Audible PDFs, only include the first page
    if "Audible" in pdf_file:
        merger.append(pdf_path, pages=(0, 1))
        print(f"Added (first page only): {pdf_file}")
    else:
        merger.append(pdf_path)
        print(f"Added: {pdf_file}")
 # Write the combined PDF
 output_path = "receipts.pdf"
 merger.write(output_path)
 merger.close()
 print(f"\nCombined {len(pdf_files)} PDFs into {output_path}")
--- a/invoices.py
+++ b/invoices.py
@@ -0,0 +1,50 @@
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 import time
 import os
 import base64
 # Configure Chrome to save PDFs automatically
 chrome_options = Options()
 download_dir = os.path.join(os.getcwd(), "invoices")
 os.makedirs(download_dir, exist_ok=True)
 prefs = {
    "printing.print_preview_sticky_settings.appState": '{"recentDestinations":[{"id":"Save as PDF","origin":"local","account":""}],"selectedDestinationId":"Save as PDF","version":2}',
    "savefile.default_directory": download_dir,
    "download.default_directory": download_dir,
    "download.prompt_for_download": False,
    "plugins.always_open_pdf_externally": True
 }
 chrome_options.add_experimental_option("prefs", prefs)
 driver = webdriver.Chrome(options=chrome_options)
 driver.get("https://www.amazon.com/ap/signin?openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2F%3Fref_%3Dnav_signin&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.assoc_handle=usflex&openid.mode=checkid_setup&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0")
 # Wait for manual login
 input("Log in and press Enter...")
 with open("orders.txt") as f:
    orders = [line.strip() for line in f]
 for order in orders:
    url = f"https://www.amazon.com/gp/css/summary/print.html?orderID={order}&ref_=ppx_hzod_invoiceConns_dt_b_invoice"
    driver.get(url)
    time.sleep(3)
    # Save as PDF with order ID as filename
    print_options = {
        'landscape': False,
        'displayHeaderFooter': False,
        'printBackground': True,
        'preferCSSPageSize': True,
    }
    result = driver.execute_cdp_cmd("Page.printToPDF", print_options)
    # Save the PDF
    pdf_path = os.path.join(download_dir, f"{order}.pdf")
    with open(pdf_path, 'wb') as f:
        f.write(base64.b64decode(result['data']))
    print(f"Saved: {order}.pdf")
--- a/requirements.txt
+++ b/requirements.txt