initial commit
This commit is contained in:
28
.gitignore
vendored
Normal file
28
.gitignore
vendored
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
|
||||||
|
# Virtual Environment
|
||||||
|
venv/
|
||||||
|
env/
|
||||||
|
ENV/
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Project specific
|
||||||
|
orders.txt
|
||||||
|
*.pdf
|
||||||
|
downloads/
|
||||||
|
invoices/
|
||||||
39
CLAUDE.md
Normal file
39
CLAUDE.md
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
This is a Python automation script that downloads Amazon order invoices in bulk using Selenium WebDriver. It reads order IDs from [orders.txt](orders.txt) and automates the process of navigating to invoice pages and triggering the print dialog for each order.
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
- Python 3.x with `selenium` package
|
||||||
|
- Chrome browser with ChromeDriver
|
||||||
|
- Virtual environment in `venv/` directory
|
||||||
|
|
||||||
|
## Running the Script
|
||||||
|
|
||||||
|
1. Activate the virtual environment:
|
||||||
|
- Windows: `venv\Scripts\activate`
|
||||||
|
- Unix/macOS: `source venv/bin/activate`
|
||||||
|
|
||||||
|
2. Run the script:
|
||||||
|
```
|
||||||
|
python invoices.py
|
||||||
|
```
|
||||||
|
|
||||||
|
3. When prompted, manually log into Amazon in the browser window, then press Enter in the terminal to continue
|
||||||
|
|
||||||
|
4. The script will iterate through each order ID in [orders.txt](orders.txt) and open the print dialog for the invoice
|
||||||
|
|
||||||
|
## Input Format
|
||||||
|
|
||||||
|
[orders.txt](orders.txt) should contain one Amazon order ID per line (format: `XXX-XXXXXXX-XXXXXXX`)
|
||||||
|
|
||||||
|
## Architecture Notes
|
||||||
|
|
||||||
|
- The script uses direct URL construction to access invoice print pages: `https://www.amazon.com/gp/css/summary/print.html?orderID={order}`
|
||||||
|
- Manual authentication is required due to Amazon's login protections
|
||||||
|
- 3-second delay between orders to avoid rate limiting
|
||||||
|
- Print dialog is triggered via JavaScript (`window.print()`) - user must manually save each PDF
|
||||||
31
combine_pdfs.py
Normal file
31
combine_pdfs.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
from PyPDF2 import PdfMerger, PdfReader
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Directory containing the invoice PDFs
|
||||||
|
invoices_dir = "invoices"
|
||||||
|
|
||||||
|
# Get all PDF files and sort them
|
||||||
|
pdf_files = [f for f in os.listdir(invoices_dir) if f.endswith('.pdf')]
|
||||||
|
pdf_files.sort()
|
||||||
|
|
||||||
|
# Create a PDF merger object
|
||||||
|
merger = PdfMerger()
|
||||||
|
|
||||||
|
# Add each PDF to the merger
|
||||||
|
for pdf_file in pdf_files:
|
||||||
|
pdf_path = os.path.join(invoices_dir, pdf_file)
|
||||||
|
|
||||||
|
# For Audible PDFs, only include the first page
|
||||||
|
if "Audible" in pdf_file:
|
||||||
|
merger.append(pdf_path, pages=(0, 1))
|
||||||
|
print(f"Added (first page only): {pdf_file}")
|
||||||
|
else:
|
||||||
|
merger.append(pdf_path)
|
||||||
|
print(f"Added: {pdf_file}")
|
||||||
|
|
||||||
|
# Write the combined PDF
|
||||||
|
output_path = "receipts.pdf"
|
||||||
|
merger.write(output_path)
|
||||||
|
merger.close()
|
||||||
|
|
||||||
|
print(f"\nCombined {len(pdf_files)} PDFs into {output_path}")
|
||||||
50
invoices.py
Normal file
50
invoices.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
import base64
|
||||||
|
|
||||||
|
# Configure Chrome to save PDFs automatically
|
||||||
|
chrome_options = Options()
|
||||||
|
download_dir = os.path.join(os.getcwd(), "invoices")
|
||||||
|
os.makedirs(download_dir, exist_ok=True)
|
||||||
|
|
||||||
|
prefs = {
|
||||||
|
"printing.print_preview_sticky_settings.appState": '{"recentDestinations":[{"id":"Save as PDF","origin":"local","account":""}],"selectedDestinationId":"Save as PDF","version":2}',
|
||||||
|
"savefile.default_directory": download_dir,
|
||||||
|
"download.default_directory": download_dir,
|
||||||
|
"download.prompt_for_download": False,
|
||||||
|
"plugins.always_open_pdf_externally": True
|
||||||
|
}
|
||||||
|
chrome_options.add_experimental_option("prefs", prefs)
|
||||||
|
|
||||||
|
driver = webdriver.Chrome(options=chrome_options)
|
||||||
|
driver.get("https://www.amazon.com/ap/signin?openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2F%3Fref_%3Dnav_signin&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.assoc_handle=usflex&openid.mode=checkid_setup&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0")
|
||||||
|
|
||||||
|
# Wait for manual login
|
||||||
|
input("Log in and press Enter...")
|
||||||
|
|
||||||
|
with open("orders.txt") as f:
|
||||||
|
orders = [line.strip() for line in f]
|
||||||
|
|
||||||
|
for order in orders:
|
||||||
|
url = f"https://www.amazon.com/gp/css/summary/print.html?orderID={order}&ref_=ppx_hzod_invoiceConns_dt_b_invoice"
|
||||||
|
driver.get(url)
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
# Save as PDF with order ID as filename
|
||||||
|
print_options = {
|
||||||
|
'landscape': False,
|
||||||
|
'displayHeaderFooter': False,
|
||||||
|
'printBackground': True,
|
||||||
|
'preferCSSPageSize': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
result = driver.execute_cdp_cmd("Page.printToPDF", print_options)
|
||||||
|
|
||||||
|
# Save the PDF
|
||||||
|
pdf_path = os.path.join(download_dir, f"{order}.pdf")
|
||||||
|
with open(pdf_path, 'wb') as f:
|
||||||
|
f.write(base64.b64decode(result['data']))
|
||||||
|
|
||||||
|
print(f"Saved: {order}.pdf")
|
||||||
BIN
requirements.txt
Normal file
BIN
requirements.txt
Normal file
Binary file not shown.
Reference in New Issue
Block a user