Attaching PDFs stored locally to Zotero items using PyZotero/API

I'm working on a project which involves a JSON file with a big list of papers' (which update regularly every week) metadata and a folder called "pdfs" which has most PDFs for each paper. Each file is named after the DOI (any '/'s are turned into '_'). I want to associate each PDF with each paper in the JSON and enable manual input of either of these to ease the user with modifying this database manually if necessary.

I'm having a lot of trouble with linking each PDF with its item in Zotero, and I suspect it's because all PDFs are stored locally on my hard drive and I'm accessing them as such when running the python program to store stuff into Zotero. I'll put the Zotero storage code for reference. I tell it the PDF path, and it goes on to identify it on Zotero application as an empty file (I'm assuming) because it looks for it at the directory starting with Zotero storage (e.g. /home/USER/Zotero/storage/9CIJS9P5/pdfs/10.18043_001c.120572.pdf instead of /home/USER/Documents/ORG/ALR ORG/pdfs/10.18043_001c.120572.pdf)

import json
import os
from dotenv import load_dotenv
from pyzotero import zotero

# Load environment variables
load_dotenv(".env")
API_KEY = os.getenv("ZOTERO_API_KEY")
USER_ID = os.getenv("ZOTERO_USER_ID")

# Initialize Zotero client
try:
zot = zotero.Zotero(USER_ID, 'group', API_KEY)
except Exception as e:
print(f"Error connecting to Zotero: {str(e)}")
print("Please check your library ID, user type, and API key.")
exit()

# Load your JSON data
with open('relevant_papers_fulltext.json', 'r') as f:
papers = json.load(f)

for paper in papers:
doi = paper.get('doi')

if doi:
# Check if the paper already exists in the Zotero library
existing_items = zot.everything(zot.items(q=f'doi:{doi}', itemType='journalArticle'))
if existing_items:
print(f"Paper with DOI {doi} already exists in the library. Skipping...")
continue

# Create item metadata
item_data = {
'itemType': 'journalArticle',
'DOI': doi,
'title': paper.get('title', ''),
'abstractNote': paper.get('abstract', ''),
'publicationTitle': paper.get('journal', ''),
'date': paper.get('publication_date', ''),
'creators': [{'creatorType': 'author', 'firstName': author.split()[0], 'lastName': author.split()[-1]} for author in paper.get('authors', [])]
}

try:
# Create the item in Zotero
items = zot.create_items([item_data])

if items and 'successful' in items and items['successful']:
item_key = items['successful']['0']['key']

# Attach the PDF if it exists
pdf_filename = f"{doi.replace('/', '_')}.pdf"
pdf_path = os.path.join("pdfs", pdf_filename)

if os.path.exists(pdf_path):
try:
# Attach the PDF using the Zotero file attachment API
zot.attachment_simple([pdf_path], item_key)
print(f"Added paper and attached PDF: {paper['title']}")
except Exception as e:
if "quota" in str(e).lower():
print(f"Error attaching PDF for {paper['title']}: File would exceed quota.")
else:
print(f"Error attaching PDF for {paper['title']}: {str(e)}")
else:
print(f"PDF file for {paper['title']} not found at {pdf_path}.")
else:
print(f"Failed to add paper with DOI: {doi}")
except Exception as e:
print(f"Error adding paper with DOI {doi}: {str(e)}")
else:
print(f"No DOI found for paper: {paper['title']}")

Image of the resulting Zotero library:
https://s3.amazonaws.com/zotero.org/images/forums/u9780326/dij98xpy414b1cy37dva.png
Sign In or Register to comment.