uspto.gov patent applications

aaronkaplan · December 4, 2009

The USPTO translator works for granted patents, but not for patent applications. Is this an oversight, or is there something about applications that makes them harder to import?

Here's an
example of a patent application page.

willsmithorg · December 6, 2009

Try replacing the Patents - USPTO.js file in your Zotero translators folder with this, which I've editied to also handle applications. Not sure if it will get approved into the full Zotero, because there's no category currently for Patent Applications, only Patents.

I've also tidied up the abstracts, fixed all the ugly newlines.

William Smith
www.willsmith.org/contactme/

Unfortunately pasting it here kills all the indenting which was present in the original. I'll post to the google group to try to get it submitted.

---


{
	"translatorID":"232e24fe-2f68-44fc-9366-ecd45720ee9e",
	"translatorType":4,
	"label":"Patents - USPTO",
	"creator":"Bill McKinney",
	"target":"^http://(pat|app)ft\\.uspto\\.gov/netacgi/nph-Parser.+",
	"minVersion":"1.0.0b4.r1",
	"maxVersion":"",
	"priority":100,
	"inRepository":true,
	"lastUpdated":"2009-12-07 02:50:00"
}

// Modified by Will Smith (see www.willsmith.org/contactme/)
// to handle patent applications as well as granted ones.

function detectWeb(doc, url) {
	var re = new RegExp("^http://(pat|app)ft\.uspto\.gov/netacgi/nph-Parser");
	if(re.test(doc.location.href)) {
		return "book";
	} else {
		return "multiple";
	}
}

function get_nextsibling(n)
  {
  var x=n.nextSibling;
  while (x.nodeType!=1)
   {
   x=x.nextSibling;
   }
  return x;
}

function scrape(doc) {

	var newItem = new Zotero.Item("patent");
	newItem.url = doc.location.href;
	var extraText = new String();
	var tmpStr = new String();
	var tmpRefs = "";
	var tmpTitle = doc.title;
	
	var fontTags = doc.getElementsByTagName("font");
	for(var i=0; i<fontTags.length; i++) {
		if (fontTags[i].getAttribute("size") == "+1") {
			tmpTitle = tmpTitle + " - " + fontTags[i].innerHTML;
		}
	}
	tmpTitle = Zotero.Utilities.cleanString(tmpTitle);
	tmpTitle = tmpTitle.replace(/<[^>]+>/g, "");
	newItem.title = tmpTitle;
	
	var cellTags = doc.getElementsByTagName("td");
	for(var i=0; i<cellTags.length; i++) {

		var s = new String(cellTags[i].innerHTML);

		if (s.indexOf("United States Patent Application") > -1) {
			
			tmpStr = cellTags[i+1].childNodes[0].innerHTML;
			tmpStr = tmpStr.replace(/<[^>]+>/gi, "");
			tmpStr = tmpStr.replace(/,/gi, "");
			newItem.applicationNumber = tmpStr;
			
			tmpStr = cellTags[i+3].innerHTML;
			tmpStr = tmpStr.replace(/<[^>]+>/gi, "");
			newItem.applicationDate = tmpStr;
			newItem.issueDate = "";
			continue;
		} else { 
			if (s.indexOf("United States Patent") > -1) {
			
				tmpStr = cellTags[i+1].childNodes[0].innerHTML;
				tmpStr = tmpStr.replace(/<[^>]+>/gi, "");
				tmpStr = tmpStr.replace(/,/gi, "");
				newItem.patentNumber = tmpStr;
			
				tmpStr = cellTags[i+3].innerHTML;
				tmpStr = tmpStr.replace(/<[^>]+>/gi, "");
				newItem.issueDate = tmpStr;
				continue;
			}
		}
		if (s.indexOf("Assignee") > -1) {
			tmpStr = cellTags[i+1].innerHTML;
			tmpStr = tmpStr.replace(/<\/?\w+>/gi, "");
			newItem.assignee = tmpStr;
			continue;
		}
		if (s.indexOf("Inventors") > -1) {
			tmpStr = cellTags[i+1].innerHTML;
			
			var inventors = tmpStr.split(/<b>,/ig);
			for (var j=0; j<inventors.length; j++) {
				var tmpInventor = inventors[j];
				tmpInventor = tmpInventor.replace(/<\/?\w+>/gi, "");
				tmpInventor = tmpInventor.replace(/\([^\)]+\)/gi, "");
				tmpInventor = tmpInventor.replace(/^\s+/gi, "");
				
				var names = tmpInventor.split(";");
				if (names) {
					var lname = names[0];
					var fname = names[1];
					lname = lname.replace(/^\s+/gi, "");
					lname = lname.replace(/\s+$/gi, "");
					fname= fname.replace(/^\s+/gi, "");
					fname= fname.replace(/\s+$/gi, "");
					newItem.creators.push({lastName:lname, firstName:fname, creatorType:"inventor"});
				}
			}
			continue;
		}
		
		// references
		if (s.indexOf("<a href=\"/netacgi/nph-Parser?Sect2") > -1) {
				tmpRefs = tmpRefs + cellTags[i].childNodes[0].innerHTML + " ";
		}
		if (s.indexOf("<a href=\"http://appft1.uspto.gov/netacgi/nph-Parser?TERM1";) > -1) {
				tmpRefs = tmpRefs + cellTags[i].childNodes[0].innerHTML + " ";
		}
	}
	
	var centerTags = doc.getElementsByTagName("center");
	for(var i=0; i<centerTags.length; i++) {
		var s = new String(centerTags[i].innerHTML);
		if (s.indexOf("Abstract") > -1) {
			//newItem.extra = "ok";
			var el = get_nextsibling(centerTags[i]);
			var abstract = el.innerHTML.replace(/\n/g, " ");
			newItem.abstractNote = abstract;
		}
	
	}

	newItem.references = tmpRefs;
	newItem.complete();
}

function doWeb(doc, url) {
	var re = new RegExp("^http://(pat|app)ft\.uspto\.gov/netacgi/nph-Parser.+");
	if(re.test(doc.location.href)) {
		scrape(doc);
	} else {
		var items = Zotero.Utilities.getItemArray(doc, doc, "^http://(pat|app)ft\.uspto\.gov/netacgi/nph-Parser.+");
		items = Zotero.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			uris.push(i);
		}
		
		Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
			function() { Zotero.done(); }, null);
		
		Zotero.wait();
	}
}

fbennett · December 6, 2009

Always best to put code on the zotero-dev site. But to preserve indenting you can use a <code> tag:

Zotero.Utilities.processDocuments(
   uris,
   function(doc) {
      scrape(doc)
   },
   function() { 
      Zotero.done(); 
   }, 
   null
);

cavallad · March 17, 2010

I have updated Will Smith's code to cater for later applications (where the URL begins http://appft1 rather than http://appft.)

See zotero-dev Google group for newly upladed file.

David