uspto.gov patent applications
The USPTO translator works for granted patents, but not for patent applications. Is this an oversight, or is there something about applications that makes them harder to import?
Here's an
example of a patent application page.
Here's an
example of a patent application page.
I've also tidied up the abstracts, fixed all the ugly newlines.
William Smith
www.willsmith.org/contactme/
Unfortunately pasting it here kills all the indenting which was present in the original. I'll post to the google group to try to get it submitted.
---
{
"translatorID":"232e24fe-2f68-44fc-9366-ecd45720ee9e",
"translatorType":4,
"label":"Patents - USPTO",
"creator":"Bill McKinney",
"target":"^http://(pat|app)ft\\.uspto\\.gov/netacgi/nph-Parser.+",
"minVersion":"1.0.0b4.r1",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2009-12-07 02:50:00"
}
// Modified by Will Smith (see www.willsmith.org/contactme/)
// to handle patent applications as well as granted ones.
function detectWeb(doc, url) {
var re = new RegExp("^http://(pat|app)ft\.uspto\.gov/netacgi/nph-Parser");
if(re.test(doc.location.href)) {
return "book";
} else {
return "multiple";
}
}
function get_nextsibling(n)
{
var x=n.nextSibling;
while (x.nodeType!=1)
{
x=x.nextSibling;
}
return x;
}
function scrape(doc) {
var newItem = new Zotero.Item("patent");
newItem.url = doc.location.href;
var extraText = new String();
var tmpStr = new String();
var tmpRefs = "";
var tmpTitle = doc.title;
var fontTags = doc.getElementsByTagName("font");
for(var i=0; i<fontTags.length; i++) {
if (fontTags[i].getAttribute("size") == "+1") {
tmpTitle = tmpTitle + " - " + fontTags[i].innerHTML;
}
}
tmpTitle = Zotero.Utilities.cleanString(tmpTitle);
tmpTitle = tmpTitle.replace(/<[^>]+>/g, "");
newItem.title = tmpTitle;
var cellTags = doc.getElementsByTagName("td");
for(var i=0; i<cellTags.length; i++) {
var s = new String(cellTags[i].innerHTML);
if (s.indexOf("United States Patent Application") > -1) {
tmpStr = cellTags[i+1].childNodes[0].innerHTML;
tmpStr = tmpStr.replace(/<[^>]+>/gi, "");
tmpStr = tmpStr.replace(/,/gi, "");
newItem.applicationNumber = tmpStr;
tmpStr = cellTags[i+3].innerHTML;
tmpStr = tmpStr.replace(/<[^>]+>/gi, "");
newItem.applicationDate = tmpStr;
newItem.issueDate = "";
continue;
} else {
if (s.indexOf("United States Patent") > -1) {
tmpStr = cellTags[i+1].childNodes[0].innerHTML;
tmpStr = tmpStr.replace(/<[^>]+>/gi, "");
tmpStr = tmpStr.replace(/,/gi, "");
newItem.patentNumber = tmpStr;
tmpStr = cellTags[i+3].innerHTML;
tmpStr = tmpStr.replace(/<[^>]+>/gi, "");
newItem.issueDate = tmpStr;
continue;
}
}
if (s.indexOf("Assignee") > -1) {
tmpStr = cellTags[i+1].innerHTML;
tmpStr = tmpStr.replace(/<\/?\w+>/gi, "");
newItem.assignee = tmpStr;
continue;
}
if (s.indexOf("Inventors") > -1) {
tmpStr = cellTags[i+1].innerHTML;
var inventors = tmpStr.split(/<b>,/ig);
for (var j=0; j<inventors.length; j++) {
var tmpInventor = inventors[j];
tmpInventor = tmpInventor.replace(/<\/?\w+>/gi, "");
tmpInventor = tmpInventor.replace(/\([^\)]+\)/gi, "");
tmpInventor = tmpInventor.replace(/^\s+/gi, "");
var names = tmpInventor.split(";");
if (names) {
var lname = names[0];
var fname = names[1];
lname = lname.replace(/^\s+/gi, "");
lname = lname.replace(/\s+$/gi, "");
fname= fname.replace(/^\s+/gi, "");
fname= fname.replace(/\s+$/gi, "");
newItem.creators.push({lastName:lname, firstName:fname, creatorType:"inventor"});
}
}
continue;
}
// references
if (s.indexOf("<a href=\"/netacgi/nph-Parser?Sect2") > -1) {
tmpRefs = tmpRefs + cellTags[i].childNodes[0].innerHTML + " ";
}
if (s.indexOf("<a href=\"http://appft1.uspto.gov/netacgi/nph-Parser?TERM1") > -1) {
tmpRefs = tmpRefs + cellTags[i].childNodes[0].innerHTML + " ";
}
}
var centerTags = doc.getElementsByTagName("center");
for(var i=0; i<centerTags.length; i++) {
var s = new String(centerTags[i].innerHTML);
if (s.indexOf("Abstract") > -1) {
//newItem.extra = "ok";
var el = get_nextsibling(centerTags[i]);
var abstract = el.innerHTML.replace(/\n/g, " ");
newItem.abstractNote = abstract;
}
}
newItem.references = tmpRefs;
newItem.complete();
}
function doWeb(doc, url) {
var re = new RegExp("^http://(pat|app)ft\.uspto\.gov/netacgi/nph-Parser.+");
if(re.test(doc.location.href)) {
scrape(doc);
} else {
var items = Zotero.Utilities.getItemArray(doc, doc, "^http://(pat|app)ft\.uspto\.gov/netacgi/nph-Parser.+");
items = Zotero.selectItems(items);
if(!items) {
return true;
}
var uris = new Array();
for(var i in items) {
uris.push(i);
}
Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
function() { Zotero.done(); }, null);
Zotero.wait();
}
}
Zotero.Utilities.processDocuments(
uris,
function(doc) {
scrape(doc)
},
function() {
Zotero.done();
},
null
);
See zotero-dev Google group for newly upladed file.
David