uspto.gov patent applications
The USPTO translator works for granted patents, but not for patent applications. Is this an oversight, or is there something about applications that makes them harder to import?
Here's an
example of a patent application page.
Here's an
example of a patent application page.
This is an old discussion that has not been active in a long time. Instead of commenting here, you should start a new discussion. If you think the content of this discussion is still relevant, you can link to it from your new discussion.
I've also tidied up the abstracts, fixed all the ugly newlines.
William Smith
www.willsmith.org/contactme/
Unfortunately pasting it here kills all the indenting which was present in the original. I'll post to the google group to try to get it submitted.
---
{
"translatorID":"232e24fe-2f68-44fc-9366-ecd45720ee9e",
"translatorType":4,
"label":"Patents - USPTO",
"creator":"Bill McKinney",
"target":"^http://(pat|app)ft\\.uspto\\.gov/netacgi/nph-Parser.+",
"minVersion":"1.0.0b4.r1",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2009-12-07 02:50:00"
}
// Modified by Will Smith (see www.willsmith.org/contactme/)
// to handle patent applications as well as granted ones.
function detectWeb(doc, url) {
var re = new RegExp("^http://(pat|app)ft\.uspto\.gov/netacgi/nph-Parser");
if(re.test(doc.location.href)) {
return "book";
} else {
return "multiple";
}
}
function get_nextsibling(n)
{
var x=n.nextSibling;
while (x.nodeType!=1)
{
x=x.nextSibling;
}
return x;
}
function scrape(doc) {
var newItem = new Zotero.Item("patent");
newItem.url = doc.location.href;
var extraText = new String();
var tmpStr = new String();
var tmpRefs = "";
var tmpTitle = doc.title;
var fontTags = doc.getElementsByTagName("font");
for(var i=0; i<fontTags.length; i++) {
if (fontTags[i].getAttribute("size") == "+1") {
tmpTitle = tmpTitle + " - " + fontTags[i].innerHTML;
}
}
tmpTitle = Zotero.Utilities.cleanString(tmpTitle);
tmpTitle = tmpTitle.replace(/<[^>]+>/g, "");
newItem.title = tmpTitle;
var cellTags = doc.getElementsByTagName("td");
for(var i=0; i<cellTags.length; i++) {
var s = new String(cellTags[i].innerHTML);
if (s.indexOf("United States Patent Application") > -1) {
tmpStr = cellTags[i+1].childNodes[0].innerHTML;
tmpStr = tmpStr.replace(/<[^>]+>/gi, "");
tmpStr = tmpStr.replace(/,/gi, "");
newItem.applicationNumber = tmpStr;
tmpStr = cellTags[i+3].innerHTML;
tmpStr = tmpStr.replace(/<[^>]+>/gi, "");
newItem.applicationDate = tmpStr;
newItem.issueDate = "";
continue;
} else {
if (s.indexOf("United States Patent") > -1) {
tmpStr = cellTags[i+1].childNodes[0].innerHTML;
tmpStr = tmpStr.replace(/<[^>]+>/gi, "");
tmpStr = tmpStr.replace(/,/gi, "");
newItem.patentNumber = tmpStr;
tmpStr = cellTags[i+3].innerHTML;
tmpStr = tmpStr.replace(/<[^>]+>/gi, "");
newItem.issueDate = tmpStr;
continue;
}
}
if (s.indexOf("Assignee") > -1) {
tmpStr = cellTags[i+1].innerHTML;
tmpStr = tmpStr.replace(/<\/?\w+>/gi, "");
newItem.assignee = tmpStr;
continue;
}
if (s.indexOf("Inventors") > -1) {
tmpStr = cellTags[i+1].innerHTML;
var inventors = tmpStr.split(/<b>,/ig);
for (var j=0; j<inventors.length; j++) {
var tmpInventor = inventors[j];
tmpInventor = tmpInventor.replace(/<\/?\w+>/gi, "");
tmpInventor = tmpInventor.replace(/\([^\)]+\)/gi, "");
tmpInventor = tmpInventor.replace(/^\s+/gi, "");
var names = tmpInventor.split(";");
if (names) {
var lname = names[0];
var fname = names[1];
lname = lname.replace(/^\s+/gi, "");
lname = lname.replace(/\s+$/gi, "");
fname= fname.replace(/^\s+/gi, "");
fname= fname.replace(/\s+$/gi, "");
newItem.creators.push({lastName:lname, firstName:fname, creatorType:"inventor"});
}
}
continue;
}
// references
if (s.indexOf("<a href=\"/netacgi/nph-Parser?Sect2") > -1) {
tmpRefs = tmpRefs + cellTags[i].childNodes[0].innerHTML + " ";
}
if (s.indexOf("<a href=\"http://appft1.uspto.gov/netacgi/nph-Parser?TERM1") > -1) {
tmpRefs = tmpRefs + cellTags[i].childNodes[0].innerHTML + " ";
}
}
var centerTags = doc.getElementsByTagName("center");
for(var i=0; i<centerTags.length; i++) {
var s = new String(centerTags[i].innerHTML);
if (s.indexOf("Abstract") > -1) {
//newItem.extra = "ok";
var el = get_nextsibling(centerTags[i]);
var abstract = el.innerHTML.replace(/\n/g, " ");
newItem.abstractNote = abstract;
}
}
newItem.references = tmpRefs;
newItem.complete();
}
function doWeb(doc, url) {
var re = new RegExp("^http://(pat|app)ft\.uspto\.gov/netacgi/nph-Parser.+");
if(re.test(doc.location.href)) {
scrape(doc);
} else {
var items = Zotero.Utilities.getItemArray(doc, doc, "^http://(pat|app)ft\.uspto\.gov/netacgi/nph-Parser.+");
items = Zotero.selectItems(items);
if(!items) {
return true;
}
var uris = new Array();
for(var i in items) {
uris.push(i);
}
Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
function() { Zotero.done(); }, null);
Zotero.wait();
}
}
Zotero.Utilities.processDocuments(
uris,
function(doc) {
scrape(doc)
},
function() {
Zotero.done();
},
null
);
See zotero-dev Google group for newly upladed file.
David