Missing Publisher field in Google Books site translator
Hi All,
I am new to zotero, and I am glad it is saving me a lot of typing...
I have just noticed that in "scraping" data from books.google.com, Zotero seems to omit the publisher field (this field is instead present when scraping from Amazon). Is there any body who has noticed the same problem? does it work for you?
I am running Zotero 1.0.9 on Firefox 3.0.5
Thank you in advance,
Andrea
I am new to zotero, and I am glad it is saving me a lot of typing...
I have just noticed that in "scraping" data from books.google.com, Zotero seems to omit the publisher field (this field is instead present when scraping from Amazon). Is there any body who has noticed the same problem? does it work for you?
I am running Zotero 1.0.9 on Firefox 3.0.5
Thank you in advance,
Andrea
try testing on both this pages:
http://books.google.com/books?id=MKtr_svfY1kC
http://books.google.com/books?id=MKtr_svfY1kC&pg=PA268&dq=tiziana+terranova
Do you confirm me that your Zotero installations properly retrieve the publisher field when importing from books.google.com? (mine does not..)
Please, let me know...
Although I did not test it extensively (so no warranties), I add here my reworked version of the code. I guess that the most convenient way to update the script is to install the nice Scaffold add-on, edit the google books translator, and replace the javascript in the code tab with the following one:
(find the original script here)
function doWeb(doc, url) {
// get local domain suffix
var suffixRe = new RegExp("https?://books\.google\.([^/]+)/");
var suffixMatch = suffixRe.exec(url);
var suffix = suffixMatch[1];
var uri = doc.location.href;
var newUris = new Array();
var re = new RegExp('^http://books\\.google\\.[a-z]+(\.[a-z]+)?/books\\?id=([^&]+)', 'i');
var m = re.exec(uri);
if(m) {
newUris.push('http://books.google.'+suffix+'/books?id='+m[2]);
} else {
var items = Zotero.Utilities.getItemArray(doc, doc, 'http://books\\.google\\.' + suffix + '/books\\?id=([^&]+)', '^(?:All matching pages|About this Book|Table of Contents|Index)');
// Drop " - Page" thing
for(var i in items) {
items[i] = items[i].replace(/- Page [0-9]+\s*$/, "");
}
items = Zotero.selectItems(items);
if(!items) {
return true;
}
for(var i in items) {
var m = re.exec(i);
newUris.push('http://books.google.'+suffix+'/books?id='+m[2]);
}
}
Zotero.debug(newUris);
Zotero.Utilities.processDocuments(newUris, function(newDoc) {
var newItem = new Zotero.Item("book");
newItem.extra = "";
var namespace = newDoc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var xpath = '//h2[@class="title"]'
var elmt;
if (elmt = newDoc.evaluate(xpath, newDoc, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext()){
var title = Zotero.Utilities.superCleanString(elmt.textContent);
newItem.title = title;
Zotero.debug("title: " + title);
}
xpath = '//div[@class="titlewrap"]/span[@class="addmd"]'
if (elmt = newDoc.evaluate(xpath, newDoc, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext()){
var authors = Zotero.Utilities.superCleanString(elmt.textContent);
if (authors.substring(0, 3) == "By "){
authors = authors.substring(3);
}
authors = authors.split(", ");
for(j in authors) {
newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[j], "author"));
}
}
xpath = '//td[2][@id="bookinfo"]/div[@class="bookinfo_sectionwrap"]/div';
var elmts = newDoc.evaluate(xpath, newDoc, nsResolver,
XPathResult.ANY_TYPE, null);
while(elmt = elmts.iterateNext()) {
var fieldelmt = newDoc.evaluate('.//text()', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(fieldelmt) {
field = Zotero.Utilities.superCleanString(fieldelmt.nodeValue);
Zotero.debug("output: " + field);
if(field.substring(0,10) == "Published ") {
Zotero.debug("Published*****."+field);
newItem.date = field.substring(field.length-4);
newItem.publisher = field.replace("Published by ", "").replace(", "+newItem.date,"");
} else if(field.substring(0,5) == "ISBN ") {
newItem.ISBN = field.substring(5);
} else if(field.substring(field.length-6) == " pages") {
newItem.pages = field.substring(0, field.length-6);
} else if(field.substring(0,12) == "Contributor ") {
newItem.creators.push(Zotero.Utilities.cleanAuthor(field.substring(12), "contributor"));
}
}
}
newItem.complete();
}, function() { Zotero.done(); }, null);
Zotero.wait();
}
https://www.zotero.org/trac/ticket/1299