Missing Publisher field in Google Books site translator

Hi All,
I am new to zotero, and I am glad it is saving me a lot of typing...
I have just noticed that in "scraping" data from books.google.com, Zotero seems to omit the publisher field (this field is instead present when scraping from Amazon). Is there any body who has noticed the same problem? does it work for you?

I am running Zotero 1.0.9 on Firefox 3.0.5

Thank you in advance,
Andrea
  • and
    edited January 18, 2009
    Really nobody has noticed this problem?
    try testing on both this pages:
    http://books.google.com/books?id=MKtr_svfY1kC
    http://books.google.com/books?id=MKtr_svfY1kC&pg=PA268&dq=tiziana+terranova

    Do you confirm me that your Zotero installations properly retrieve the publisher field when importing from books.google.com? (mine does not..)

    Please, let me know...
  • Same thing here, with the 1.5 development branch. It misses out the publisher field.
  • and
    edited January 19, 2009
    I have managed to make a quick and dirty update to fix this.
    Although I did not test it extensively (so no warranties), I add here my reworked version of the code. I guess that the most convenient way to update the script is to install the nice Scaffold add-on, edit the google books translator, and replace the javascript in the code tab with the following one:

    (find the original script here)



    function doWeb(doc, url) {
    // get local domain suffix
    var suffixRe = new RegExp("https?://books\.google\.([^/]+)/");
    var suffixMatch = suffixRe.exec(url);
    var suffix = suffixMatch[1];
    var uri = doc.location.href;
    var newUris = new Array();

    var re = new RegExp('^http://books\\.google\\.[a-z]+(\.[a-z]+)?/books\\?id=([^&]+)', 'i');
    var m = re.exec(uri);
    if(m) {
    newUris.push('http://books.google.'+suffix+'/books?id='+m[2]);
    } else {
    var items = Zotero.Utilities.getItemArray(doc, doc, 'http://books\\.google\\.' + suffix + '/books\\?id=([^&]+)', '^(?:All matching pages|About this Book|Table of Contents|Index)');
    // Drop " - Page" thing
    for(var i in items) {
    items[i] = items[i].replace(/- Page [0-9]+\s*$/, "");
    }
    items = Zotero.selectItems(items);

    if(!items) {
    return true;
    }

    for(var i in items) {
    var m = re.exec(i);
    newUris.push('http://books.google.'+suffix+'/books?id='+m[2]);
    }
    }
    Zotero.debug(newUris);
    Zotero.Utilities.processDocuments(newUris, function(newDoc) {
    var newItem = new Zotero.Item("book");
    newItem.extra = "";

    var namespace = newDoc.documentElement.namespaceURI;
    var nsResolver = namespace ? function(prefix) {
    if (prefix == 'x') return namespace; else return null;
    } : null;

    var xpath = '//h2[@class="title"]'
    var elmt;
    if (elmt = newDoc.evaluate(xpath, newDoc, nsResolver,
    XPathResult.ANY_TYPE, null).iterateNext()){
    var title = Zotero.Utilities.superCleanString(elmt.textContent);
    newItem.title = title;
    Zotero.debug("title: " + title);
    }
    xpath = '//div[@class="titlewrap"]/span[@class="addmd"]'
    if (elmt = newDoc.evaluate(xpath, newDoc, nsResolver,
    XPathResult.ANY_TYPE, null).iterateNext()){
    var authors = Zotero.Utilities.superCleanString(elmt.textContent);
    if (authors.substring(0, 3) == "By "){
    authors = authors.substring(3);
    }
    authors = authors.split(", ");
    for(j in authors) {
    newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[j], "author"));
    }
    }

    xpath = '//td[2][@id="bookinfo"]/div[@class="bookinfo_sectionwrap"]/div';
    var elmts = newDoc.evaluate(xpath, newDoc, nsResolver,
    XPathResult.ANY_TYPE, null);
    while(elmt = elmts.iterateNext()) {
    var fieldelmt = newDoc.evaluate('.//text()', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
    if(fieldelmt) {
    field = Zotero.Utilities.superCleanString(fieldelmt.nodeValue);
    Zotero.debug("output: " + field);


    if(field.substring(0,10) == "Published ") {
    Zotero.debug("Published*****."+field);
    newItem.date = field.substring(field.length-4);
    newItem.publisher = field.replace("Published by ", "").replace(", "+newItem.date,"");

    } else if(field.substring(0,5) == "ISBN ") {
    newItem.ISBN = field.substring(5);
    } else if(field.substring(field.length-6) == " pages") {
    newItem.pages = field.substring(0, field.length-6);
    } else if(field.substring(0,12) == "Contributor ") {
    newItem.creators.push(Zotero.Utilities.cleanAuthor(field.substring(12), "contributor"));
    }
    }
    }
    newItem.complete();
    }, function() { Zotero.done(); }, null);

    Zotero.wait();
    }
Sign In or Register to comment.