D648753522 : cairn.info translator does not get the PDF attachment but with a FIX
I'm a very new beginner with Zotero. It's great! So thanks to all the contributors. Also I've some limited skills in programming.
But I propose a FIX ;-)
The issue:
1/ with Firefox : http://www.cairn.info.bibliotheque-nomade2.univ-lyon2.fr/revue-savoirs-2004-5-page-59.htm
Note: I'm using a proxy: bibliotheque-nomade2.univ-lyon2.fr
2/ enregistrer dans Zotero avec "Cairn.info" (Save in Zotero with "Cairn.info""
3/ Red X with "Full Text Pdf", the ref is saved with a snapshot
So the PDF file is not attached.
If I download the PDF and chose "download with Zotero" there is also an error.
(http://www.cairn.info.bibliotheque-nomade2.univ-lyon2.fr/load_pdf.php?ID_ARTICLE=SAVO_HS01_0059)
If I download the PDF then I drop it in Zotero, it's ok.
After tens times of such workaround I've got tired of it, and I looked forward a recorded bug: nothing.
I've been lucky, one time the import has worked when I've pasted the pdf url in the navigation bar:
http://www.cairn.info.bibliotheque-nomade2.univ-lyon2.fr/load_pdf.php?ID_ARTICLE=SAVO_HS01_0059 and I've saw pdf_do_not_index, that can be found in the page code of Cairn.info.
So I've looked in the source code of the web page of cairn.info and in Cairn.info.js
The link that works with Cairn.info is:
http://www.cairn.info.bibliotheque-nomade2.univ-lyon2.fr/load_pdf_do_not_index.php?ID_ARTICLE=SAVO_HS01_0059
I've edited my Cairn.info.js as below. Pay attention it's only for debug purpose and there is any guarantee of non-regression. And As the proxy is "hard" coded in my fix, that doesn't work for a lot of users.
Hope that helps!
Daniel Conil
{
"translatorID": "f46cc903-c447-47d6-a2cf-c75ed22dc96b",
"translatorType": 4,
"label": "Cairn.infodcl",
"creator": "Sebastian Karcher, Sylvain Machefert and Nicolas Chachereau",
"target": "^https?://www\\.cairn\\.info/",
"minVersion": "3.0",
"maxVersion": null,
"priority": 100,
"inRepository": true,
"browserSupport": "gcsibv",
"lastUpdated": "2016-02-16 02:40:00"
}
/*
Translator
Copyright (C) 2013 Sebastian Karcher
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
function detectWeb(doc,url) {
var xpath='//meta[@name="citation_journal_title"]';
if (ZU.xpath(doc, xpath).length > 0) {
return "journalArticle";
}
if (ZU.xpathText(doc, '//div[contains(@class, "list_articles")]//div[contains(@class, "article") or contains(@class, "articleBookList")]')) {
return "multiple";
}
return false;
}
function doWeb(doc,url) {
if (detectWeb(doc, url) == "multiple") {
var hits = {};
var urls = [];
var title;
var link;
var resultsrow = ZU.xpath(doc, '//div[contains(@class, "list_articles")]/div[contains(@class, "article")]');
for (var i=0; i<resultsrow.length; i++) {
title = ZU.xpathText(resultsrow[i], './/div[@class="meta"]//div[@class="title"]');
if (!title) {
title = ZU.xpathText(resultsrow[i], './/div[@class="wrapper_title"]/h2/text()');
}
link = ZU.xpathText(resultsrow[i], './/div[@class="state"]/a[1]/@href');
//Z.debug(title + ": " + link)
hits[link] = title.replace(/^[\s\,]+/, "").trim();
}
Z.selectItems(hits, function(items) {
if (items == null) return true;
for (var j in items) {
urls.push(j);
}
ZU.processDocuments(urls, scrape);
});
} else {
scrape(doc);
}
}
function scrape(doc) {
// We call the Embedded Metadata translator to do the actual work
var translator = Zotero.loadTranslator("web");
translator.setTranslator("951c027d-74ac-47d4-a107-9c3069ab7b48");
translator.setDocument(doc);
translator.setHandler("itemDone", function(obj, item) {
// Cairn.info uses non-standard keywords:
// we import them here, as the Embedded Metadata translator
// cannot catch them.
item.tags = [];
var keywords = ZU.xpathText(doc, '//meta[@name="article-mot_cle"]/@content');
if (keywords) {
keywords = keywords.split(/\s*[,;]\s*/);
for (var i=0; i<keywords.length; i++) {
if (keywords[i].trim()) {
item.tags.push(keywords[i])
}
}
}
//@DCL
// get the ID_ARTICLE in the metadata info
item.id_article = ZU.xpathText(doc, '//meta[@name="WT.pn_sku"]/@content');
// The default value for PDF download is on an HTML page that
// calls the actual download. We need to correct the attachment
// URL after the import translator has run.
for (var i=0; i<item.attachments.length; i++) {
if (item.attachments[i].mimeType == 'application/pdf') {
//item.attachments[i].url += "&download=1";
//@DCL
item.attachments[i].url = "http://www.cairn.info.bibliotheque-nomade2.univ-lyon2.fr/load_pdf_do_not_index.php?ID_ARTICLE=" + item.id_article;
}
}
// Correct volume and issue information
if (item.volume.search(/^n°/i) != -1) {
item.issue = item.volume.split(/n°/i)[1].trim();
item.volume = '';
} else if (item.volume.search(/^Vol./i) != -1) {
item.volume = item.volume.split(/Vol./i)[1].trim();
}
if (item.volume.search(/^\d+-\d+$/) != -1) {
var volume = item.volume.split('-')
item.volume = volume[0];
item.issue = volume[1];
}
// Other fixes
delete item.libraryCatalog;
item.complete();
});
translator.translate();
}
But I propose a FIX ;-)
The issue:
1/ with Firefox : http://www.cairn.info.bibliotheque-nomade2.univ-lyon2.fr/revue-savoirs-2004-5-page-59.htm
Note: I'm using a proxy: bibliotheque-nomade2.univ-lyon2.fr
2/ enregistrer dans Zotero avec "Cairn.info" (Save in Zotero with "Cairn.info""
3/ Red X with "Full Text Pdf", the ref is saved with a snapshot
So the PDF file is not attached.
If I download the PDF and chose "download with Zotero" there is also an error.
(http://www.cairn.info.bibliotheque-nomade2.univ-lyon2.fr/load_pdf.php?ID_ARTICLE=SAVO_HS01_0059)
If I download the PDF then I drop it in Zotero, it's ok.
After tens times of such workaround I've got tired of it, and I looked forward a recorded bug: nothing.
I've been lucky, one time the import has worked when I've pasted the pdf url in the navigation bar:
http://www.cairn.info.bibliotheque-nomade2.univ-lyon2.fr/load_pdf.php?ID_ARTICLE=SAVO_HS01_0059 and I've saw pdf_do_not_index, that can be found in the page code of Cairn.info.
So I've looked in the source code of the web page of cairn.info and in Cairn.info.js
The link that works with Cairn.info is:
http://www.cairn.info.bibliotheque-nomade2.univ-lyon2.fr/load_pdf_do_not_index.php?ID_ARTICLE=SAVO_HS01_0059
I've edited my Cairn.info.js as below. Pay attention it's only for debug purpose and there is any guarantee of non-regression. And As the proxy is "hard" coded in my fix, that doesn't work for a lot of users.
Hope that helps!
Daniel Conil
{
"translatorID": "f46cc903-c447-47d6-a2cf-c75ed22dc96b",
"translatorType": 4,
"label": "Cairn.infodcl",
"creator": "Sebastian Karcher, Sylvain Machefert and Nicolas Chachereau",
"target": "^https?://www\\.cairn\\.info/",
"minVersion": "3.0",
"maxVersion": null,
"priority": 100,
"inRepository": true,
"browserSupport": "gcsibv",
"lastUpdated": "2016-02-16 02:40:00"
}
/*
Translator
Copyright (C) 2013 Sebastian Karcher
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
function detectWeb(doc,url) {
var xpath='//meta[@name="citation_journal_title"]';
if (ZU.xpath(doc, xpath).length > 0) {
return "journalArticle";
}
if (ZU.xpathText(doc, '//div[contains(@class, "list_articles")]//div[contains(@class, "article") or contains(@class, "articleBookList")]')) {
return "multiple";
}
return false;
}
function doWeb(doc,url) {
if (detectWeb(doc, url) == "multiple") {
var hits = {};
var urls = [];
var title;
var link;
var resultsrow = ZU.xpath(doc, '//div[contains(@class, "list_articles")]/div[contains(@class, "article")]');
for (var i=0; i<resultsrow.length; i++) {
title = ZU.xpathText(resultsrow[i], './/div[@class="meta"]//div[@class="title"]');
if (!title) {
title = ZU.xpathText(resultsrow[i], './/div[@class="wrapper_title"]/h2/text()');
}
link = ZU.xpathText(resultsrow[i], './/div[@class="state"]/a[1]/@href');
//Z.debug(title + ": " + link)
hits[link] = title.replace(/^[\s\,]+/, "").trim();
}
Z.selectItems(hits, function(items) {
if (items == null) return true;
for (var j in items) {
urls.push(j);
}
ZU.processDocuments(urls, scrape);
});
} else {
scrape(doc);
}
}
function scrape(doc) {
// We call the Embedded Metadata translator to do the actual work
var translator = Zotero.loadTranslator("web");
translator.setTranslator("951c027d-74ac-47d4-a107-9c3069ab7b48");
translator.setDocument(doc);
translator.setHandler("itemDone", function(obj, item) {
// Cairn.info uses non-standard keywords:
// we import them here, as the Embedded Metadata translator
// cannot catch them.
item.tags = [];
var keywords = ZU.xpathText(doc, '//meta[@name="article-mot_cle"]/@content');
if (keywords) {
keywords = keywords.split(/\s*[,;]\s*/);
for (var i=0; i<keywords.length; i++) {
if (keywords[i].trim()) {
item.tags.push(keywords[i])
}
}
}
//@DCL
// get the ID_ARTICLE in the metadata info
item.id_article = ZU.xpathText(doc, '//meta[@name="WT.pn_sku"]/@content');
// The default value for PDF download is on an HTML page that
// calls the actual download. We need to correct the attachment
// URL after the import translator has run.
for (var i=0; i<item.attachments.length; i++) {
if (item.attachments[i].mimeType == 'application/pdf') {
//item.attachments[i].url += "&download=1";
//@DCL
item.attachments[i].url = "http://www.cairn.info.bibliotheque-nomade2.univ-lyon2.fr/load_pdf_do_not_index.php?ID_ARTICLE=" + item.id_article;
}
}
// Correct volume and issue information
if (item.volume.search(/^n°/i) != -1) {
item.issue = item.volume.split(/n°/i)[1].trim();
item.volume = '';
} else if (item.volume.search(/^Vol./i) != -1) {
item.volume = item.volume.split(/Vol./i)[1].trim();
}
if (item.volume.search(/^\d+-\d+$/) != -1) {
var volume = item.volume.split('-')
item.volume = volume[0];
item.issue = volume[1];
}
// Other fixes
delete item.libraryCatalog;
item.complete();
});
translator.translate();
}