Aggressive highlighting in snapshot
I'm getting some erratic behaviour with highlighting of page snapshots from Lexis. The pages have a few characteristics that might be contributing to the problem. They are in some sort of CSS viewport, with a fixed "navigation bar" at the top and another at the bottom. There are also linked endnotes in the pages. When I highlight text to the end of a paragraph, I get a highlight to the end of the document, which has to be removed manually.
If you are able to take a look, I can bundle up the snapshot files and send them along.
If you are able to take a look, I can bundle up the snapshot files and send them along.
The downloaded page is simpler, but the problem persists. Highlighting sometimes triggers a blanket sea of yellow from the cursor position to the bottom of the document. Removing the unwanted highlighting destroys any others that were overlaid by it.
Not sure of the cause, and can't find a clear pattern in the failures, other than that it sometimes seems to be caused by crossing paragraph boundaries.
I'm thinking that the most likely cause may be the extensive Javascript embedded in the pages (this downloads even for the printable version). If we could strip this during the download, that would be great. Otherwise, is there a chance someone in the team could take a look at their function names and let me know if anything raises a red flag?
<html>
<head><title>Document to break highlighting</title></head>
<body>
<div>
One small paragraph.<br/>
Another small paragraph.
</div>
</body>
</html>
If the cursor is placed in the first paragraph, the mouse is clicked, held and moved into the space separating it from the next, releasing the cursor will create a highlight to the end of the DIV. In the documents that will see the heaviest use of highlighting among our users, the BR-formatted text within the DIV is typically between fifty and a hundred printed pages in length, so this is an issue for us.
The faulty code is at line 204 to 252 of annotate.js, in r4003 of the trunk. It recognizes that it's in a non-text node, but it grabs the last child of the surrounding environment (the DIV), which is clearly wrong. I'm now fishing around with alerts to find a way to identify the BR tag, and hope to have a fix done soon.
diff -u -r zotero-trunk.orig/chrome/content/zotero/xpcom/annotate.js zotero-trunk/chrome/content/zotero/xpcom/annotate.js
Frank B--- zotero-trunk.orig/chrome/content/zotero/xpcom/annotate.js 2009-01-24 15:44:37.000000000 +0900
+++ zotero-trunk/chrome/content/zotero/xpcom/annotate.js 2009-01-25 15:24:56.000000000 +0900
@@ -212,8 +212,8 @@
* @return {Array} The node and offset
*/
function _getTextNode(container, offset, isStart) {
- var childAttribute = isStart ? "firstChild" : "lastChild";
- var siblingAttribute = isStart ? "nextSibling" : "lastSibling";
+ var firstPriorityTarget = isStart ? "firstChild" : "previousSibling";
+ var secondPriorityTarget = isStart ? "nextSibling" : "parentNode";
container = Zotero.Annotate.dereferenceNodeOffset(container, offset);
if(container.nodeType == TEXT_TYPE) return [container, 0];
@@ -224,10 +224,10 @@
container = node;
break;
} else {
- if(node[childAttribute]) {
- node = node[childAttribute];
- } else if(node[siblingAttribute]) {
- node = node[siblingAttribute];
+ if(node[firstPriorityTarget]) {
+ node = node[firstPriorityTarget];
+ } else if(node[secondPriorityTarget]) {
+ node = node[secondPriorityTarget];
} else {
node = node.parentNode;
if(node.isSameNode(container)) {
diff -u -r zotero-trunk.orig/chrome/content/zotero/xpcom/annotate.js zotero-trunk/chrome/content/zotero/xpcom/annotate.js
--- zotero-trunk.orig/chrome/content/zotero/xpcom/annotate.js 2009-01-25 23:06:39.000000000 +0900
+++ zotero-trunk/chrome/content/zotero/xpcom/annotate.js 2009-01-25 23:15:56.000000000 +0900
@@ -212,32 +212,51 @@
* @return {Array} The node and offset
*/
function _getTextNode(container, offset, isStart) {
- var childAttribute = isStart ? "firstChild" : "lastChild";
- var siblingAttribute = isStart ? "nextSibling" : "lastSibling";
+ var firstTarget = isStart ? "firstChild" : "lastChild";
+ var secondTarget = isStart ? "nextSibling" : "previousSibling";
container = Zotero.Annotate.dereferenceNodeOffset(container, offset);
if(container.nodeType == TEXT_TYPE) return [container, 0];
-
+
+ var seenArray = new Array();
var node = container;
while(node) {
- if(node.nodeType == TEXT_TYPE) {
+ if ( !node ) {
+ // uh-oh
+ break;
+ }
+ if(node.nodeType == TEXT_TYPE ) {
container = node;
break;
+ }
+ if( node[firstTarget] && ! _seen(node[firstTarget],seenArray)) {
+ node = node[firstTarget];
+ } else if( node[secondTarget] && ! _seen(node[secondTarget],seenArray)) {
+ node = node[secondTarget];
} else {
- if(node[childAttribute]) {
- node = node[childAttribute];
- } else if(node[siblingAttribute]) {
- node = node[siblingAttribute];
- } else {
- node = node.parentNode;
- if(node.isSameNode(container)) {
- break;
- }
- }
+ node = node.parentNode;
}
}
return [container, (!isStart && container.nodeType == TEXT_TYPE ? container.nodeValue.length : 0)];
}
+
+ /**
+ * look for a node object in an array. return true if the node
+ * is found in the array. otherwise push the node onto the array
+ * and return false. used by _getTextNode.
+ */
+ function _seen(node,array) {
+ var seen = false;
+ for (n in array) {
+ if (node.isSameNode(array[n])) {
+ seen = true;
+ }
+ }
+ if ( !seen ) {
+ array.push(node);
+ }
+ return seen;
+ }
}
/**
@@ -1449,7 +1468,7 @@
}
var span = this._highlightTextNode(highlightNode);
} else {
- var span = this._highlightSpaceBetween(this.range.startContainer, this.range.startContainer);
+ var span = this._highlightSpaceBetween(this.range.startContainer, this.range.endContainer);
}
this.range.setStart(span.firstChild, 0);
Frank B
Note to folks getting into annotating and highlighting snapshots: The current annotation/highlighting system will be overhauled in a future version of Zotero. Existing annotations and highlights do not, and will not, sync in Zotero 1.5.
The warning from Tjowens above still holds, however.