linking citation to bibliography

edited December 10, 2020
Following a previous macro to do this in libreoffice written in basic, I rewrote the program in python and improved it. I would appreciate any improvements, especially to the regex searches.


# coding: utf-8
import uno
import unohelper
import sys
import re

document = XSCRIPTCONTEXT.getDocument()

def modifyStyle(cStyleName, cStyleFamily, oFont, oSize, oColor):
oStyleFamily = document.getStyleFamilies().getByName( cStyleFamily )
oStyle = oStyleFamily.getByName( cStyleName )
oStyle.setPropertyValue("CharFontName", oFont)
oStyle.setPropertyValue("CharColor", oColor)
oStyle.setPropertyValue("CharHeight", oSize)
oStyle.setPropertyValue("CharUnderline", 0)
oStyle.setPropertyValue("CharNoHyphenation", True)

def deleteRefBookmarks():
bookmarks = document.getBookmarks()
bmList = []
for bookmark in bookmarks:
if "Ref_" in bookmark.Name:
bmList.append(bookmark.Name)
for bm in bmList:
bookmarks.getByName(bm).dispose()

def findFirstWord (oString):
(firstWord, *_) = oString.split(maxsplit=1)
return firstWord

def literatureDate(oString):
year = re.search(r'((19|2[0-9])\d{2}[a-z]?)', oString).group(1)
# year numbers from 1900 to 2999 and optional a-z
return year


def insertZoteroBookmarks(bNumbered):
deleteRefBookmarks()
vSections = document.getTextSections()
sEventNames = vSections.getElementNames()
for sEventName in sEventNames:
if "ZOTERO" in sEventName:
oSection = document.getTextSections().getByName(sEventName)
oSectionTextRange = oSection.getAnchor()
oPE = oSectionTextRange.createEnumeration()
iCount = 0
while oPE.hasMoreElements():
oPar = oPE.nextElement()
if oPar.supportsService("com.sun.star.text.Paragraph"):
iCount = iCount +1
bm = document.createInstance("com.sun.star.text.Bookmark")
oCurs = oPar.getText().createTextCursorByRange(oPar)
if bNumbered:
bm.Name = "Ref_" + str(iCount)
else:
bm.Name = "Ref_" + findFirstWord (oCurs.getString()) + "_" + literatureDate(oCurs.getString())
if not document.getBookmarks().hasByName(bm.Name):
document.Text.insertTextContent(oCurs, bm, True)

def modStyles():
modifyStyle("Internet Link", "CharacterStyles", "Times New Roman", 12, 0x0000FF)
modifyStyle("Visited Internet Link", "CharacterStyles", "Times New Roman", 12, 0x0000FF)

def linkNumberedReferences():
modStyles()
insertZoteroBookmarks(True)
refSearch("([1-9][0-9][0-9]|[1-9][0-9]|[0-9])", True)

def linkAuthorDateReferences():
modStyles()
insertZoteroBookmarks(False)
refSearch("\p{Lu}[\p{Lu}\p{Ll}'-]*( et al.)(,?) [0-9]{4}([a-z]?)", False)
refSearch("(\p{Lu}[\p{Lu}\p{Ll}'-]* and )?(\p{Lu}[\p{Lu}\p{Ll}'-]*)(,?) [0-9]{4}([a-z]?)", False)
refSearch("(\p{Lu}[\p{Lu}\p{Ll}'-]*, \p{Lu}[\p{Lu}\p{Ll}'-]*, and \p{Lu}[\p{Lu}\p{Ll}'-]*)(,?) [0-9]{4}([a-z]?)", False)

def refSearch(sSearchString, bNumbered):
oSearch = document.createSearchDescriptor()
oSearch.SearchRegularExpression = True
oSearch.SearchString = sSearchString
oFound = document.findFirst(oSearch)
while oFound:
oCursor = oFound.Text.createTextCursorByRange(oFound)
if oCursor.Start.ReferenceMark:
if bNumbered:
bm = "#Ref_" + oCursor.String
else:
bm = "#Ref_" + findFirstWord (oCursor.getString()) + "_" + literatureDate(oCursor.getString())
oCursor.HyperLinkURL = bm
oFound = document.findNext(oFound, oSearch)


g_exportedScripts = linkNumberedReferences, linkAuthorDateReferences,


  • Sorry, but pasting the script removed all of the necessary python white spaces. Please insert these yourself (you may need to look up how to format python).

    I also have a similar version in vba for word.
  • You can wrap the above in html <code></code> tags for proper formatting
  • edited December 10, 2020
    Cheers. looks much better!
    By the way, the code works for numeric references and author-date with disambiguation using year-letter (eg. Smith 2010a, Smith 2010b).
  • Here is the word version. Again works for numeric citations and author date with year-letter disambiguation.


    Function firstWord(oStr)
    firstWord = Split(oStr, " ")(0)
    End Function

    Function getYear(oStr, oExtra)
    Dim searchString As String
    searchString = "\b[0-9]{4,}[a-z]?" & oExtra
    Set re = New RegExp
    re.Global = True
    re.IgnoreCase = False
    re.Pattern = searchString
    getYear = re.Execute(oStr)(0)
    End Function

    Sub insertZoteroLiteratureBookmarks(bmNumbered)
    Dim oRange As Range
    Dim oPara As Paragraph
    Dim oRangePara As Range
    Dim bmtext As String
    For i = 1 To ActiveDocument.Fields.Count
    If InStr(ActiveDocument.Fields(i).Code, "ADDIN ZOTERO_BIBL") Then
    Set oRange = ActiveDocument.Fields(i).Result
    Exit For
    End If
    Next
    For Each oBookMark In oRange.Bookmarks
    oBookMark.Delete
    Next
    iCount = 0
    For Each oPara In oRange.Paragraphs
    Set oRangePara = oPara.Range
    Set bmRange = oRangePara
    iCount = iCount + 1
    If bmNumbered Then
    bmtext = "Ref_" + CStr(iCount)
    Else
    oYear = getYear(oRangePara.Text, "[;:.,\)]")
    bmtext = "Ref_" + oRangePara.Words(1) + "_" + oYear
    bmtext = multipleReplace(bmtext, ":;.,) " & Chr(39) & ChrW(8217) & Chr(13))
    End If
    bmRange.MoveEnd unit:=wdCharacter, Count:=-1
    ActiveDocument.Bookmarks.Add Name:=bmtext, Range:=bmRange
    bmRange.Collapse wdCollapseEnd
    Next
    End Sub
    Sub insertCitationToZoteroLiteratue(bmNumbered)
    Dim i As Long
    Dim oRng As Range
    startingFieldCount = ActiveDocument.Fields.Count
    For i = startingFieldCount To 1 Step -1
    If InStr(ActiveDocument.Fields(i).Code, "ADDIN ZOTERO_ITEM") Then
    Set oRange = ActiveDocument.Fields(i).Result
    With oRange.Find
    .MatchWildcards = True
    If bmNumbered Then
    oRange.Collapse wdCollapseStart
    Do While .Execute("[0-9]{1,}") And oRange.InRange(ActiveDocument.Fields(i).Result)
    bmtext = "Ref_" & oRange.Text
    Set oRng = oRange
    ActiveDocument.Hyperlinks.Add Anchor:=oRng, Address:="", _
    SubAddress:=bmtext, ScreenTip:="", TextToDisplay:=""
    oRange.Collapse wdCollapseEnd
    Loop
    Else
    Do While .Execute("<*([0-9]{4}[a-z,,.;/)])") And oRange.InRange(ActiveDocument.Fields(i).Result)
    bmtext = "Ref_" & firstWord(oRange.Text) & "_" & getYear(oRange.Text, "")
    oRange.MoveEnd unit:=wdCharacter, Count:=-1
    ActiveDocument.Hyperlinks.Add Anchor:=oRange, Address:="", _
    SubAddress:=bmtext, ScreenTip:="", TextToDisplay:=""
    oRange.Collapse wdCollapseEnd
    Loop
    End If
    End With
    End If
    Next i
    End Sub

    Sub ZoteroLinkNumberedCitations()
    insertZoteroLiteratureBookmarks (True)
    insertCitationToZoteroLiteratue (True)
    End Sub

    Sub ZoteroLinkAuthorDateCitations()
    insertZoteroLiteratureBookmarks (False)
    insertCitationToZoteroLiteratue (False)
    End Sub

  • Some numeric styles such as Nature have non-clickable links within word but when converted to pdf the links work fine. Any ideas?
  • I have updated the libreoffice python macro for author-date cites to cope with some borderline cases. It is not perfect, but works for most citations.

    import uno
    import re
    from ast import literal_eval as to_dict
    document = XSCRIPTCONTEXT.getDocument()

    semicolon = re.compile(r"\d\w?;")
    comma = re.compile(r"\d\w?,")
    colon = re.compile(r"\d\w?:")

    def external_reference_links():
    search_descriptor = document.createSearchDescriptor()

    text_cursor = document.getText().createTextCursor()

    reference_marks = document.ReferenceMarks

    for reference_mark in reference_marks:
    reference_mark_content = reference_mark.Name
    reference_mark_content, _ = reference_mark_content.rsplit(" ", 1)
    _,_, reference_mark_data = reference_mark_content.split(" ", 2)

    data_dictionary = to_dict(reference_mark_data)

    plain_cite = data_dictionary["properties"]["plainCitation"]
    plain_cite = plain_cite.replace("(", "").replace(")", "")

    if semicolon.search(plain_cite):
    names = list(plain_cite.split(";"))
    elif comma.search(plain_cite):
    names = list(plain_cite.split(","))
    elif colon.search(plain_cite):
    names = list(plain_cite.split(":"))
    else:
    names = [plain_cite]

    citation_items_list = data_dictionary["citationItems"]

    for citation_items in citation_items_list:

    if "itemData" in citation_items.keys():
    item_data = citation_items["itemData"]

    author = item_data["author"][0]["family"]
    date_year = item_data["issued"]["date-parts"][0][0]

    citation = list(filter(lambda x: author in x,names))

    citation_year_ok= [c for c in citation if (date_year in c)]
    text_cursor = reference_mark.getAnchor()
    text_cursor_start = text_cursor.getStart()

    found = False
    for cite in citation_year_ok:
    search_descriptor.SearchString = cite
    found_range = document.findNext(text_cursor_start, search_descriptor)
    if found_range.HyperLinkURL == "":
    found = True
    break
    else:
    text_cursor_start = found_range.getEnd()

    if found == False:
    break

    text_cursor_by_range = found_range.Text.createTextCursorByRange(found_range)

    if "DOI" in item_data.keys():
    DOI = item_data["DOI"]
    else:
    DOI = ""
    if "source" in item_data.keys():
    source = item_data["source"]
    else:
    source = ""
    if "note" in item_data.keys():
    note = item_data["note"]
    else:
    note = ""
    if DOI:
    text_cursor_by_range.HyperLinkURL = "https://doi.org/"; + DOI
    elif source == "PubMed":
    _, n = note.split(": ")
    text_cursor_by_range.HyperLinkURL = "https://pubmed.ncbi.nlm.nih.gov/"; + n
    else:
    pass
    else:
    continue
  • your vba code ruturns an error at multipleReplace
    There is no such function in vba and you haven't defined one
  • Unfortunately, I no longer use word. However, I think the line was supposed to remove any extraneous symbols. Please comment it out and see if it works.
  • I do not like giving negative answers, so I dug around in my old files (loaded the word macros in Libreoffice!) and found this function:

    Function multipleReplace(sBmtext, sList)
    Dim iCounter As Integer
    For iCounter = 1 To Len(sList)
    sBmtext = Replace(sBmtext, Mid(sList, iCounter, 1), "")
    Next
    multipleReplace = sBmtext
    End Function

    Hope this solves it.
Sign In or Register to comment.