#!/usr/bin/env perl =pod =head1 NAME bibdoiadd.pl - add DOI numbers to papers in a given bib file =head1 SYNOPSIS bibdoiadd [B<-c> I] [B<-f>] [B<-o> I] I =head1 OPTIONS =over 4 =item B<-c> I Configuration file. If this file is absent, some defaults are used. See below for its format. =item B<-f> Force checking doi number even if one is present =item B<-o> I Output file. If this option is not used, the name for the output file is formed by adding C<_doi> to the input file =back =head1 DESCRIPTION The script reads a BibTeX file. It checks whether the entries have DOIs. If now, tries to contact http://www.crossref.org to get the corresponding DOI. The result is a BibTeX file with the fields C added. The name of the output file is either set by the B<-o> option or is derived by adding the suffix C<_doi> to the output file. There are two options for making queries with Crossref: free account and paid membership. In the first case you still must register with Crossref and are limited to a small number of queries, see the agreement at C. In the second case you have a username and password, and can use them for automatic queries. I am not sure whether the use of this script is allowed for the free account holders. Anyway if you try to add DOI to a large number of entries, you should register as a paid member. =head1 CONFIGURATION FILE The configuration file is mostly self-explanatory: it has comments (starting with C<#>) and assginments in the form $field = value ; The important parameters are C<$mode> (C<'free'> or C<'paid'>, C<$email> (for free users) and C<$username> & C<$password> for paid members. =head1 EXAMPLES bibdoiadd -c bibdoiadd.cfg citations.bib > result.bib bibdoiadd -c bibdoiadd.cfg citations.bib -o result.bib =head1 AUTHOR Boris Veytsman =head1 COPYRIGHT AND LICENSE Copyright (C) 2014-2016 Boris Veytsman This is free software. You may redistribute copies of it under the terms of the GNU General Public License L. There is NO WARRANTY, to the extent permitted by law. =cut use strict; BEGIN { # find files relative to our installed location within TeX Live chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root if (length($TLMaster)) { unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs"; } } use IO::File; use BibTeX::Parser; use LaTeX::ToUnicode qw (convert); use Getopt::Std; use URI::Escape; use LWP::Simple; my $USAGE="USAGE: $0 [-c config] [-f] [-o output] file\n"; my $VERSION = <new($inputfile) or die "Cannot find BibTeX file $inputfile\n$USAGE\n"; my $output = IO::File->new("> $outputfile") or die "Cannot write to $outputfile\n$USAGE\n"; my $parser=new BibTeX::Parser($input); my $prefix = "http://www.crossref.org/openurl?redirect=false"; if ($mode eq 'free') { $prefix .= '&pid='.uri_escape($email); } else { $prefix .= '&pid='.uri_escape($username).":". uri_escape($password); } # Processing the input while (my $entry = $parser->next) { if (!$entry->parse_ok()) { print STDERR "Cannot understand entry: "; $entry->print(*STDERR); print STDERR "Skipping this entry\n"; next; } if (!($entry->type() eq 'ARTICLE')) { print $output $entry->raw_bibtex(), "\n\n"; next; } if ($entry->has('doi') && !$forceSearch) { print $output $entry->raw_bibtex(), "\n\n"; next; } my $doi = GetDoi($prefix, $entry); if (length($doi)) { $entry->field('doi',$doi); } print $output $entry->to_string(), "\n\n"; } $input->close(); $output->close(); exit 0; ############################################################### # Getting one doi ############################################################### sub GetDoi { my ($url,$entry) = @_; if ($entry->has('issn')) { $url .= "&issn=".uri_escape_utf8(SanitizeText($entry->field('issn'))); } if ($entry->has('journal')) { $url .= "&title=".uri_escape_utf8(SanitizeText($entry->field('journal'))); } my @names=$entry->author(); if (scalar(@names)) { my $lastname = SanitizeText($names[0]->last()); $url .= "&aulast=".uri_escape_utf8($lastname); } if ($entry->has('volume')) { $url .= "&volume=".uri_escape_utf8($entry->field('volume')); } if ($entry->has('number')) { $url .= "&issue=".uri_escape_utf8($entry->field('number')); } if ($entry->has('pages')) { my $pages=$entry->field('pages'); $pages =~ s/-.*$//; $url .= "&spage=".uri_escape_utf8($pages); } if ($entry->has('year')) { $url .= "&date=".uri_escape_utf8($entry->field('year')); } my $result=get($url); if ($result =~ m/]*>(.*)<\/doi>/) { return $1; } else { return ""; } } ############################################################### # Sanitization of a text string ############################################################### sub SanitizeText { my $string = shift; $string = convert($string); $string =~ s/\\newblock//g; $string =~ s/\\bgroup//g; $string =~ s/\\egroup//g; $string =~ s/\\scshape//g; $string =~ s/\\urlprefix//g; $string =~ s/\\emph//g; $string =~ s/\\textbf//g; $string =~ s/\\enquote//g; $string =~ s/\\url/URL: /g; $string =~ s/\\doi/DOI: /g; $string =~ s/\\\\/ /g; $string =~ s/\$//g; $string =~ s/\\checkcomma/,/g; $string =~ s/~/ /g; $string =~ s/[\{\}]//g; return $string; }