-
Notifications
You must be signed in to change notification settings - Fork 0
/
authorindex
executable file
·510 lines (448 loc) · 17.3 KB
/
authorindex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
#!/usr/bin/perl -w
# This file is part of the authorindex package for LaTeX + BibTeX.
use strict;
# configuration: apart from the command to invoke perl above you might want to
# change:
my $bstenv="BSTINPUTS"; # Environment Variable holding .bst search path
my $bibenv="BIBINPUTS"; # Env Variable holding search path for BibTeX databases
my $tmp="_autidx_"; # Name base for temporary files
# This script takes LaTeX .aux files as input. It extracts all citations made
# with page number information. These, together with the bibliography data base
# extracted from the input and a .bst file are processed by bibtex to get a
# file that associates each citation label with corresponding authors. Together
# with the knowledge of which work is cited on which page, this is used to
# compute which author is cited on which page. This information is written to
# the output in form suitable to be included in a LaTeX document.
# Alternatively, the script can also extract the label that appears in the
# references for each work instead of the pages. Also, output for further
# processing by makeindex can be generated instead of a 'ready' LaTeX file.
# examine command line for options
my %opt; # hash to contain options found
use Getopt::Std;
getopts('dhikpr',\%opt);
# handle -h option: a short description of the script.
exists $opt{'h'} && die <<END;
Usage: $0 [-d] [-h] [-i] [-k] [-p] [-r] [filename ...]
-d generate additional statistical information as comments
-i generate file (with extension .ain) suitable as input for makeindex
-k keep auxiliary bibliography style file $tmp.bst after script finishes
-h print this help and exit
-r suppresses automatic inclusion of .aux generated by \\include-ed files
-p send result to standard output
Any number of file names can be given. If none is given, input is read from
standard input. Otherwise, all files specified are read, after the extension
.aux has been appended to their name where necessary.
END
# Make sure that all input files get .aux extension.
grep(s/$/\.aux/,grep(!/\.aux$/,@ARGV));
my @SAVEARGV=@ARGV;
my $usenum=0; # default: put page numbers into index
my $PageTypeOrder="rRnAa"; # default order of number types
my $altedit="'skip\$"; # no editors if no authors present
my $addedit="'skip\$"; # no editors if authors present
my $citationcount=0; # used for statistics
my $workscount=0;
my $explicits=0;
my $authorcount=0;
my $miniindex=0; # flag: is 1 if mini-index is requested
my $nocompress=0; # flag: is 1 if page range compression is prohibited
my %bib; # collect .bib-file names as defined keys of hash
my %pnlist; # collect occuring pages/bib.numbers as keys of hash
my $editors=0; # number: 0 no editors; 1 editors if no authors
# 2: editors whenever present
my $nameformat=""; # string to format/sort names
my $maxnames=999; # max. number of author names per entry
my $truncnames=999; # how many authors to take if max number is exceeded
my $labeltype="labels"; # wether bibliography labels or pages are indexed
my %Lab2Pag; # for each label gives array of pages where work cited
my %Lab2Num; # for each label give its bibliography number
my %Lab2Bib; # ditto, but for occurrence in bibliography
my %Aut2Pag; # for each author gives array of pages cited/in bilio.
my %Aut2Bib; # ditto, but only occurrence in bibliography
my %Aut2Lab; # list of BibTeX entry labels an author occurs in
my %LeadAutPag; # author -> hash marking pages with work author is 1st
my %printname; # sort-key -> printed representation of author
my %plainname; # sort-key -> author name
my %PageTypeOrder; # Page type code -> number giving relative order
my %PageOrder; # page string -> array used to sort pages
my $see=""; # string to separate other and first author, or undef.
my $bst=""; # name of BibTeX program to extract author names
my $output=""; # name of file to which author index is written
my $twoabbrev=""; # string to append to page for 2 subsequent pages
# scan input files and
# - build the file later to be processed by BibTeX,
# - generate a temporary bibtex database of the explicit author names given,
# - assemble for each citation the page where it was referenced and
# - look for data base specification, output file name, and so on.
open(AUXFILE,">$tmp.aux") || die "Can't open temporary file $tmp.aux\n";
open(BIBFILE,">$tmp.bib") || die "Can't open temporary file $tmp.bib\n";
while(<>){
if(/^\\citationpage\{([^{]+)\}\{(.+)\}$/){
$citationcount++; # used for statistics only.
$workscount++ unless ($1 eq '*') or (exists $Lab2Pag{$1})
or (exists $Lab2Bib{$1}) or (exists $Lab2Num{$1});
$pnlist{$2}="";
push @{$Lab2Pag{$1}},$2;
print AUXFILE "\\citation{$1}\n";
}elsif(/^\\bibcite\{([^{]+)\}\{(.+)\}$/){
$workscount++ unless ($1 eq '*') or (exists $Lab2Pag{$1})
or (exists $Lab2Bib{$1}) or (exists $Lab2Num{$1});
$pnlist{$2}="";
push @{$Lab2Num{$1}},$2;
print AUXFILE "\\citation{$1}\n";
}elsif(/^\\bibpage\{([^{]+)\}\{(.+)\}$/){
$workscount++ unless (exists $Lab2Pag{$1})
or (exists $Lab2Bib{$1}) or (exists $Lab2Num{$1});
$pnlist{$2}="";
push @{$Lab2Bib{$1}},$2;
print AUXFILE "\\citation{$1}\n";
}elsif(/^\\aiexplicit\{(.+)\}\{(.+)\}$/ and !$usenum){
# above: page number might not contain '}{'
print BIBFILE "\@MISC{$tmp$explicits,author=\"$1\"}\n";
$bib{$tmp}="";
$pnlist{$2}="";
push @{$Lab2Pag{"$tmp$explicits"}},$2;
print AUXFILE "\\citation{$tmp$explicits}\n";
$explicits++;
}elsif(/^\\bibdata\{(.+)\}$/){
@bib{split(",",$1)}="";
}elsif(/^\\aistyle\{(.+)\}$/){
warn "Multiple \\authorindexstyle\n" if $bst && ($bst ne $1);
$bst=$1;
}elsif(/^\\aioptions\{(.*)\}$/){
($editors,$nameformat,$maxnames,$truncnames,$labeltype)=split /\|/,$1;
$usenum|=($labeltype eq "labels");
if($editors>0){
$altedit="{ editor format }";
$addedit=$altedit if $editors==2;
}
}elsif(/^\\aifilename\{(.+)\}$/){
warn "Warning: Multiple authorindices\n" if $output;
$output=$1;
}elsif(/^\\\@input\{(.+)\}$/){
push(@ARGV,$1) unless exists $opt{'r'};
}elsif(/^\\pagetypeorder\{([rRaAn]+)\}$/){
$PageTypeOrder=$1;
}elsif(/^\\aiseestring\{(.+)\}$/){
$see=$1;
}elsif(/^\\aitwostring\{(.+)\}$/){
$twoabbrev=$1;
}elsif(/^\\aiinbibflag$/){
$miniindex=1;
}elsif(/^\\ainocompressflag$/){
$nocompress=1;
}
}
close BIBFILE;
# output can go to stdout or a filename found in the input files.
$output || die "You have to include .aux file produced by .tex file containing \\begin{document}\nin the argument list and you have to \\usepackage{authorindex}!\n";
$output="-" if exists $opt{'p'};
# We need at least one BibTeX database
my $bib=join(",",keys %bib)
|| die "You must specify at least one BibTeX database\n";
print AUXFILE "\\bibdata{$bib}\n";
# if the user hasn't explicitly given a .bst style for formatting author names,
# we generate our own based on the style options found in the input files.
unless($bst){
my @nameformat=split /;/,$nameformat;
my ($printkey,$namefmtcmd)=("cite\$ write\$ newline\$\n","");
for (@nameformat){
my ($namerep,$sortrep)=split /:/;
$sortrep=$namerep unless $sortrep;
$namefmtcmd.=
"duplicate\$ names swap\$ \"$namerep\" format.name\$ " .
"write\$ newline\$\n" .
"duplicate\$ names swap\$ \"$sortrep\" format.name\$ " .
"purify\$ \"u\" change.case\$ write\$ newline\$\n$printkey";
$printkey="newline\$\n";
}
$ENV{$bstenv}=".:" . (exists $ENV{$bstenv} ? $ENV{$bstenv} : "");
$bst=$tmp;
open(BSTFILE,">$bst.bst") || die "Can't open $bst.bst\n";
print BSTFILE <<END; # Now comes the BibTeX programm inlined...
% Temporary file generated by $0
entry{author editor}{}{} strings{names} integers{numnames}
function{format}
{ duplicate\$ empty\$ % field present?
{ pop\$ } % no: do nothing but cleanup
{ duplicate\$ 'names := % memorise namelist in variable "names"
num.names\$ duplicate\$
#$maxnames > % too many names in list?
{ pop\$ #$truncnames } % yes, truncate.
'skip\$ % no, keep them all
if\$
'numnames := % save number of names
#0 % start index
{ duplicate\$ numnames < } % test for "while\$"
{ #1 + % next name
duplicate\$ names swap\$ % get name list and index
"{ll}" format.name\$ % format curr. name
"others" = % et al part?
'skip\$ % yes, do not output
{ $namefmtcmd } % no: format all
if\$
} while\$ pop\$ % loop until index is 0
}
if\$
}
function{default.type}{author format author empty\$ $altedit $addedit if\$ }
function{article}{default.type} function{book}{default.type}
function{booklet}{default.type} function{inbook}{default.type}
function{incollection}{default.type} function{inproceedings}{default.type}
function{conference}{default.type} function{manual}{default.type}
function{mastersthesis}{default.type} function{misc}{default.type}
function{phdthesis}{default.type} function{proceedings}{default.type}
function{techreport}{default.type} function{unpublished}{default.type}
read iterate{call.type\$}
END
# ... and here comes perl again.
close BSTFILE;
}
# Now we have decided on our .bst file and can finish the temporary .aux file
# we prepared for BibTeX.
print AUXFILE "\\bibstyle{$bst}\n";
close AUXFILE;
# if we have written to the temporary database, make sure BibTeX can find it.
$ENV{$bibenv}=".:" . (exists $ENV{$bibenv} ? $ENV{$bibenv} : "") if $explicits;
# We now give BibTeX all the citation labels. In return we get a file whose
# lines in turn contain an author name and a label of a work of that author.
# The format the author names are given are determined by the BibTeX style file
# $bst.bst.
print STDERR `bibtex $tmp`;
die "BibTeX error. Aborting leaving all temporary files $tmp.*\n" if $?;
# if things went well, we can delete all these temporary files made for BibTeX.
# The generated .bst file is kept if the user wishes so (-k option).
unlink "$bst.bst" if ($bst eq $tmp && !(exists $opt{'k'}));
unlink "$tmp.aux","$tmp.bib";
# Decide wether pages or citation labels go to the index
my %Lab2Ent=%Lab2Pag;
if($usenum){
%Lab2Ent=%Lab2Num;
%Lab2Bib=();
}else{
%Lab2Num=();
}
# We have now labels associated with page numbers and labels associated with
# author names (in the file generated by the BibTeX run). Now we can bring
# together the previous two main steps and compute for each author the pages
# where she is cited. In draft mode, we also remember for each author the
# labels of her works and the pages where these works are cited.
my ($firstauthor,$firstsortname,$Lab,$PrevLab)=("","","","");
open(BIBFILE,"$tmp.bbl") || die "Can't open $tmp.bbl\n";
while(<BIBFILE>){
chop; s/[\[\]]//g;
my $author=$_;
my ($sortname,$LabOrEmpty);
chop($sortname=<BIBFILE>);
chop($LabOrEmpty=<BIBFILE>);
if($LabOrEmpty){
$PrevLab=$Lab;
$Lab=$LabOrEmpty;
}
my $printname=$author;
if($Lab ne $PrevLab){
@{$LeadAutPag{$author}}{@{$Lab2Ent{$Lab}}}="" if exists $Lab2Ent{$Lab};
@{$LeadAutPag{$author}}{@{$Lab2Ent{'*'}}}="" if exists $Lab2Ent{'*'};
@{$LeadAutPag{$author}}{@{$Lab2Bib{$Lab}}}="" if exists $Lab2Bib{$Lab};
$firstauthor=$author;
$firstsortname=$sortname;
}elsif($see){
$printname="{$author}$see\\aifirst{$firstauthor}";
$sortname="$sortname$see$firstsortname";
}
@{$Aut2Pag{$printname}}{@{$Lab2Ent{$Lab}}}="" if exists $Lab2Ent{$Lab};
@{$Aut2Pag{$printname}}{@{$Lab2Ent{'*'}}}="" if exists $Lab2Ent{'*'};
@{$Aut2Pag{$printname}}{@{$Lab2Bib{$Lab}}}="" if exists $Lab2Bib{$Lab};
@{$Aut2Bib{$printname}}{@{$Lab2Bib{$Lab}}}="" if exists $Lab2Bib{$Lab};
push @{$Aut2Lab{$printname}},$Lab if (exists $opt{'d'}) and $LabOrEmpty;
$printname{$sortname}=$printname;
$plainname{$sortname}=$author;
}
close BIBFILE;
unlink "$tmp.blg","$tmp.bbl";
# Last not least, output the results, properly sorted if needed.
open(AIFILE,">$output") || die "Can't create author index file $output\n";
# convert page type order into numerical values
my ($i,$page);
$PageTypeOrder{$i}=length($PageTypeOrder) while($i=chop $PageTypeOrder);
# create table that relates page to page order info
for $page (keys %pnlist){
$PageOrder{$page}=&parse_pagenumber($page);
}
if(exists $opt{'i'}){
# generate file for makeindex: leave the work for makeindex.
my $name;
for $name (keys %printname){
my $author=$printname{$name};
my $page;
for $page (keys %{$Aut2Pag{$author}}){
print AIFILE "\\indexentry{$name\@$author}{$page}\n";
}
}
}else{
# sort result, throw away duplicate page numbers and generate LaTeX file.
print AIFILE "\\begin{theauthorindex}\n";
my ($prevfirstchar,$prevplain,$name)=("","","");
for $name (sort keys %printname){
my $thisfirstchar=substr($name,0,1);
if($thisfirstchar ne $prevfirstchar){
print AIFILE "\\indexspace\n" if $prevfirstchar;
$prevfirstchar=$thisfirstchar;
}
my $author=$printname{$name};
my $plain=$plainname{$name};
my $rep=$author;
print AIFILE "% @{$Aut2Lab{$author}}\n" if (exists $opt{'d'});
$rep=($plain eq $prevplain) ? "\\airep$author" : "\\aitop$author"
if $plain ne $author;
print AIFILE "\\item[$rep]";
$Aut2Bib{$author}={} unless exists $Aut2Bib{$author};
$LeadAutPag{$author}={} unless exists $LeadAutPag{$author};
my %b2p=%{$Aut2Bib{$author}};
my %lp=%{$LeadAutPag{$author}};
my %pagerep;
my $page;
for $page (keys %{$Aut2Pag{$author}}){
my $prep=$page;
$prep="\\aifirstpage{$prep}" if exists $lp{$page};
$prep="\\aibibpage{$prep}" if exists $b2p{$page};
$pagerep{$page}=$prep;
}
my $res=&compressed_pages($Aut2Pag{$author},\%pagerep);
print AIFILE " \\aipages{$res}\n";
$prevplain=$plain;
$authorcount++;
}
print AIFILE "\\end{theauthorindex}\n";
if(exists $opt{'d'}){ # in draft mode, include some statistics
print AIFILE "%\n% $citationcount citations ";
print AIFILE "of $workscount distinct works\n";
print AIFILE "% $explicits times \\aimention\n" if $explicits;
print AIFILE "% $authorcount different authors\n";
}
}
close AIFILE;
# merge mini indices into the .bbl-Files if it was requested.
if($miniindex){
map s/aux$/bbl/,@SAVEARGV;
my $file;
for $file (@SAVEARGV){
open(BBLINPUT,$file) || next;
open(BBLHELP,">$tmp.bbl") || die "Can't create temp file $tmp.bbl\n";
my $currlabel="";
while(<BBLINPUT>){
if(/\\bibitem(\[.*\])*\{(.*)\}|\\end\{thebibliography\}/){
if($currlabel){
my $pagelist=&pages_for_label($currlabel);
print BBLHELP "\\bibindex{$pagelist}\n";
}
$currlabel=$2;
print BBLHELP "$_";
}elsif(/\\bibindex\{(.*)\}/){
if($currlabel){
my $pagelist=&pages_for_label($currlabel);
print BBLHELP "$`\\bibindex{$pagelist}$'";
}
$currlabel="";
}else{
print BBLHELP "$_" if "$_" ne "\n";
}
}
close BBLHELP;
close BBLINPUT;
rename "$tmp.bbl","$file" || die "Can't replace old $file\n";
}
}
# auxiliary functions
# convert roman numeral string to integer
sub romanvalue
{
local($_)=shift; tr/IVXLCDM/ivxlcdm/;
my %romandigits = ("i", 1, "v", 5, "x", 10, "l", 50,
"c", 100, "d", 500, "m", 1000);
my ($i,$sum,$prev)=("",0,1);
while($i=chop){
my $this=$romandigits{$i};
$sum=$sum+(($this<$prev) ? -$this : $this);
$prev=$this;
}
return $sum;
}
# convert letter to numeric value
sub alphavalue
{
local($_)=@_; tr/A-Z/a-z/;
return ord($_)-ord("a");
}
# split page number in components and replace each component by a number for
# the page type and the page number as an integer.
sub parse_pagenumber
{
local($_)=@_;
my $res="";
while($_){
s/^[^\\A-Za-z0-9]*//;
if(exists $PageTypeOrder{'n'} && s/(^\d+)//){
$res.="$PageTypeOrder{'n'}".sprintf "%0.6d",$1;
}elsif(exists $PageTypeOrder{'R'} &&
s/^\\uppercase\s*\{([ivxlcdm]+)\}//){
$res.="$PageTypeOrder{'R'}".sprintf "%0.4d",&romanvalue($1);
}elsif(exists $PageTypeOrder{'R'} && s/(^[IVXLCDM]+)//){
$res.="$PageTypeOrder{'R'}".sprintf "%0.4d",&romanvalue($1);
}elsif(exists $PageTypeOrder{'A'} && s/(^[A-Z])//){
$res.="$PageTypeOrder{'A'}".sprintf "%0.2d",&alphavalue($1);
}elsif(exists $PageTypeOrder{'r'} && s/(^[ivxlcdm]+)//){
$res.="$PageTypeOrder{'r'}".sprintf "%0.4d",&romanvalue($1);
}elsif(exists $PageTypeOrder{'a'} && s/(^[a-z])//){
$res.="$PageTypeOrder{'a'}".sprintf "%0.2d",&alphavalue($1);
}else{
s/^.//;
}
}
return $res;
}
# test wether 2 pages are subsequent
sub a_follows_b
{
my ($i,$j)=@PageOrder{@_};
$i++;
return($i eq $j);
}
# make a sorted, maybe compressed, list of pages
sub compressed_pages
{
my ($A,$B)=@_;
my %pages=%{$A};
my %pagerep=%{$B};
my ($prevpage,$pendrep,$res,$pagepending,$page)=("","","","","");
for $page (sort { $PageOrder{$a} cmp $PageOrder{$b} } keys %pages){
# handle compression of page ranges. At the moment, we
# also compress ranges that might be displayed in different faces
my $pagerep=(exists $pagerep{$page}) ? $pagerep{$page} : $page;
if($prevpage){
if(!$nocompress and &a_follows_b($prevpage,$page)){
$pendrep=$pagepending ? "--$pagerep"
: ($twoabbrev ? "$twoabbrev" : ", $pagerep");
$pagepending=1;
}else{
$res.=($pagepending ? "$pendrep" : "").", $pagerep";
$pagepending=0;
}
}else{
$res.="$pagerep";
}
$prevpage=$page;
}
$res.="$pendrep" if $pagepending;
return $res;
}
sub pages_for_label
{
my ($label)=@_;
if(exists $Lab2Pag{$label}){
my (%pages,%empty);
@pages{@{$Lab2Pag{$label}}}="";
return &compressed_pages(\%pages,\%empty);
}
return "";
}