#! /bin/sh
#
# This program reads one of James Kiefer's BIO messages and formats it
# into a web page.
#
# Brian Reid, in odds and ends of leftover time, January 1998
#
# Notes: there's still a conflict between the capitalization corrector
# and the pragmat parsers. They are getting capital-repaired before they
# are parsed. Still to do: add accented members of ISO LATIN 1 to gwords
# generator, so we can use umlauts in headings. Also, the NT/OT capitalizer
# is not working. Look for brain damage.
#
#######################################################################
# "documentation", such as it is.
#
# LineMode[n] is "quote", "verse", "preformatted", "deleted", or undefined.
#
# Algorithm is 2.5 pass. First pass builds array "LText". Then we mess with
# it. Then we write it out as html.
PATH=:/usr/local/bin:/usr/local/Hughes/bin:/usr/X11R6/bin:/usr/sbin:/bin:/usr/bin:
export PATH
awk -v TOPHEAD=3 '
# concatenate the words in "Array" into a single string.
function wordcat(Array) {
N=-1;
for (i in Array) {if (i > N) N=i};
Out="";
for (i=1; i<=N; i++) {
if (i > 1) Out = Out " ";
Out = Out Array[i];
}
return(Out);
}
# count the number of instances of a character in a string
function NumInst(word,chr) {
nk=0;
for (m=1; m<=length(word); m++) {
if (substr(word,m,1)==chr) nk++
}
return(nk);
}
# return True if a word is in all capitals. Returns zero if it has a
# lowercase letter, 1 if it is all numbers and punctuation, and 2 if it
# has actual capital letters in it.
function isAllcap(word) {
if (w[k]!=toupper(w[k])) return(0);
ww=w[k];
for (gx=1; gx<=length(ww); gx++) {
if (uc[substr(ww,gx,1)]) return(2);
}
return(1);
}
# squeeze out all of the punctuation from a word
function losePunc(word) {
tw = ""; for (gg=1; gg<=length(word); gg++) {
chr=substr(word,gg,1);
if (!puncs[chr]) tw=tw chr;
}
return(tw)
}
# return true if "word" is probably a Roman numeral.
function isroman(word) {
for (jw in A) delete A[jw];
LW = length(word);
for (jw=1; jw<=LW; jw++) {
let=substr(word,jw,1);
if (!alpha[let]) continue;
if (!rom[let]) return(0);
A[let]++;
}
return(1);
}
# Capitalize a word in a title
function titleCap(word) {
tw = losePunc(word);
if (isroman(tw)) return(word);
if (KeepCaps[tw]) return(word);
nword = tolower(word)
if (lcwords[nword] != 0) return(nword);
return(initCap(nword));
}
# Change capitalization of a word found in the text in all caps
function wordCap(word) {
tw = losePunc(word);
if (isroman(tw)) return(word);
if (KeepCaps[tw]) return(word);
if (NumInst(word,".") > 1) return(toupper(word));
nword = tolower(word);
if (lcwords[nword] != 0) return(nword);
icword = initCap(nword);
return(icword);
}
# Force a word to have one initial capital
function initCap(word) {
if (KeepCaps[word]) return(word);
if (isroman(word)) return(word);
if (NumInst(word,".") > 1) return(toupper(word));
nword = tolower(word)
ji=1;
let=toupper(substr(word,ji,1));
cword = let;
while(!alpha[let]) {
let=toupper(substr(word,++ji,1));
cword = cword let;
if (ji >= length(word)) break;
}
cword = cword substr(nword,++ji);
return(cword)
}
# like index(), only right-to-left
function rindex(str,chr) {
for (i=length(str); i>0; i--) {
if (substr(str,i,1)==chr) return (i)
}
return (0);
}
# convert the characters in a word to be shell-safe
function convertWord(w) {
nw="";
for (kw=1; kw<=length(w); kw++) {
c=substr(w,kw,1);
if (SUB[c]!="") {nw=nw SUB[c]} else {nw=nw c};
}
return(nw);
}
# function to spot things that should be hotlinked, and hotlink them.
BEGIN {
# capitalization rules
lcwords["of"]=1; lcwords["the"]=1; lcwords["in"]=1; lcwords["a"]=1;
lcwords["an"]=1; lcwords["and"]=1;
MonthAbb["Jan"]="January"; MonthNum["Jan"]=1;
MonthAbb["Feb"]="February"; MonthNum["Feb"]=2;
MonthAbb["Mar"]="March"; MonthNum["Mar"]=3;
MonthAbb["Apr"]="April"; MonthNum["Apr"]=4;
MonthAbb["May"]="May"; MonthNum["May"]=5;
MonthAbb["Jun"]="June"; MonthNum["Jun"]=6;
MonthAbb["Jul"]="July"; MonthNum["Jul"]=7;
MonthAbb["Aug"]="August"; MonthNum["Aug"]=8;
MonthAbb["Sep"]="September"; MonthNum["Sep"]=9;
MonthAbb["Oct"]="October"; MonthNum["Oct"]=10;
MonthAbb["Nov"]="November"; MonthNum["Nov"]=11;
MonthAbb["Dec"]="December"; MonthNum["Dec"]=12;
for (j in MonthAbb) {
MonthNumber[MonthAbb[j]] = MonthNum[j];
MonthCode[MonthAbb[j]] = j;
}
MonthAbb["Sept"]="September"; MonthNum["Sept"]=9;
KeepCaps["ELCA"]=1; KeepCaps["ECUSA"]=1; KeepCaps["ECUSA"]=1;
KeepCaps["NT"]=1; KeepCaps["OT"]=1; KeepCaps["UTO"]=1;
KeepCaps["AD"]=1; KeepCaps["ISBN"]=1;
CapWords["Christian"]=1; CapWords["Christ"]=1; CapWords["Jesus"]=1;
character-class rules
alph="abcdefghijklmnopqrstuvwxyz"
for (i=1; i<=26;i++) {lc[substr(alph,i,1)]=1; alpha[substr(alph,i,1)]=1;}
uca=toupper(alph);
for (i=1; i<=26;i++) {uc[substr(uca,i,1)]=1; alpha[substr(uca,i,1)]=1;}
roman="IVXLCDM";
for (i=1; i<=7;i++) {rom[substr(roman,i,1)]=1;}
ipunc="(;[{";
for (i=1; i<=4;i++) {ipuncs[substr(ipunc,i,1)]=1;}
allpunc="!@#$%^&*()_+`~-={}[]:\";'\''<>,./?;|\\";
for (i=1; i<=length(allpunc);i++) {puncs[substr(allpunc,i,1)]=1;}
# character translation rules
SUB["?"]="@Q"; SUB["-"]="@-";
SUB["!"]="@E"; SUB["'\''"]="@P";
SUB["/"]="@S"; SUB["\""]="@D";
SUB["$"]="@M"; SUB["%"]="@C"; SUB["*"]="@A";
SUB["<"]="@1"; SUB[">"]="@2"; SUB["|"]="@V";
SUB["{"]="@3"; SUB["}"]="@4"; SUB["~"]="@T";
SUB["["]="@5"; SUB["]"]="@6"; SUB["`"]="@F";
SUB["("]="@7"; SUB[")"]="@8"; SUB["#"]="@Z";
SUB[":"]="@K"; SUB[";"]="@L"; SUB[","]="@,";
SUB["\\"]="@B";SUB["@"]="@@"; SUB["&"]="@N";
# ordinary initialization
Nln=0;
InHeader = 1;
BlankLine[0] = 1;
FirstText = 0;
SeenHeading = 0;
Itemizing = 0;
BLANKS=" ";
LinkCount=0;
}
################# beginning of main body of AWK program #####
/^@link/ {
LinkCount++;
NLF=split($0,linkfields,"|");
if (NLF != 3) next;
if (linkfields[3]==ENVIRON["THISFILE"]) next;
LinkCode=sprintf("${%d}",LinkCount);
LinkResult=sprintf("%s", linkfields[3], linkfields[2]);
Lcode[linkfields[2]]=LinkCode;
Link[LinkCode]=LinkResult;
LinkOrder[LinkCount] = linkfields[2];
}
{
if (FILENAME != "-") {
if (NF == 0) next;
# Allow # as comment character except in bios themselves
if (substr($0,1,1)=="#") next;
}
if (NF == 0) {
# Blank line
InHeader = 0;
if (BlankLine[Nln]) next;
BlankLine[++Nln]=1;
LText[Nln]="";
} else {
# Nonblank line.
if (($0 == " > ") || ($0 == " >") ||
($0 == " >") || ($0 == " > ")) {
BlankLine[++Nln]=1;
LText[Nln]="";
} else {
Itemized[Nln]=Itemizing;
if (InHeader) next;
LText[++Nln] = $0;
if (substr($0,1,3)==" > ") LText[Nln] = " " substr($0,4);
if (substr($0,1,4)==" > ") LText[Nln] = " " substr($0,5);
if (substr($0,1,5)==" > ") LText[Nln] = " " substr($0,6);
# Keep track of the running left margin of the text
for (i=1; i<=length(LText[Nln]); i++) {
if (substr(LText[Nln],i,1) != " ") break;
}
LeftMargin[Nln] = i;
if (i > 0) LText[Nln]=substr(LText[Nln],i);
if ((i <= 3) && (SeenHeading!= 0) && (FirstText == 0)) {
FirstText = Nln;
}
}
}
}
##### begin processing of lines beginning with =
# Lines beginning with an = are major title
# Processing of major title: break up into words, repair capitalization,
# reassemble into string, remove date from end of string, re-break into words,
# make calls to gif-word generator using those words.
/^=/ { ln=substr(LText[Nln],2)
SeenHeading = 1;
# break it up into words
nf=split(ln,f,FS);
Title[0]="";
for (j in Title) {delete Title[j]};
Title[1]=initCap(f[1]);
for (i=2; i<=nf; i++) {
# fix capitalization
Title[i]=titleCap(f[i]);
}
# reassemble into a string
TitleString=wordcat(Title);
# the last parenthesized item in the title is its date
d1=rindex(TitleString, "(");
if (d1) {
d2=rindex(TitleString,")");
if (d2 == 0) {d2 = length(TitleString);}
PT = substr(TitleString,1,d1-1);
PDate = substr(TitleString,d1+1,d2-d1-1);
TitleString = PT;
# remove the date from the end of the title string
while (substr(TitleString,length(TitleString),1)==" ") {
TitleString=substr(TitleString,1,length(TitleString)-1);
}
# split the date into day, month, and year. Use "NT" if year is missing.
if (Day == "") {
NF=split(PDate,DC," ");
Day=DC[1];
Month=initCap(DC[2]);
if (MonthAbb[Month] != "") Month = MonthAbb[Month];
if (NF > 2) { Year=DC[3];} else {Year="NT"};
if (Year=="Nt") Year="NT";
if (Year=="Ot") Year="OT";
if (Day != "") NeedImage[convertWord(Day)]=Day;
if (Month != "") NeedImage[convertWord(Month)]=Month;
if (Year != "") NeedImage[convertWord(Year)]=Year;
}
}
if (PageTitle == "") PageTitle = TitleString;
# Redivide the dateless title back into words
NTW=split(TitleString,Words,FS);
# Find the sort key for the title
if (SortKey == "") {
SortKey = Words[1];
for (g=1; g<=NTW; g++) {
if (index(Words[g],",")) {
SortKey=substr(Words[g],1,length(Words[g])-1);
break;
}
}
}
BioTitles[TitleString] = SortKey;
SortKey = "";
# Make a title display out of generated gif images of each word
LText[Nln]="\n";
for (k=1; k<=NTW; k++) {
TitleWord=convertWord(Title[k]);
NeedImage[TitleWord]=Title[k];
Wtemp=sprintf("
\n",Title[k],TitleWord);
LText[Nln]=LText[Nln] Wtemp;
}
LText[Nln]=LText[Nln] "
\n";
# Leave a record that we have been here.
LineProc[Nln] = 1;
Break[Nln] = 1;
MarkupBefore[Nln] = MarkupBefore[Nln] "
";
}
##### end processing of lines beginning with =
/^PRAYER \(/ {
LText[Nln]=sprintf("
"; } } else { if (inQuote) { EnvEnd[i-1] = "blockquote"; Break[i] = 1; BlockEnd[blk] = i-1; } inQuote = 0 CRbreak = 0; } } if (inQuote) { EnvEnd[Nln] = "blockquote"; BlockEnd[blk] = Nln-1; } # # Sweep 1a: look over the indented blocks to see if any of them is # preformatted. # for (i=1; i<=blk; i++) { if (BlockIdentified[i]==0) { i1 = BlockStart[i]; i2 = BlockEnd[i]; # Look at each line in the indented block to see if it contains any # manual formatting, spacebar stuff. for (j=i1; j<=i2; j++) { line=LText[j]; for (k=LeftMargin[j];k<=length(line); k++) { if (substr(line,k,1)==" ") { nsp++; if (nsp > 2 || (((nsp > 1) && (LeftMargin[j]>0))) ) { BlockIdentified[i] = 2; EnvBegin[i1]="pre"; EnvEnd[i2]="pre"; break; } } else {nsp=0}; } if (BlockIdentified[i] != 0) break; } # If this is a
block, set the "line has been processed" flag on every
# line in the block, to make sure that we dont mess with it further.
if (BlockIdentified[i] == 2) {
for (j=i1; j<=i2; j++) {
LineProc[j]=1;
LineMode[j]="preformatted";
}
}
}
}
#
# Sweep 1b: look over the indented blocks to see if any of them is verse. If
# the block is verse, were going to need to keep track of the left margin
# so we can put in non-breaking spaces when the line is emitted.
#
for (i=1; i<=blk; i++) {
if (BlockIdentified[i]) continue;
Leftmost[i] = 999;
IsVerse = 0;
ShortLines = 0;
VeryShortLines = 0;
Jaglines = 0;
if (BlockIdentified[i]==0) {
i1 = BlockStart[i];
i2 = BlockEnd[i];
nlines = i2-i1+1;
# Look at each line in the indented block to see if it has the margins that
# make it look like poetry. And find the mimimum left margin.
for (j=i1; j<=i2; j++) {
if (LeftMargin[j] != "" && LeftMargin[j] < Leftmost[i]) {
Leftmost[i] = LeftMargin[j];
}
line=LText[j];
llen=length(line);
if ((llen > 0) && (llen < 50)) ShortLines++;
if ((llen > 0) && (llen < 30)) VeryShortLines++;
if ((llen != 0) && (length(LText[j-1]) != 0)) {
if ((j > i1) && (LeftMargin[j] != LeftMargin[j-1]))
Jaglines++;
}
}
if ((ShortLines > (nlines/4)) || (Jaglines > nlines/5)) {
IsVerse = 1;
}
if (VeryShortLines > 1) IsVerse = 1;
}
if (IsVerse == 1) {
BlockIdentified[i] = 3;
for (j=i1; j<=i2; j++) {
LineMode[j]="verse";
MinMargin[j]=Leftmost[i];
}
}
}
# for (i=1; i<=Nln; i++) print i,LeftMargin[i],LineMode[i],substr(LText[i],1,25)
#
# Sweep 2: look for lines that need transformations applied
#
for (i=1; i<=Nln; i++) {
if (LineProc[i]) continue;
# Look for horizontal rules
Naster = 0;
for (j=1; j<=length(LText[i]); j++) {
chr = substr(LText[i],j,1);
if ((chr == "=") || (chr == "*") ) {Naster++;}
}
if (Naster > 2 && (100*Naster/length(LText[i])) > 50) {
# Its a rule
if (!BlankLine[i-1] || Break[i-1]) {
MarkupBefore[i] = MarkupBefore[i] "
";
}
LText[i] = "
";
Break[i]=1;
continue; # skip the rest of Sweep 2
}
# Deal with capitalization. Find each word in the line and call the
# "scrutinize capitalization" procedure on it. Then reassemble the line.
nw=split(LText[i],w,FS);
if (nw > 0) {
# find sequences of capitalized words. Probably turn them into sequences of
# italicized words. This is deeply twisted logic, because it is all rather
# empirical. We want things like JUSTIN MARTYR, HIS LIFE AND THOUGHT
# to be italicized, but we dont want J C MARTYR to be italicized, and
# we dont want AD 308 to be italicized.
for (k=1; k<=nw; k++) iscap[k] = isAllcap(w[k]);
capping = 0;
for (k=1; k<=nw; k++) {
if (iscap[k]) {
if (capping) w[k]=wordCap(w[k]);
else w[k]=initCap(w[k]);
# Do not italicize initials, which means 2 or more single-letters in a row
if (!capping && (length(w[k])==1 && length(w[k+1])==1)) continue;
# do not italicize short capitalized sequences that are mostly numbers
if (!capping && (iscap[k]==1) &&
( (k>=nw-1) || (iscap[k+2] != 2))) continue;
if ((k < nw) && iscap[k+1] && !iscap[k-1]) {
w[k]="" w[k];
capping = 1;
}
# at the end of an italicized sequence, put in the exit-italics code.
if (capping && !iscap[k+1]) {
w[k]=w[k] "";
capping = 0
}
# a word that begins with one of the initial punctuations characters
# will end italicization. This only works some of the time.
if (capping && ipuncs[substr(w[k+1],1,1)]) {
w[k]=w[k] "";
capping = 0
}
}
}
turn it off at the end of each line
if (capping) {capping=0; w[nw]=w[nw] ""};
################ end of twisted italicization logic
# Look here for things like URLs and email addresses
for (k=1; k<=nw; k++) {
wordflag[k] = 0;
if (tolower(substr(w[k],1,7))=="http://") {
w[k]="" w[k] ""
wordflag[k] = 1;
}
if (index(w[k],"@")) {
ati=index(w[k],".");
right=substr(w[k],ati+1);
dot=index(right,".");
if (dot > 0) {
w[k] = "" w[k] ""
wordflag[k] = 2;
}
}
}
# Do init-cap processing of the line unless the line contains markup.
if (((wordflag[1]==0 && (Break[i-1]) ||
(Break[i] || BlankLine[i-1])) &&
(substr(w[1],1,1) != "<"))) {
w[1]=initCap(w[1]);
}
LText[i]=w[1];
for (k=2; k<=nw; k++) {
LText[i] = LText[i] " " w[k]
}
}
# Look out for poem titles. A poem title is a line that has a break before
# and after it, and for which the next substantive line is marked "verse".
if ( (Break[i] || BlankLine[i-1]) && (!BlankLine[i]) &&
(LeftMargin[i] <= 1) &&
((LineMode[i+1]=="verse") || (LineMode[i+1]=="quote") ||
(BlankLine[i+1] &&
((LineMode[i+2]=="verse") || LineMode[i+2]=="quote")))) {
S1=sprintf("",TOPHEAD+1);
S2=sprintf(" ",TOPHEAD+1);
MarkupBefore[i] = MarkupBefore[i] S1;
MarkupAfter[i] = S2 MarkupAfter[i];
LineProc[i] = 4;
}
}
#
# Now take all "text" lines and remove the line breaks, so that we
# can look for cross-linked phrases line by line.
#
for (i=1; i<=Nln; i++) {
if (LineProc[i]) continue;
if (LineMode[i]!= "") continue;
if (BlankLine[i]) continue;
for (j=1; j<=Nln-1; j++) {
if (LineMode[i+j]=="" && (!BlankLine[i+j]) && (LText[i+j]!="")) {
LText[i] = LText[i] " " LText[i+j];
LineMode[i+j]="deleted";
BlankLine[i]=0;
LText[i+j]= "";
} else {
break;
}
}
}
#
# Process the cross-reference links
#
for (i=1; i<=Nln; i++) {
if (LineProc[i]) continue;
if (LineMode[i] == "deleted") continue;
if (LineMode[i] == "verbatim") continue;
for (j=1; j<=LinkCount; j++) {
target=LinkOrder[j];
LinkMatch[j]=gsub(target, Lcode[target], LText[i]);
}
for (j=1; j<=LinkCount; j++) {
target=LinkOrder[j];
if (LinkMatch[j] > 0) {
gsub(Lcode[target], Link[Lcode[target]], LText[i]);
}
}
}
print ""
print ""
print ""
print " "
print " "
print " "
printf " %s \n",PageTitle;
# Dump the table of needed images as comments in the header section.
for (j in NeedImage) {
printf " \n",NeedImage[j],j;
}
if ((Day != "") && (Month != "")) {
MC = MonthCode[Month];
SortKey = sprintf("%02d%02d",MonthNumber[Month],Day);
if (Year == "") {
printf " \n",SortKey,Day,MC
} else {
printf " \n",Year,SortKey,Day,MC,Year
}
}
for (i in BioTitles) {
printf " \n",BioTitles[i],i
}
print ""
print ""
print ""
# Now dump the text buffers.
for (i=1; i<=Nln; i++) {
if (MarkupBefore[i] != "") printf "%s",MarkupBefore[i];
if (EnvBegin[i] != "") printf "<%s>\n",EnvBegin[i];
if (LineMode[i]=="deleted") continue;
if (BlankLine[i] && !Itemized[i]) {
if (!Break[i-1] && !Break[i+1]) print ""
} else {
if (i == FirstText && (Day != "")) {
print "";
printf("
\n",Day,convertWord(Day));
printf("
\n",Month,convertWord(Month));
printf("
\n",Year,convertWord(Year));
print "
";
}
if (Itemized[i] && (Break[i-1] || BlankLine[i-1])) printf "