emailrelay/bin/txt2html.sh_
Graeme Walker 6b2298628a v0.9.3
2001-10-21 12:00:00 +00:00

452 lines
8.4 KiB
Bash

#!/bin/sh
#
# Copyright (C) 2001 Graeme Walker <graeme_walker@users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# ===
#
# txt2html.sh
#
# Converts plain-text to html. The plain-text has to use special
# formating conventions (see "function process()", Anchorise_1() etc).
#
# Embeds comments in the html output which can be used by "index.sh"
# to create a document index.
#
# Definition lists require a bullet graphic, "graphics/bullet.gif".
#
# usage: txt2html.sh [-a <awk-binary>] [-x] <input-file> [<title>]
#
# The -x switch excludes header and footer stuff.
#
awk="gawk"
if test "${1}" = "-a"
then
shift
if test "${1}" != ""
then
awk="${1}"
fi
shift
fi
full="1"
if test "${1}" = "-x"
then
shift
full="0"
fi
file="${1}"
if test "${file}" = ""
then
echo usage: `basename $0` '<txt-file>' >&2
exit 2
fi
if test \! -f "${file}"
then
echo `basename $0`: no such file: ${file} >&2
exit 1
fi
title="`grep -v '^[[:space:]]*$' ${file} | head -1`"
if test "${2}" != ""
then
title="${2}"
fi
# ===
# Include()
#
# Expands #include# directives. Included text is then processed
# as plain text, just like the top-level file. An include directive
# within a line (ie. not on the lhs) is treated as an inline
# sustitution, like shell backticks.
#
Include()
{
${awk} -v cat="${awk} '{print}'" '
{
line = $0
if( match(line,"^#include#[^#]*#") )
{
path = substr(line,10,RLENGTH-10)
system( cat " " path )
}
else
{
print line
}
}
'
}
# ===
# Main()
#
# Does the bulk of the conversion.
#
Main()
{
${awk} -v title="${1}" -v full="${2}" -v colour="${3}" '
BEGIN {
if( full )
{
dtd = "-//W3C//DTD HTML 4.01 Transitional//EN"
printf( "<!DOCTYPE HTML PUBLIC \"%s\">\n" , dtd )
printf( "<html>\n" )
printf( "<head>\n" )
printf( "<title>%s</title>\n" , title )
printf( "</head>\n" )
printf( "<body bgcolor=\"%s\">\n" , colour )
printf( "<!-- index:0::::%s -->\n" , title )
}
}
function escape( line )
{
gsub( "&" , "\\&amp;" , line )
gsub( "<" , "\\&lt;" , line )
gsub( ">" , "\\&gt;" , line )
return line
}
function dequote( line )
{
quote = "\""
not_quote = "[^" quote "]"
gsub( quote not_quote "*" quote , "<b><em>&</em></b>" , line )
gsub( "<em>" quote , "<em>" , line )
gsub( quote "</em>" , "</em>" , line )
return line
}
function fn( line )
{
gsub( "[^[:space:]][^[:space:]]*\\(\\)" , "<i>&</i>" , line )
return line
}
function output( line )
{
printf( "%s\n" , fn(dequote(escape(line))) )
}
function tagOutput( line , tag )
{
printf( "<%s>%s</%s>\n" , tag , fn(dequote(escape(line))) , tag )
}
function process( line , next_ )
{
tab = " "
is_blank = match( line , "^ *$" )
is_heading = match( next_ , "^==* *$" )
is_sub_heading = match( next_ , "^--* *$" )
is_list_item = match( line , "^\\* " )
is_definition_term = match( line , "^\\# " )
is_definition_text = match( line , "^ [^- ]" )
is_outer_list_item = match( line , "^+ " )
is_inner_list_item = match( line , "^ - " )
is_sub_list_item = match( line , "^ + " )
is_numbered_item = match( line , "^\\([[:digit:]][[:digit:]]*\\)" )
is_heading_line = match( line , "^==* *$" )
is_sub_heading_line = match( line , "^--* *$" )
is_code = match( line , "^" tab )
if( is_blank )
{
printf( "<br><br>\n" )
}
else if( is_code )
{
tagOutput( line , "pre" )
}
else if( is_definition_term )
{
gsub( "^# " , "" , line )
tagOutput( line , "dt" )
}
else if( is_definition_text )
{
tagOutput( line , "dd" )
}
else if( is_list_item )
{
gsub( "^\\* " , "" , line )
tagOutput( line , "li" )
}
else if( is_outer_list_item )
{
gsub( "^+ " , "" , line )
tagOutput( line , "Li" )
}
else if( is_inner_list_item )
{
gsub( "^ - " , "" , line )
tagOutput( line , "lI" )
}
else if( is_numbered_item )
{
gsub( "^\\([[:digit:]][[:digit:]]*\\) " , "" , line )
tagOutput( line , "LI" )
}
else if( is_heading )
{
major += 1
minor = 0
printf( "<h1><a name=\"H_%d\">%s</h1>" , major , line )
printf( "<!-- index:1:H:%d::%s -->\n" , major , line )
}
else if( is_sub_heading )
{
minor += 1
printf( "<h2><a name=\"SH_%d_%d\">%s</h2>" , major , minor , line )
printf( "<!-- index:2:SH:%d:%d:%s -->\n" , major , minor , line )
}
else if( !is_heading_line && !is_sub_heading_line )
{
output( line )
}
}
{
if( NR != 1 )
process( previous , $0 )
previous = $0
}
END {
process( previous , "" )
if( full )
{
printf( "</body>\n" )
printf( "</html>\n" )
}
} '
}
# ===
# AugmentLists()
#
# Adds list begin/end tags around a set of list items.
#
AugmentLists()
{
${awk} -v item_tag="${1}" -v list_tag="${2}" -v ignore_1_re="${3}" -v ignore_2_re="${4}" '
{
line = $0
ignore_1 = length(ignore_1_re) && match( line , ignore_1_re )
ignore_2 = length(ignore_2_re) && match( line , ignore_2_re )
ignore = ignore_1 || ignore_2
if( ignore )
{
print
}
else
{
is_list_item = match( line , "^<" item_tag ">.*</" item_tag ">$" )
if( is_list_item && !in_list )
printf( "<%s>\n" , list_tag )
else if( in_list && !is_list_item )
printf( "</%s>\n" , list_tag )
print
in_list = is_list_item
}
} '
}
# ===
# Elide()
#
# Converts repeated lines of <foo>lineN</foo> into
# <foo>
# line1
# line2
# </foo>
#
# Useful for <pre> and <sub>.
#
Elide()
{
${awk} -v tag="${1}" '
{
line = $0
is_tag_line = match( line , "^<" tag ">.*</" tag ">$" )
core = substr( line , length(tag)+3 , length(line)-length(tag)-length(tag)-5 )
if( is_tag_line && !in_tag )
printf( "<%s>%s" , tag , core )
else if( is_tag_line && in_tag )
printf( "\n%s" , core )
else if( !is_tag_line && in_tag )
printf( "</%s>\n%s\n" , tag , line )
else
print line
in_tag = is_tag_line
} '
}
# ===
# Decorate()
#
# Adds additional stuff after a given opening tag.
# The tag is expected to be at the start of the line.
#
Decorate()
{
${awk} -v tag="${1}" -v decoration="${2}" '
{
line = $0
sub( "^<" tag ">" , "<" tag ">" decoration , line )
print line
} '
}
# ===
# Compress()
#
# Removes blank lines near to headings (etc).
#
Compress()
{
${awk} '
function process( previous , line , next_ )
{
re_blank = "^<br><br>$"
re_heading = "^<[Hh][[:digit:]]>"
re_dd = "^<dd>"
re_pre_start = "^<pre>"
re_pre_end = "</pre>$"
this_is_blank = match(line,re_blank)
next_is_special = match(next_,re_heading) || match(next_,re_dd)
previous_is_special = match(previous,re_heading) || match(previous,re_dd)
next_is_pre_start = match(next_,re_pre_start)
if( this_is_blank && ( next_is_special || previous_is_special ) )
{
}
else if( this_is_blank && next_is_pre_start )
{
}
else
{
print line
}
}
{
if( NR >= 2 )
process( l2 , l1 , $0 )
l2 = l1
l1 = $0
}
END {
process( l2 , l1 , "" )
process( l1 , "" , "" )
}
'
}
# ===
# Anchorise_1()
#
# Converts [[-foo-bar-]] to <a href="foo">bar</a>.
#
Anchorise_1()
{
sed 's/\[\[-\([^-]*\)-\([^-]*\)-\]\]/<a href="\1">\2<\/a>/g'
}
# ===
# Anchorise_2()
#
# Converts [[foo]] to <a href="../foo">foo</a>.
#
Anchorise_2()
{
sed 's/\[\[\([^\]*\)\]\]/<a href=..\/"\1">\1<\/a>/g'
}
# ===
# Cat()
#
# An awk version of "cat" (cygwin cat is
# broken).
#
Cat()
{
${awk} '{print}' $@ | tr -d '\015'
}
# ===
# MoveIndex()
#
# Moves the index comments to a line before
# the header, rather than at the end of the
# header line.
#
MoveIndex()
{
${awk} '
{
line = $0
re = "<!-- index:"
pos = match(line,re)
if( pos )
{
head = substr(line,1,pos-1)
tail = substr(line,pos+length(re))
print head
print re tail
}
else
{
print line
}
}
'
}
# ==
colour="#FFFFFF"
Cat "${file}" | \
Include | \
Main "${title}" "${full}" "${colour}" | \
Compress | \
AugmentLists li ul | \
AugmentLists Li ul "^<lI>" "^<br><br>" | \
AugmentLists lI ul | \
AugmentLists LI ol | \
AugmentLists dt dl "^<dd>" "^<br><br>" | \
Elide "sub" | \
Elide "dd" | \
Elide "pre" | \
Decorate dt "<img src=\"graphics/bullet.gif\">\\\&nbsp;" | \
Decorate dd "<p>" | \
Anchorise_1 | \
Anchorise_2 | \
MoveIndex