452 lines
8.4 KiB
Bash
452 lines
8.4 KiB
Bash
#!/bin/sh
|
|
#
|
|
# Copyright (C) 2001 Graeme Walker <graeme_walker@users.sourceforge.net>
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either
|
|
# version 2 of the License, or (at your option) any later
|
|
# version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
#
|
|
# ===
|
|
#
|
|
# txt2html.sh
|
|
#
|
|
# Converts plain-text to html. The plain-text has to use special
|
|
# formating conventions (see "function process()", Anchorise_1() etc).
|
|
#
|
|
# Embeds comments in the html output which can be used by "index.sh"
|
|
# to create a document index.
|
|
#
|
|
# Definition lists require a bullet graphic, "graphics/bullet.gif".
|
|
#
|
|
# usage: txt2html.sh [-a <awk-binary>] [-x] <input-file> [<title>]
|
|
#
|
|
# The -x switch excludes header and footer stuff.
|
|
#
|
|
|
|
awk="gawk"
|
|
if test "${1}" = "-a"
|
|
then
|
|
shift
|
|
if test "${1}" != ""
|
|
then
|
|
awk="${1}"
|
|
fi
|
|
shift
|
|
fi
|
|
|
|
full="1"
|
|
if test "${1}" = "-x"
|
|
then
|
|
shift
|
|
full="0"
|
|
fi
|
|
|
|
file="${1}"
|
|
if test "${file}" = ""
|
|
then
|
|
echo usage: `basename $0` '<txt-file>' >&2
|
|
exit 2
|
|
fi
|
|
|
|
if test \! -f "${file}"
|
|
then
|
|
echo `basename $0`: no such file: ${file} >&2
|
|
exit 1
|
|
fi
|
|
|
|
title="`grep -v '^[[:space:]]*$' ${file} | head -1`"
|
|
if test "${2}" != ""
|
|
then
|
|
title="${2}"
|
|
fi
|
|
|
|
# ===
|
|
# Include()
|
|
#
|
|
# Expands #include# directives. Included text is then processed
|
|
# as plain text, just like the top-level file. An include directive
|
|
# within a line (ie. not on the lhs) is treated as an inline
|
|
# sustitution, like shell backticks.
|
|
#
|
|
Include()
|
|
{
|
|
${awk} -v cat="${awk} '{print}'" '
|
|
{
|
|
line = $0
|
|
if( match(line,"^#include#[^#]*#") )
|
|
{
|
|
path = substr(line,10,RLENGTH-10)
|
|
system( cat " " path )
|
|
}
|
|
else
|
|
{
|
|
print line
|
|
}
|
|
}
|
|
'
|
|
}
|
|
|
|
# ===
|
|
# Main()
|
|
#
|
|
# Does the bulk of the conversion.
|
|
#
|
|
Main()
|
|
{
|
|
${awk} -v title="${1}" -v full="${2}" -v colour="${3}" '
|
|
BEGIN {
|
|
if( full )
|
|
{
|
|
dtd = "-//W3C//DTD HTML 4.01 Transitional//EN"
|
|
printf( "<!DOCTYPE HTML PUBLIC \"%s\">\n" , dtd )
|
|
printf( "<html>\n" )
|
|
printf( "<head>\n" )
|
|
printf( "<title>%s</title>\n" , title )
|
|
printf( "</head>\n" )
|
|
printf( "<body bgcolor=\"%s\">\n" , colour )
|
|
printf( "<!-- index:0::::%s -->\n" , title )
|
|
}
|
|
}
|
|
|
|
function escape( line )
|
|
{
|
|
gsub( "&" , "\\&" , line )
|
|
gsub( "<" , "\\<" , line )
|
|
gsub( ">" , "\\>" , line )
|
|
return line
|
|
}
|
|
|
|
function dequote( line )
|
|
{
|
|
quote = "\""
|
|
not_quote = "[^" quote "]"
|
|
gsub( quote not_quote "*" quote , "<b><em>&</em></b>" , line )
|
|
gsub( "<em>" quote , "<em>" , line )
|
|
gsub( quote "</em>" , "</em>" , line )
|
|
return line
|
|
}
|
|
|
|
function fn( line )
|
|
{
|
|
gsub( "[^[:space:]][^[:space:]]*\\(\\)" , "<i>&</i>" , line )
|
|
return line
|
|
}
|
|
|
|
function output( line )
|
|
{
|
|
printf( "%s\n" , fn(dequote(escape(line))) )
|
|
}
|
|
|
|
function tagOutput( line , tag )
|
|
{
|
|
printf( "<%s>%s</%s>\n" , tag , fn(dequote(escape(line))) , tag )
|
|
}
|
|
|
|
function process( line , next_ )
|
|
{
|
|
tab = " "
|
|
is_blank = match( line , "^ *$" )
|
|
is_heading = match( next_ , "^==* *$" )
|
|
is_sub_heading = match( next_ , "^--* *$" )
|
|
is_list_item = match( line , "^\\* " )
|
|
is_definition_term = match( line , "^\\# " )
|
|
is_definition_text = match( line , "^ [^- ]" )
|
|
is_outer_list_item = match( line , "^+ " )
|
|
is_inner_list_item = match( line , "^ - " )
|
|
is_sub_list_item = match( line , "^ + " )
|
|
is_numbered_item = match( line , "^\\([[:digit:]][[:digit:]]*\\)" )
|
|
is_heading_line = match( line , "^==* *$" )
|
|
is_sub_heading_line = match( line , "^--* *$" )
|
|
is_code = match( line , "^" tab )
|
|
|
|
if( is_blank )
|
|
{
|
|
printf( "<br><br>\n" )
|
|
}
|
|
else if( is_code )
|
|
{
|
|
tagOutput( line , "pre" )
|
|
}
|
|
else if( is_definition_term )
|
|
{
|
|
gsub( "^# " , "" , line )
|
|
tagOutput( line , "dt" )
|
|
}
|
|
else if( is_definition_text )
|
|
{
|
|
tagOutput( line , "dd" )
|
|
}
|
|
else if( is_list_item )
|
|
{
|
|
gsub( "^\\* " , "" , line )
|
|
tagOutput( line , "li" )
|
|
}
|
|
else if( is_outer_list_item )
|
|
{
|
|
gsub( "^+ " , "" , line )
|
|
tagOutput( line , "Li" )
|
|
}
|
|
else if( is_inner_list_item )
|
|
{
|
|
gsub( "^ - " , "" , line )
|
|
tagOutput( line , "lI" )
|
|
}
|
|
else if( is_numbered_item )
|
|
{
|
|
gsub( "^\\([[:digit:]][[:digit:]]*\\) " , "" , line )
|
|
tagOutput( line , "LI" )
|
|
}
|
|
else if( is_heading )
|
|
{
|
|
major += 1
|
|
minor = 0
|
|
printf( "<h1><a name=\"H_%d\">%s</h1>" , major , line )
|
|
printf( "<!-- index:1:H:%d::%s -->\n" , major , line )
|
|
}
|
|
else if( is_sub_heading )
|
|
{
|
|
minor += 1
|
|
printf( "<h2><a name=\"SH_%d_%d\">%s</h2>" , major , minor , line )
|
|
printf( "<!-- index:2:SH:%d:%d:%s -->\n" , major , minor , line )
|
|
}
|
|
else if( !is_heading_line && !is_sub_heading_line )
|
|
{
|
|
output( line )
|
|
}
|
|
}
|
|
|
|
{
|
|
if( NR != 1 )
|
|
process( previous , $0 )
|
|
previous = $0
|
|
}
|
|
|
|
END {
|
|
process( previous , "" )
|
|
if( full )
|
|
{
|
|
printf( "</body>\n" )
|
|
printf( "</html>\n" )
|
|
}
|
|
} '
|
|
}
|
|
|
|
# ===
|
|
# AugmentLists()
|
|
#
|
|
# Adds list begin/end tags around a set of list items.
|
|
#
|
|
AugmentLists()
|
|
{
|
|
${awk} -v item_tag="${1}" -v list_tag="${2}" -v ignore_1_re="${3}" -v ignore_2_re="${4}" '
|
|
{
|
|
line = $0
|
|
ignore_1 = length(ignore_1_re) && match( line , ignore_1_re )
|
|
ignore_2 = length(ignore_2_re) && match( line , ignore_2_re )
|
|
ignore = ignore_1 || ignore_2
|
|
if( ignore )
|
|
{
|
|
print
|
|
}
|
|
else
|
|
{
|
|
is_list_item = match( line , "^<" item_tag ">.*</" item_tag ">$" )
|
|
|
|
if( is_list_item && !in_list )
|
|
printf( "<%s>\n" , list_tag )
|
|
else if( in_list && !is_list_item )
|
|
printf( "</%s>\n" , list_tag )
|
|
|
|
print
|
|
in_list = is_list_item
|
|
}
|
|
} '
|
|
}
|
|
|
|
# ===
|
|
# Elide()
|
|
#
|
|
# Converts repeated lines of <foo>lineN</foo> into
|
|
# <foo>
|
|
# line1
|
|
# line2
|
|
# </foo>
|
|
#
|
|
# Useful for <pre> and <sub>.
|
|
#
|
|
Elide()
|
|
{
|
|
${awk} -v tag="${1}" '
|
|
{
|
|
line = $0
|
|
is_tag_line = match( line , "^<" tag ">.*</" tag ">$" )
|
|
|
|
core = substr( line , length(tag)+3 , length(line)-length(tag)-length(tag)-5 )
|
|
|
|
if( is_tag_line && !in_tag )
|
|
printf( "<%s>%s" , tag , core )
|
|
else if( is_tag_line && in_tag )
|
|
printf( "\n%s" , core )
|
|
else if( !is_tag_line && in_tag )
|
|
printf( "</%s>\n%s\n" , tag , line )
|
|
else
|
|
print line
|
|
|
|
in_tag = is_tag_line
|
|
} '
|
|
}
|
|
|
|
# ===
|
|
# Decorate()
|
|
#
|
|
# Adds additional stuff after a given opening tag.
|
|
# The tag is expected to be at the start of the line.
|
|
#
|
|
Decorate()
|
|
{
|
|
${awk} -v tag="${1}" -v decoration="${2}" '
|
|
{
|
|
line = $0
|
|
sub( "^<" tag ">" , "<" tag ">" decoration , line )
|
|
print line
|
|
} '
|
|
}
|
|
|
|
# ===
|
|
# Compress()
|
|
#
|
|
# Removes blank lines near to headings (etc).
|
|
#
|
|
Compress()
|
|
{
|
|
${awk} '
|
|
function process( previous , line , next_ )
|
|
{
|
|
re_blank = "^<br><br>$"
|
|
re_heading = "^<[Hh][[:digit:]]>"
|
|
re_dd = "^<dd>"
|
|
re_pre_start = "^<pre>"
|
|
re_pre_end = "</pre>$"
|
|
|
|
this_is_blank = match(line,re_blank)
|
|
next_is_special = match(next_,re_heading) || match(next_,re_dd)
|
|
previous_is_special = match(previous,re_heading) || match(previous,re_dd)
|
|
next_is_pre_start = match(next_,re_pre_start)
|
|
|
|
if( this_is_blank && ( next_is_special || previous_is_special ) )
|
|
{
|
|
}
|
|
else if( this_is_blank && next_is_pre_start )
|
|
{
|
|
}
|
|
else
|
|
{
|
|
print line
|
|
}
|
|
}
|
|
{
|
|
if( NR >= 2 )
|
|
process( l2 , l1 , $0 )
|
|
l2 = l1
|
|
l1 = $0
|
|
}
|
|
END {
|
|
process( l2 , l1 , "" )
|
|
process( l1 , "" , "" )
|
|
}
|
|
'
|
|
}
|
|
|
|
# ===
|
|
# Anchorise_1()
|
|
#
|
|
# Converts [[-foo-bar-]] to <a href="foo">bar</a>.
|
|
#
|
|
Anchorise_1()
|
|
{
|
|
sed 's/\[\[-\([^-]*\)-\([^-]*\)-\]\]/<a href="\1">\2<\/a>/g'
|
|
}
|
|
|
|
# ===
|
|
# Anchorise_2()
|
|
#
|
|
# Converts [[foo]] to <a href="../foo">foo</a>.
|
|
#
|
|
Anchorise_2()
|
|
{
|
|
sed 's/\[\[\([^\]*\)\]\]/<a href=..\/"\1">\1<\/a>/g'
|
|
}
|
|
|
|
# ===
|
|
# Cat()
|
|
#
|
|
# An awk version of "cat" (cygwin cat is
|
|
# broken).
|
|
#
|
|
Cat()
|
|
{
|
|
${awk} '{print}' $@ | tr -d '\015'
|
|
}
|
|
|
|
# ===
|
|
# MoveIndex()
|
|
#
|
|
# Moves the index comments to a line before
|
|
# the header, rather than at the end of the
|
|
# header line.
|
|
#
|
|
MoveIndex()
|
|
{
|
|
${awk} '
|
|
{
|
|
line = $0
|
|
re = "<!-- index:"
|
|
pos = match(line,re)
|
|
if( pos )
|
|
{
|
|
head = substr(line,1,pos-1)
|
|
tail = substr(line,pos+length(re))
|
|
print head
|
|
print re tail
|
|
}
|
|
else
|
|
{
|
|
print line
|
|
}
|
|
}
|
|
'
|
|
}
|
|
|
|
# ==
|
|
|
|
colour="#FFFFFF"
|
|
Cat "${file}" | \
|
|
Include | \
|
|
Main "${title}" "${full}" "${colour}" | \
|
|
Compress | \
|
|
AugmentLists li ul | \
|
|
AugmentLists Li ul "^<lI>" "^<br><br>" | \
|
|
AugmentLists lI ul | \
|
|
AugmentLists LI ol | \
|
|
AugmentLists dt dl "^<dd>" "^<br><br>" | \
|
|
Elide "sub" | \
|
|
Elide "dd" | \
|
|
Elide "pre" | \
|
|
Decorate dt "<img src=\"graphics/bullet.gif\">\\\ " | \
|
|
Decorate dd "<p>" | \
|
|
Anchorise_1 | \
|
|
Anchorise_2 | \
|
|
MoveIndex
|
|
|