emailrelay/bin/txt2mu.sh_
Graeme Walker aa8ca77702 v1.6
2007-08-27 12:00:00 +00:00

343 lines
7.2 KiB
Bash

#!/bin/sh
#
# Copyright (C) 2001-2007 Graeme Walker <graeme_walker@users.sourceforge.net>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# ===
#
# txt2mu.sh
#
# Converts specially-formatted plain-text to marked-up text.
# The mark-up process works on complete lines; inline
# markup is handled by later processing (eg. "mu2html.sh").
#
# The "-t" (text-mode) switch modifies the set
# of available styles to suit non-technical texts.
#
# usage: txt2mu.sh [-a <awk-binary>] [-t] [<input-file>]
#
awk="gawk"
if test "${1}" = "-a"
then
shift
if test "${1}" != "" ; then awk="${1}" ; fi
shift
fi
text_mode="0"
if test "${1}" = "-t"
then
shift
text_mode="1"
fi
file="${1}"
# ===
# Main()
#
# Does most of the processing.
#
Main()
{
${awk} -v text_mode="${1}" '
BEGIN { in_footer = 0 }
function output( line )
{
printf( "%s\n" , line )
}
function tagOutput( line , tag )
{
printf( "%s:%s\n" , tag , line )
}
function tagOutputRaw( line , tag )
{
printf( "%s:%s\n" , tag , line )
}
function process( line , next_ )
{
tab = " "
is_blank = match( line , "^[[:space:]]*$" )
is_heading = match( next_ , "^==*[[:space:]]*$" )
is_footer = match( line , "^____*[[:space:]]*$" )
is_sub_heading = match( next_ , "^--*[[:space:]]*$" )
is_item = match( line , "^\\* " )
is_item_name = match( line , "^\\# " )
is_item_detail = match( line , "^ [^- ]" )
is_item_numbered = match( line , "^\\([[:digit:]][[:digit:]]*\\)" )
is_heading_line = match( line , "^==*[[:space:]]*$" )
is_sub_heading_line = match( line , "^--*[[:space:]]*$" )
is_image = match( line , "^[[:space:]]*<<.*>>[[:space:]]*$" )
if( text_mode )
{
is_citation = match( line , "^" tab "[^" tab "]" )
is_author = match( line , "^" tab tab )
is_html = match( line , "^<.*>[[:space:]]*$" )
is_code = 0
is_item_outer = 0
is_item_inner = 0
}
else
{
is_citation = 0
is_author = 0
is_html = 0
is_code = match( line , "^" tab )
is_item_outer = match( line , "^\\+ " )
is_item_inner = match( line , "^ - " )
}
if( is_footer )
{
in_footer = 1
}
else if( is_code )
{
sub( "^" tab , "" , line )
tagOutputRaw( line , "code" )
}
else if( is_image )
{
sub( "^[[:space:]]*<<" , "" , line )
sub( ">>[[:space:]]*$" , "" , line )
tagOutputRaw( line , "image" )
}
else if( is_html )
{
tagOutputRaw( line , "html" )
}
else if( is_blank )
{
tagOutput( "" , "blank" )
}
else if( is_item_name )
{
sub( "^# " , "" , line )
tagOutput( line , "item-name" )
}
else if( is_item_detail )
{
sub( "^ " , "" , line )
tagOutput( line , "item-detail" )
}
else if( is_item )
{
sub( "^\\* " , "" , line )
tagOutput( line , "item" )
}
else if( is_item_outer )
{
sub( "^\\+ " , "" , line )
tagOutput( line , "item-outer" )
}
else if( is_item_inner )
{
sub( "^ - " , "" , line )
tagOutput( line , "item-inner" )
}
else if( is_item_numbered )
{
gsub( "^\\([[:digit:]][[:digit:]]*\\) " , "" , line )
tagOutput( line , "item-numbered" )
}
else if( is_citation )
{
sub( "^" tab , "" , line )
tagOutput( line , "citation" )
}
else if( is_author )
{
sub( "^" tab tab , "" , line )
tagOutput( line , "author" )
}
else if( is_heading )
{
major += 1
minor = 0
h1_tag = "h1" "," major "," minor
tagOutput( line , h1_tag )
}
else if( is_sub_heading )
{
minor += 1
h2_tag = "h2" "," major "," minor
tagOutput( line , h2_tag )
}
else if( !is_heading_line && !is_sub_heading_line )
{
tagOutput( line , in_footer ? "footer" : "text" )
}
}
{
if( NR != 1 )
process( previous , $0 )
previous = $0
}
END {
process( previous , "" )
} '
}
# ===
# Number()
#
# Numbers a set of commonly-tagged lines, and inserts an end
# marker line at the end of the sequence.
#
# The 'ignore' parameters can be used to make sure that
# item lines separated with 'ignore' patterns are
# treated as being contiguous.
#
# Eg: Number foo bar
# foo,1: first line
# ignore,bar:
# foo,2: second line
# foo-end:
#
Number()
{
${awk} -v item_tag="${1}" -v ignore_1="${2}" -v ignore_2="${3}" -v ignore_3="${4}" -v ignore_4="${5}" '
function ignore_line( line )
{
i_0 = match( line , "^ignore" )
i_1 = length(ignore_1) && match( line , "^" ignore_1 "[:,]" )
i_2 = length(ignore_2) && match( line , "^" ignore_2 "[:,]" )
i_3 = length(ignore_3) && match( line , "^" ignore_3 "[:,]" )
i_4 = length(ignore_3) && match( line , "^" ignore_4 "[:,]" )
return i_0 || i_1 || i_2 || i_3 || i_4
}
BEGIN {
n = 1
}
{
if( match( $0 , "^" item_tag "[:,]" ) )
{
sub( "^" item_tag , "" )
printf( "%s,%d%s\n" , item_tag , n++ , $0 )
}
else
{
if( !ignore_line($0) )
{
if( n > 1 )
printf( "%s-end:\n" , item_tag )
n = 1
}
print
}
} '
}
# ===
# Compress()
#
# Removes blank lines near to headings (etc) by changing
# the "blank" tag to "ignore,blank".
#
# As a special case, converts single blank lines within an
# "item-detail" block to have a tag of "item-detail-blank"
# rather than "blank".
#
Compress()
{
${awk} '
function process( previous , line , next_ )
{
re_blank = "^blank:"
re_heading = "^h[[:digit:]][:,]"
re_detail = "^item-detail:"
re_pre_start = "^code,1[:,]"
this_is_blank = match(line,re_blank)
next_is_heading = match(next_,re_heading)
previous_is_heading = match(previous,re_heading)
next_is_detail = match(next_,re_detail)
previous_is_detail = match(previous,re_detail)
next_is_pre_start = match(next_,re_pre_start)
if( this_is_blank && ( next_is_heading || previous_is_heading ) )
{
print "ignore," line
}
else if( this_is_blank && !previous_is_detail && next_is_detail )
{
print "ignore," line
}
else if( this_is_blank && previous_is_detail && !next_is_detail )
{
print "ignore," line
}
else if( this_is_blank && next_is_detail && previous_is_detail )
{
print "item-detail-" line
}
else if( this_is_blank && next_is_pre_start )
{
print "ignore," line
}
else
{
print line
}
}
{
if( NR >= 2 )
process( l2 , l1 , $0 )
l2 = l1
l1 = $0
}
END {
process( l2 , l1 , "" )
process( l1 , "" , "" )
} '
}
# ===
# Cat()
#
# An awk version of "cat".
#
Cat()
{
${awk} '{print}' $@ | tr -d '\015'
}
# ==
Cat ${file} | \
Main "${text_mode}" | \
Compress | \
Number "item" | \
Number "item-outer" "item-inner" "blank" | \
Number "item-inner" "blank" | \
Number "item-numbered" | \
Number "item-name" "item-detail" "blank" "item-detail-blank" "code" | \
Number "item-detail" "item-detail-blank" "code" | \
Number "code" "blank" "image" | \
Number "footer" "blank" "image" | \
Number "citation" "blank" | \
Number "author" | \
Number "text" "image"