emailrelay/bin/txt2mu.sh_

#!/bin/sh
#
# Copyright (C) 2001-2007 Graeme Walker <graeme_walker@users.sourceforge.net>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# ===
#
# txt2mu.sh
#
# Converts specially-formatted plain-text to marked-up text.
# The mark-up process works on complete lines; inline
# markup is handled by later processing (eg. "mu2html.sh").
#
# The "-t" (text-mode) switch modifies the set
# of available styles to suit non-technical texts.
#
# usage: txt2mu.sh [-a <awk-binary>] [-t] [<input-file>]
#

awk="gawk"
if test "${1}" = "-a"
then
	shift
	if test "${1}" != "" ; then awk="${1}" ; fi
	shift
fi

text_mode="0"
if test "${1}" = "-t"
then
	shift
	text_mode="1"
fi

file="${1}"

# ===
# Main()
#
# Does most of the processing.
#
Main()
{
	${awk} -v text_mode="${1}" '

	BEGIN { in_footer = 0 }

	function output( line )
	{
		printf( "%s\n" , line )
	}

	function tagOutput( line , tag )
	{
		printf( "%s:%s\n" , tag , line )
	}

	function tagOutputRaw( line , tag )
	{
		printf( "%s:%s\n" , tag , line )
	}

	function process( line , next_ )
	{
		tab = "	"
		is_blank = match( line , "^[[:space:]]*$" )
		is_heading = match( next_ , "^==*[[:space:]]*$" )
		is_footer = match( line , "^____*[[:space:]]*$" )
		is_sub_heading = match( next_ , "^--*[[:space:]]*$" )
		is_item = match( line , "^\\* " )
		is_item_name = match( line , "^\\# " )
		is_item_detail = match( line , "^  [^- ]" )
		is_item_numbered = match( line , "^\\([[:digit:]][[:digit:]]*\\)" )
		is_heading_line = match( line , "^==*[[:space:]]*$" )
		is_sub_heading_line = match( line , "^--*[[:space:]]*$" )
		is_image = match( line , "^[[:space:]]*<<.*>>[[:space:]]*$" )

		if( text_mode )
		{
			is_citation = match( line , "^" tab "[^" tab "]" )
			is_author = match( line , "^" tab tab )
			is_html = match( line , "^<.*>[[:space:]]*$" )
			is_code = 0
			is_item_outer = 0
			is_item_inner = 0
		}
		else
		{
			is_citation = 0
			is_author = 0
			is_html = 0
			is_code = match( line , "^" tab )
			is_item_outer = match( line , "^\\+ " )
			is_item_inner = match( line , "^  - " )
		}

		if( is_footer )
		{
			in_footer = 1
		}
		else if( is_code )
		{
			sub( "^" tab , "" , line )
			tagOutputRaw( line , "code" )
		}
		else if( is_image )
		{
			sub( "^[[:space:]]*<<" , "" , line )
			sub( ">>[[:space:]]*$" , "" , line )
			tagOutputRaw( line , "image" )
		}
		else if( is_html )
		{
			tagOutputRaw( line , "html" )
		}
		else if( is_blank )
		{
			tagOutput( "" , "blank" )
		}
		else if( is_item_name )
		{
			sub( "^# " , "" , line )
			tagOutput( line , "item-name" )
		}
		else if( is_item_detail )
		{
			sub( "^  " , "" , line )
			tagOutput( line , "item-detail" )
		}
		else if( is_item )
		{
			sub( "^\\* " , "" , line )
			tagOutput( line , "item" )
		}
		else if( is_item_outer )
		{
			sub( "^\\+ " , "" , line )
			tagOutput( line , "item-outer" )
		}
		else if( is_item_inner )
		{
			sub( "^  - " , "" , line )
			tagOutput( line , "item-inner" )
		}
		else if( is_item_numbered )
		{
			gsub( "^\\([[:digit:]][[:digit:]]*\\) " , "" , line )
			tagOutput( line , "item-numbered" )
		}
		else if( is_citation )
		{
			sub( "^" tab , "" , line )
			tagOutput( line , "citation" )
		}
		else if( is_author )
		{
			sub( "^" tab tab , "" , line )
			tagOutput( line , "author" )
		}
		else if( is_heading )
		{
			major += 1
			minor = 0
			h1_tag = "h1" "," major "," minor
			tagOutput( line , h1_tag )
		}
		else if( is_sub_heading )
		{
			minor += 1
			h2_tag = "h2" "," major "," minor
			tagOutput( line , h2_tag )
		}
		else if( !is_heading_line && !is_sub_heading_line )
		{
			tagOutput( line , in_footer ? "footer" : "text" )
		}
	}

	{
		if( NR != 1 )
			process( previous , $0 )
		previous = $0
	}

	END {
		process( previous , "" )
	} '
}

# ===
# Number()
#
# Numbers a set of commonly-tagged lines, and inserts an end
# marker line at the end of the sequence.
#
# The 'ignore' parameters can be used to make sure that
# item lines separated with 'ignore' patterns are
# treated as being contiguous.
#
# Eg: Number foo bar
#   foo,1: first line
#   ignore,bar:
#   foo,2: second line
#   foo-end:
#
Number()
{
	${awk} -v item_tag="${1}" -v ignore_1="${2}" -v ignore_2="${3}" -v ignore_3="${4}" -v ignore_4="${5}" '
	function ignore_line( line )
	{
		i_0 = match( line , "^ignore" )
		i_1 = length(ignore_1) && match( line , "^" ignore_1 "[:,]" )
		i_2 = length(ignore_2) && match( line , "^" ignore_2 "[:,]" )
		i_3 = length(ignore_3) && match( line , "^" ignore_3 "[:,]" )
		i_4 = length(ignore_3) && match( line , "^" ignore_4 "[:,]" )
		return i_0 || i_1 || i_2 || i_3 || i_4
	}
	BEGIN {
		n = 1
	}
	{
		if( match( $0 , "^" item_tag "[:,]" ) )
		{
			sub( "^" item_tag , "" )
			printf( "%s,%d%s\n" , item_tag , n++ , $0 )
		}
		else
		{
			if( !ignore_line($0) )
			{
				if( n > 1 )
					printf( "%s-end:\n" , item_tag )
				n = 1
			}
			print
		}
	} '
}

# ===
# Compress()
#
# Removes blank lines near to headings (etc) by changing
# the "blank" tag to "ignore,blank".
#
# As a special case, converts single blank lines within an
# "item-detail" block to have a tag of "item-detail-blank"
# rather than "blank".
#
Compress()
{
	${awk} '
	function process( previous , line , next_ )
	{
		re_blank = "^blank:"
		re_heading = "^h[[:digit:]][:,]"
		re_detail = "^item-detail:"
		re_pre_start = "^code,1[:,]"

		this_is_blank = match(line,re_blank)
		next_is_heading = match(next_,re_heading)
		previous_is_heading = match(previous,re_heading)
		next_is_detail = match(next_,re_detail)
		previous_is_detail = match(previous,re_detail)
		next_is_pre_start = match(next_,re_pre_start)

		if( this_is_blank && ( next_is_heading || previous_is_heading ) )
		{
			print "ignore," line
		}
		else if( this_is_blank && !previous_is_detail && next_is_detail )
		{
			print "ignore," line
		}
		else if( this_is_blank && previous_is_detail && !next_is_detail )
		{
			print "ignore," line
		}
		else if( this_is_blank && next_is_detail && previous_is_detail )
		{
			print "item-detail-" line
		}
		else if( this_is_blank && next_is_pre_start )
		{
			print "ignore," line
		}
		else
		{
			print line
		}
	}
	{
		if( NR >= 2 )
			process( l2 , l1 , $0 )
		l2 = l1
		l1 = $0
	}
	END {
		process( l2 , l1 , "" )
		process( l1 , "" , "" )
	} '
}

# ===
# Cat()
#
# An awk version of "cat".
#
Cat()
{
	${awk} '{print}' $@ | tr -d '\015'
}

# ==

Cat ${file} | \
	Main "${text_mode}" | \
	Compress | \
	Number "item" | \
	Number "item-outer" "item-inner" "blank" | \
	Number "item-inner" "blank" | \
	Number "item-numbered" | \
	Number "item-name" "item-detail" "blank" "item-detail-blank" "code" | \
	Number "item-detail" "item-detail-blank" "code" | \
	Number "code" "blank" "image" | \
	Number "footer" "blank" "image" | \
	Number "citation" "blank" | \
	Number "author" | \
	Number "text" "image"