343 lines
7.2 KiB
Bash
343 lines
7.2 KiB
Bash
#!/bin/sh
|
|
#
|
|
# Copyright (C) 2001-2007 Graeme Walker <graeme_walker@users.sourceforge.net>
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
# ===
|
|
#
|
|
# txt2mu.sh
|
|
#
|
|
# Converts specially-formatted plain-text to marked-up text.
|
|
# The mark-up process works on complete lines; inline
|
|
# markup is handled by later processing (eg. "mu2html.sh").
|
|
#
|
|
# The "-t" (text-mode) switch modifies the set
|
|
# of available styles to suit non-technical texts.
|
|
#
|
|
# usage: txt2mu.sh [-a <awk-binary>] [-t] [<input-file>]
|
|
#
|
|
|
|
awk="gawk"
|
|
if test "${1}" = "-a"
|
|
then
|
|
shift
|
|
if test "${1}" != "" ; then awk="${1}" ; fi
|
|
shift
|
|
fi
|
|
|
|
text_mode="0"
|
|
if test "${1}" = "-t"
|
|
then
|
|
shift
|
|
text_mode="1"
|
|
fi
|
|
|
|
file="${1}"
|
|
|
|
# ===
|
|
# Main()
|
|
#
|
|
# Does most of the processing.
|
|
#
|
|
Main()
|
|
{
|
|
${awk} -v text_mode="${1}" '
|
|
|
|
BEGIN { in_footer = 0 }
|
|
|
|
function output( line )
|
|
{
|
|
printf( "%s\n" , line )
|
|
}
|
|
|
|
function tagOutput( line , tag )
|
|
{
|
|
printf( "%s:%s\n" , tag , line )
|
|
}
|
|
|
|
function tagOutputRaw( line , tag )
|
|
{
|
|
printf( "%s:%s\n" , tag , line )
|
|
}
|
|
|
|
function process( line , next_ )
|
|
{
|
|
tab = " "
|
|
is_blank = match( line , "^[[:space:]]*$" )
|
|
is_heading = match( next_ , "^==*[[:space:]]*$" )
|
|
is_footer = match( line , "^____*[[:space:]]*$" )
|
|
is_sub_heading = match( next_ , "^--*[[:space:]]*$" )
|
|
is_item = match( line , "^\\* " )
|
|
is_item_name = match( line , "^\\# " )
|
|
is_item_detail = match( line , "^ [^- ]" )
|
|
is_item_numbered = match( line , "^\\([[:digit:]][[:digit:]]*\\)" )
|
|
is_heading_line = match( line , "^==*[[:space:]]*$" )
|
|
is_sub_heading_line = match( line , "^--*[[:space:]]*$" )
|
|
is_image = match( line , "^[[:space:]]*<<.*>>[[:space:]]*$" )
|
|
|
|
if( text_mode )
|
|
{
|
|
is_citation = match( line , "^" tab "[^" tab "]" )
|
|
is_author = match( line , "^" tab tab )
|
|
is_html = match( line , "^<.*>[[:space:]]*$" )
|
|
is_code = 0
|
|
is_item_outer = 0
|
|
is_item_inner = 0
|
|
}
|
|
else
|
|
{
|
|
is_citation = 0
|
|
is_author = 0
|
|
is_html = 0
|
|
is_code = match( line , "^" tab )
|
|
is_item_outer = match( line , "^\\+ " )
|
|
is_item_inner = match( line , "^ - " )
|
|
}
|
|
|
|
if( is_footer )
|
|
{
|
|
in_footer = 1
|
|
}
|
|
else if( is_code )
|
|
{
|
|
sub( "^" tab , "" , line )
|
|
tagOutputRaw( line , "code" )
|
|
}
|
|
else if( is_image )
|
|
{
|
|
sub( "^[[:space:]]*<<" , "" , line )
|
|
sub( ">>[[:space:]]*$" , "" , line )
|
|
tagOutputRaw( line , "image" )
|
|
}
|
|
else if( is_html )
|
|
{
|
|
tagOutputRaw( line , "html" )
|
|
}
|
|
else if( is_blank )
|
|
{
|
|
tagOutput( "" , "blank" )
|
|
}
|
|
else if( is_item_name )
|
|
{
|
|
sub( "^# " , "" , line )
|
|
tagOutput( line , "item-name" )
|
|
}
|
|
else if( is_item_detail )
|
|
{
|
|
sub( "^ " , "" , line )
|
|
tagOutput( line , "item-detail" )
|
|
}
|
|
else if( is_item )
|
|
{
|
|
sub( "^\\* " , "" , line )
|
|
tagOutput( line , "item" )
|
|
}
|
|
else if( is_item_outer )
|
|
{
|
|
sub( "^\\+ " , "" , line )
|
|
tagOutput( line , "item-outer" )
|
|
}
|
|
else if( is_item_inner )
|
|
{
|
|
sub( "^ - " , "" , line )
|
|
tagOutput( line , "item-inner" )
|
|
}
|
|
else if( is_item_numbered )
|
|
{
|
|
gsub( "^\\([[:digit:]][[:digit:]]*\\) " , "" , line )
|
|
tagOutput( line , "item-numbered" )
|
|
}
|
|
else if( is_citation )
|
|
{
|
|
sub( "^" tab , "" , line )
|
|
tagOutput( line , "citation" )
|
|
}
|
|
else if( is_author )
|
|
{
|
|
sub( "^" tab tab , "" , line )
|
|
tagOutput( line , "author" )
|
|
}
|
|
else if( is_heading )
|
|
{
|
|
major += 1
|
|
minor = 0
|
|
h1_tag = "h1" "," major "," minor
|
|
tagOutput( line , h1_tag )
|
|
}
|
|
else if( is_sub_heading )
|
|
{
|
|
minor += 1
|
|
h2_tag = "h2" "," major "," minor
|
|
tagOutput( line , h2_tag )
|
|
}
|
|
else if( !is_heading_line && !is_sub_heading_line )
|
|
{
|
|
tagOutput( line , in_footer ? "footer" : "text" )
|
|
}
|
|
}
|
|
|
|
{
|
|
if( NR != 1 )
|
|
process( previous , $0 )
|
|
previous = $0
|
|
}
|
|
|
|
END {
|
|
process( previous , "" )
|
|
} '
|
|
}
|
|
|
|
# ===
|
|
# Number()
|
|
#
|
|
# Numbers a set of commonly-tagged lines, and inserts an end
|
|
# marker line at the end of the sequence.
|
|
#
|
|
# The 'ignore' parameters can be used to make sure that
|
|
# item lines separated with 'ignore' patterns are
|
|
# treated as being contiguous.
|
|
#
|
|
# Eg: Number foo bar
|
|
# foo,1: first line
|
|
# ignore,bar:
|
|
# foo,2: second line
|
|
# foo-end:
|
|
#
|
|
Number()
|
|
{
|
|
${awk} -v item_tag="${1}" -v ignore_1="${2}" -v ignore_2="${3}" -v ignore_3="${4}" -v ignore_4="${5}" '
|
|
function ignore_line( line )
|
|
{
|
|
i_0 = match( line , "^ignore" )
|
|
i_1 = length(ignore_1) && match( line , "^" ignore_1 "[:,]" )
|
|
i_2 = length(ignore_2) && match( line , "^" ignore_2 "[:,]" )
|
|
i_3 = length(ignore_3) && match( line , "^" ignore_3 "[:,]" )
|
|
i_4 = length(ignore_3) && match( line , "^" ignore_4 "[:,]" )
|
|
return i_0 || i_1 || i_2 || i_3 || i_4
|
|
}
|
|
BEGIN {
|
|
n = 1
|
|
}
|
|
{
|
|
if( match( $0 , "^" item_tag "[:,]" ) )
|
|
{
|
|
sub( "^" item_tag , "" )
|
|
printf( "%s,%d%s\n" , item_tag , n++ , $0 )
|
|
}
|
|
else
|
|
{
|
|
if( !ignore_line($0) )
|
|
{
|
|
if( n > 1 )
|
|
printf( "%s-end:\n" , item_tag )
|
|
n = 1
|
|
}
|
|
print
|
|
}
|
|
} '
|
|
}
|
|
|
|
# ===
|
|
# Compress()
|
|
#
|
|
# Removes blank lines near to headings (etc) by changing
|
|
# the "blank" tag to "ignore,blank".
|
|
#
|
|
# As a special case, converts single blank lines within an
|
|
# "item-detail" block to have a tag of "item-detail-blank"
|
|
# rather than "blank".
|
|
#
|
|
Compress()
|
|
{
|
|
${awk} '
|
|
function process( previous , line , next_ )
|
|
{
|
|
re_blank = "^blank:"
|
|
re_heading = "^h[[:digit:]][:,]"
|
|
re_detail = "^item-detail:"
|
|
re_pre_start = "^code,1[:,]"
|
|
|
|
this_is_blank = match(line,re_blank)
|
|
next_is_heading = match(next_,re_heading)
|
|
previous_is_heading = match(previous,re_heading)
|
|
next_is_detail = match(next_,re_detail)
|
|
previous_is_detail = match(previous,re_detail)
|
|
next_is_pre_start = match(next_,re_pre_start)
|
|
|
|
if( this_is_blank && ( next_is_heading || previous_is_heading ) )
|
|
{
|
|
print "ignore," line
|
|
}
|
|
else if( this_is_blank && !previous_is_detail && next_is_detail )
|
|
{
|
|
print "ignore," line
|
|
}
|
|
else if( this_is_blank && previous_is_detail && !next_is_detail )
|
|
{
|
|
print "ignore," line
|
|
}
|
|
else if( this_is_blank && next_is_detail && previous_is_detail )
|
|
{
|
|
print "item-detail-" line
|
|
}
|
|
else if( this_is_blank && next_is_pre_start )
|
|
{
|
|
print "ignore," line
|
|
}
|
|
else
|
|
{
|
|
print line
|
|
}
|
|
}
|
|
{
|
|
if( NR >= 2 )
|
|
process( l2 , l1 , $0 )
|
|
l2 = l1
|
|
l1 = $0
|
|
}
|
|
END {
|
|
process( l2 , l1 , "" )
|
|
process( l1 , "" , "" )
|
|
} '
|
|
}
|
|
|
|
# ===
|
|
# Cat()
|
|
#
|
|
# An awk version of "cat".
|
|
#
|
|
Cat()
|
|
{
|
|
${awk} '{print}' $@ | tr -d '\015'
|
|
}
|
|
|
|
# ==
|
|
|
|
Cat ${file} | \
|
|
Main "${text_mode}" | \
|
|
Compress | \
|
|
Number "item" | \
|
|
Number "item-outer" "item-inner" "blank" | \
|
|
Number "item-inner" "blank" | \
|
|
Number "item-numbered" | \
|
|
Number "item-name" "item-detail" "blank" "item-detail-blank" "code" | \
|
|
Number "item-detail" "item-detail-blank" "code" | \
|
|
Number "code" "blank" "image" | \
|
|
Number "footer" "blank" "image" | \
|
|
Number "citation" "blank" | \
|
|
Number "author" | \
|
|
Number "text" "image"
|
|
|
|
|