#!/bin/sh # # Copyright (C) 2001-2004 Graeme Walker # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # === # # txt2mu.sh # # Converts specially-formatted plain-text to marked-up text. # The mark-up process works on complete lines; inline # markup is handled by later processing (eg. "mu2html.sh"). # # The "-t" (text-mode) switch modifies the set # of available styles to suit non-technical texts. # # usage: txt2mu.sh [-a ] [-t] [] # awk="gawk" if test "${1}" = "-a" then shift if test "${1}" != "" ; then awk="${1}" ; fi shift fi text_mode="0" if test "${1}" = "-t" then shift text_mode="1" fi file="${1}" # === # Main() # # Does most of the processing. # Main() { ${awk} -v text_mode="${1}" ' BEGIN { in_footer = 0 } function output( line ) { printf( "%s\n" , line ) } function tagOutput( line , tag ) { printf( "%s:%s\n" , tag , line ) } function tagOutputRaw( line , tag ) { printf( "%s:%s\n" , tag , line ) } function process( line , next_ ) { tab = " " is_blank = match( line , "^[[:space:]]*$" ) is_heading = match( next_ , "^==*[[:space:]]*$" ) is_footer = match( line , "^____*[[:space:]]*$" ) is_sub_heading = match( next_ , "^--*[[:space:]]*$" ) is_item = match( line , "^\\* " ) is_item_name = match( line , "^\\# " ) is_item_detail = match( line , "^ [^- ]" ) is_item_numbered = match( line , "^\\([[:digit:]][[:digit:]]*\\)" ) is_heading_line = match( line , "^==*[[:space:]]*$" ) is_sub_heading_line = match( line , "^--*[[:space:]]*$" ) is_image = match( line , "^[[:space:]]*<<.*>>[[:space:]]*$" ) if( text_mode ) { is_citation = match( line , "^" tab "[^" tab "]" ) is_author = match( line , "^" tab tab ) is_html = match( line , "^<.*>[[:space:]]*$" ) is_code = 0 is_item_outer = 0 is_item_inner = 0 } else { is_citation = 0 is_author = 0 is_html = 0 is_code = match( line , "^" tab ) is_item_outer = match( line , "^\\+ " ) is_item_inner = match( line , "^ - " ) } if( is_footer ) { in_footer = 1 } else if( is_code ) { sub( "^" tab , "" , line ) tagOutputRaw( line , "code" ) } else if( is_image ) { sub( "^[[:space:]]*<<" , "" , line ) sub( ">>[[:space:]]*$" , "" , line ) tagOutputRaw( line , "image" ) } else if( is_html ) { tagOutputRaw( line , "html" ) } else if( is_blank ) { tagOutput( "" , "blank" ) } else if( is_item_name ) { sub( "^# " , "" , line ) tagOutput( line , "item-name" ) } else if( is_item_detail ) { sub( "^ " , "" , line ) tagOutput( line , "item-detail" ) } else if( is_item ) { sub( "^\\* " , "" , line ) tagOutput( line , "item" ) } else if( is_item_outer ) { sub( "^\\+ " , "" , line ) tagOutput( line , "item-outer" ) } else if( is_item_inner ) { sub( "^ - " , "" , line ) tagOutput( line , "item-inner" ) } else if( is_item_numbered ) { gsub( "^\\([[:digit:]][[:digit:]]*\\) " , "" , line ) tagOutput( line , "item-numbered" ) } else if( is_citation ) { sub( "^" tab , "" , line ) tagOutput( line , "citation" ) } else if( is_author ) { sub( "^" tab tab , "" , line ) tagOutput( line , "author" ) } else if( is_heading ) { major += 1 minor = 0 h1_tag = "h1" "," major "," minor tagOutput( line , h1_tag ) } else if( is_sub_heading ) { minor += 1 h2_tag = "h2" "," major "," minor tagOutput( line , h2_tag ) } else if( !is_heading_line && !is_sub_heading_line ) { tagOutput( line , in_footer ? "footer" : "text" ) } } { if( NR != 1 ) process( previous , $0 ) previous = $0 } END { process( previous , "" ) } ' } # === # Number() # # Numbers a set of commonly-tagged lines, and inserts an end # marker line at the end of the sequence. # # The 'ignore' parameters can be used to make sure that # item lines separated with 'ignore' patterns are # treated as being contiguous. # Number() { ${awk} -v item_tag="${1}" -v ignore_1="${2}" -v ignore_2="${3}" -v ignore_3="${4}" -v ignore_4="${5}" ' function ignore_line( line ) { i_0 = match( line , "^ignore" ) i_1 = length(ignore_1) && match( line , "^" ignore_1 "[:,]" ) i_2 = length(ignore_2) && match( line , "^" ignore_2 "[:,]" ) i_3 = length(ignore_3) && match( line , "^" ignore_3 "[:,]" ) i_4 = length(ignore_3) && match( line , "^" ignore_4 "[:,]" ) return i_0 || i_1 || i_2 || i_3 || i_4 } BEGIN { n = 1 } { if( match( $0 , "^" item_tag "[:,]" ) ) { sub( "^" item_tag , "" ) printf( "%s,%d%s\n" , item_tag , n++ , $0 ) } else { if( !ignore_line($0) ) { if( n > 1 ) printf( "%s-end:\n" , item_tag ) n = 1 } print } } ' } # === # Compress() # # Removes blank lines near to headings (etc) by changing # the "blank" tag to "ignore,blank". # # As a special case, converts single blank lines within an # "item-detail" block to have a tag of "item-detail-blank" # rather than "blank". # Compress() { ${awk} ' function process( previous , line , next_ ) { re_blank = "^blank:" re_heading = "^h[[:digit:]][:,]" re_detail = "^item-detail:" re_pre_start = "^code,1[:,]" this_is_blank = match(line,re_blank) next_is_heading = match(next_,re_heading) previous_is_heading = match(previous,re_heading) next_is_detail = match(next_,re_detail) previous_is_detail = match(previous,re_detail) next_is_pre_start = match(next_,re_pre_start) if( this_is_blank && ( next_is_heading || previous_is_heading ) ) { print "ignore," line } else if( this_is_blank && !previous_is_detail && next_is_detail ) { print "ignore," line } else if( this_is_blank && previous_is_detail && !next_is_detail ) { print "ignore," line } else if( this_is_blank && next_is_detail && previous_is_detail ) { print "item-detail-" line } else if( this_is_blank && next_is_pre_start ) { print "ignore," line } else { print line } } { if( NR >= 2 ) process( l2 , l1 , $0 ) l2 = l1 l1 = $0 } END { process( l2 , l1 , "" ) process( l1 , "" , "" ) } ' } # === # Cat() # # An awk version of "cat". # Cat() { ${awk} '{print}' $@ | tr -d '\015' } # == Cat ${file} | \ Main "${text_mode}" | \ Compress | \ Number "item" | \ Number "item-outer" "item-inner" "blank" | \ Number "item-inner" "blank" | \ Number "item-numbered" | \ Number "item-name" "item-detail" "blank" "item-detail-blank" "code" | \ Number "item-detail" "item-detail-blank" "code" | \ Number "code" "blank" "image" | \ Number "footer" "blank" "image" | \ Number "citation" "blank" | \ Number "author" | \ Number "text" "image"