This tool contains two files:
* t2h.php - does the filtering
* - shell wrapper; passes arguments to t2h.php
Copyright 2002 Gregory Keraunen. All Rights Reserved.
Translate text from standard input
What sets this filter apart from the many other text-to-html filters (such as is:
* text 'markup' syntax is not obviously identifiable as such: quality of text formatting is not compromised
* includes command line options for line wrapping and indentation
markup configuration settings are read from a separate file, similar
The basic syntax rule for text file conversion is:
+ process the file one line at a time
+ look for a 'signal' after any leading white space on each line
+ signals are defined by Perl Regular Expressions (PCRE)
+ ordinarily signals apply to only the line on which they occur
+ TODO: support for more involved processing such as html tables may require start/end signals
+ if wrapping is enabled; wrapped lines include any signal that preceded the original line
+ there are two common actions taken in response to signals: WRAP line, or REPLACE line
+ ordinary tag signals cause the line to be wrapped by tags
+ some signals require unique logic so must use reserved keywords
* Long anchor/fragment lines get split before anchor tags are written causing dramatic change in formatting after line is wrapped.
1) identify fragements during initial parse, prior to wrapping, prepend formatting tags to wrapped lines
2) optionally specify formatting of fragments and/or their named anchors.
moved files from /htdocs/proliberty/tools/txt2html to /htdocs/include/
renamed script to from 'txt2html' to 't2h'
// better would be to set these by looping through:
// $T2H_PUBLIC_VARS array
$T2H_INDENT= (int)getenv("T2H_INDENT");
// $T2H_ANCHORS signals to create links to text fragments; target fragment must also begin with $T2H_FRAGMENT_SIGNAL
// $T2H_REMOVE_COMMENT_SIGNAL signals a line to ignore; don't print
//trigger_error("T2H_STRONG_SIGNAL_SUB=".$T2H_STRONG_SIGNAL_SUB,E_USER_ERROR); exit (1);
/* This should be obsoleted by config file settings...
// $T2H_REMOVE_COMMENT_SIGNAL signals a line to ignore; don't print
$T2H_COMMENT_TAG = '!--';
$T2H_HEADING_TAG = 'h2';
// $T2H_STRONG_SIGNAL signals <b>
$T2H_STRONG_TAG = 'b';
// $T2H_ANCHORS signals to create links to text fragments; target fragment must also begin with $T2H_FRAGMENT_SIGNAL
/* htmlFormatLines($lines)
$lines - array of lines, without trailing "\n"
function htmlFormatLines($lines){
global $T2H_ANCHORS;
global $T2H_HEADING_TAG;
global $T2H_STRONG_TAG;
global $T2H_COMMENT_TAG;
global $T2H_DEBUG;
global $T2H_SMALL_TAG;
global $T2H_HR_TAG;
global $T2H_HR_SIGNAL;
if (!is_array($lines)){
trigger_error( 'htmlFormatLines(): $lines must be an array',E_USER_ERROR);
} else {
$T2H_DEBUG && trigger_error( "htmlFormatLines(): processsing $numlines lines\n", E_USER_NOTICE);
if (empty($T2H_ANCHORS)){
// don't do any formatting
return $lines;
// must apply htmlspecialchars() before adding any markup
foreach ($lines as $i => $line){
$lines[$i] = htmlspecialchars ($lines[$i]) ;
// handle horizontal rule signals:
if (!empty($T2H_HR_SIGNAL)){
$pcre = '/^\s*('.preg_quote($T2H_HR_SIGNAL, '/').')\s*$/';
} else {
$pcre = '/^\s*(---+|===+)\s*$/';
if ( preg_match ( $pcre, $lines[0]) ){
if (!empty($T2H_HR_TAG)){
$lines[0] = preg_replace( $pcre, '<'.$T2H_HR_TAG.' class="t2h-HR"/>', $lines[0] );
// we don't set $closing_tag for <hr/>
return $lines;
} else {
return NULL;
$pcre = '/^(\s*)'.preg_quote($T2H_REMOVE_COMMENT_SIGNAL, '/').'(.*)$/';
if ( preg_match ( $pcre, $lines[0]) ){
$T2H_DEBUG && trigger_error( "htmlFormatLines(): removed comment line\n", E_USER_NOTICE);
return NULL;
if (!empty($T2H_COMMENT_SIGNAL)){
$pcre = '/^('.preg_quote($T2H_COMMENT_SIGNAL, '/').'.*)$/';
if ( preg_match ( $pcre, $lines[0]) ){
$lines[0] = preg_replace( $pcre, '<'.$T2H_COMMENT_TAG.' class="t2h-comment">$1', $lines[0] );
if (!empty($T2H_HEADING_SIGNAL)){
// add <$T2H_HEADING_TAG>...</$T2H_HEADING_TAG> when line starts with '/// '
$pcre = '/^(\s*)'.preg_quote($T2H_HEADING_SIGNAL, '/').'(.*)$/';
if ( preg_match ( $pcre, $lines[0]) ){
$lines[0] = preg_replace( $pcre, '$1<'.$T2H_HEADING_TAG.' class="t2h-heading">$2', $lines[0] );
if (!empty($T2H_SMALL_SIGNAL)){
// add <$T2H_SMALL_TAG>...</$T2H_SMALL_TAG> when line starts with "$T2H_SMALL_SIGNAL "
$pcre = '/^(\s*)'.preg_quote($T2H_SMALL_SIGNAL, '/').'(.*)$/';
if ( preg_match ( $pcre, $lines[0]) ){
$lines[0] = preg_replace( $pcre, '$1<'.$T2H_SMALL_TAG.' class="t2h-small">$2', $lines[0] );
if (!empty($T2H_STRONG_SIGNAL)){
// add <b>...</b> when line starts with '* '
$pcre = '/^(\s*)'.preg_quote($T2H_STRONG_SIGNAL, '/').'(.*)$/';
if ( preg_match ( $pcre, $lines[0]) ){
if (!empty($T2H_STRONG_SIGNAL_SUB)){
$lines[0] = preg_replace( $pcre, '$1<'.$T2H_STRONG_TAG.' class="t2h-strong">'.$T2H_STRONG_SIGNAL_SUB.'$2', $lines[0] );
} else {
$lines[0] = preg_replace( $pcre, '$1<'.$T2H_STRONG_TAG.'>$2', $lines[0] );
// add anchors before closing formatting tag:
foreach ($lines as $i => $line){
// this is not great: if lines have been wrapped, we won't match "hi" http://...
$pcre = '%"(.+)"[\s]+((http|https|ftp|file)://[^\s]+?)(.*)%sU';
if ( preg_match( $pcre,$lines[$i] ) ){
$lines[$i] = preg_replace( $pcre, '<a href="$2" target="_top">$1</a>$4', $lines[$i] );
} else {
$pcre = '%\b((http|https|ftp|file)://[^\s]+?)(.*)%U';
$lines[$i] = preg_replace( $pcre, '<a href="$1" target="_top">$1</a>$3', $lines[$i] );
// relative links
$pcre = '%\b(file:([^\s]+?))(.*)%U';
$lines[$i] = preg_replace( $pcre, '<a href="$2">$2</a>$3', $lines[$i] );
// add mailto: links
$pcre = '/\b(\w+@\w+\.[\w.]+)(.*)/';
$lines[$i] = preg_replace( $pcre, '<a href="mailto:$1">$1</a>$2', $lines[$i] );
if (!empty($closing_tag)){
$lines[$numlines -1] =$lines[$numlines -1]."</$closing_tag>" ;
return $lines;
} // htmlFormatLines()
if( $T2H_DEBUG){
trigger_error( "width=$T2H_WRAP_WIDTH\n", E_USER_NOTICE);
trigger_error( "T2H_BREAK_STR=$T2H_BREAK_STR\n", E_USER_NOTICE);
trigger_error( "T2H_INDENT=$T2H_INDENT\n", E_USER_NOTICE);
trigger_error( "T2H_ANCHORS=$T2H_ANCHORS\n", E_USER_NOTICE);
trigger_error( "T2H_TAB_SPACES=$T2H_TAB_SPACES\n", E_USER_NOTICE);
while ( $T2H_TAB_SPACES-- ) {
$tabs .= ' ';
$fcontents = file ('/dev/stdin');
if( empty($fcontents)){
trigger_error( "empty input: $INPUT_FILE\n", E_USER_NOTICE);
echo '';
// we must do the wordwrapping prior to any html formatting
while (list ($line_num, $line) = each ($fcontents)) {
$lineLen = strlen( $line );
if( $T2H_WRAP_WIDTH && ( $T2H_WRAP_WIDTH < $lineLen ) ){
// we will wrap lines
// preserve any formatting signal characters so wrapped lines will have same formatting as unwrapped line
$pcre = '/^(\s*)((';
$pcre .= preg_quote($T2H_STRONG_SIGNAL, '/');
$pcre .='|'.preg_quote($T2H_HEADING_SIGNAL, '/');
$pcre .='|'.preg_quote($T2H_COMMENT_SIGNAL, '/');
$pcre .='|'.preg_quote($T2H_REMOVE_COMMENT_SIGNAL, '/');
$pcre .= ')?)/';
preg_match( $pcre, $line, $matches );
$leading_space = $matches[1];
$formattingSignal = $matches[2];
if (!empty($formattingSignal)){
$conditionalTab = $tabs ;
// comment lines, preserve formatting at start of line
if ( ($formattingSignal==$T2H_COMMENT_SIGNAL) ||
) {
$wordT2H_WRAP_STR="\n".$formattingSignal.' ';
// added indentation looks bad so we don't do it
$conditionalTab = "" ;
} else {
$conditionalTab = "" ;
// wordwrap does the line wrapping; explode removes "\n"
$wrappedLines = explode( "\n", wordwrap( $line, $T2H_WRAP_WIDTH, $wordT2H_WRAP_STR ) );
$wrappedLines = htmlFormatLines($wrappedLines);
if (!empty($wrappedLines)){
$lines[] = $wrappedLines[0]."\n";
while ( $i < count($wrappedLines)){
$lines[] = $leading_space.$conditionalTab.$wrappedLines[$i]."\n";
} else {
$T2H_DEBUG && trigger_error( "formatted line is empty\n", E_USER_WARNING);
} else {
// remove trailing "\n", for consistency of input to htmlFormatLines()
$formattedLine = htmlFormatLines(array(rtrim($line)));
if (!empty($formattedLine)){
$lines[] = implode('',$formattedLine)."\n";
} else {
$T2H_DEBUG && trigger_error( "formatted line is empty\n", E_USER_WARNING);
while (list ($line_num, $line) = each ( $lines ) ) {
if( !empty($T2H_ANCHORS) ){
if( !empty($T2H_FRAGMENT_SIGNAL) ){
// keep track of patterns for creating links and named anchors to text fragments
$pcre = '/^\s*?'.preg_quote( $T2H_FRAGMENT_SIGNAL, '/' ).'(.+)(\s*)$/U';
if ( preg_match( $pcre, $line, $matches ) ){
$fragment_patterns[$line_num] = $matches[1];
$lines[$line_num] = $line;
if( !empty($fragment_patterns) ){
while (list ($line_num, $pattern) = each ($fragment_patterns)) {
// if $pattern is empty, we have already handled it
if ( empty($pattern) ) continue;
static $fragment_num =1;
$fragment_name = 'fragment'.$fragment_num++;
$pcre = '/(.*)('.preg_quote( $T2H_FRAGMENT_SIGNAL, '/' ).')('.preg_quote( $pattern, '/' ).')(\s*)$/U';
if ( $T2H_DEBUG ) echo "pcre=$pcre";
for ( $i=$line_num + 1; $cur_line =& $lines[$i]; $i++){
// look for a matching pattern later in the file...
if ( preg_match( $pcre, $cur_line, $matches ) ){
// if a match is found, create a link and a named anchor
// create the link with the first line with the pattern
$lines[$line_num] = preg_replace( $pcre, '$1<a href="#'.$fragment_name.'">$3</a>$4', $lines[$line_num] );
// create the named anchor for the subsequent line
$cur_line = preg_replace( $pcre, '$1<'.$T2H_FRAGMENT_TAG.' class="t2h-fragment"><a name="'.$fragment_name.'"/>$3</'.$T2H_FRAGMENT_TAG.'>$4', $cur_line );
// we don't want to process the second instance of the pattern inside
// the while loop since we've already handled it: set pattern to '':
// continue with the while loop
continue 2;
} // for
// if we get here, no match was found inside for loop:
// only create a named anchor, no link
$lines[$line_num] = preg_replace( $pcre, '$1<'.$T2H_FRAGMENT_TAG.' class="t2h-fragment"><a name="'.$fragment_name.'"/>$3</'.$T2H_FRAGMENT_TAG.'>$4', $lines[$line_num] );
} // while
} // if
if( !empty($T2H_BREAK_STR) ){
foreach ($lines as $i=>$line){
$lines[$i] = str_replace("\n", $T2H_BREAK_STR."\n", $line );
echo join( '', $lines );
#!/bin/sh #!/bin/sh -x option prints each line as it is executed; good for debugging; not documented! # Copyright 2002 Gregory Keranen. All Rights Reserved. # Created 5/21/2002 # PURPOSE # convert text to html entitities # optionally wrap input # USAGE: # [OPTIONS] [INPUT_FILE] # QUESTIONS/ TODO: # BUGS: # sourcing the -f config_file will clobber environmental vars; is this right? #============================================================================== PHP=/usr/local/bin/php #### DEFAULT SETTINGS: INPUT_FILE="/dev/stdin" OUT_FILE="/dev/stdout" BASENAME=$(basename $0 .sh) # this needs work: SCRIPT_DIR=$(dirname $(ls -al `which $BASENAME`|sed -n 's/.*-> \(.*\)$/\1/p')) TARGET_SCRIPT=${SCRIPT_DIR}/${BASENAME}.phpo # a default config file in this script's directory T2H_CONFIG_FILE_DEFAULT=${SCRIPT_DIR}/${BASENAME}.conf.default # a config file in the current working directory #T2H_CONFIG_FILE_LOCAL="$PWD/${BASENAME}.conf" # a config file in the user's HOME directory #T2H_CUSTOM_CONFIG_FILE=${HOME}/.t2h VERSION="0.1a" USAGE="Usage: $ [OPTIONS] [INPUT_FILE]" #CONFIG_READ=0 T2H_PRIVATE_VARS=(\ T2H_CONFIG_FILE_DEFAULT \ T2H_CONFIG_FILE \ ) T2H_PUBLIC_VARS=(\ T2H_WRAP_WIDTH \ T2H_BREAK_STR \ T2H_WRAP_STR \ T2H_TAB_SPACES \ T2H_INDENT \ T2H_ANCHORS \ T2H_DEBUG \ T2H_FRAGMENT_SIGNAL \ T2H_FRAGMENT_TAG \ T2H_COMMENT_SIGNAL \ T2H_COMMENT_TAG \ T2H_REMOVE_COMMENT_SIGNAL \ T2H_HEADING_SIGNAL \ T2H_HEADING_TAG \ T2H_STRONG_SIGNAL \ T2H_STRONG_TAG \ T2H_STRONG_SIGNAL_SUB \ T2H_SMALL_SIGNAL \ T2H_SMALL_TAG \ T2H_HR_TAG \ T2H_HR_SIGNAL \ ) export T2H_PUBLIC_VARS #### END OF DEFAULT SETTINGS # adapt this to my purpose help() { cat << eof $USAGE Options: -f config_file specify a config file to load -o output_file specify an output file name -w wrap_width wrap text to specified column width -i if wrapping, maintain leading-space indentation -t tab_spaces if wrapping, indent wrapped lines by tab_spaces spaces -b break_str replace \n with break_str\n -a disable HTML conversion: no entities or tags -v print version -d debug -h print this help text eof } # syntax of getopts is: # : indicates option expects argument string after; E.g., -o output_file # without :, option doesn't expect anything else; E.g., -h while getopts ":df:vihao:w:b:t:" Option do # ;; required after each case case $Option in d ) T2H_DEBUG=1;echo "$BASENAME" version: "$VERSION" >&2;; v ) echo "$BASENAME" version: "$VERSION"; exit 0;; h ) help; exit 0;; f ) T2H_CONFIG_FILE="$OPTARG";; a ) T2H_ANCHORS=0;; o ) T2H_OUT_FILE="$OPTARG";; w ) T2H_WRAP_WIDTH="$OPTARG";; i ) T2H_INDENT=1;; b ) T2H_BREAK_STR="$OPTARG";; t ) T2H_TAB_SPACES="$OPTARG";; * ) echo "Unimplemented option chosen: $1" >&2; exit 1;; esac done # this decrements the argument pointer so it points to next argument after the options. shift $(($OPTIND - 1)) if [ ! -z "$1" ] # If input file is specified then if [ $(dirname "$1") = "." ] then INPUT_FILE=`pwd`"/$1" else INPUT_FILE="$1" fi fi if [ ! -e "$INPUT_FILE" ] # If input file doesn't exist then echo "error: input file not found: \"$INPUT_FILE\"" >&2; exit 1; fi # source the specified config file, if it exists if [ ! -z "$T2H_CONFIG_FILE" ] then if [ -f "$T2H_CONFIG_FILE" ] then if [[ $T2H_DEBUG = 1 ]] then echo "Using config file:" >&2 echo "$T2H_CONFIG_FILE" >&2 fi . "$T2H_CONFIG_FILE" if [ ! $? ] then echo "Failed reading config file:" >&2; echo "$T2H_CONFIG_FILE" >&2; exit 1 fi else echo "Config file not found:" "$T2H_CONFIG_FILE" >&2; exit 1 fi elif [ -e "$T2H_CONFIG_FILE_DEFAULT" ] # source the default config file, if it exists then if [[ $T2H_DEBUG = 1 ]] then echo "Using config file:" >&2 echo "$T2H_CONFIG_FILE_DEFAULT" >&2 fi . "$T2H_CONFIG_FILE_DEFAULT" if [ ! $? ] then echo "Failed reading default config file" >&2; echo "$T2H_CONFIG_FILE_DEFAULT" >&2; exit 1 fi else echo "Default config file not found:" >&2; echo "$T2H_CONFIG_FILE_DEFAULT" >&2; exit 1 fi exportVars() { i=0 count=${#T2H_PUBLIC_VARS[@]} while [ "$i" -lt "$count" ] do export ${T2H_PUBLIC_VARS[$i]} let i=$i+1 done } echoVars() { i=0 count=${#T2H_PUBLIC_VARS[@]} while [ "$i" -lt "$count" ] do var=${T2H_PUBLIC_VARS[$i]} value="${!var}" echo "$var=\"$value\"" let i=$i+1 done i=0 count=${#T2H_PRIVATE_VARS[@]} while [ "$i" -lt "$count" ] do var=${T2H_PRIVATE_VARS[$i]} value="${!var}" echo "$var=\"$value\"" let i=$i+1 done } if [[ $T2H_DEBUG = 1 ]] then echoVars >&2 fi exportVars # command pipe output to > "$OUT_FILE" cat $INPUT_FILE | ${PHP} "$TARGET_SCRIPT" 1> "$OUT_FILE" exit 0