# jtagconvert.tcl - procedures for converting multi-font text
# 
# Copyright 1994-1995 by Jay Sekora.  All rights reserved, except 
# that this file may be freely redistributed in whole or in part 
# for non-profit, noncommercial use.
######################################################################

# TO DO:
#   html conversion needs to handle single newline (?<br>?)
#   bold italic for html
#   ISO entities for html
#   some mechanism for lists
#   extend to underlining and colours
#   generic "get tag information for chunk" routine, eg
#     "j:tc:get_status var" might set var(font) to bolditalic,
#     var(anchor) to "Table of Contents" and var(link) to
#     "foo/bar#intro".
#   rewrite conversion to use above routine, and switch on its
#     results.
#   rewrite TeX to use {...} sensibly.
#   handle returns better, esp if they're tagged.

if { ! [info exists jstools_library] } {
  upvar #0 {set jstools_library /usr/local/lib/jstools}
}

######################################################################
######################################################################
### FOLLOWING SHOULD HAVE A PREFERENCES PANEL:
######################################################################
######################################################################

global J_TAGCONVERT
set J_TAGCONVERT(lib) $jstools_library/jtagconvert
set J_TAGCONVERT(tex,prologue) prologue.tex
set J_TAGCONVERT(tex,epilogue) epilogue.tex
set J_TAGCONVERT(ps,prologue) prologue.ps
set J_TAGCONVERT(ps,epilogue) epilogue.ps
set J_TAGCONVERT(html,prologue) prologue.html
set J_TAGCONVERT(html,epilogue) epilogue.html
set J_TAGCONVERT(tclrt,prologue) prologue.tclrt
set J_TAGCONVERT(tclrt,epilogue) epilogue.tclrt

######################################################################
# j:tc:saveas t -
#   let the user choose a file format and a file to save the
#   richtext in $t into
######################################################################

proc j:tc:saveas { t } {
  set filename [j:fs \
    -prompt "Save as:" \
    -types {PostScript HTML TeX {Tcl Rich Text}} \
    -typevariable user_type \
    -typeprompt "Format:"]
  
  if {"x$filename" == "x"} {
    return
  }
  
  switch -exact -- $user_type {
    {PostScript} {set type ps}
    {Tcl Rich Text} {set type tclrt}
    {HTML} {set type html}
    default {set type tex}
  }
  
  j:fileio:write $filename [j:tc:${type}:convert_text $t]
}

######################################################################
# clear all convert:place:* tags (done when you start)
######################################################################

proc j:tc:clear_marks { t } {
  foreach mark [$t mark names] {
    if [string match convert:place:* $mark] {
      $t mark unset $mark
    }
  }
}

######################################################################
# mark all places in the text where the font changes
######################################################################

proc j:tc:mark_transitions { t } {
  # mark every spot where there's a font tag transition.  because each
  # mark is named after its position, a given location will only end up
  # with one mark, even if it has multiple font transitions  
  foreach tag [$t tag names] {
    # we only care about font tags (at this point; might do underlining
    # and colours at some point in the future)
    if [string match richtext:font:* $tag] {
      foreach place [$t tag ranges $tag] {
        $t mark set convert:place:$place $place
      }
    }
  }
  $t mark set convert:place:1.0 1.0
  set end [$t index end]
  $t mark set convert:place:$end $end
}

######################################################################
# add marks to break the text up into pieces of a manageable size
######################################################################

proc j:tc:make_small_chunks { t } {
  # add a mark about every sixty characters.  because each
  # mark is named after its position, a given location will only end up
  # with one mark, even if it has multiple font transitions  
  $t mark set chunk_pointer 1.0
  
  while 1 {
    $t mark set chunk_pointer {chunk_pointer + 50 chars wordend}
    while {[string match "\[ \n\t\f\]" [$t get chunk_pointer]] &&
           [$t compare chunk_pointer < end]} {
      $t mark set chunk_pointer {chunk_pointer + 1 char}
    }
    if [$t compare chunk_pointer >= end] {
      return 0
    }
    set index [$t index chunk_pointer]
    $t mark set convert:place:$index $index
  }
}

######################################################################
# compare marks by index
######################################################################

proc j:tc:compare_marks { t one two } {
  if [$t compare $one < $two] {
    return -1
  }
  if [$t compare $one == $two] {
    return 0
  }
  return 1
}

######################################################################
# get all (relevant) marks, in sorted order
######################################################################

proc j:tc:get_marks { t } {
  set marks {}
  
  foreach mark [$t mark names] {
    if [string match convert:place:* $mark] {
      lappend marks $mark
    }
  }
  return [lsort -command "j:tc:compare_marks $t" $marks]
}

######################################################################
# convert text to mode, processing each chunk in order
#   this is a "generic" procedure that many output formats should
#   be able to share.  it prepends a mode-specific prologue.  then
#   it breaks the text up into chunks, where each chunk is the same
#   font and a reasonable length, and calls a mode-specific procedure
#   to convert each chunk.  it appends a mode-specific epilogue
#   and returns the result.
# if "-break 1" is given, call j:tc:make_small_chunks before
#   processing each chunk.  (a mode can also break chunks in 
#   j:tc:_mode_:convert_chunk
######################################################################

proc j:tc:generic_convert_text { t {mode tclrt} args } {
  j:parse_args {
    {break 0}
  }
  
  set result {}
  
  catch {
    set result [j:tc:$mode:prologue $t]
  }
  
  j:tc:clear_marks $t
  if $break {
    j:tc:make_small_chunks $t
  }
  j:tc:mark_transitions $t
  
  set marks [j:tc:get_marks $t]
  if {[llength $marks] == 1} {
    # text is empty
    return -1
  }
  
  set lastmark [expr [llength $marks] - 1]
  
  for {set i 0} {$i < $lastmark} {incr i} {
    set start [lindex $marks $i]
    set end [lindex $marks [expr $i + 1]]
    set text [$t get $start $end]
    set tags [$t tag names $start]
    append result [j:tc:$mode:convert_chunk $text $tags]
  }
  catch {
    append result [j:tc:$mode:epilogue $t]
  }
  return $result
}

######################################################################
# j:tc:find_font_tag tags -
#   given a list of tags, return the (hopefully only) font tag
#   defaults to "typewriter" font.
######################################################################

proc j:tc:find_font_tag { tags } {
  foreach tag $tags {
    if [string match richtext:font:* $tag] {
      return $tag
    }
  }
  return "richtext:font:typewriter"
}

######################################################################
# j:tc:break_string string -
#   return string with spaces replaced with returns every sixty characters
#   or so
#   HORRIBLY HORRIBLY KLUDGY!
######################################################################

proc j:tc:break_string { string } {
  if {[string length $string] < 60} {
    return $string
  }
  
  set old_lines [split $string "\n"]
  set new_lines ""
  
  foreach old_line $old_lines {
    if {[string length $old_line] < 60} {
      lappend new_lines $old_line
    } else {
      set new_words ""
      foreach word [split $old_line " "] {
        lappend new_words $word
        if { [string length $new_words] > 60} {
          lappend new_lines [join $new_words " "]
          set new_words ""
        } ;# if >60
      } ;# foreach word
      if [llength $new_words] {
        lappend new_lines [join $new_words " "]
      }
    } ;# else (wasn't <60)
  } ;# foreach line
  
  return [join $new_lines "\n"]
}

######################################################################
# j:tc:get_file_contents filename -
#   get contents of file if it exists, else empty string
######################################################################

proc j:tc:get_file_contents { filename } {
  if [file readable $filename] {
    set file [open $filename]
    set contents [read -nonewline $file]
    close $file
    
    return $contents
  } else {
    return {}
  }
}

######################################################################
######################################################################
### tclrt mode
######################################################################
######################################################################

proc j:tc:tclrt:convert_text { t } {
  return [j:tc:generic_convert_text $t tclrt -break 1]
}

proc j:tc:tclrt:prologue { t } {
  global J_TAGCONVERT
  
  return [j:tc:get_file_contents \
    $J_TAGCONVERT(lib)/$J_TAGCONVERT(tclrt,prologue)]
}

proc j:tc:tclrt:epilogue { t } {
  global J_TAGCONVERT
  
  return [j:tc:get_file_contents \
    $J_TAGCONVERT(lib)/$J_TAGCONVERT(tclrt,epilogue)]
}

proc j:tc:tclrt:convert_string { text } {
  # prepend a backslash to some characters
  return [list $text]
}

proc j:tc:tclrt:tag_to_command { tag } {
  switch -glob $tag {
    *:roman		{return j:rt:rm}
    *:italic		{return j:rt:it}
    *:bold		{return j:rt:bf}
    *:bolditalic	{return j:rt:it}
    *:typewriter	{return j:rt:tt}
    *:heading0		{return j:rt:h0}
    *:heading1		{return j:rt:h1}
    *:heading2		{return j:rt:h2}
    *:heading3		{return j:rt:h3}
    *:heading4		{return j:rt:h4}
    *:heading5		{return j:rt:h5}
    *			{return j:rt:tt}
  }
}

proc j:tc:tclrt:convert_chunk { text tags } {
  set text [j:tc:tclrt:convert_string $text]
  set tag [j:tc:find_font_tag $tags]
  set command [j:tc:tclrt:tag_to_command $tag]
  
  return "$command $text\n"
}

######################################################################
######################################################################
### tex mode
######################################################################
######################################################################

proc j:tc:tex:convert_text { t } {
  return [j:tc:generic_convert_text $t tex -break 1]
}

proc j:tc:tex:prologue { t } {
  global J_TAGCONVERT
  
  return [j:tc:get_file_contents \
    $J_TAGCONVERT(lib)/$J_TAGCONVERT(tex,prologue)]
}

proc j:tc:tex:epilogue { t } {
  global J_TAGCONVERT
  
  return [j:tc:get_file_contents \
    $J_TAGCONVERT(lib)/$J_TAGCONVERT(tex,epilogue)]
}

### This really should be split - rules are different in \tt mode from
### in normal fonts.
### note that it requires a lot of help from the TeX prologue.

proc j:tc:tex:convert_string { text } {
  # prepend a backslash to some characters
  regsub -all -- {[${\\|}<>\\\\&#%_]} $text {\\\0} text
  regsub -all -- {[~^]} $text {\\\0{}} text
  # carriage returns become paragraph markers or line breaks:
  regsub -all -- "\n\n\n*" $text {\par } text
  regsub -all -- "\n" $text {\jtcnewline } text
  # tabs become whitespace (best we can do easily):
  regsub -all -- "\t" $text {\jtctab } text
  
  return $text
}

proc j:tc:tex:tag_to_command { tag } {
  switch -glob $tag {
    *:roman		{return \\jtcroman}
    *:italic		{return \\jtcitalic}
    *:bold		{return \\jtcbold}
    *:bolditalic	{return \\jtcbolditalic}
    *:typewriter	{return \\jtctypewriter}
    *:heading0		{return \\jtcheadingzero}
    *:heading1		{return \\jtcheadingone}
    *:heading2		{return \\jtcheadingtwo}
    *:heading3		{return \\jtcheadingthree}
    *:heading4		{return \\jtcheadingfour}
    *:heading5		{return \\jtcheadingfive}
    *			{return \\jtctypewriter}
  }
}

proc j:tc:tex:convert_chunk { text tags } {
  set text [j:tc:tex:convert_string $text]
  set tag [j:tc:find_font_tag $tags]
  set command [j:tc:tex:tag_to_command $tag]
  
  if [string match " *" $text] {
    return "$command \\$text%\n"
  } else {
    return "$command $text%\n"
  }
}

######################################################################
######################################################################
### html mode
######################################################################
######################################################################

proc j:tc:html:convert_text { t } {
  return [j:tc:generic_convert_text $t html -break 0]
}


proc j:tc:html:prologue { t } {
  global J_TAGCONVERT
  
  return [j:tc:get_file_contents \
    $J_TAGCONVERT(lib)/$J_TAGCONVERT(html,prologue)]
}

proc j:tc:html:epilogue { t } {
  global J_TAGCONVERT
  
  return [j:tc:get_file_contents \
    $J_TAGCONVERT(lib)/$J_TAGCONVERT(html,epilogue)]
}

proc j:tc:html:convert_string { text } {
  # need to translate <>& into "&entity;" sequences
  if [string match {*[<>&]*} $text] {
    set chars [split $text ""]
    set text ""
    foreach char $chars {
      if { ! [string match {[<>&]} $char] } {
        append text $char
      } else {
        switch -exact $char {
          {<} {
            append text "&lt;"
          }
          {>} {
            append text "&gt;"
          }
          {&} {
            append text "&amp;"
          }
        } ;# switch
      } ;# if char is <>&
    } ;# foreach
  } ;# if any <>&
  
  regsub -all -- "\n\n*" $text "\n<p>\n" text
  return [j:tc:break_string $text]
}

proc j:tc:html:tag_to_font { tag } {
  switch -glob $tag {
    *:roman		{return {}}
    *:italic		{return i}
    *:bold		{return b}
    *:bolditalic	{return em}
    *:typewriter	{return samp}
    *:heading0		{return h1}
    *:heading1		{return h2}
    *:heading2		{return h3}
    *:heading3		{return h4}
    *:heading4		{return h5}
    *:heading5		{return h5}
    *			{return samp}
  }
}

proc j:tc:html:convert_chunk { text tags } {
  set text [j:tc:html:convert_string $text]
  set before ""
  set after ""
  set tag [j:tc:find_font_tag $tags]
  set font [j:tc:html:tag_to_font $tag]
  if [string length $font] {
    set before "<$font>"
    set after "</$font>"
  }
  
  if [string match "* " $text] {
    return "$before$text$after\n"
  } else {
    return "$before$text$after"
  }
}

######################################################################
######################################################################
### postscript mode
######################################################################
######################################################################

proc j:tc:ps:convert_text { t } {
  return [j:tc:generic_convert_text $t ps -break 0]
}

proc j:tc:ps:prologue { t } {
  global J_TAGCONVERT
  
  return [j:tc:get_file_contents \
    $J_TAGCONVERT(lib)/$J_TAGCONVERT(ps,prologue)]
}

proc j:tc:ps:epilogue { t } {
  global J_TAGCONVERT
  
  return [j:tc:get_file_contents \
    $J_TAGCONVERT(lib)/$J_TAGCONVERT(ps,epilogue)]
}

proc j:tc:ps:convert_string { text } {
  # prepend a backslash to some characters
  regsub -all -- {[()\\\\]} $text {\\\0} text
  # tabs become whitespace (best we can do easily):
  regsub -all -- "\t" $text {        } text
  
  return "($text)"
}

proc j:tc:ps:tag_to_command { tag } {
  switch -glob $tag {
    *:roman		{return roman}
    *:italic		{return italic}
    *:bold		{return bold}
    *:bolditalic	{return bolditalic}
    *:typewriter	{return typewriter}
    *:heading0		{return heading0}
    *:heading1		{return heading1}
    *:heading2		{return heading2}
    *:heading3		{return heading3}
    *:heading4		{return heading4}
    *:heading5		{return heading5}
    *			{return typewriter}
  }
}

proc j:tc:ps:convert_chunk { text tags } {
  set text [j:tc:ps:convert_string $text]
  set tag [j:tc:find_font_tag $tags]
  set command [j:tc:ps:tag_to_command $tag]
  
  return "$command $text breakshow\n"
}
