#!/usr/bin/tclsh # $Id: fickle.tcl,v 1.26 2002/07/12 16:01:36 Administrator Exp $ # fickle 1.00 by Jason Tang (tang@jtang.org). Please see the README # for usage instructions. Also see http://mini.net/tcl/fickle set IO_ERROR 1 set SYNTAX_ERROR 2 set PARAM_ERROR 3 set LOGIC_ERROR -1 set BUFFER_SIZE 1024 set HEADERS 1 set p "yy" set P "YY" ;# two types of start states allowed: set INCLUSIVE 0 set EXCLUSIVE 1 ;# copy everything between ^%\{$ to ^%\}$ to the destination file proc handle_literal_block {} { global src dest line_count set end_defs 0 while {$end_defs == 0} { if {[gets $src line] < 0} { incr line_count puts stderr "No terminator to verbatim section found." global SYNTAX_ERROR exit $SYNTAX_ERROR } elseif {[string equal [string trim $line] "%\}"]} { set end_defs 1 } else { puts $dest $line } incr line_count } } ;# processes the definitions section proc handle_defs {line} { global line_count BUFFER_SIZE SYNTAX_ERROR PARAM_ERROR INCLUSIVE EXCLUSIVE set line [string trim $line] ;# ignore blank lines if {[string equal $line ""]} { continue } elseif {[string equal $line "%\{"]} { handle_literal_block } else { ;# two main types are allowed: %xxxxx rules, and textual ;# substitutions ;# for both of them, extract the keyword to the left of the first ;# space, and the arguments (if any) to the right if {[regexp -line {^(\S+)\s+(.*)} $line foo keyword args] == 0} { set keyword $line set args "" } global sub_table state_table switch -- $keyword { "%s" { foreach state_name [split $args] { if {! [string equal $state_name ""]} { set state_table($state_name) $INCLUSIVE } } } "%x" { foreach state_name [split $args] { if {! [string equal $state_name ""]} { set state_table($state_name) $EXCLUSIVE } } } "%BUFSIZE" { if {[string equal $args ""]} { puts stderr "%BUFSIZE must have an integer parameter on line $line_count." exit $PARAM_ERROR } elseif {[expr [string is digit $args] && {$args > 0}]} { set BUFFER_SIZE $args } else { puts stderr "%BUFSIZE parameter must be positive integer on line $line_count." exit $PARAM_ERROR } } "%GLOBAL" { global global_args append global_args " $args" } "%NOCASE" { global scan_args append scan_args "-nocase" } "%NOHEADERS" { global HEADERS set HEADERS 0 } "%PREFIX" { global p P if {[string equal $args ""]} { puts stderr "%PREFIX must have a parameter on line $line_count." exit $PARAM_ERROR } elseif {[llength $args] > 1} { puts stderr "%PREFIX may have only one parameter on line $line_count." exit $PARAM_ERROR } else { set p [string tolower $args] set P [string toupper $args] } } "%SUPPRESS" { global suppress set suppress 1 } default { if {[string equal -len 1 $keyword "%"]} { puts stderr "Unknown directive $keyword on line $line_count." exit $SYNTAX_ERROR } set sub_table($keyword) $args } } } } ;# process the rules line proc handle_rule {line} { global rule_table src line_count ;# check for blank lines if {[string length [string trim $line]] == 0} { return } ;# first extract the regular expression part from the line set pattern "" set space_found 0 set i 0 set in_quotes 0 set brace_count 0 set bracket_count 0 set paren_count 0 while {$space_found == 0} { if {$i >= [string length $line]} { puts stderr "Improperly formed pattern: line $line_count." global SYNTAX_ERROR exit $SYNTAX_ERROR } set c [string index $line $i] ;# this ugly switch statement is to allow for whitespaces ;# within regexs; it also cleans up special characters by ;# adding backslashes where needed, and fixes how Tcl handles ;# quotation marks switch -regexp -- $c { {\\} { ;# skip over the next character as well append pattern [string range $line $i [expr $i + 1]] incr i 2 } {\{} { if {[expr {$brace_count == 0} && {$bracket_count == 0} &&\ {$in_quotes == 0}]} { incr brace_count } else { append pattern "\\" } append pattern $c incr i } {\}} { if {[expr {$brace_count == 1} && {$bracket_count == 0} &&\ {$in_quotes == 0}]} { incr brace_count -1 } else { append pattern "\\" } append pattern $c incr i } {\[} { if {[expr {$in_quotes == 0} && {$bracket_count == 0}]} { incr bracket_count } else { append pattern "\\" } append pattern $c incr i } {\]} { if {[expr {$in_quotes == 0} && {$bracket_count == 1}]} { incr bracket_count -1 } else { append pattern "\\" } append pattern $c incr i } {\(} { if {[expr {$in_quotes == 0} && {$bracket_count == 0}]} { incr paren_count } else { append pattern "\\" } append pattern $c incr i } {\)} { if {[expr {$in_quotes == 0} && {$bracket_count == 0}]} { incr paren_count -1 } else { append pattern "\\" } append pattern $c incr i } {\"} { if {$bracket_count == 0} { set in_quotes [expr ! $in_quotes] } else { append pattern "\\$c" } incr i } {[ \t]} { if {[expr {$brace_count == 0} && {$bracket_count == 0} && \ {$paren_count == 0} && {$in_quotes == 0}]} { set space_found 1 } else { append pattern $c incr i } } default { if {$in_quotes == 0} { append pattern $c } else { if {[regexp {[.*\[\]^$\{\}+?|/\(\)]} $c foo] > 0} { append pattern "\\" } append pattern $c } incr i } } } set orig_pattern [string range $line 0 $i] ;# check the pattern to see if it has a start state is indicated set state_name "" if {[regexp {^<([^>]+)>} $pattern foo state_name] > 0} { ;# a state was found; remove the tag from the pattern regsub {^<[^>]+>} $pattern "" pattern } ;# now that a pattern has been found, see if any textual ;# substitutions are needed global sub_table foreach sub_rule [array names sub_table] { ;# the quotes around the regexp below is necessary, to allow ;# for substitution of the sub_rule regsub -all "\{$sub_rule\}" $pattern \ "\($sub_table($sub_rule)\)" pattern } set line [string trimleft [string range $line $i end]] ;# now that a pattern has been found, determine the action ;# if the action does not start with a curly brace, then scan then ;# the rest of the line; otherwise, scan for the matching closing ;# brace, which may be several lines later set action "" if {[string equal -len 1 $line "\{"] == 0} { set action $line } else { set brace_count 1 set i 1 while {$brace_count > 0} { if {$i >= [string length $line]} { if {[gets $src line] < 0} { incr line_count puts stderr "Improperly formed action: line $line_count." global SYNTAX_ERROR exit $SYNTAX_ERROR } incr line_count append action "\n" set i 0 } set c [string index $line $i] if {[string equal $c "\\"]} { append action [string range $line $i [expr $i + 1]] incr i 2 } elseif {[string equal $c "\{"]} { incr brace_count append action "\{" incr i } elseif {[string equal $c "\}"]} { incr brace_count -1 if {$brace_count > 0} { append action "\}" } incr i } else { append action $c incr i } } } ;# special condition: if the action is merely a bar, then use the ;# next pattern's action if {[string equal [string trim $action] "|"]} { set action "" } lappend rule_table [list $orig_pattern $state_name $pattern $action] ;# puts stdout "DEBUG state: |$state_name| pattern: |$pattern| action: |$action|" } ;# upon reaching the subroutines section, copy everything thereafter ;# to the destination file proc handle_subroutines {} { global src dest line_count while {[gets $src line] >= 0} { incr line_count puts $dest $line } } ;# writes the utility functions to the destination; the programmer may ;# call these from actions, and choose to override these as necessary proc write_header {} { global dest BUFFER_SIZE p P HEADERS puts $dest " ;###### ;# autogenerated utility functions used by fickle; override as needed ;###### " if {$HEADERS} { puts $dest ";# \[from the flex(1) man page\]: ;# ;# If ${p}wrap() returns false (zero), then it is assumed that the ;# function has gone ahead and set up ${p}in to point to another input ;# file, and scanning continues. If it returns true (non-zero), then ;# the scanner terminates, returning 0 to its caller. Note that in ;# either case, the start condition remains unchanged; it does not ;# revert to INITIAL." } puts $dest "proc ${p}wrap \{\} \{ return 1 \} " if {$HEADERS} { puts $dest ";# \[from the flex(1) man page\]: ;# ;# ECHO copies ${p}text to the scanner's output \[if no arguments are ;# given\]. . . .The scanner writes its ECHO output to the ${p}out global ;# (default, stdout), which may be redefined by the user simply by ;# assigning it to some other FILE pointer." } puts $dest "proc ECHO \{\{s \"\"\}\} \{ if \{\[string equal \$s \"\"\]\} \{ upvar ${p}text local_${p}text set s \$local_${p}text \} global ${p}out puts -nonewline \$${p}out \$s \} " if {$HEADERS} { puts $dest ";# \[from the flex(1) man page\]: ;# ;# ${P}_FLUSH_BUFFER flushes the scanner's internal buffer so that the ;# next time the scanner attempts to match a token, it will first ;# refill the buffer using ${P}_INPUT." } puts $dest "proc ${P}_FLUSH_BUFFER \{\} \{ global ${p}_buffer ${p}_index ${p}_done set ${p}_buffer \"\" set ${p}_index 0 set ${p}_done 0 \} " if {$HEADERS} { puts $dest ";# \[from the flex(1) man page\]: ;# ;# The nature of how it gets its input can be controlled by defining ;# the ${P}_INPUT macro. ${P}_INPUT's calling sequence is ;# \"${P}_INPUT(buf,result,max_size)\". Its action is to place up to ;# max_size characters in the character array buf and return in the ;# integer variable result either the number of characters read or the ;# constant ${P}_NULL (0 on Unix systems) to indicate EOF. The default ;# ${P}_INPUT reads from the global file-pointer \"${p}in\"." } puts $dest "set ${P}_NULL 0 proc ${P}_INPUT \{buf result max_size\} \{ global ${p}in upvar \$result ret_val upvar \$buf new_data set new_data \[read \$${p}in \$max_size\] set ret_val \[string length \$new_data\] \} " if {$HEADERS} { puts $dest ";# \[from the flex(1) man page\]: ;# ;# unput(c) puts the character c back onto the input stream. It will ;# be the next character scanned. The following action will take the ;# current token and cause it to be rescanned enclosed in parentheses." } puts $dest "proc unput \{c\} \{ global ${p}_buffer ${p}_index set ${p}_buffer \[string replace \$${p}_buffer \$${p}_index \$${p}_index \\ \"\$c\[string index \$${p}_buffer \$${p}_index\]\"\] \} " if {$HEADERS} { puts $dest ";# \[from the flex(1) man page\]: ;# ;# input() reads the next character from the input stream. ;# ;# As with flex, please do NOT override this function." } puts $dest "proc input \{\} \{ global ${p}_buffer ${p}_index ${P}_NULL ${p}_done if \{\[expr \[string length \$${p}_buffer\] - \$${p}_index\] < $BUFFER_SIZE\} \{ set ${p}_buffer_size \$${P}_NULL if \{\$${p}_done == 0\} \{ ${P}_INPUT new_buffer ${p}_buffer_size $BUFFER_SIZE append ${p}_buffer \$new_buffer if \{\$${p}_buffer_size == \$${P}_NULL\} \{ set ${p}_done 1 \} \} if \{\$${p}_done == 1\} \{ if \{\[${p}wrap\] == 0\} \{ return \[input\] \} elseif \{\[expr \[string length \$${p}_buffer\] - \$${p}_index\] == 0\} \{ return \"\" \} \} \} set c \[string index \$${p}_buffer \$${p}_index\] incr ${p}_index return \$c \} " if {$HEADERS} { puts $dest ";# \[from the flex(1) man page\]: ;# ;# Pushes the current start condition onto the top of the start ;# condition stack and switches to new_state as though you had used ;# BEGIN new_state. ;# ;# Please do NOT override this function." } puts $dest "proc ${p}_push_state \{new_state\} \{ global ${p}_state_stack lappend ${p}_state_stack \$new_state \} " if {$HEADERS} { puts $dest ";# \[from the flex(1) man page\]: ;# ;# Pops off the top of the state stack; if the stack is now empty, then ;# pushes the state \"INITIAL\". ;# ;# Please do NOT override this function." } puts $dest "proc ${p}_pop_state \{\} \{ global ${p}_state_stack set ${p}_state_stack \[lrange \$${p}_state_stack 0 end-1\] if \{\[string equal \$${p}_state_stack \"\"\]\} \{ ${p}_push_state INITIAL \} \} " if {$HEADERS} { puts $dest ";# \[from the flex(1) man page\]: ;# ;# Returns the top of the stack without altering the stack's contents. ;# ;# Please do NOT override this function." } puts $dest "proc ${p}_top_state \{\} \{ global ${p}_state_stack return \[lindex \$${p}_state_stack end\] \} " if {$HEADERS} { puts $dest ";# \[from the flex(1) man page\]: ;# ;# BEGIN followed by the name of a start condition places the scanner ;# in the corresponding start condition. . . .Until the next BEGIN ;# action is executed, rules with the given start condition will be ;# active and rules with other start conditions will be inactive. If ;# the start condition is inclusive, then rules with no start ;# conditions at all will also be active. If it is exclusive, then ;# only rules qualified with the start condi tion will be active. ;# ;# Please do NOT override this function." } puts $dest "proc BEGIN \{new_state\ \{prefix \"yy\"\}\} \{ eval global \${prefix}_state_stack eval set \${prefix}_state_stack \[lrange \$\${prefix}_state_stack 0 end-1\] eval lappend \${prefix}_state_stack \$new_state \} ;###### ;# end autogenerated utility functions ;###### " } ;# starts building the yy_lex() function; called when the rules section ;# is about to begin proc start_rules {} { global dest global_args state_table BUFFER_SIZE p P puts $dest " ;###### ;# autogenerated ${p}lex function by fickle -- modify at your own peril ;###### proc ${p}lex \{\} \{ global ${p}_first_time ${p}_buffer ${p}_index ${p}_state_stack global ${p}_state_table ${p}in ${p}out ${P}_NULL ${p}_done" if {! [string equal $global_args ""]} { puts $dest " global $global_args" } puts -nonewline $dest " if \{\[info exists ${p}_first_time\] == 0\} \{ set ${p}_first_time \"\" set ${p}_buffer \"\" set ${p}_buffer_size \$${P}_NULL set ${p}_index 0 set ${p}_state_stack \"\" set ${p}_done 0 BEGIN INITIAL ${p} array set ${p}_state_table \[list " ;# write the state table to the file puts -nonewline $dest [array get state_table] puts $dest "\] if \{\[info exists ${p}in\] == 0\} \{ set ${p}in \"stdin\" \} if \{\[info exists ${p}out\] == 0\} \{ set ${p}out \"stdout\" \} \} while \{1\} \{ set ${p}_current_state \[${p}_top_state\] if \{\[expr \[string length \$${p}_buffer\] - \$${p}_index\] < $BUFFER_SIZE\} \{ if \{\$${p}_done == 0\} \{ set new_buffer \"\" ${P}_INPUT new_buffer ${p}_buffer_size $BUFFER_SIZE append ${p}_buffer \$new_buffer if \{\$${p}_buffer_size == \$${P}_NULL\ && \\ \[expr \[string length \$${p}_buffer\] - \$${p}_index\] == 0\} \{ set ${p}_done 1 \} \} if \{\$${p}_done == 1\} \{ if \{\[${p}wrap\] == 0\} \{ set ${p}_done 0 continue \} elseif \{\[expr \[string length \$${p}_buffer\] - \$${p}_index\] == 0\} \{ break \} \} \} set ${p}text \"\" set ${p}_matched_rule -1" } ;# stop yy_lex() function; called when exiting the rules section proc end_rules {} { global dest rule_table scan_args suppress EXCLUSIVE p P ;# build up the if statements to determine which rule to execute; ;# lex is greedy, and will use the rule that matches the most ;# strings if {[llength $rule_table] > 0} { set rule_num 0 foreach rule $rule_table { set orig_pattern [lindex $rule 0] set state_name [lindex $rule 1] set pattern [lindex $rule 2] ;# puts stdout "DEBUG: ($rule_num). $rule" ;# if the state is "*", then this state will *always* match puts $dest " ;# rule $rule_num: $orig_pattern" if {[string equal $state_name "*"]} { puts $dest " if \{\[expr \\" } elseif {[string equal $state_name ""]} { puts $dest " if \{\[expr \\ \{\$${p}_state_table(\$${p}_current_state) != $EXCLUSIVE\} && \\" } else { ;# do logic to figure out states puts $dest " if \{\[expr \\ \{\[string equal \$${p}_current_state $state_name\]\} && \\" } puts $dest " \{\[regexp -start \$${p}_index -indices -line $scan_args -- \{$pattern\} \$${p}_buffer ${p}_match\] > 0\} && \\ \{\[lindex \$${p}_match 0\] == \$${p}_index\}\]\} \{ if \{\[expr \[lindex \$${p}_match 1\] - \$${p}_index + 1\] > \[string length \$${p}text\]\} \{ set ${p}text \[string range \$${p}_buffer \$${p}_index \[lindex \$${p}_match 1\]\] set ${p}_matched_rule $rule_num \} \}" incr rule_num } ;# now add the default case puts $dest " if \{\$${p}_matched_rule == -1\} \{ set ${p}text \[string index \$${p}_buffer \$${p}_index\] \}" } else { ;# no rules were defined at all, so need to slightly adjust output puts $dest " set ${p}text \[string index \$${p}_buffer \$${p}_index\]" } puts $dest " set ${p}leng \[string length \$${p}text\] incr ${p}_index \$${p}leng ;# workaround for stupid circumflex behavior if \{\[string equal \[string index \$${p}text end\] \"\\n\"\]\} \{ set ${p}_buffer \[string range \$${p}_buffer \$${p}_index end\] set ${p}_index 0 \} switch -- \$${p}_matched_rule \{" set rule_num 0 foreach rule $rule_table { puts $dest " $rule_num" if {[string length [lindex $rule 3]] == 0} { ;# action is empty, so use next pattern's action puts $dest " -" } else { puts $dest " \{" ;# output the action associated with the rule foreach action_line [split [lindex $rule 3] "\n"] { puts $dest " $action_line" } puts $dest " \}" } incr rule_num } puts $dest " default" if {$suppress == 0} { puts $dest " \{ ECHO \}" } else { puts $dest " \{ puts stderr \"unmatched token: \$${p}text in state \$${p}_current_state\" ; exit -1 \}" } puts $dest " \} \} return 0 \} ;###### ;# end autogenerated data ;###### " } ;############################################################# ;# start of actual script if {[llength $argv] >= 1} { set force_overwrite 0 if {[string equal [lindex $argv 0] "--force"]} { set force_overwrite 1 } set in_filename [lindex $argv end] set out_filename [file rootname $in_filename] append out_filename ".tcl" if {[expr [file exists $out_filename] && {$force_overwrite != 1}]} { puts stderr "Destination already exists. (Override with --force.)" exit $IO_ERROR } if {[catch {open $in_filename r} src]} { puts stderr "Could not open source file." exit $IO_ERROR } if {[catch {open $out_filename w} dest]} { puts stderr "Could not open destination file." exit $IO_ERROR } } else { set src "stdin" set dest "stdout" } ;# indicates the state of the file being scanned ;# valid states are 'defs', 'rules', and 'sub'[routines] set file_state "defs" set rule_table "" set line_count 0 set scan_args "" set suppress 0 set global_args "" ;# set up the INITIAL start state to be a normal inclusionary state set state_table(INITIAL) $INCLUSIVE while {[gets $src line] >= 0} { global line_count incr line_count set line [string trim $line] ;# ignore blank lines if {[string equal $line ""]} { continue } elseif {[string equal $line "%%"]} { ;# figure out what state to switch to if {[string equal $file_state "rules"]} { end_rules set file_state "sub" handle_subroutines } elseif {[string equal $file_state "defs"]} { write_header ;# puts stdout "DEBUG:" ;# foreach {state val} [array get state_table] { ;# puts stdout " $state: $val" ;# } start_rules set file_state "rules" } } else { ;# no special symbol found, so handle the line if {[string equal $file_state "defs"]} { handle_defs $line } elseif {[string equal $file_state "rules"]} { handle_rule $line } else { puts stderr "logic error: in state $file_state on line $line" exit $LOGIC_ERROR } } } ;# EOF reached -- make sure to close off the yy_lex() function, in case ;# there was no explicit subroutines section if {[string equal $file_state "rules"]} { end_rules } close $src close $dest