(* * footnotes.ml * * copyright 2008 by Andreas Romeyke (fa.romeyke@web.de) * * Competition of Linux Magazine 2008-10 to write a footnote sorter * in different languages. * * compile it with: * ocamlopt.opt -I /usr/lib/ocaml/3.10.1/extlib/ -o footnotes -unsafe \ * unix.cmxa extLib.cmxa str.cmxa footnotes.ml * * (please replace path to your extlib to compile) * * execute it with: * cat input_text | ./footnotes -u > used_footnotes.txt * or with * cat input_text | ./footnotes -o > ordered_footnotes.txt * * * The program works correctly only if the keyword "@footnote:" will be on a * separate line. The text should be formated in unix style, otherwise the * carriage return is lost. * * In used mode only the program detects unreferenced indices in text part and * aborts. In ordered mode it only looks to footnotes part and detects lines * without index markers. With a little bit more overhead the program could * detect all errors in every mode. * * The program is tested sucessfully under Debian GNU/Linux with Ocaml 3.10 * * Have fun... :) * *) open Str;; open ExtLib;; exception ParseError of string;; exception WrongArgument of string;; exception MissedIdx of string;; let debug msg = Printf.eprintf "%s time:%f\n" msg (Sys.time ()); flush stderr ;; (* returns list of (Delim d) or (Text t) *) let lexer (input: string list) = List.flatten ( List.map (fun line -> Str.full_split( Str.regexp "\[[0-9]+\]" ) line ) input ) ;; (* Parses list of tokens and splits them into two streams, returns text and footnotes *) let split_on_footnotes tokens = let match_footnotes = function (Delim d) -> false | (Text w) -> try Str.search_forward (Str.regexp "^\@footnote:$") w 0; true with Not_found -> false in let rec splitter text footnotes has_footnotes input = match input with [] -> ((List.rev text),(List.rev footnotes)) | hd::tl when (match_footnotes hd) -> splitter (hd::text) footnotes true tl | hd::tl when has_footnotes -> splitter text (hd::footnotes) has_footnotes tl | hd::tl -> splitter (hd::text) footnotes has_footnotes tl in splitter [] [] false tokens ;; (* helper function to make Hash unique *) let add_key_if_new hash (key:string) (value:int) = try Hashtbl.find hash key; () with Not_found -> Hashtbl.add hash key value ;; (* Maps indices to sorted version, returns Hash *) let build_used_order text = let ordered = Hashtbl.create 0 in List.iter ( function (Delim d) -> let len = (Hashtbl.length ordered) +1 in add_key_if_new ordered d len; () | _ -> () ) text; ordered ;; (* prints substituted text and footnotes to Stdout *) let print text footnotes hash = let rec comb_idx_with_txt accu = function [] -> List.rev accu | ((Delim i) as idx)::((Text t) as txt)::tl -> begin try let d = (Hashtbl.find hash i) in let v = (d, t) in comb_idx_with_txt (v::accu) tl with Not_found -> raise (ParseError ("an (idx,txt) pair in footnotes section is not referenced i='" ^ i ^ "'")) end | (Text t) ::tl when (t.[0] == '\n') -> comb_idx_with_txt accu tl | (Delim i)::tl -> raise (ParseError ("not an (idx,txt) pair in footnotes section i='" ^ i ^ "'" )) | (Text t) ::tl -> raise (ParseError ("not an (idx,txt) pair in footnotes section t='" ^ t ^ "'" )) in let sort_footnotes = let tmp = comb_idx_with_txt [] footnotes in (* TODO: check if Space changes the sort order, test, if sort is * necessary.. *) let cmp a b = let (a',_) = a and (b',_) = b in compare a' b' in List.sort ~cmp:cmp tmp in List.iter ( function (Text t) -> Printf.printf "%s" t | (Delim d) -> try Printf.printf "[%i]" (Hashtbl.find hash d) with Not_found -> raise (ParseError ("an index in text has no reference in footnotes section i='" ^ d ^ "'")) ) text; List.iter ( fun (idx,txt) -> Printf.printf "[%i]%s" idx txt ) sort_footnotes ;; (* Reads data from STDIN and returns list of lines *) let readin () = debug ">> readin"; let rec readlines file = try let line = (input_line file) ^ "\n" in line:: readlines file with End_of_file -> [] in readlines stdin ;; (* MAIN *) let _ = if (Array.length Sys.argv) != 2 then raise (WrongArgument "wrong count of Argv"); let tokens = lexer (readin ()) in debug ">> lexed"; let (text, footnotes) = split_on_footnotes tokens in debug ">> splitted"; let ordered_hash = match Sys.argv.(1) with "-o" -> build_used_order text | "-u" -> build_used_order footnotes | _ -> raise (WrongArgument Sys.argv.(1)) in debug ">> built"; print text footnotes ordered_hash; debug ">>printed" ;;