/[mojave]/metaprl/mllib/comment_parse.mll
ViewVC logotype

Annotation of /metaprl/mllib/comment_parse.mll

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3591 - (hide annotations) (download)
Sun Apr 28 19:51:58 2002 UTC (19 years, 3 months ago) by nogin
File size: 19332 byte(s)
- Added an option to be have "THEORIES=all" in mk/config
(instead of THEORIES="long list of theories").
I will change all my nightly scripts to use THEORIES=all.

- Added mc to the default list of TEXTHEORIES. Added a file all-bodies.tex
that would automatically contain \inputs corresponding to TEXTHEORIES.
(Before this change we had to update all-theories.tex by hands whenever TEXTHEORIES
was changed).

- Minor fixes in some display forms.

- Removed theories/caml that never had anything useful.

- Removed a few files from theories/ocaml_doc that seemed to be there by accident
(Jason, can you confirm?).

1 jyh 3057 (*
2     * This is a simple lexer to extract terms from a comment string.
3     * Grammar:
4     *
5     * Text is a sequence of tokens containing strings, terms, and quotations
6     * Literal-text is any sequence of chars not containing the trailing delimiter.
7     *
8     * For text:
9     * 1. Whitespace is ignored
10     * 2. Strings are any sequence on non-special chars
11     * 3. Quoted strings are surrounded by double-quotes
12     * 4. There are three types of terms:
13     * a. @opname[params]{args}
14     * Opname is an alphnumeric sequence, or a quoted string
15     * [params] are optional; a param is a string or quoted string
16     * {args} are optional; a sequence of text separated by ;
17     *
18     * b. @begin[name]
19     * text
20     * @end[name]
21     *
22     * This builds a term @name{text}
23     *
24     * c. $literal-str$
25     * This builds the term @math[str]
26     *
27     * 5. Quotations have the form
28     * <<literal-str>> and <:tag<literal-str>>
29     * are also allowed.
30     *
31     * Special forms:
32     *
33     * Quotations can be nested:
34     * TokQuote ("", text): << text >>
35     * TokQuote (tag, text): <:tag< text >>
36     *
37     * TokName: @opname
38     * TokString s: any sequence of non-whitespace, non-special chars
39     * TokQString s: any text between double-quotes
40     * TokMath s: any literal text between $
41     *
42     * TokWhite: whitespace
43     * TokLeftBrace: {
44     * TokRightBrace: }
45     * TokLeftBrack: [
46     * TokRightBrack: ]
47     * TokComma: ,
48     * TokSemi: ;
49     *
50     * The lexer removes the first leading * on each line of the
51     * input string.
52     *
53     * ----------------------------------------------------------------
54     *
55     * Copyright (C) 2000 Jason Hickey, Caltech
56     *
57     * This program is free software; you can redistribute it and/or
58     * modify it under the terms of the GNU General Public License
59     * as published by the Free Software Foundation; either version 2
60     * of the License, or (at your option) any later version.
61     *
62     * This program is distributed in the hope that it will be useful,
63     * but WITHOUT ANY WARRANTY; without even the implied warranty of
64     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
65     * GNU General Public License for more details.
66     *
67     * You should have received a copy of the GNU General Public License
68     * along with this program; if not, write to the Free Software
69     * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
70     *
71     * Author: Jason Hickey
72     * jyh@cs.caltech.edu
73     *)
74    
75     {
76     (*
77     * A program is a sequence of strings and terms.
78     *)
79     type t = item list
80    
81     and item =
82     White
83     | String of string
84     | Term of string list * string list * t list
85     | Quote of string * string
86     | Block of t
87    
88     (*
89     * Tokens.
90     *)
91     type token =
92     TokWhite of bool
93     | TokQString of string
94     | TokMath of bool
95     | TokString of bool * string
96     | TokQuote of string * string
97     | TokName of string
98     | TokLeftBrace
99     | TokRightBrace
100     | TokSpecial of char
101     | TokEof
102    
103     type code_token =
104     CodeString of string
105     | CodeEnd
106    
107     (*
108     * Items returned by the item parser.
109     *)
110     type item_item =
111     ItemItem of item
112     | ItemMath of bool
113     | ItemEnd of string list
114     | ItemSpecial of char
115     | ItemBrace
116     | ItemEof
117    
118     (*
119     * Modes.
120     * The parser can be looking for arguments (so the ';' and ','
121     * chars are special),
122     * and it can be in math mode (so the '_' and '^' chars are special)
123     *)
124     type mode =
125     ModeNormal
126     | ModeArg
127     | ModeMath
128     | ModeArgMath
129    
130     (*
131     * Test for special chars.
132     *)
133     let is_special mode c =
134     let special =
135     match mode with
136     ModeNormal -> []
137     | ModeArg -> [';']
138     | ModeMath -> ['_'; '^']
139     | ModeArgMath -> ['_'; '^'; ';']
140     in
141     List.mem c special
142    
143     (*
144     * Test for math mode.
145     *)
146     let is_math_mode = function
147     ModeMath
148     | ModeArgMath ->
149     true
150     | ModeNormal
151     | ModeArg ->
152     false
153    
154     (*
155     * Move to arg mode.
156     *)
157     let arg_mode = function
158     ModeNormal
159     | ModeArg ->
160     ModeArg
161     | ModeMath
162     | ModeArgMath ->
163     ModeArgMath
164    
165     let math_mode = function
166     ModeNormal
167     | ModeMath ->
168     ModeMath
169     | ModeArg
170     | ModeArgMath ->
171     ModeArgMath
172    
173     let non_arg_mode = function
174     ModeNormal
175     | ModeArg ->
176     ModeNormal
177     | ModeMath
178     | ModeArgMath ->
179     ModeMath
180    
181     (*
182     * Termination items.
183     *)
184     type terminator =
185     TermEof
186     | TermBrace
187     | TermSemi
188     | TermMath of bool
189     | TermEnd of string list
190    
191     (*
192     * State for parsing quotations.
193     *)
194     let level = ref 0
195     let tag = ref ""
196     let buffer = Buffer.create 19
197    
198     let set_tag tag' =
199     tag := tag';
200     Buffer.clear buffer
201    
202     let add_string s =
203     Buffer.add_string buffer s
204    
205     let flush_buffer () =
206     let s = Buffer.contents buffer in
207     Buffer.clear buffer;
208     TokQuote (!tag, s)
209    
210     let flush_string () =
211     let s = Buffer.contents buffer in
212     Buffer.clear buffer;
213     s
214    
215     (*
216     * Pushback buffer.
217     *)
218     type token_buffer =
219     { lexbuf : Lexing.lexbuf;
220     mutable tokens : token list
221     }
222    
223     (*
224     * Errors.
225     *)
226     exception Parse_error of string * int * int
227    
228     let parse_error_buf s lexbuf =
229     let loc1 = Lexing.lexeme_start lexbuf in
230     let loc2 = Lexing.lexeme_start lexbuf in
231     raise (Parse_error (s, loc1, loc2))
232    
233     let parse_error s buf =
234     parse_error_buf s buf.lexbuf
235     }
236    
237     let white = [' ' '\t']+
238     let optwhite = [' ' '\t']*
239 nogin 3591 let newline = ['\r' '\n']+
240 jyh 3057 let name = ['a'-'z''A'-'Z']+
241     let number = ['0'-'9']+
242     let special = ['[' ']' ';' ',' '_' '^' '!']
243    
244     rule main = parse
245     (* White space *)
246 nogin 3591 newline optwhite '*'
247     | newline
248 jyh 3057 { TokWhite true }
249     | white
250     { TokWhite false }
251    
252 jyh 3062 (* Nested comments *)
253     | "(*"
254     { comment lexbuf; main lexbuf }
255    
256 jyh 3057 (* Quotations *)
257     | "<<"
258     { set_tag "";
259     quotation lexbuf
260     }
261     | "<:" name '<'
262     { let buf = Lexing.lexeme lexbuf in
263     set_tag (String.sub buf 2 (pred (String.length buf)));
264     quotation lexbuf
265     }
266    
267     (* This is temporary *)
268     | '\\'
269     { parse_error_buf "fix this backslash" lexbuf }
270    
271     (* Strings *)
272     | '"'
273     { TokQString (string lexbuf) }
274    
275     (* Special tokens *)
276     | "$$"
277     { TokMath true }
278     | '$'
279     { TokMath false }
280     | '@'
281     { opname lexbuf }
282     | '{'
283     { TokLeftBrace }
284     | '}'
285     { TokRightBrace }
286     | special
287     { TokSpecial (Lexing.lexeme_char lexbuf 0) }
288    
289     (* Alphanumeric names *)
290 nogin 3444 | name | number
291 jyh 3057 { TokString (true, Lexing.lexeme lexbuf) }
292     | _
293     { TokString (false, Lexing.lexeme lexbuf) }
294    
295     | eof
296     { TokEof }
297    
298     (*
299 jyh 3062 * Comments.
300     *)
301     and comment = parse
302     "(*"
303     { comment lexbuf; comment lexbuf }
304     | "*)"
305     { () }
306     | _
307     { comment lexbuf }
308     | eof
309     { parse_error_buf "comment is not terminated" lexbuf }
310    
311     (*
312 jyh 3057 * Read the first string in the opname.
313     *)
314     and opname = parse
315     name
316     { TokName (Lexing.lexeme lexbuf) }
317     | '"'
318     { TokName (string lexbuf) }
319     | _
320     { TokString (false, Lexing.lexeme lexbuf) }
321     | eof
322     { parse_error_buf "opname is not terminated" lexbuf }
323    
324     (*
325     * Quotations.
326     * Watch for nested quotations.
327     *)
328     and quotation = parse
329     "<<"
330     { add_string "<<";
331     incr level;
332     quotation lexbuf
333     }
334     | "<:" name '<'
335     { add_string (Lexing.lexeme lexbuf);
336     incr level;
337     quotation lexbuf
338     }
339     | ">>"
340     { if !level = 0 then
341     flush_buffer ()
342     else
343     begin
344     add_string ">>";
345     decr level;
346     quotation lexbuf
347     end
348     }
349     | _
350     { add_string (Lexing.lexeme lexbuf);
351     quotation lexbuf
352     }
353     | eof
354     { parse_error_buf "quotation is not terminated" lexbuf }
355    
356     (*
357     * Strings.
358     * Remove escaped eol.
359     *)
360     and string = parse
361     '"' (* '"' *)
362     { flush_string () }
363     | '\\'
364     { escape lexbuf }
365     | _
366     { add_string (Lexing.lexeme lexbuf);
367     string lexbuf
368     }
369     | eof
370     { parse_error_buf "string is not terminated" lexbuf }
371    
372     and escape = parse
373 nogin 3591 newline
374 jyh 3057 { string lexbuf }
375     | _
376     { add_string (Lexing.lexeme lexbuf);
377     string lexbuf
378     }
379     | eof
380     { parse_error_buf "escape sequence is not terminated" lexbuf }
381    
382     (*
383     * Literal forms.
384     *)
385     and code_string_brace = parse
386 nogin 3591 newline optwhite '*'
387 jyh 3057 { CodeString "\n" }
388     | '}'
389     { CodeEnd }
390     | _
391     { CodeString (Lexing.lexeme lexbuf) }
392     | eof
393     { parse_error_buf "code string is not terminated" lexbuf }
394    
395     and code_string_end = parse
396 nogin 3591 newline optwhite '*'
397 jyh 3057 { CodeString "\n" }
398     | "@end[verbatim]"
399     | "@end[literal]"
400     { CodeEnd }
401     | _
402     { CodeString (Lexing.lexeme lexbuf) }
403     | eof
404     { parse_error_buf "code block is not terminated" lexbuf }
405    
406     {
407     (*
408     * In math mode, add the "math_" prefix to the opname.
409     *)
410     let rec mk_math_opname = function
411     [name] ->
412     ["math_" ^ name]
413     | h :: t ->
414     h :: mk_math_opname t
415     | [] ->
416     raise (Invalid_argument "Comment_parse.mk_math_opname")
417    
418     let mk_opname mode opname =
419     if is_math_mode mode then
420     mk_math_opname opname
421     else
422     opname
423    
424     (************************************************************************
425     * Pushback buffer.
426     *)
427     let parse_token buf =
428     match buf with
429     { tokens = token :: t } ->
430     buf.tokens <- t;
431     token
432     | { lexbuf = lexbuf } ->
433     main lexbuf
434    
435     let push_back token buf =
436     buf.tokens <- token :: buf.tokens
437    
438     (*
439     * Special forms.
440     *)
441     let parse_code_string_brace buf =
442     assert (buf.tokens = []);
443     code_string_brace buf.lexbuf
444    
445     let parse_code_string_end buf =
446     assert (buf.tokens = []);
447     code_string_end buf.lexbuf
448    
449     (************************************************************************
450     * Toplevel recursive-descent parser.
451     * term: the expected terminator for this block
452     * mode: the current parsing mode
453     * items: the list of items collected so far (in reverse order)
454     * buf: the token buffer
455     *)
456     let rec parse_block term mode items buf =
457     let item = parse_item mode buf in
458     match item with
459     ItemItem item ->
460     parse_block term mode (item :: items) buf
461     | ItemSpecial '_' ->
462     parse_math_script term mode "math_subscript" items buf
463     | ItemSpecial '^' ->
464     parse_math_script term mode "math_superscript" items buf
465     | ItemMath flag ->
466     finish_block term (TermMath flag) items buf
467     | ItemEnd tag ->
468     finish_block term (TermEnd tag) items buf
469     | ItemSpecial ';' ->
470     finish_block term TermSemi items buf
471     | ItemBrace ->
472     finish_block term TermBrace items buf
473     | ItemEof ->
474     finish_block term TermEof items buf
475     | ItemSpecial _ ->
476     parse_error "illegal special character" buf
477    
478     (*
479     * Found a terminator.
480     *)
481     and finish_block term term' items buf =
482     if not (List.mem term' term) then
483     parse_error "terminator mismatch" buf;
484     term', List.rev items
485    
486     (*
487     * Math mode sub/superscripts.
488     *)
489     and parse_math_script term mode opname items buf =
490     match items, parse_item (math_mode mode) buf with
491     item :: items, ItemItem item' ->
492     let items = Term ([opname], [], [[item]; [item']]) :: items in
493     parse_block term mode items buf
494     | _ ->
495     parse_error "illegal sub/superscript operation" buf
496    
497     (*
498     * Parse the entire next item.
499     *)
500     and parse_item mode buf =
501     let token = parse_token buf in
502     match token with
503     TokWhite is_nl_flag ->
504     ItemItem (parse_white is_nl_flag buf)
505     | TokMath flag ->
506     if is_math_mode mode then
507     ItemMath flag
508     else
509     let opname =
510     if flag then
511     "centermath"
512     else
513     "math"
514     in
515     let _, items = parse_block [TermMath flag] ModeMath [] buf in
516     ItemItem (Term ([opname], [], [items]))
517     | TokName s ->
518     parse_term mode s buf
519     | TokQuote (tag, next) ->
520     ItemItem (Quote (tag, next))
521     | TokQString s ->
522     ItemItem (String ("\"" ^ s ^ "\""))
523     | TokString (_, s) ->
524     ItemItem (String s)
525     | TokLeftBrace ->
526     let _, items = parse_block [TermBrace] (non_arg_mode mode) [] buf in
527     ItemItem (Block items)
528     | TokRightBrace ->
529     ItemBrace
530     | TokSpecial c ->
531     if is_special mode c then
532     ItemSpecial c
533     else
534     ItemItem (String (String.make 1 c))
535     | TokEof ->
536     ItemEof
537    
538     (*
539     * Adjacent non-nl whitespace is concatenated.
540     *)
541     and parse_white is_nl_flag buf =
542     let token = parse_token buf in
543     match token with
544     TokWhite is_nl_flag' ->
545     if is_nl_flag && is_nl_flag' then
546     begin
547     push_back token buf;
548     White
549     end
550     else
551     parse_white (is_nl_flag || is_nl_flag') buf
552     | _ ->
553     push_back token buf;
554     White
555    
556     (*
557     * Adjacent strings are concatenated.
558     *)
559     and parse_strings mode s buf =
560     let buffer = Buffer.create 19 in
561     Buffer.add_string buffer s;
562     parse_strings' mode buffer buf
563    
564     and parse_strings' mode buffer buf =
565     let token = parse_token buf in
566     match token with
567     TokString (_, s) ->
568     add_string mode s buffer buf
569     | TokSpecial c ->
570     if is_special mode c then
571     flush_string token buffer buf
572     else
573     add_char mode c buffer buf
574     | TokWhite _
575     | TokMath _
576     | TokQString _
577     | TokQuote _
578     | TokName _
579     | TokLeftBrace
580     | TokRightBrace
581     | TokEof ->
582     flush_string token buffer buf
583    
584     and add_string mode s buffer buf =
585     Buffer.add_string buffer s;
586     parse_strings' mode buffer buf
587    
588     and add_char mode c buffer buf =
589     Buffer.add_char buffer c;
590     parse_strings' mode buffer buf
591    
592     and flush_string token buffer buf =
593     push_back token buf;
594     String (Buffer.contents buffer)
595    
596     (*
597     * Parse a term.
598     * There are several mode cases to consider.
599     *)
600     and parse_term mode s buf =
601     let opname = parse_opname mode s buf in
602     let params = parse_params mode false buf in
603     if opname = ["code"] || opname = ["email"] then
604     let s = parse_code_arg buf in
605     ItemItem (Term (opname, [s], []))
606     else
607     let args = parse_args mode false buf in
608     let args =
609     if args = [[]] then
610     []
611     else
612     args
613     in
614     (* Mode cases *)
615     match opname, params, args with
616     ["begin"], [[tag]], [] when tag = "verbatim" || tag = "literal" ->
617     let s = parse_code_block buf in
618     ItemItem (Term ([tag], [s], []))
619     | ["begin"], tag :: params, [] ->
620     let _, args = parse_block [TermEnd tag] mode [] buf in
621     let opname = mk_opname mode tag in
622     ItemItem (Term (opname, flatten_params params, [args]))
623     | ["end"], [tag], [] ->
624     ItemEnd tag
625     | _ ->
626     let opname = mk_opname mode opname in
627     ItemItem (Term (opname, flatten_params params, args))
628    
629     (*
630     * Code blocks.
631     *)
632     and parse_code_arg buf =
633     let buffer = Buffer.create 19 in
634     match parse_token buf with
635     TokLeftBrace ->
636     parse_code_arg' buffer buf
637     | token ->
638     push_back token buf;
639     ""
640    
641     and parse_code_arg' buffer buf =
642     match parse_code_string_brace buf with
643     CodeString s ->
644     Buffer.add_string buffer s;
645     parse_code_arg' buffer buf
646     | CodeEnd ->
647     Buffer.contents buffer
648    
649     and parse_code_block buf =
650     let buffer = Buffer.create 19 in
651     buf.tokens <- [];
652     parse_code_block' buffer buf
653    
654     and parse_code_block' buffer buf =
655     match parse_code_string_end buf with
656     CodeString s ->
657     Buffer.add_string buffer s;
658     parse_code_block' buffer buf
659     | CodeEnd ->
660     Buffer.contents buffer
661    
662     (*
663     * Opname.
664     *)
665     and parse_opname mode s buf =
666     let buffer = Buffer.create 19 in
667     Buffer.add_string buffer s;
668     parse_opname_name mode buffer [] buf
669    
670     and parse_opname_name mode buffer opname buf =
671     let token = parse_token buf in
672     match token with
673 nogin 3444 TokString (true, s) | TokQString s ->
674 jyh 3057 add_opname_string mode buffer opname s buf
675     | TokSpecial '_' ->
676     if is_math_mode mode then
677     flush_opname token buffer opname buf
678     else
679     add_opname_string mode buffer opname "_" buf
680     | TokSpecial '!' ->
681     push_opname mode buffer opname buf
682     | _ ->
683     flush_opname token buffer opname buf
684    
685     and add_opname_string mode buffer opname s buf =
686     Buffer.add_string buffer s;
687     parse_opname_name mode buffer opname buf
688    
689     and flush_opname token buffer opname buf =
690     let s = Buffer.contents buffer in
691     let opname =
692     if s = "" then
693     opname
694     else
695     s :: opname
696     in
697     Buffer.clear buffer;
698     push_back token buf;
699     List.rev opname
700    
701     and push_opname mode buffer opname buf =
702     let s = Buffer.contents buffer in
703     let opname =
704     if s = "" then
705     opname
706     else
707     s :: opname
708     in
709     Buffer.clear buffer;
710     parse_opname_name mode buffer opname buf
711    
712     (*
713     * Param list is a list of opnames.
714     *)
715     and parse_params mode found_white buf =
716     let token = parse_token buf in
717     match token with
718     TokWhite false ->
719     parse_params mode true buf
720     | TokSpecial '[' ->
721     let buffer = Buffer.create 19 in
722     parse_inner_params mode buffer [] buf
723     | _ ->
724     push_back token buf;
725     if found_white then
726     push_back (TokWhite false) buf;
727     []
728    
729     and parse_inner_params mode buffer items buf =
730     let param = parse_opname_name ModeNormal buffer [] buf in
731     let items =
732     if param <> [] then
733     param :: items
734     else
735     items
736     in
737     let token = parse_token buf in
738     match token with
739     TokWhite _
740     | TokSpecial ','
741     | TokSpecial ';' ->
742     parse_inner_params mode buffer items buf
743     | TokSpecial ']' ->
744     List.rev items
745     | TokName _
746     | TokString _
747     | TokQString _
748     | TokSpecial _
749     | TokMath _
750     | TokLeftBrace
751     | TokRightBrace
752     | TokQuote _
753     | TokEof ->
754     parse_error "illegal parameter" buf
755    
756     and flatten_params params =
757     List.map (fun l ->
758     List.fold_left (fun buf s -> buf ^ s) "" l) params
759    
760     (*
761     * Arguments.
762     *)
763     and parse_args mode found_white buf =
764     let token = parse_token buf in
765     match token with
766     TokWhite false ->
767     parse_args mode true buf
768     | TokLeftBrace ->
769     parse_inner_args (arg_mode mode) [] buf
770     | _ ->
771     push_back token buf;
772     if found_white then
773     push_back (TokWhite false) buf;
774     []
775    
776     and parse_inner_args mode items buf =
777     let term, t = parse_block [TermSemi; TermBrace] mode [] buf in
778     let items = t :: items in
779     match term with
780     TermSemi ->
781     parse_inner_args mode items buf
782     | TermBrace ->
783     List.rev items
784     | TermEof
785     | TermEnd _
786     | TermMath _ ->
787     parse_error "illegal terminator" buf
788    
789     (*
790     * Main function.
791     *)
792     let parse s =
793     let lexbuf = Lexing.from_string s in
794     let buf = { lexbuf = lexbuf; tokens = [] } in
795     let _, items = parse_block [TermEof] ModeNormal [] buf in
796     items
797     }
798    
799     (*
800     * -*-
801     * Local Variables:
802     * Caml-master: "set"
803     * End:
804     * -*-
805     *)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26