/[mojave]/metaprl/mllib/comment_parse.mll
ViewVC logotype

Annotation of /metaprl/mllib/comment_parse.mll

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3062 - (hide annotations) (download)
Tue Sep 19 03:03:23 2000 UTC (20 years, 10 months ago) by jyh
File size: 19423 byte(s)
Sorry, the problem with the last commit was just because of the
theories/base/base_theory.mlz, which used @begin[tex] instead
of @begin[doc].  Everything should compile now.

Once it compiles, try using make in the doc/latex/theories
directory.  It should generate the file all-theories.pdf, which
is docs for all the modules, including tactics, rules, rewrites,
etc.  There is an index that you can use to jump to the documentation
for any particular tactic, etc.

1 jyh 3057 (*
2     * This is a simple lexer to extract terms from a comment string.
3     * Grammar:
4     *
5     * Text is a sequence of tokens containing strings, terms, and quotations
6     * Literal-text is any sequence of chars not containing the trailing delimiter.
7     *
8     * For text:
9     * 1. Whitespace is ignored
10     * 2. Strings are any sequence on non-special chars
11     * 3. Quoted strings are surrounded by double-quotes
12     * 4. There are three types of terms:
13     * a. @opname[params]{args}
14     * Opname is an alphnumeric sequence, or a quoted string
15     * [params] are optional; a param is a string or quoted string
16     * {args} are optional; a sequence of text separated by ;
17     *
18     * b. @begin[name]
19     * text
20     * @end[name]
21     *
22     * This builds a term @name{text}
23     *
24     * c. $literal-str$
25     * This builds the term @math[str]
26     *
27     * 5. Quotations have the form
28     * <<literal-str>> and <:tag<literal-str>>
29     * are also allowed.
30     *
31     * Special forms:
32     *
33     * Quotations can be nested:
34     * TokQuote ("", text): << text >>
35     * TokQuote (tag, text): <:tag< text >>
36     *
37     * TokName: @opname
38     * TokString s: any sequence of non-whitespace, non-special chars
39     * TokQString s: any text between double-quotes
40     * TokMath s: any literal text between $
41     *
42     * TokWhite: whitespace
43     * TokLeftBrace: {
44     * TokRightBrace: }
45     * TokLeftBrack: [
46     * TokRightBrack: ]
47     * TokComma: ,
48     * TokSemi: ;
49     *
50     * The lexer removes the first leading * on each line of the
51     * input string.
52     *
53     * ----------------------------------------------------------------
54     *
55     * Copyright (C) 2000 Jason Hickey, Caltech
56     *
57     * This program is free software; you can redistribute it and/or
58     * modify it under the terms of the GNU General Public License
59     * as published by the Free Software Foundation; either version 2
60     * of the License, or (at your option) any later version.
61     *
62     * This program is distributed in the hope that it will be useful,
63     * but WITHOUT ANY WARRANTY; without even the implied warranty of
64     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
65     * GNU General Public License for more details.
66     *
67     * You should have received a copy of the GNU General Public License
68     * along with this program; if not, write to the Free Software
69     * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
70     *
71     * Author: Jason Hickey
72     * jyh@cs.caltech.edu
73     *)
74    
75     {
76     (*
77     * A program is a sequence of strings and terms.
78     *)
79     type t = item list
80    
81     and item =
82     White
83     | String of string
84     | Term of string list * string list * t list
85     | Quote of string * string
86     | Block of t
87    
88     (*
89     * Tokens.
90     *)
91     type token =
92     TokWhite of bool
93     | TokQString of string
94     | TokMath of bool
95     | TokString of bool * string
96     | TokQuote of string * string
97     | TokName of string
98     | TokLeftBrace
99     | TokRightBrace
100     | TokSpecial of char
101     | TokEof
102    
103     type code_token =
104     CodeString of string
105     | CodeEnd
106    
107     (*
108     * Items returned by the item parser.
109     *)
110     type item_item =
111     ItemItem of item
112     | ItemMath of bool
113     | ItemEnd of string list
114     | ItemSpecial of char
115     | ItemBrace
116     | ItemEof
117    
118     (*
119     * Modes.
120     * The parser can be looking for arguments (so the ';' and ','
121     * chars are special),
122     * and it can be in math mode (so the '_' and '^' chars are special)
123     *)
124     type mode =
125     ModeNormal
126     | ModeArg
127     | ModeMath
128     | ModeArgMath
129    
130     (*
131     * Test for special chars.
132     *)
133     let is_special mode c =
134     let special =
135     match mode with
136     ModeNormal -> []
137     | ModeArg -> [';']
138     | ModeMath -> ['_'; '^']
139     | ModeArgMath -> ['_'; '^'; ';']
140     in
141     List.mem c special
142    
143     (*
144     * Test for math mode.
145     *)
146     let is_math_mode = function
147     ModeMath
148     | ModeArgMath ->
149     true
150     | ModeNormal
151     | ModeArg ->
152     false
153    
154     (*
155     * Move to arg mode.
156     *)
157     let arg_mode = function
158     ModeNormal
159     | ModeArg ->
160     ModeArg
161     | ModeMath
162     | ModeArgMath ->
163     ModeArgMath
164    
165     let math_mode = function
166     ModeNormal
167     | ModeMath ->
168     ModeMath
169     | ModeArg
170     | ModeArgMath ->
171     ModeArgMath
172    
173     let non_arg_mode = function
174     ModeNormal
175     | ModeArg ->
176     ModeNormal
177     | ModeMath
178     | ModeArgMath ->
179     ModeMath
180    
181     (*
182     * Termination items.
183     *)
184     type terminator =
185     TermEof
186     | TermBrace
187     | TermSemi
188     | TermMath of bool
189     | TermEnd of string list
190    
191     (*
192     * State for parsing quotations.
193     *)
194     let level = ref 0
195     let tag = ref ""
196     let buffer = Buffer.create 19
197    
198     let set_tag tag' =
199     tag := tag';
200     Buffer.clear buffer
201    
202     let add_string s =
203     Buffer.add_string buffer s
204    
205     let flush_buffer () =
206     let s = Buffer.contents buffer in
207     Buffer.clear buffer;
208     TokQuote (!tag, s)
209    
210     let flush_string () =
211     let s = Buffer.contents buffer in
212     Buffer.clear buffer;
213     s
214    
215     (*
216     * Pushback buffer.
217     *)
218     type token_buffer =
219     { lexbuf : Lexing.lexbuf;
220     mutable tokens : token list
221     }
222    
223     (*
224     * Errors.
225     *)
226     exception Parse_error of string * int * int
227    
228     let parse_error_buf s lexbuf =
229     let loc1 = Lexing.lexeme_start lexbuf in
230     let loc2 = Lexing.lexeme_start lexbuf in
231     raise (Parse_error (s, loc1, loc2))
232    
233     let parse_error s buf =
234     parse_error_buf s buf.lexbuf
235     }
236    
237     let white = [' ' '\t']+
238     let optwhite = [' ' '\t']*
239     let name = ['a'-'z''A'-'Z']+
240     let number = ['0'-'9']+
241     let special = ['[' ']' ';' ',' '_' '^' '!']
242    
243     rule main = parse
244     (* White space *)
245     '\n' optwhite '*'
246     { TokWhite true }
247     | '\n'
248     { TokWhite true }
249     | white
250     { TokWhite false }
251    
252 jyh 3062 (* Nested comments *)
253     | "(*"
254     { comment lexbuf; main lexbuf }
255    
256 jyh 3057 (* Quotations *)
257     | "<<"
258     { set_tag "";
259     quotation lexbuf
260     }
261     | "<:" name '<'
262     { let buf = Lexing.lexeme lexbuf in
263     set_tag (String.sub buf 2 (pred (String.length buf)));
264     quotation lexbuf
265     }
266    
267     (* This is temporary *)
268     | '\\'
269     { parse_error_buf "fix this backslash" lexbuf }
270    
271     (* Strings *)
272     | '"'
273     { TokQString (string lexbuf) }
274    
275     (* Special tokens *)
276     | "$$"
277     { TokMath true }
278     | '$'
279     { TokMath false }
280     | '@'
281     { opname lexbuf }
282     | '{'
283     { TokLeftBrace }
284     | '}'
285     { TokRightBrace }
286     | special
287     { TokSpecial (Lexing.lexeme_char lexbuf 0) }
288    
289     (* Alphanumeric names *)
290     | name
291     { TokString (true, Lexing.lexeme lexbuf) }
292     | number
293     { TokString (true, Lexing.lexeme lexbuf) }
294     | _
295     { TokString (false, Lexing.lexeme lexbuf) }
296    
297     | eof
298     { TokEof }
299    
300     (*
301 jyh 3062 * Comments.
302     *)
303     and comment = parse
304     "(*"
305     { comment lexbuf; comment lexbuf }
306     | "*)"
307     { () }
308     | _
309     { comment lexbuf }
310     | eof
311     { parse_error_buf "comment is not terminated" lexbuf }
312    
313     (*
314 jyh 3057 * Read the first string in the opname.
315     *)
316     and opname = parse
317     name
318     { TokName (Lexing.lexeme lexbuf) }
319     | '"'
320     { TokName (string lexbuf) }
321     | _
322     { TokString (false, Lexing.lexeme lexbuf) }
323     | eof
324     { parse_error_buf "opname is not terminated" lexbuf }
325    
326     (*
327     * Quotations.
328     * Watch for nested quotations.
329     *)
330     and quotation = parse
331     "<<"
332     { add_string "<<";
333     incr level;
334     quotation lexbuf
335     }
336     | "<:" name '<'
337     { add_string (Lexing.lexeme lexbuf);
338     incr level;
339     quotation lexbuf
340     }
341     | ">>"
342     { if !level = 0 then
343     flush_buffer ()
344     else
345     begin
346     add_string ">>";
347     decr level;
348     quotation lexbuf
349     end
350     }
351     | _
352     { add_string (Lexing.lexeme lexbuf);
353     quotation lexbuf
354     }
355     | eof
356     { parse_error_buf "quotation is not terminated" lexbuf }
357    
358     (*
359     * Strings.
360     * Remove escaped eol.
361     *)
362     and string = parse
363     '"' (* '"' *)
364     { flush_string () }
365     | '\\'
366     { escape lexbuf }
367     | _
368     { add_string (Lexing.lexeme lexbuf);
369     string lexbuf
370     }
371     | eof
372     { parse_error_buf "string is not terminated" lexbuf }
373    
374     and escape = parse
375     '\n'
376     { string lexbuf }
377     | _
378     { add_string (Lexing.lexeme lexbuf);
379     string lexbuf
380     }
381     | eof
382     { parse_error_buf "escape sequence is not terminated" lexbuf }
383    
384     (*
385     * Literal forms.
386     *)
387     and code_string_brace = parse
388     '\n' optwhite '*'
389     { CodeString "\n" }
390     | '}'
391     { CodeEnd }
392     | _
393     { CodeString (Lexing.lexeme lexbuf) }
394     | eof
395     { parse_error_buf "code string is not terminated" lexbuf }
396    
397     and code_string_end = parse
398     '\n' optwhite '*'
399     { CodeString "\n" }
400     | "@end[verbatim]"
401     | "@end[literal]"
402     { CodeEnd }
403     | _
404     { CodeString (Lexing.lexeme lexbuf) }
405     | eof
406     { parse_error_buf "code block is not terminated" lexbuf }
407    
408     {
409     (*
410     * In math mode, add the "math_" prefix to the opname.
411     *)
412     let rec mk_math_opname = function
413     [name] ->
414     ["math_" ^ name]
415     | h :: t ->
416     h :: mk_math_opname t
417     | [] ->
418     raise (Invalid_argument "Comment_parse.mk_math_opname")
419    
420     let mk_opname mode opname =
421     if is_math_mode mode then
422     mk_math_opname opname
423     else
424     opname
425    
426     (************************************************************************
427     * Pushback buffer.
428     *)
429     let parse_token buf =
430     match buf with
431     { tokens = token :: t } ->
432     buf.tokens <- t;
433     token
434     | { lexbuf = lexbuf } ->
435     main lexbuf
436    
437     let push_back token buf =
438     buf.tokens <- token :: buf.tokens
439    
440     (*
441     * Special forms.
442     *)
443     let parse_code_string_brace buf =
444     assert (buf.tokens = []);
445     code_string_brace buf.lexbuf
446    
447     let parse_code_string_end buf =
448     assert (buf.tokens = []);
449     code_string_end buf.lexbuf
450    
451     (************************************************************************
452     * Toplevel recursive-descent parser.
453     * term: the expected terminator for this block
454     * mode: the current parsing mode
455     * items: the list of items collected so far (in reverse order)
456     * buf: the token buffer
457     *)
458     let rec parse_block term mode items buf =
459     let item = parse_item mode buf in
460     match item with
461     ItemItem item ->
462     parse_block term mode (item :: items) buf
463     | ItemSpecial '_' ->
464     parse_math_script term mode "math_subscript" items buf
465     | ItemSpecial '^' ->
466     parse_math_script term mode "math_superscript" items buf
467     | ItemMath flag ->
468     finish_block term (TermMath flag) items buf
469     | ItemEnd tag ->
470     finish_block term (TermEnd tag) items buf
471     | ItemSpecial ';' ->
472     finish_block term TermSemi items buf
473     | ItemBrace ->
474     finish_block term TermBrace items buf
475     | ItemEof ->
476     finish_block term TermEof items buf
477     | ItemSpecial _ ->
478     parse_error "illegal special character" buf
479    
480     (*
481     * Found a terminator.
482     *)
483     and finish_block term term' items buf =
484     if not (List.mem term' term) then
485     parse_error "terminator mismatch" buf;
486     term', List.rev items
487    
488     (*
489     * Math mode sub/superscripts.
490     *)
491     and parse_math_script term mode opname items buf =
492     match items, parse_item (math_mode mode) buf with
493     item :: items, ItemItem item' ->
494     let items = Term ([opname], [], [[item]; [item']]) :: items in
495     parse_block term mode items buf
496     | _ ->
497     parse_error "illegal sub/superscript operation" buf
498    
499     (*
500     * Parse the entire next item.
501     *)
502     and parse_item mode buf =
503     let token = parse_token buf in
504     match token with
505     TokWhite is_nl_flag ->
506     ItemItem (parse_white is_nl_flag buf)
507     | TokMath flag ->
508     if is_math_mode mode then
509     ItemMath flag
510     else
511     let opname =
512     if flag then
513     "centermath"
514     else
515     "math"
516     in
517     let _, items = parse_block [TermMath flag] ModeMath [] buf in
518     ItemItem (Term ([opname], [], [items]))
519     | TokName s ->
520     parse_term mode s buf
521     | TokQuote (tag, next) ->
522     ItemItem (Quote (tag, next))
523     | TokQString s ->
524     ItemItem (String ("\"" ^ s ^ "\""))
525     | TokString (_, s) ->
526     ItemItem (String s)
527     | TokLeftBrace ->
528     let _, items = parse_block [TermBrace] (non_arg_mode mode) [] buf in
529     ItemItem (Block items)
530     | TokRightBrace ->
531     ItemBrace
532     | TokSpecial c ->
533     if is_special mode c then
534     ItemSpecial c
535     else
536     ItemItem (String (String.make 1 c))
537     | TokEof ->
538     ItemEof
539    
540     (*
541     * Adjacent non-nl whitespace is concatenated.
542     *)
543     and parse_white is_nl_flag buf =
544     let token = parse_token buf in
545     match token with
546     TokWhite is_nl_flag' ->
547     if is_nl_flag && is_nl_flag' then
548     begin
549     push_back token buf;
550     White
551     end
552     else
553     parse_white (is_nl_flag || is_nl_flag') buf
554     | _ ->
555     push_back token buf;
556     White
557    
558     (*
559     * Adjacent strings are concatenated.
560     *)
561     and parse_strings mode s buf =
562     let buffer = Buffer.create 19 in
563     Buffer.add_string buffer s;
564     parse_strings' mode buffer buf
565    
566     and parse_strings' mode buffer buf =
567     let token = parse_token buf in
568     match token with
569     TokString (_, s) ->
570     add_string mode s buffer buf
571     | TokSpecial c ->
572     if is_special mode c then
573     flush_string token buffer buf
574     else
575     add_char mode c buffer buf
576     | TokWhite _
577     | TokMath _
578     | TokQString _
579     | TokQuote _
580     | TokName _
581     | TokLeftBrace
582     | TokRightBrace
583     | TokEof ->
584     flush_string token buffer buf
585    
586     and add_string mode s buffer buf =
587     Buffer.add_string buffer s;
588     parse_strings' mode buffer buf
589    
590     and add_char mode c buffer buf =
591     Buffer.add_char buffer c;
592     parse_strings' mode buffer buf
593    
594     and flush_string token buffer buf =
595     push_back token buf;
596     String (Buffer.contents buffer)
597    
598     (*
599     * Parse a term.
600     * There are several mode cases to consider.
601     *)
602     and parse_term mode s buf =
603     let opname = parse_opname mode s buf in
604     let params = parse_params mode false buf in
605     if opname = ["code"] || opname = ["email"] then
606     let s = parse_code_arg buf in
607     ItemItem (Term (opname, [s], []))
608     else
609     let args = parse_args mode false buf in
610     let args =
611     if args = [[]] then
612     []
613     else
614     args
615     in
616     (* Mode cases *)
617     match opname, params, args with
618     ["begin"], [[tag]], [] when tag = "verbatim" || tag = "literal" ->
619     let s = parse_code_block buf in
620     ItemItem (Term ([tag], [s], []))
621     | ["begin"], tag :: params, [] ->
622     let _, args = parse_block [TermEnd tag] mode [] buf in
623     let opname = mk_opname mode tag in
624     ItemItem (Term (opname, flatten_params params, [args]))
625     | ["end"], [tag], [] ->
626     ItemEnd tag
627     | _ ->
628     let opname = mk_opname mode opname in
629     ItemItem (Term (opname, flatten_params params, args))
630    
631     (*
632     * Code blocks.
633     *)
634     and parse_code_arg buf =
635     let buffer = Buffer.create 19 in
636     match parse_token buf with
637     TokLeftBrace ->
638     parse_code_arg' buffer buf
639     | token ->
640     push_back token buf;
641     ""
642    
643     and parse_code_arg' buffer buf =
644     match parse_code_string_brace buf with
645     CodeString s ->
646     Buffer.add_string buffer s;
647     parse_code_arg' buffer buf
648     | CodeEnd ->
649     Buffer.contents buffer
650    
651     and parse_code_block buf =
652     let buffer = Buffer.create 19 in
653     buf.tokens <- [];
654     parse_code_block' buffer buf
655    
656     and parse_code_block' buffer buf =
657     match parse_code_string_end buf with
658     CodeString s ->
659     Buffer.add_string buffer s;
660     parse_code_block' buffer buf
661     | CodeEnd ->
662     Buffer.contents buffer
663    
664     (*
665     * Opname.
666     *)
667     and parse_opname mode s buf =
668     let buffer = Buffer.create 19 in
669     Buffer.add_string buffer s;
670     parse_opname_name mode buffer [] buf
671    
672     and parse_opname_name mode buffer opname buf =
673     let token = parse_token buf in
674     match token with
675     TokString (true, s) ->
676     add_opname_string mode buffer opname s buf
677     | TokQString s ->
678     add_opname_string mode buffer opname s buf
679     | TokSpecial '_' ->
680     if is_math_mode mode then
681     flush_opname token buffer opname buf
682     else
683     add_opname_string mode buffer opname "_" buf
684     | TokSpecial '!' ->
685     push_opname mode buffer opname buf
686     | _ ->
687     flush_opname token buffer opname buf
688    
689     and add_opname_string mode buffer opname s buf =
690     Buffer.add_string buffer s;
691     parse_opname_name mode buffer opname buf
692    
693     and flush_opname token buffer opname buf =
694     let s = Buffer.contents buffer in
695     let opname =
696     if s = "" then
697     opname
698     else
699     s :: opname
700     in
701     Buffer.clear buffer;
702     push_back token buf;
703     List.rev opname
704    
705     and push_opname mode buffer opname buf =
706     let s = Buffer.contents buffer in
707     let opname =
708     if s = "" then
709     opname
710     else
711     s :: opname
712     in
713     Buffer.clear buffer;
714     parse_opname_name mode buffer opname buf
715    
716     (*
717     * Param list is a list of opnames.
718     *)
719     and parse_params mode found_white buf =
720     let token = parse_token buf in
721     match token with
722     TokWhite false ->
723     parse_params mode true buf
724     | TokSpecial '[' ->
725     let buffer = Buffer.create 19 in
726     parse_inner_params mode buffer [] buf
727     | _ ->
728     push_back token buf;
729     if found_white then
730     push_back (TokWhite false) buf;
731     []
732    
733     and parse_inner_params mode buffer items buf =
734     let param = parse_opname_name ModeNormal buffer [] buf in
735     let items =
736     if param <> [] then
737     param :: items
738     else
739     items
740     in
741     let token = parse_token buf in
742     match token with
743     TokWhite _
744     | TokSpecial ','
745     | TokSpecial ';' ->
746     parse_inner_params mode buffer items buf
747     | TokSpecial ']' ->
748     List.rev items
749     | TokName _
750     | TokString _
751     | TokQString _
752     | TokSpecial _
753     | TokMath _
754     | TokLeftBrace
755     | TokRightBrace
756     | TokQuote _
757     | TokEof ->
758     parse_error "illegal parameter" buf
759    
760     and flatten_params params =
761     List.map (fun l ->
762     List.fold_left (fun buf s -> buf ^ s) "" l) params
763    
764     (*
765     * Arguments.
766     *)
767     and parse_args mode found_white buf =
768     let token = parse_token buf in
769     match token with
770     TokWhite false ->
771     parse_args mode true buf
772     | TokLeftBrace ->
773     parse_inner_args (arg_mode mode) [] buf
774     | _ ->
775     push_back token buf;
776     if found_white then
777     push_back (TokWhite false) buf;
778     []
779    
780     and parse_inner_args mode items buf =
781     let term, t = parse_block [TermSemi; TermBrace] mode [] buf in
782     let items = t :: items in
783     match term with
784     TermSemi ->
785     parse_inner_args mode items buf
786     | TermBrace ->
787     List.rev items
788     | TermEof
789     | TermEnd _
790     | TermMath _ ->
791     parse_error "illegal terminator" buf
792    
793     (*
794     * Main function.
795     *)
796     let parse s =
797     let lexbuf = Lexing.from_string s in
798     let buf = { lexbuf = lexbuf; tokens = [] } in
799     let _, items = parse_block [TermEof] ModeNormal [] buf in
800     items
801     }
802    
803     (*
804     * -*-
805     * Local Variables:
806     * Caml-master: "set"
807     * End:
808     * -*-
809     *)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26