/[mojave]/metaprl/mllib/comment_parse.mll
ViewVC logotype

Contents of /metaprl/mllib/comment_parse.mll

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3062 - (show annotations) (download)
Tue Sep 19 03:03:23 2000 UTC (20 years, 10 months ago) by jyh
File size: 19423 byte(s)
Sorry, the problem with the last commit was just because of the
theories/base/base_theory.mlz, which used @begin[tex] instead
of @begin[doc].  Everything should compile now.

Once it compiles, try using make in the doc/latex/theories
directory.  It should generate the file all-theories.pdf, which
is docs for all the modules, including tactics, rules, rewrites,
etc.  There is an index that you can use to jump to the documentation
for any particular tactic, etc.

1 (*
2 * This is a simple lexer to extract terms from a comment string.
3 * Grammar:
4 *
5 * Text is a sequence of tokens containing strings, terms, and quotations
6 * Literal-text is any sequence of chars not containing the trailing delimiter.
7 *
8 * For text:
9 * 1. Whitespace is ignored
10 * 2. Strings are any sequence on non-special chars
11 * 3. Quoted strings are surrounded by double-quotes
12 * 4. There are three types of terms:
13 * a. @opname[params]{args}
14 * Opname is an alphnumeric sequence, or a quoted string
15 * [params] are optional; a param is a string or quoted string
16 * {args} are optional; a sequence of text separated by ;
17 *
18 * b. @begin[name]
19 * text
20 * @end[name]
21 *
22 * This builds a term @name{text}
23 *
24 * c. $literal-str$
25 * This builds the term @math[str]
26 *
27 * 5. Quotations have the form
28 * <<literal-str>> and <:tag<literal-str>>
29 * are also allowed.
30 *
31 * Special forms:
32 *
33 * Quotations can be nested:
34 * TokQuote ("", text): << text >>
35 * TokQuote (tag, text): <:tag< text >>
36 *
37 * TokName: @opname
38 * TokString s: any sequence of non-whitespace, non-special chars
39 * TokQString s: any text between double-quotes
40 * TokMath s: any literal text between $
41 *
42 * TokWhite: whitespace
43 * TokLeftBrace: {
44 * TokRightBrace: }
45 * TokLeftBrack: [
46 * TokRightBrack: ]
47 * TokComma: ,
48 * TokSemi: ;
49 *
50 * The lexer removes the first leading * on each line of the
51 * input string.
52 *
53 * ----------------------------------------------------------------
54 *
55 * Copyright (C) 2000 Jason Hickey, Caltech
56 *
57 * This program is free software; you can redistribute it and/or
58 * modify it under the terms of the GNU General Public License
59 * as published by the Free Software Foundation; either version 2
60 * of the License, or (at your option) any later version.
61 *
62 * This program is distributed in the hope that it will be useful,
63 * but WITHOUT ANY WARRANTY; without even the implied warranty of
64 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
65 * GNU General Public License for more details.
66 *
67 * You should have received a copy of the GNU General Public License
68 * along with this program; if not, write to the Free Software
69 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
70 *
71 * Author: Jason Hickey
72 * jyh@cs.caltech.edu
73 *)
74
75 {
76 (*
77 * A program is a sequence of strings and terms.
78 *)
79 type t = item list
80
81 and item =
82 White
83 | String of string
84 | Term of string list * string list * t list
85 | Quote of string * string
86 | Block of t
87
88 (*
89 * Tokens.
90 *)
91 type token =
92 TokWhite of bool
93 | TokQString of string
94 | TokMath of bool
95 | TokString of bool * string
96 | TokQuote of string * string
97 | TokName of string
98 | TokLeftBrace
99 | TokRightBrace
100 | TokSpecial of char
101 | TokEof
102
103 type code_token =
104 CodeString of string
105 | CodeEnd
106
107 (*
108 * Items returned by the item parser.
109 *)
110 type item_item =
111 ItemItem of item
112 | ItemMath of bool
113 | ItemEnd of string list
114 | ItemSpecial of char
115 | ItemBrace
116 | ItemEof
117
118 (*
119 * Modes.
120 * The parser can be looking for arguments (so the ';' and ','
121 * chars are special),
122 * and it can be in math mode (so the '_' and '^' chars are special)
123 *)
124 type mode =
125 ModeNormal
126 | ModeArg
127 | ModeMath
128 | ModeArgMath
129
130 (*
131 * Test for special chars.
132 *)
133 let is_special mode c =
134 let special =
135 match mode with
136 ModeNormal -> []
137 | ModeArg -> [';']
138 | ModeMath -> ['_'; '^']
139 | ModeArgMath -> ['_'; '^'; ';']
140 in
141 List.mem c special
142
143 (*
144 * Test for math mode.
145 *)
146 let is_math_mode = function
147 ModeMath
148 | ModeArgMath ->
149 true
150 | ModeNormal
151 | ModeArg ->
152 false
153
154 (*
155 * Move to arg mode.
156 *)
157 let arg_mode = function
158 ModeNormal
159 | ModeArg ->
160 ModeArg
161 | ModeMath
162 | ModeArgMath ->
163 ModeArgMath
164
165 let math_mode = function
166 ModeNormal
167 | ModeMath ->
168 ModeMath
169 | ModeArg
170 | ModeArgMath ->
171 ModeArgMath
172
173 let non_arg_mode = function
174 ModeNormal
175 | ModeArg ->
176 ModeNormal
177 | ModeMath
178 | ModeArgMath ->
179 ModeMath
180
181 (*
182 * Termination items.
183 *)
184 type terminator =
185 TermEof
186 | TermBrace
187 | TermSemi
188 | TermMath of bool
189 | TermEnd of string list
190
191 (*
192 * State for parsing quotations.
193 *)
194 let level = ref 0
195 let tag = ref ""
196 let buffer = Buffer.create 19
197
198 let set_tag tag' =
199 tag := tag';
200 Buffer.clear buffer
201
202 let add_string s =
203 Buffer.add_string buffer s
204
205 let flush_buffer () =
206 let s = Buffer.contents buffer in
207 Buffer.clear buffer;
208 TokQuote (!tag, s)
209
210 let flush_string () =
211 let s = Buffer.contents buffer in
212 Buffer.clear buffer;
213 s
214
215 (*
216 * Pushback buffer.
217 *)
218 type token_buffer =
219 { lexbuf : Lexing.lexbuf;
220 mutable tokens : token list
221 }
222
223 (*
224 * Errors.
225 *)
226 exception Parse_error of string * int * int
227
228 let parse_error_buf s lexbuf =
229 let loc1 = Lexing.lexeme_start lexbuf in
230 let loc2 = Lexing.lexeme_start lexbuf in
231 raise (Parse_error (s, loc1, loc2))
232
233 let parse_error s buf =
234 parse_error_buf s buf.lexbuf
235 }
236
237 let white = [' ' '\t']+
238 let optwhite = [' ' '\t']*
239 let name = ['a'-'z''A'-'Z']+
240 let number = ['0'-'9']+
241 let special = ['[' ']' ';' ',' '_' '^' '!']
242
243 rule main = parse
244 (* White space *)
245 '\n' optwhite '*'
246 { TokWhite true }
247 | '\n'
248 { TokWhite true }
249 | white
250 { TokWhite false }
251
252 (* Nested comments *)
253 | "(*"
254 { comment lexbuf; main lexbuf }
255
256 (* Quotations *)
257 | "<<"
258 { set_tag "";
259 quotation lexbuf
260 }
261 | "<:" name '<'
262 { let buf = Lexing.lexeme lexbuf in
263 set_tag (String.sub buf 2 (pred (String.length buf)));
264 quotation lexbuf
265 }
266
267 (* This is temporary *)
268 | '\\'
269 { parse_error_buf "fix this backslash" lexbuf }
270
271 (* Strings *)
272 | '"'
273 { TokQString (string lexbuf) }
274
275 (* Special tokens *)
276 | "$$"
277 { TokMath true }
278 | '$'
279 { TokMath false }
280 | '@'
281 { opname lexbuf }
282 | '{'
283 { TokLeftBrace }
284 | '}'
285 { TokRightBrace }
286 | special
287 { TokSpecial (Lexing.lexeme_char lexbuf 0) }
288
289 (* Alphanumeric names *)
290 | name
291 { TokString (true, Lexing.lexeme lexbuf) }
292 | number
293 { TokString (true, Lexing.lexeme lexbuf) }
294 | _
295 { TokString (false, Lexing.lexeme lexbuf) }
296
297 | eof
298 { TokEof }
299
300 (*
301 * Comments.
302 *)
303 and comment = parse
304 "(*"
305 { comment lexbuf; comment lexbuf }
306 | "*)"
307 { () }
308 | _
309 { comment lexbuf }
310 | eof
311 { parse_error_buf "comment is not terminated" lexbuf }
312
313 (*
314 * Read the first string in the opname.
315 *)
316 and opname = parse
317 name
318 { TokName (Lexing.lexeme lexbuf) }
319 | '"'
320 { TokName (string lexbuf) }
321 | _
322 { TokString (false, Lexing.lexeme lexbuf) }
323 | eof
324 { parse_error_buf "opname is not terminated" lexbuf }
325
326 (*
327 * Quotations.
328 * Watch for nested quotations.
329 *)
330 and quotation = parse
331 "<<"
332 { add_string "<<";
333 incr level;
334 quotation lexbuf
335 }
336 | "<:" name '<'
337 { add_string (Lexing.lexeme lexbuf);
338 incr level;
339 quotation lexbuf
340 }
341 | ">>"
342 { if !level = 0 then
343 flush_buffer ()
344 else
345 begin
346 add_string ">>";
347 decr level;
348 quotation lexbuf
349 end
350 }
351 | _
352 { add_string (Lexing.lexeme lexbuf);
353 quotation lexbuf
354 }
355 | eof
356 { parse_error_buf "quotation is not terminated" lexbuf }
357
358 (*
359 * Strings.
360 * Remove escaped eol.
361 *)
362 and string = parse
363 '"' (* '"' *)
364 { flush_string () }
365 | '\\'
366 { escape lexbuf }
367 | _
368 { add_string (Lexing.lexeme lexbuf);
369 string lexbuf
370 }
371 | eof
372 { parse_error_buf "string is not terminated" lexbuf }
373
374 and escape = parse
375 '\n'
376 { string lexbuf }
377 | _
378 { add_string (Lexing.lexeme lexbuf);
379 string lexbuf
380 }
381 | eof
382 { parse_error_buf "escape sequence is not terminated" lexbuf }
383
384 (*
385 * Literal forms.
386 *)
387 and code_string_brace = parse
388 '\n' optwhite '*'
389 { CodeString "\n" }
390 | '}'
391 { CodeEnd }
392 | _
393 { CodeString (Lexing.lexeme lexbuf) }
394 | eof
395 { parse_error_buf "code string is not terminated" lexbuf }
396
397 and code_string_end = parse
398 '\n' optwhite '*'
399 { CodeString "\n" }
400 | "@end[verbatim]"
401 | "@end[literal]"
402 { CodeEnd }
403 | _
404 { CodeString (Lexing.lexeme lexbuf) }
405 | eof
406 { parse_error_buf "code block is not terminated" lexbuf }
407
408 {
409 (*
410 * In math mode, add the "math_" prefix to the opname.
411 *)
412 let rec mk_math_opname = function
413 [name] ->
414 ["math_" ^ name]
415 | h :: t ->
416 h :: mk_math_opname t
417 | [] ->
418 raise (Invalid_argument "Comment_parse.mk_math_opname")
419
420 let mk_opname mode opname =
421 if is_math_mode mode then
422 mk_math_opname opname
423 else
424 opname
425
426 (************************************************************************
427 * Pushback buffer.
428 *)
429 let parse_token buf =
430 match buf with
431 { tokens = token :: t } ->
432 buf.tokens <- t;
433 token
434 | { lexbuf = lexbuf } ->
435 main lexbuf
436
437 let push_back token buf =
438 buf.tokens <- token :: buf.tokens
439
440 (*
441 * Special forms.
442 *)
443 let parse_code_string_brace buf =
444 assert (buf.tokens = []);
445 code_string_brace buf.lexbuf
446
447 let parse_code_string_end buf =
448 assert (buf.tokens = []);
449 code_string_end buf.lexbuf
450
451 (************************************************************************
452 * Toplevel recursive-descent parser.
453 * term: the expected terminator for this block
454 * mode: the current parsing mode
455 * items: the list of items collected so far (in reverse order)
456 * buf: the token buffer
457 *)
458 let rec parse_block term mode items buf =
459 let item = parse_item mode buf in
460 match item with
461 ItemItem item ->
462 parse_block term mode (item :: items) buf
463 | ItemSpecial '_' ->
464 parse_math_script term mode "math_subscript" items buf
465 | ItemSpecial '^' ->
466 parse_math_script term mode "math_superscript" items buf
467 | ItemMath flag ->
468 finish_block term (TermMath flag) items buf
469 | ItemEnd tag ->
470 finish_block term (TermEnd tag) items buf
471 | ItemSpecial ';' ->
472 finish_block term TermSemi items buf
473 | ItemBrace ->
474 finish_block term TermBrace items buf
475 | ItemEof ->
476 finish_block term TermEof items buf
477 | ItemSpecial _ ->
478 parse_error "illegal special character" buf
479
480 (*
481 * Found a terminator.
482 *)
483 and finish_block term term' items buf =
484 if not (List.mem term' term) then
485 parse_error "terminator mismatch" buf;
486 term', List.rev items
487
488 (*
489 * Math mode sub/superscripts.
490 *)
491 and parse_math_script term mode opname items buf =
492 match items, parse_item (math_mode mode) buf with
493 item :: items, ItemItem item' ->
494 let items = Term ([opname], [], [[item]; [item']]) :: items in
495 parse_block term mode items buf
496 | _ ->
497 parse_error "illegal sub/superscript operation" buf
498
499 (*
500 * Parse the entire next item.
501 *)
502 and parse_item mode buf =
503 let token = parse_token buf in
504 match token with
505 TokWhite is_nl_flag ->
506 ItemItem (parse_white is_nl_flag buf)
507 | TokMath flag ->
508 if is_math_mode mode then
509 ItemMath flag
510 else
511 let opname =
512 if flag then
513 "centermath"
514 else
515 "math"
516 in
517 let _, items = parse_block [TermMath flag] ModeMath [] buf in
518 ItemItem (Term ([opname], [], [items]))
519 | TokName s ->
520 parse_term mode s buf
521 | TokQuote (tag, next) ->
522 ItemItem (Quote (tag, next))
523 | TokQString s ->
524 ItemItem (String ("\"" ^ s ^ "\""))
525 | TokString (_, s) ->
526 ItemItem (String s)
527 | TokLeftBrace ->
528 let _, items = parse_block [TermBrace] (non_arg_mode mode) [] buf in
529 ItemItem (Block items)
530 | TokRightBrace ->
531 ItemBrace
532 | TokSpecial c ->
533 if is_special mode c then
534 ItemSpecial c
535 else
536 ItemItem (String (String.make 1 c))
537 | TokEof ->
538 ItemEof
539
540 (*
541 * Adjacent non-nl whitespace is concatenated.
542 *)
543 and parse_white is_nl_flag buf =
544 let token = parse_token buf in
545 match token with
546 TokWhite is_nl_flag' ->
547 if is_nl_flag && is_nl_flag' then
548 begin
549 push_back token buf;
550 White
551 end
552 else
553 parse_white (is_nl_flag || is_nl_flag') buf
554 | _ ->
555 push_back token buf;
556 White
557
558 (*
559 * Adjacent strings are concatenated.
560 *)
561 and parse_strings mode s buf =
562 let buffer = Buffer.create 19 in
563 Buffer.add_string buffer s;
564 parse_strings' mode buffer buf
565
566 and parse_strings' mode buffer buf =
567 let token = parse_token buf in
568 match token with
569 TokString (_, s) ->
570 add_string mode s buffer buf
571 | TokSpecial c ->
572 if is_special mode c then
573 flush_string token buffer buf
574 else
575 add_char mode c buffer buf
576 | TokWhite _
577 | TokMath _
578 | TokQString _
579 | TokQuote _
580 | TokName _
581 | TokLeftBrace
582 | TokRightBrace
583 | TokEof ->
584 flush_string token buffer buf
585
586 and add_string mode s buffer buf =
587 Buffer.add_string buffer s;
588 parse_strings' mode buffer buf
589
590 and add_char mode c buffer buf =
591 Buffer.add_char buffer c;
592 parse_strings' mode buffer buf
593
594 and flush_string token buffer buf =
595 push_back token buf;
596 String (Buffer.contents buffer)
597
598 (*
599 * Parse a term.
600 * There are several mode cases to consider.
601 *)
602 and parse_term mode s buf =
603 let opname = parse_opname mode s buf in
604 let params = parse_params mode false buf in
605 if opname = ["code"] || opname = ["email"] then
606 let s = parse_code_arg buf in
607 ItemItem (Term (opname, [s], []))
608 else
609 let args = parse_args mode false buf in
610 let args =
611 if args = [[]] then
612 []
613 else
614 args
615 in
616 (* Mode cases *)
617 match opname, params, args with
618 ["begin"], [[tag]], [] when tag = "verbatim" || tag = "literal" ->
619 let s = parse_code_block buf in
620 ItemItem (Term ([tag], [s], []))
621 | ["begin"], tag :: params, [] ->
622 let _, args = parse_block [TermEnd tag] mode [] buf in
623 let opname = mk_opname mode tag in
624 ItemItem (Term (opname, flatten_params params, [args]))
625 | ["end"], [tag], [] ->
626 ItemEnd tag
627 | _ ->
628 let opname = mk_opname mode opname in
629 ItemItem (Term (opname, flatten_params params, args))
630
631 (*
632 * Code blocks.
633 *)
634 and parse_code_arg buf =
635 let buffer = Buffer.create 19 in
636 match parse_token buf with
637 TokLeftBrace ->
638 parse_code_arg' buffer buf
639 | token ->
640 push_back token buf;
641 ""
642
643 and parse_code_arg' buffer buf =
644 match parse_code_string_brace buf with
645 CodeString s ->
646 Buffer.add_string buffer s;
647 parse_code_arg' buffer buf
648 | CodeEnd ->
649 Buffer.contents buffer
650
651 and parse_code_block buf =
652 let buffer = Buffer.create 19 in
653 buf.tokens <- [];
654 parse_code_block' buffer buf
655
656 and parse_code_block' buffer buf =
657 match parse_code_string_end buf with
658 CodeString s ->
659 Buffer.add_string buffer s;
660 parse_code_block' buffer buf
661 | CodeEnd ->
662 Buffer.contents buffer
663
664 (*
665 * Opname.
666 *)
667 and parse_opname mode s buf =
668 let buffer = Buffer.create 19 in
669 Buffer.add_string buffer s;
670 parse_opname_name mode buffer [] buf
671
672 and parse_opname_name mode buffer opname buf =
673 let token = parse_token buf in
674 match token with
675 TokString (true, s) ->
676 add_opname_string mode buffer opname s buf
677 | TokQString s ->
678 add_opname_string mode buffer opname s buf
679 | TokSpecial '_' ->
680 if is_math_mode mode then
681 flush_opname token buffer opname buf
682 else
683 add_opname_string mode buffer opname "_" buf
684 | TokSpecial '!' ->
685 push_opname mode buffer opname buf
686 | _ ->
687 flush_opname token buffer opname buf
688
689 and add_opname_string mode buffer opname s buf =
690 Buffer.add_string buffer s;
691 parse_opname_name mode buffer opname buf
692
693 and flush_opname token buffer opname buf =
694 let s = Buffer.contents buffer in
695 let opname =
696 if s = "" then
697 opname
698 else
699 s :: opname
700 in
701 Buffer.clear buffer;
702 push_back token buf;
703 List.rev opname
704
705 and push_opname mode buffer opname buf =
706 let s = Buffer.contents buffer in
707 let opname =
708 if s = "" then
709 opname
710 else
711 s :: opname
712 in
713 Buffer.clear buffer;
714 parse_opname_name mode buffer opname buf
715
716 (*
717 * Param list is a list of opnames.
718 *)
719 and parse_params mode found_white buf =
720 let token = parse_token buf in
721 match token with
722 TokWhite false ->
723 parse_params mode true buf
724 | TokSpecial '[' ->
725 let buffer = Buffer.create 19 in
726 parse_inner_params mode buffer [] buf
727 | _ ->
728 push_back token buf;
729 if found_white then
730 push_back (TokWhite false) buf;
731 []
732
733 and parse_inner_params mode buffer items buf =
734 let param = parse_opname_name ModeNormal buffer [] buf in
735 let items =
736 if param <> [] then
737 param :: items
738 else
739 items
740 in
741 let token = parse_token buf in
742 match token with
743 TokWhite _
744 | TokSpecial ','
745 | TokSpecial ';' ->
746 parse_inner_params mode buffer items buf
747 | TokSpecial ']' ->
748 List.rev items
749 | TokName _
750 | TokString _
751 | TokQString _
752 | TokSpecial _
753 | TokMath _
754 | TokLeftBrace
755 | TokRightBrace
756 | TokQuote _
757 | TokEof ->
758 parse_error "illegal parameter" buf
759
760 and flatten_params params =
761 List.map (fun l ->
762 List.fold_left (fun buf s -> buf ^ s) "" l) params
763
764 (*
765 * Arguments.
766 *)
767 and parse_args mode found_white buf =
768 let token = parse_token buf in
769 match token with
770 TokWhite false ->
771 parse_args mode true buf
772 | TokLeftBrace ->
773 parse_inner_args (arg_mode mode) [] buf
774 | _ ->
775 push_back token buf;
776 if found_white then
777 push_back (TokWhite false) buf;
778 []
779
780 and parse_inner_args mode items buf =
781 let term, t = parse_block [TermSemi; TermBrace] mode [] buf in
782 let items = t :: items in
783 match term with
784 TermSemi ->
785 parse_inner_args mode items buf
786 | TermBrace ->
787 List.rev items
788 | TermEof
789 | TermEnd _
790 | TermMath _ ->
791 parse_error "illegal terminator" buf
792
793 (*
794 * Main function.
795 *)
796 let parse s =
797 let lexbuf = Lexing.from_string s in
798 let buf = { lexbuf = lexbuf; tokens = [] } in
799 let _, items = parse_block [TermEof] ModeNormal [] buf in
800 items
801 }
802
803 (*
804 * -*-
805 * Local Variables:
806 * Caml-master: "set"
807 * End:
808 * -*-
809 *)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26