/[mojave]/metaprl/mllib/comment_parse.mll
ViewVC logotype

Contents of /metaprl/mllib/comment_parse.mll

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3057 - (show annotations) (download)
Sun Sep 10 18:26:37 2000 UTC (20 years, 10 months ago) by jyh
File size: 19157 byte(s)
This jumbo update is a documentation update.  There are no
logical changes.  I did two things:

1. First, comments that begin with the sequence "(*!" are
   interpreted as structured comments.  The contents are parsed
   as terms, and the comments will display in the editor window.
   The comments may contain arbitrary text, as usual, and they
   can also contain explicit terms.  The syntax for terms is:

   @opname[p1, ..., pn]{t1; ...; tm}

   or there is an alternate syntax:

   @begin[opname, p1, ..., pn]
   arg
   @end[opname]

   Text sequences count as a single term.  So, for instance, the
   term @bf{Hello world} display the "Hello world" text in a bold
   font.

   The "doc" term is used for text that is considered to be
   documentation.  There is a set of LaTeX-like terms that you can
   use to produce good-looking documentation.  These are all
   documented in the theories/tactic/comment.ml module.  The exact
   syntax of structured comments is defined in the
   mllib/comment_parse.mll file.  You can also look at any of the
   module in the theories/itt directory if you want to see examples.

   You can generate a pritable version of documentation with the
   print_theory command in the editor.  For example, the command

   # print_theory "itt_equal";;

   produces a file called output.tex that can be formatted with
   your favorite version of latex2e.  You can also generate a
   manule of all the theories by running "make tex" in the
   editor/ml directory.  Or you can run "make" in the
   doc/latex/theories directory.

   The files are hyperlinked, and they contain a table of contents
   and an index.  The preferred reader is acrobat, and the preferred
   formatter is pdflatex.

2. Second, I documented the tactic, base, itt, and czf theories.

3. Oh, and I also added a spelling checker:)  You can turn on the
   spelling checker with the -debug spell option or MP_DEBUG=spell.
   If prlc thinks that your files contain spelling mistakes, it will
   print them out and abort.  You can fix this problem by 1) fixing
   the spelling error, 2) adding the words withing a @spelling{words}
   term in a comment, or adding the words to you .ispell_english
   directory.

   For example, we could either do this:

   @begin[spelling]
   Aleksey Nogin
   @end[spelling]

   Or we could just add Alexey to either $cwd/.ispell_english, or
   ~/.ispell_english.

   The spell checker uses /usr/dict/words as a database, which it
   compiles to a hashtable in /tmp/metaprl-spell-$USER.dat.  Don't
   worry about the tmp file, it is generated automatically.

1 (*
2 * This is a simple lexer to extract terms from a comment string.
3 * Grammar:
4 *
5 * Text is a sequence of tokens containing strings, terms, and quotations
6 * Literal-text is any sequence of chars not containing the trailing delimiter.
7 *
8 * For text:
9 * 1. Whitespace is ignored
10 * 2. Strings are any sequence on non-special chars
11 * 3. Quoted strings are surrounded by double-quotes
12 * 4. There are three types of terms:
13 * a. @opname[params]{args}
14 * Opname is an alphnumeric sequence, or a quoted string
15 * [params] are optional; a param is a string or quoted string
16 * {args} are optional; a sequence of text separated by ;
17 *
18 * b. @begin[name]
19 * text
20 * @end[name]
21 *
22 * This builds a term @name{text}
23 *
24 * c. $literal-str$
25 * This builds the term @math[str]
26 *
27 * 5. Quotations have the form
28 * <<literal-str>> and <:tag<literal-str>>
29 * are also allowed.
30 *
31 * Special forms:
32 *
33 * Quotations can be nested:
34 * TokQuote ("", text): << text >>
35 * TokQuote (tag, text): <:tag< text >>
36 *
37 * TokName: @opname
38 * TokString s: any sequence of non-whitespace, non-special chars
39 * TokQString s: any text between double-quotes
40 * TokMath s: any literal text between $
41 *
42 * TokWhite: whitespace
43 * TokLeftBrace: {
44 * TokRightBrace: }
45 * TokLeftBrack: [
46 * TokRightBrack: ]
47 * TokComma: ,
48 * TokSemi: ;
49 *
50 * The lexer removes the first leading * on each line of the
51 * input string.
52 *
53 * ----------------------------------------------------------------
54 *
55 * Copyright (C) 2000 Jason Hickey, Caltech
56 *
57 * This program is free software; you can redistribute it and/or
58 * modify it under the terms of the GNU General Public License
59 * as published by the Free Software Foundation; either version 2
60 * of the License, or (at your option) any later version.
61 *
62 * This program is distributed in the hope that it will be useful,
63 * but WITHOUT ANY WARRANTY; without even the implied warranty of
64 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
65 * GNU General Public License for more details.
66 *
67 * You should have received a copy of the GNU General Public License
68 * along with this program; if not, write to the Free Software
69 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
70 *
71 * Author: Jason Hickey
72 * jyh@cs.caltech.edu
73 *)
74
75 {
76 (*
77 * A program is a sequence of strings and terms.
78 *)
79 type t = item list
80
81 and item =
82 White
83 | String of string
84 | Term of string list * string list * t list
85 | Quote of string * string
86 | Block of t
87
88 (*
89 * Tokens.
90 *)
91 type token =
92 TokWhite of bool
93 | TokQString of string
94 | TokMath of bool
95 | TokString of bool * string
96 | TokQuote of string * string
97 | TokName of string
98 | TokLeftBrace
99 | TokRightBrace
100 | TokSpecial of char
101 | TokEof
102
103 type code_token =
104 CodeString of string
105 | CodeEnd
106
107 (*
108 * Items returned by the item parser.
109 *)
110 type item_item =
111 ItemItem of item
112 | ItemMath of bool
113 | ItemEnd of string list
114 | ItemSpecial of char
115 | ItemBrace
116 | ItemEof
117
118 (*
119 * Modes.
120 * The parser can be looking for arguments (so the ';' and ','
121 * chars are special),
122 * and it can be in math mode (so the '_' and '^' chars are special)
123 *)
124 type mode =
125 ModeNormal
126 | ModeArg
127 | ModeMath
128 | ModeArgMath
129
130 (*
131 * Test for special chars.
132 *)
133 let is_special mode c =
134 let special =
135 match mode with
136 ModeNormal -> []
137 | ModeArg -> [';']
138 | ModeMath -> ['_'; '^']
139 | ModeArgMath -> ['_'; '^'; ';']
140 in
141 List.mem c special
142
143 (*
144 * Test for math mode.
145 *)
146 let is_math_mode = function
147 ModeMath
148 | ModeArgMath ->
149 true
150 | ModeNormal
151 | ModeArg ->
152 false
153
154 (*
155 * Move to arg mode.
156 *)
157 let arg_mode = function
158 ModeNormal
159 | ModeArg ->
160 ModeArg
161 | ModeMath
162 | ModeArgMath ->
163 ModeArgMath
164
165 let math_mode = function
166 ModeNormal
167 | ModeMath ->
168 ModeMath
169 | ModeArg
170 | ModeArgMath ->
171 ModeArgMath
172
173 let non_arg_mode = function
174 ModeNormal
175 | ModeArg ->
176 ModeNormal
177 | ModeMath
178 | ModeArgMath ->
179 ModeMath
180
181 (*
182 * Termination items.
183 *)
184 type terminator =
185 TermEof
186 | TermBrace
187 | TermSemi
188 | TermMath of bool
189 | TermEnd of string list
190
191 (*
192 * State for parsing quotations.
193 *)
194 let level = ref 0
195 let tag = ref ""
196 let buffer = Buffer.create 19
197
198 let set_tag tag' =
199 tag := tag';
200 Buffer.clear buffer
201
202 let add_string s =
203 Buffer.add_string buffer s
204
205 let flush_buffer () =
206 let s = Buffer.contents buffer in
207 Buffer.clear buffer;
208 TokQuote (!tag, s)
209
210 let flush_string () =
211 let s = Buffer.contents buffer in
212 Buffer.clear buffer;
213 s
214
215 (*
216 * Pushback buffer.
217 *)
218 type token_buffer =
219 { lexbuf : Lexing.lexbuf;
220 mutable tokens : token list
221 }
222
223 (*
224 * Errors.
225 *)
226 exception Parse_error of string * int * int
227
228 let parse_error_buf s lexbuf =
229 let loc1 = Lexing.lexeme_start lexbuf in
230 let loc2 = Lexing.lexeme_start lexbuf in
231 raise (Parse_error (s, loc1, loc2))
232
233 let parse_error s buf =
234 parse_error_buf s buf.lexbuf
235 }
236
237 let white = [' ' '\t']+
238 let optwhite = [' ' '\t']*
239 let name = ['a'-'z''A'-'Z']+
240 let number = ['0'-'9']+
241 let special = ['[' ']' ';' ',' '_' '^' '!']
242
243 rule main = parse
244 (* White space *)
245 '\n' optwhite '*'
246 { TokWhite true }
247 | '\n'
248 { TokWhite true }
249 | white
250 { TokWhite false }
251
252 (* Quotations *)
253 | "<<"
254 { set_tag "";
255 quotation lexbuf
256 }
257 | "<:" name '<'
258 { let buf = Lexing.lexeme lexbuf in
259 set_tag (String.sub buf 2 (pred (String.length buf)));
260 quotation lexbuf
261 }
262
263 (* This is temporary *)
264 | '\\'
265 { parse_error_buf "fix this backslash" lexbuf }
266
267 (* Strings *)
268 | '"'
269 { TokQString (string lexbuf) }
270
271 (* Special tokens *)
272 | "$$"
273 { TokMath true }
274 | '$'
275 { TokMath false }
276 | '@'
277 { opname lexbuf }
278 | '{'
279 { TokLeftBrace }
280 | '}'
281 { TokRightBrace }
282 | special
283 { TokSpecial (Lexing.lexeme_char lexbuf 0) }
284
285 (* Alphanumeric names *)
286 | name
287 { TokString (true, Lexing.lexeme lexbuf) }
288 | number
289 { TokString (true, Lexing.lexeme lexbuf) }
290 | _
291 { TokString (false, Lexing.lexeme lexbuf) }
292
293 | eof
294 { TokEof }
295
296 (*
297 * Read the first string in the opname.
298 *)
299 and opname = parse
300 name
301 { TokName (Lexing.lexeme lexbuf) }
302 | '"'
303 { TokName (string lexbuf) }
304 | _
305 { TokString (false, Lexing.lexeme lexbuf) }
306 | eof
307 { parse_error_buf "opname is not terminated" lexbuf }
308
309 (*
310 * Quotations.
311 * Watch for nested quotations.
312 *)
313 and quotation = parse
314 "<<"
315 { add_string "<<";
316 incr level;
317 quotation lexbuf
318 }
319 | "<:" name '<'
320 { add_string (Lexing.lexeme lexbuf);
321 incr level;
322 quotation lexbuf
323 }
324 | ">>"
325 { if !level = 0 then
326 flush_buffer ()
327 else
328 begin
329 add_string ">>";
330 decr level;
331 quotation lexbuf
332 end
333 }
334 | _
335 { add_string (Lexing.lexeme lexbuf);
336 quotation lexbuf
337 }
338 | eof
339 { parse_error_buf "quotation is not terminated" lexbuf }
340
341 (*
342 * Strings.
343 * Remove escaped eol.
344 *)
345 and string = parse
346 '"' (* '"' *)
347 { flush_string () }
348 | '\\'
349 { escape lexbuf }
350 | _
351 { add_string (Lexing.lexeme lexbuf);
352 string lexbuf
353 }
354 | eof
355 { parse_error_buf "string is not terminated" lexbuf }
356
357 and escape = parse
358 '\n'
359 { string lexbuf }
360 | _
361 { add_string (Lexing.lexeme lexbuf);
362 string lexbuf
363 }
364 | eof
365 { parse_error_buf "escape sequence is not terminated" lexbuf }
366
367 (*
368 * Literal forms.
369 *)
370 and code_string_brace = parse
371 '\n' optwhite '*'
372 { CodeString "\n" }
373 | '}'
374 { CodeEnd }
375 | _
376 { CodeString (Lexing.lexeme lexbuf) }
377 | eof
378 { parse_error_buf "code string is not terminated" lexbuf }
379
380 and code_string_end = parse
381 '\n' optwhite '*'
382 { CodeString "\n" }
383 | "@end[verbatim]"
384 | "@end[literal]"
385 { CodeEnd }
386 | _
387 { CodeString (Lexing.lexeme lexbuf) }
388 | eof
389 { parse_error_buf "code block is not terminated" lexbuf }
390
391 {
392 (*
393 * In math mode, add the "math_" prefix to the opname.
394 *)
395 let rec mk_math_opname = function
396 [name] ->
397 ["math_" ^ name]
398 | h :: t ->
399 h :: mk_math_opname t
400 | [] ->
401 raise (Invalid_argument "Comment_parse.mk_math_opname")
402
403 let mk_opname mode opname =
404 if is_math_mode mode then
405 mk_math_opname opname
406 else
407 opname
408
409 (************************************************************************
410 * Pushback buffer.
411 *)
412 let parse_token buf =
413 match buf with
414 { tokens = token :: t } ->
415 buf.tokens <- t;
416 token
417 | { lexbuf = lexbuf } ->
418 main lexbuf
419
420 let push_back token buf =
421 buf.tokens <- token :: buf.tokens
422
423 (*
424 * Special forms.
425 *)
426 let parse_code_string_brace buf =
427 assert (buf.tokens = []);
428 code_string_brace buf.lexbuf
429
430 let parse_code_string_end buf =
431 assert (buf.tokens = []);
432 code_string_end buf.lexbuf
433
434 (************************************************************************
435 * Toplevel recursive-descent parser.
436 * term: the expected terminator for this block
437 * mode: the current parsing mode
438 * items: the list of items collected so far (in reverse order)
439 * buf: the token buffer
440 *)
441 let rec parse_block term mode items buf =
442 let item = parse_item mode buf in
443 match item with
444 ItemItem item ->
445 parse_block term mode (item :: items) buf
446 | ItemSpecial '_' ->
447 parse_math_script term mode "math_subscript" items buf
448 | ItemSpecial '^' ->
449 parse_math_script term mode "math_superscript" items buf
450 | ItemMath flag ->
451 finish_block term (TermMath flag) items buf
452 | ItemEnd tag ->
453 finish_block term (TermEnd tag) items buf
454 | ItemSpecial ';' ->
455 finish_block term TermSemi items buf
456 | ItemBrace ->
457 finish_block term TermBrace items buf
458 | ItemEof ->
459 finish_block term TermEof items buf
460 | ItemSpecial _ ->
461 parse_error "illegal special character" buf
462
463 (*
464 * Found a terminator.
465 *)
466 and finish_block term term' items buf =
467 if not (List.mem term' term) then
468 parse_error "terminator mismatch" buf;
469 term', List.rev items
470
471 (*
472 * Math mode sub/superscripts.
473 *)
474 and parse_math_script term mode opname items buf =
475 match items, parse_item (math_mode mode) buf with
476 item :: items, ItemItem item' ->
477 let items = Term ([opname], [], [[item]; [item']]) :: items in
478 parse_block term mode items buf
479 | _ ->
480 parse_error "illegal sub/superscript operation" buf
481
482 (*
483 * Parse the entire next item.
484 *)
485 and parse_item mode buf =
486 let token = parse_token buf in
487 match token with
488 TokWhite is_nl_flag ->
489 ItemItem (parse_white is_nl_flag buf)
490 | TokMath flag ->
491 if is_math_mode mode then
492 ItemMath flag
493 else
494 let opname =
495 if flag then
496 "centermath"
497 else
498 "math"
499 in
500 let _, items = parse_block [TermMath flag] ModeMath [] buf in
501 ItemItem (Term ([opname], [], [items]))
502 | TokName s ->
503 parse_term mode s buf
504 | TokQuote (tag, next) ->
505 ItemItem (Quote (tag, next))
506 | TokQString s ->
507 ItemItem (String ("\"" ^ s ^ "\""))
508 | TokString (_, s) ->
509 ItemItem (String s)
510 | TokLeftBrace ->
511 let _, items = parse_block [TermBrace] (non_arg_mode mode) [] buf in
512 ItemItem (Block items)
513 | TokRightBrace ->
514 ItemBrace
515 | TokSpecial c ->
516 if is_special mode c then
517 ItemSpecial c
518 else
519 ItemItem (String (String.make 1 c))
520 | TokEof ->
521 ItemEof
522
523 (*
524 * Adjacent non-nl whitespace is concatenated.
525 *)
526 and parse_white is_nl_flag buf =
527 let token = parse_token buf in
528 match token with
529 TokWhite is_nl_flag' ->
530 if is_nl_flag && is_nl_flag' then
531 begin
532 push_back token buf;
533 White
534 end
535 else
536 parse_white (is_nl_flag || is_nl_flag') buf
537 | _ ->
538 push_back token buf;
539 White
540
541 (*
542 * Adjacent strings are concatenated.
543 *)
544 and parse_strings mode s buf =
545 let buffer = Buffer.create 19 in
546 Buffer.add_string buffer s;
547 parse_strings' mode buffer buf
548
549 and parse_strings' mode buffer buf =
550 let token = parse_token buf in
551 match token with
552 TokString (_, s) ->
553 add_string mode s buffer buf
554 | TokSpecial c ->
555 if is_special mode c then
556 flush_string token buffer buf
557 else
558 add_char mode c buffer buf
559 | TokWhite _
560 | TokMath _
561 | TokQString _
562 | TokQuote _
563 | TokName _
564 | TokLeftBrace
565 | TokRightBrace
566 | TokEof ->
567 flush_string token buffer buf
568
569 and add_string mode s buffer buf =
570 Buffer.add_string buffer s;
571 parse_strings' mode buffer buf
572
573 and add_char mode c buffer buf =
574 Buffer.add_char buffer c;
575 parse_strings' mode buffer buf
576
577 and flush_string token buffer buf =
578 push_back token buf;
579 String (Buffer.contents buffer)
580
581 (*
582 * Parse a term.
583 * There are several mode cases to consider.
584 *)
585 and parse_term mode s buf =
586 let opname = parse_opname mode s buf in
587 let params = parse_params mode false buf in
588 if opname = ["code"] || opname = ["email"] then
589 let s = parse_code_arg buf in
590 ItemItem (Term (opname, [s], []))
591 else
592 let args = parse_args mode false buf in
593 let args =
594 if args = [[]] then
595 []
596 else
597 args
598 in
599 (* Mode cases *)
600 match opname, params, args with
601 ["begin"], [[tag]], [] when tag = "verbatim" || tag = "literal" ->
602 let s = parse_code_block buf in
603 ItemItem (Term ([tag], [s], []))
604 | ["begin"], tag :: params, [] ->
605 let _, args = parse_block [TermEnd tag] mode [] buf in
606 let opname = mk_opname mode tag in
607 ItemItem (Term (opname, flatten_params params, [args]))
608 | ["end"], [tag], [] ->
609 ItemEnd tag
610 | _ ->
611 let opname = mk_opname mode opname in
612 ItemItem (Term (opname, flatten_params params, args))
613
614 (*
615 * Code blocks.
616 *)
617 and parse_code_arg buf =
618 let buffer = Buffer.create 19 in
619 match parse_token buf with
620 TokLeftBrace ->
621 parse_code_arg' buffer buf
622 | token ->
623 push_back token buf;
624 ""
625
626 and parse_code_arg' buffer buf =
627 match parse_code_string_brace buf with
628 CodeString s ->
629 Buffer.add_string buffer s;
630 parse_code_arg' buffer buf
631 | CodeEnd ->
632 Buffer.contents buffer
633
634 and parse_code_block buf =
635 let buffer = Buffer.create 19 in
636 buf.tokens <- [];
637 parse_code_block' buffer buf
638
639 and parse_code_block' buffer buf =
640 match parse_code_string_end buf with
641 CodeString s ->
642 Buffer.add_string buffer s;
643 parse_code_block' buffer buf
644 | CodeEnd ->
645 Buffer.contents buffer
646
647 (*
648 * Opname.
649 *)
650 and parse_opname mode s buf =
651 let buffer = Buffer.create 19 in
652 Buffer.add_string buffer s;
653 parse_opname_name mode buffer [] buf
654
655 and parse_opname_name mode buffer opname buf =
656 let token = parse_token buf in
657 match token with
658 TokString (true, s) ->
659 add_opname_string mode buffer opname s buf
660 | TokQString s ->
661 add_opname_string mode buffer opname s buf
662 | TokSpecial '_' ->
663 if is_math_mode mode then
664 flush_opname token buffer opname buf
665 else
666 add_opname_string mode buffer opname "_" buf
667 | TokSpecial '!' ->
668 push_opname mode buffer opname buf
669 | _ ->
670 flush_opname token buffer opname buf
671
672 and add_opname_string mode buffer opname s buf =
673 Buffer.add_string buffer s;
674 parse_opname_name mode buffer opname buf
675
676 and flush_opname token buffer opname buf =
677 let s = Buffer.contents buffer in
678 let opname =
679 if s = "" then
680 opname
681 else
682 s :: opname
683 in
684 Buffer.clear buffer;
685 push_back token buf;
686 List.rev opname
687
688 and push_opname mode buffer opname buf =
689 let s = Buffer.contents buffer in
690 let opname =
691 if s = "" then
692 opname
693 else
694 s :: opname
695 in
696 Buffer.clear buffer;
697 parse_opname_name mode buffer opname buf
698
699 (*
700 * Param list is a list of opnames.
701 *)
702 and parse_params mode found_white buf =
703 let token = parse_token buf in
704 match token with
705 TokWhite false ->
706 parse_params mode true buf
707 | TokSpecial '[' ->
708 let buffer = Buffer.create 19 in
709 parse_inner_params mode buffer [] buf
710 | _ ->
711 push_back token buf;
712 if found_white then
713 push_back (TokWhite false) buf;
714 []
715
716 and parse_inner_params mode buffer items buf =
717 let param = parse_opname_name ModeNormal buffer [] buf in
718 let items =
719 if param <> [] then
720 param :: items
721 else
722 items
723 in
724 let token = parse_token buf in
725 match token with
726 TokWhite _
727 | TokSpecial ','
728 | TokSpecial ';' ->
729 parse_inner_params mode buffer items buf
730 | TokSpecial ']' ->
731 List.rev items
732 | TokName _
733 | TokString _
734 | TokQString _
735 | TokSpecial _
736 | TokMath _
737 | TokLeftBrace
738 | TokRightBrace
739 | TokQuote _
740 | TokEof ->
741 parse_error "illegal parameter" buf
742
743 and flatten_params params =
744 List.map (fun l ->
745 List.fold_left (fun buf s -> buf ^ s) "" l) params
746
747 (*
748 * Arguments.
749 *)
750 and parse_args mode found_white buf =
751 let token = parse_token buf in
752 match token with
753 TokWhite false ->
754 parse_args mode true buf
755 | TokLeftBrace ->
756 parse_inner_args (arg_mode mode) [] buf
757 | _ ->
758 push_back token buf;
759 if found_white then
760 push_back (TokWhite false) buf;
761 []
762
763 and parse_inner_args mode items buf =
764 let term, t = parse_block [TermSemi; TermBrace] mode [] buf in
765 let items = t :: items in
766 match term with
767 TermSemi ->
768 parse_inner_args mode items buf
769 | TermBrace ->
770 List.rev items
771 | TermEof
772 | TermEnd _
773 | TermMath _ ->
774 parse_error "illegal terminator" buf
775
776 (*
777 * Main function.
778 *)
779 let parse s =
780 let lexbuf = Lexing.from_string s in
781 let buf = { lexbuf = lexbuf; tokens = [] } in
782 let _, items = parse_block [TermEof] ModeNormal [] buf in
783 items
784 }
785
786 (*
787 * -*-
788 * Local Variables:
789 * Caml-master: "set"
790 * End:
791 * -*-
792 *)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26