(* Substantially based on a file from SML/NJ 0.93: Copyright 1989 by AT&T Bell Laboratories Also see the copyright notice in SMLNJ-LICENSE. *) structure TokTable = SdmlTokenTableFn(Tokens); type svalue = Tokens.svalue type pos = int type lexresult = (svalue,pos) Tokens.token type lexarg = {comLevel : int ref, lineNum : int ref, linePos : int list ref, (* offsets of lines in file *) charlist : string list ref, stringtype : bool ref, stringstart : int ref, (* start of current string or comment*) argument : (int * int -> string -> unit) * (int -> int -> Location.location ) * (int -> int -> Sdml.exp -> Sdml.locexp) } type arg = lexarg type ('a,'b) token = ('a,'b) Tokens.token val eof = fn ({comLevel,argument=(err,_,_),linePos,stringstart,stringtype, lineNum,charlist}:lexarg) => let val pos = Int.max(!stringstart+2, hd(!linePos)) in if !comLevel>0 then err (!stringstart,pos) "unclosed comment" else (); Tokens.EOF(pos,pos) end fun addString (charlist,s:string) = charlist := s :: (!charlist) fun makeString charlist = (concat(rev(!charlist)) before charlist := nil) fun inc x = (x := (!x) + 1; !x) fun dec x = (x := (!x) - 1; !x) fun ordof (s,n) = ord (String.sub (s,n)) (* Note: "{" => (Tokens.LBRACE(yypos,yypos+1)); "}" => (Tokens.RBRACE(yypos,yypos+1)); "&" => (Tokens.INTERSECTION(yypos,yypos+1)); "\\/" => (Tokens.UNION(yypos,yypos+2)); Symbols that could be identifiers MUST NOT APPEAR HERE! Edit TokenTable.sml instead! *) %% %reject %s A S F; %header (functor SdmlLexFn(structure Tokens : Sdml_TOKENS)); %arg ({comLevel,lineNum,argument,linePos,charlist,stringstart,stringtype}); idchars=[A-Za-z'_0-9\128-\255%]; symbolidchars=[-!%&$#+/:<=>?@\\~`|*\128-\255^]; symbol={symbolidchars}+; id=[A-Za-z\128-\255]{idchars}*|[A-Za-z]{idchars}*"."{idchars}+|[A-Za-z]{idchars}*"."{symbol}; ws=("\012"|[\t\ ])*; sym=[!%&$/:<=>@~|`]|\\|\^; num=[0-9]+; frac="."{num}; exp=[Ee](~?){num}; real=(~?)(({num}{frac}?{exp})|({num}{frac}{exp}?)); hexnum=[0-9a-fA-F]+; %% {ws} => (continue()); \n => (inc lineNum; linePos := yypos :: !linePos; continue()); ";" => (Tokens.SEMICOLON(yypos,yypos+1)); "_" => (Tokens.WILD(yypos,yypos+1)); "," => (Tokens.COMMA(yypos,yypos+1)); "." => (Tokens.PERIOD(yypos,yypos+1)); ",," => (Tokens.DBLCOMMA(yypos,yypos+1)); "(" => (Tokens.LPAREN(yypos,yypos+1)); ")" => (Tokens.RPAREN(yypos,yypos+1)); "{" => (Tokens.LBRACE(yypos,yypos+1)); "}" => (Tokens.RBRACE(yypos,yypos+1)); "-all" => (Tokens.DASHALL(yypos,yypos+4)); "-exists" => (Tokens.DASHEXISTS(yypos,yypos+7)); "[" => (Tokens.LBRACK(yypos,yypos+1)); "]" => (Tokens.RBRACK(yypos,yypos+1)); "#" => (Tokens.HASH(yypos,yypos+1)); "*" => (Tokens.ASTERISK(yypos,yypos+1)); ":!" => (Tokens.ANTICOLON(yypos,yypos+2)); "+" => (Tokens.PLUS(yypos,yypos+1)); "-" => (Tokens.MINUS(yypos,yypos+1)); "|-" => (Tokens.TURNSTILE(yypos,yypos+2)); "->" => (Tokens.ARROW(yypos,yypos+2)); "=>" => (Tokens.DARROW(yypos,yypos+2)); "??" => (Tokens.DQUESTION(yypos,yypos+2)); ">:>" => (Tokens.LEFTANNO(yypos,yypos+3)); ":" => (Tokens.COLON(yypos,yypos+1)); "::" => (Tokens.DBLCOLON(yypos,yypos+2)); "'"("'"?)("_"|{num})?{id} => (Tokens.TYVAR(yytext,yypos,yypos+size yytext)); {id} => (TokTable.checkToken(yytext,yypos)); {sym}+ => (TokTable.checkToken(yytext,yypos)); {real} => (Tokens.REAL(yytext,yypos,yypos+size yytext)); {num} => (Tokens.INT(yytext,yypos,yypos+size yytext)); ~{num} => (Tokens.INT(yytext,yypos,yypos+size yytext)); \" => (charlist := [""]; stringstart := yypos; stringtype := true; YYBEGIN S; continue()); \#\" => (charlist := [""]; stringstart := yypos; stringtype := false; YYBEGIN S; continue()); "(*" => (YYBEGIN A; stringstart := yypos; comLevel := 1; continue()); "*)" => ((#1(argument)) (yypos,yypos+1) "unmatched close comment"; continue()); \h => ((#1(argument)) (yypos,yypos) "non-Ascii character"; continue()); . => ((#1(argument)) (yypos,yypos) "illegal token"; continue()); "(*" => (inc comLevel; continue()); \n => (inc lineNum; linePos := yypos :: !linePos; continue()); "*)" => (dec comLevel; if !comLevel=0 then YYBEGIN INITIAL else (); continue()); . => (continue()); \" => (let val s = makeString charlist val s = if size s <> 1 andalso not(!stringtype) then ((#1(argument)) (!stringstart,yypos) "character constant not length 1" ;substring(s^"x",0,1)) else s val t = (s,!stringstart,yypos+1) in YYBEGIN INITIAL; if !stringtype then Tokens.STRING t else Tokens.CHAR t end); \n => ((#1(argument)) (!stringstart,yypos) "unclosed string"; inc lineNum; linePos := yypos :: !linePos; YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos)); [^"\\\n]* => (addString(charlist,yytext); continue()); \\\n => (inc lineNum; linePos := yypos :: !linePos; YYBEGIN F; continue()); \\[\ \t] => (YYBEGIN F; continue()); \n => (inc lineNum; linePos := yypos :: !linePos; continue()); {ws} => (continue()); \\ => (YYBEGIN S; stringstart := yypos; continue()); . => ((#1(argument)) (!stringstart,yypos) "unclosed string"; YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos+1)); \\t => (addString(charlist,"\t"); continue()); \\n => (addString(charlist,"\n"); continue()); \\\\ => (addString(charlist,"\\"); continue()); \\\" => (addString(charlist,"\""); continue()); \\\^[@-_] => (addString(charlist,str (chr(ordof(yytext,2)-ord(#"@")))); continue()); \\[0-9]{3} => (let val x = ordof(yytext,1)*100 +ordof(yytext,2)*10 +ordof(yytext,3) -(ord(#"0")*111) in (if x>255 then (#1(argument)) (yypos,yypos+4) "illegal ascii escape" else addString(charlist,str (chr x)); continue()) end); \\ => ((#1(argument)) (yypos,yypos+1) "illegal string escape"; continue());