| basix_doc 0.1 | 
00001 00002 /****************************************************************************** 00003 * MODULE : mmx_lexer.cpp 00004 * DESCRIPTION: Lexical analysis of mathemagix files 00005 * COPYRIGHT : (C) 2000 Joris van der Hoeven 00006 ******************************************************************************* 00007 * NOTE: This file is included from mmx_parser.ypp 00008 * It is distributed apart together with the sources 00009 ******************************************************************************* 00010 * This software falls under the GNU general public license and comes WITHOUT 00011 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details. 00012 * If you don't have this file, write to the Free Software Foundation, Inc., 00013 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00014 ******************************************************************************/ 00015 00016 #include "basix/mmx_syntax.hpp" 00017 #include "basix/parse_tools.hpp" 00018 namespace mmx { 00019 00020 #define s (obj->lex_string) 00021 #define n (obj->lex_length) 00022 00023 #define start (obj->lex_start) 00024 #define prev (obj->lex_prev) 00025 #define pos (obj->lex_pos) 00026 00027 #define start_line (obj->lex_start_line) 00028 #define prev_line (obj->lex_prev_line) 00029 #define line (obj->lex_line) 00030 00031 #define start_begin_line (obj->lex_start_begin_line) 00032 #define prev_begin_line (obj->lex_prev_begin_line) 00033 #define begin_line (obj->lex_begin_line) 00034 00035 #define file (obj->lex_file_name) 00036 #define input (obj->lex_input_number) 00037 00038 #define INC_POS { pos++; } 00039 #define ADD_POS(z){ pos += z; } 00040 #define INC_LINE { pos++; line++; begin_line = pos; } 00041 #define SET_PREV { prev = pos; prev_line = line; prev_begin_line = begin_line; } 00042 #define SAVE_START { start = pos; start_line = line; start_begin_line = begin_line; } 00043 #define RESTORE_START { pos = start; line = start_line; begin_line = start_begin_line; } 00044 00045 #define produce(code) { \ 00046 *lval = lit(string (s + start, pos - start)); \ 00047 source_insert (*lval, source_location (*lval, file, input, \ 00048 source_position(start, start_line, start - start_begin_line),\ 00049 source_position(pos, line, pos - begin_line))); \ 00050 return code; } 00051 00052 #define test(c,code) \ 00053 if ((pos<n) && (s[pos]==c)) { INC_POS; produce (code); } 00054 00055 #define keyword(c,k,code) \ 00056 if ((s[pos]==c) && test_keyword(k,obj)) produce (code); 00057 00058 #define is_alpha(c) \ 00059 ((((c)>='0') && ((c)<='9')) || \ 00060 (((c)>='A') && ((c)<='Z')) || \ 00061 (((c)>='a') && ((c)<='z')) || \ 00062 (((c)=='_') || ((c)=='?') || ((c)=='$'))) 00063 00064 static bool 00065 test_keyword (const char* k, parse_instance* obj) { 00066 RESTORE_START; 00067 while ((*k)!='\0') { 00068 if (pos>=n) return false; 00069 if (s[pos]!=(*k)) return false; 00070 INC_POS; 00071 k++; 00072 } 00073 if (pos>=n) return true; 00074 if (is_alpha (s[pos])) return false; 00075 return true; 00076 } 00077 00078 int 00079 mmx_lex (generic *lval, parse_instance* obj) { 00080 if (pos == n) { 00081 s= NULL; 00082 *lval= generic (); 00083 return 0; 00084 } 00085 00086 SET_PREV; 00087 char c= s[pos]; 00088 while ((c<=' ') || (c>'~')) { 00089 if ((c == '\n') || (c == '\r')) 00090 INC_LINE 00091 else 00092 INC_POS; 00093 if (pos == n) { 00094 s= NULL; 00095 *lval= generic (); 00096 return 0; 00097 } 00098 c= s[pos]; 00099 } 00100 00101 SAVE_START; 00102 INC_POS; 00103 switch (c) { 00104 case '!': 00105 test ('=', NOT_EQUAL); 00106 if ((pos<n) && (s[pos]=='<')) { 00107 INC_POS; 00108 test ('=', NOT_LEQ); 00109 produce (NOT_LESS); 00110 } 00111 if ((pos<n) && (s[pos]=='>')) { 00112 INC_POS; 00113 test ('=', NOT_GEQ); 00114 produce (NOT_GREATER); 00115 } 00116 produce (NOT); 00117 case '\042': 00118 while (pos<n) { 00119 if ((s[pos]=='\\') && ((pos+2)<n)) { ADD_POS(2); continue; } 00120 if (s[pos]=='\n' || (s[pos]=='\r')) { INC_LINE; continue; } 00121 if (s[pos]=='\042') { INC_POS; break; } 00122 INC_POS; 00123 } 00124 produce (STRING); 00125 case '#': 00126 produce (SIZE); 00127 case '$': 00128 goto identifier; 00129 case '%': 00130 produce (PERCENT); 00131 case '&': 00132 produce (AMPERSAND); 00133 case '\047': 00134 produce (QUOTE); 00135 case '(': 00136 case ')': 00137 produce (c); 00138 case '*': 00139 test ('=', TIMES_ASSIGN); 00140 produce (TIMES); 00141 case '+': 00142 test ('=', PLUS_ASSIGN); 00143 test ('+', INC); 00144 produce (PLUS); 00145 case ',': 00146 produce (c); 00147 case '-': 00148 test ('=', MINUS_ASSIGN); 00149 test ('-', DEC); 00150 test ('>', INTO); 00151 produce (MINUS); 00152 case '.': 00153 test ('.', RANGE); 00154 produce (ACCESS); 00155 case '/': 00156 test ('\\', AND); 00157 if (s[pos]=='/') { 00158 INC_POS; 00159 while ((pos<n) && (s[pos]!='\n')) { 00160 INC_POS; 00161 } 00162 if (pos<n) { INC_LINE; } 00163 return mmx_lex (lval, obj); 00164 } 00165 if (s[pos]=='{') { 00166 nat level= 1; 00167 INC_POS; 00168 while ((pos+1<n) && (level>0)) { 00169 if (s[pos]=='\n' || (s[pos]=='\r')) { 00170 INC_LINE; continue; } 00171 if ((s[pos]=='/') && (s[pos+1]=='{')) { 00172 level++; ADD_POS(2); continue; } 00173 if ((s[pos]=='}') && (s[pos+1]=='/')) { 00174 level--; ADD_POS(2); continue; } 00175 INC_POS; 00176 } 00177 if (level>0) pos=n; 00178 return mmx_lex (lval, obj); 00179 } 00180 if (s[pos] == '\"') { 00181 INC_POS; 00182 while ((pos+1<n) && ((s[pos]!='\"') || (s[pos+1]!='/'))) { 00183 if (s[pos]=='\n' || (s[pos]=='\r')) { 00184 INC_LINE; } 00185 else { 00186 INC_POS; } 00187 } 00188 if (pos+1 < n) { 00189 ADD_POS(2); 00190 } 00191 produce (STRING); 00192 } 00193 test ('=', OVER_ASSIGN); 00194 produce (OVER); 00195 case '0': 00196 case '1': 00197 case '2': 00198 case '3': 00199 case '4': 00200 case '5': 00201 case '6': 00202 case '7': 00203 case '8': 00204 case '9': 00205 while ((pos<n) && (s[pos]>='0') && (s[pos]<='9')) INC_POS; 00206 if ((pos<n+1) && (s[pos]=='.') && (s[pos+1]>='0') && (s[pos+1]<='9')) { 00207 ADD_POS(2); 00208 while ((pos<n) && (s[pos]>='0') && (s[pos]<='9')) INC_POS; 00209 if ((pos<n+1) && ((s[pos]=='e') || (s[pos]=='E'))) { 00210 nat p= ((pos<n+2) && (s[pos+1]=='-') ? 2: 1); 00211 if ((s[pos+p]>='0') && (s[pos+p]<='9')) { 00212 ADD_POS(p); 00213 while ((pos<n) && (s[pos]>='0') && (s[pos]<='9')) INC_POS; 00214 } 00215 } 00216 produce (FLOATING); 00217 } 00218 else produce (INTEGER); 00219 case ':': 00220 if ((pos<n) && (s[pos]=='=')) { 00221 if ((pos+1<n) && (s[pos+1]=='>')) { 00222 ADD_POS(2); produce (ASSIGN_MACRO); } 00223 INC_POS; produce (ASSIGN); 00224 } 00225 test ('>', TRANSTYPE); 00226 test (':', SCOPE); 00227 if ((pos+1<n) && (s[pos]=='-') && (s[pos+1]=='>')) { 00228 ADD_POS(2); produce (MAPSTO); 00229 } 00230 produce (TYPE); 00231 case ';': 00232 produce (c); 00233 case '<': 00234 if ((pos<n) && (s[pos]=='<')) { 00235 if ((pos+1<n) && (s[pos+1]=='<')) { 00236 ADD_POS(2); produce (LEFT_FLUX_BIN); } 00237 if ((pos+1<n) && (s[pos+1]=='*')) { 00238 ADD_POS(2); produce (LEFT_FLUX_VAR); } 00239 if ((pos+1<n) && (s[pos+1]=='%')) { 00240 ADD_POS(2); produce (LEFT_FLUX_STR); } 00241 if ((pos+1<n) && (s[pos+1]=='=')) { 00242 ADD_POS(2); produce (LL_ASSIGN); } 00243 INC_POS; produce (LEFT_FLUX); 00244 } 00245 if ((pos+1<n) && (s[pos]=='=') && (s[pos+1]=='>')) { 00246 ADD_POS(2); produce (EQUIVALENT); 00247 } 00248 test ('=', LEQ); 00249 produce (LESS); 00250 case '=': 00251 if ((pos<n) && (s[pos]=='=')) { 00252 if ((pos+1<n) && (s[pos+1]=='>')) { 00253 ADD_POS(2); produce (DEFINE_MACRO); } 00254 INC_POS; produce (DEFINE); 00255 } 00256 test ('>', IMPLIES); 00257 produce (EQUAL); 00258 case '>': 00259 test ('<', APPEND); 00260 if ((pos<n) && (s[pos]=='>')) { 00261 if ((pos+1<n) && (s[pos+1]=='>')) { 00262 ADD_POS(2); produce (RIGHT_FLUX_BIN); } 00263 if ((pos+1<n) && (s[pos+1]=='=')) { 00264 ADD_POS(2); produce (GG_ASSIGN); } 00265 INC_POS; produce (RIGHT_FLUX); 00266 } 00267 test ('=', GEQ); 00268 produce (GREATER); 00269 case '?': 00270 goto identifier; 00271 case '@': 00272 test ('+', OPLUS); 00273 test ('-', OMINUS); 00274 test ('*', OTIMES); 00275 test ('/', OOVER); 00276 produce (COMPOSE); 00277 case 'A': 00278 case 'B': 00279 case 'C': 00280 case 'D': 00281 case 'E': 00282 case 'F': 00283 case 'G': 00284 case 'H': 00285 case 'I': 00286 case 'J': 00287 case 'K': 00288 case 'L': 00289 case 'M': 00290 case 'N': 00291 case 'O': 00292 case 'P': 00293 case 'Q': 00294 case 'R': 00295 case 'S': 00296 case 'T': 00297 case 'U': 00298 case 'V': 00299 case 'W': 00300 case 'X': 00301 case 'Y': 00302 case 'Z': 00303 goto identifier; 00304 case '[': 00305 produce (c); 00306 case '\\': 00307 test ('/', OR); 00308 produce (c); 00309 case ']': 00310 produce (c); 00311 case '^': 00312 test ('^', FILL); 00313 produce (POWER); 00314 case '_': 00315 goto identifier; 00316 case '`': 00317 produce (BACKQUOTE); 00318 case 'a': 00319 if (pos<n) { 00320 keyword ('b', "abstract", ABSTRACT); 00321 keyword ('d', "add", ADD); 00322 keyword ('n', "and", SEQAND); 00323 keyword ('s', "assume", ASSUME); 00324 } 00325 goto identifier; 00326 case 'b': 00327 if (pos<n) keyword ('r', "break", BREAK); 00328 goto identifier; 00329 case 'c': 00330 if ((pos+3)<n) { 00331 if (s[pos]=='a') { 00332 INC_POS; 00333 keyword ('s', "case", CASE); 00334 if (s[pos] == 't') { 00335 INC_POS; 00336 keyword ('c', "catch", CATCH); 00337 keyword ('e', "category", CATEGORY); 00338 } 00339 } 00340 keyword ('l', "class", CLASS); 00341 if ((s[pos]=='o') && (s[pos+1]=='n')) { 00342 ADD_POS(2); 00343 keyword ('c', "concrete", CONCRETE); 00344 if (((pos+1)<n) && s[pos]=='s' && s[pos+1]=='t') { 00345 ADD_POS(2); 00346 keyword ('a', "constant", CONSTANT); 00347 keyword ('r', "constructor", CONSTRUCTOR); 00348 } 00349 keyword ('t', "continue", CONTINUE); 00350 } 00351 } 00352 goto identifier; 00353 case 'd': 00354 if (pos<n) { 00355 keyword ('e', "destructor", DESTRUCTOR); 00356 if (s[pos]=='i') { 00357 INC_POS; 00358 keyword ('r', "direct", DIRECT); 00359 keyword ('v', "div", DIV); 00360 } 00361 if (s[pos]=='o') { 00362 INC_POS; 00363 if ((pos >= n) || (!is_alpha (s[pos]))) produce (DO); 00364 keyword ('w', "downto", DOWNTO); 00365 } 00366 } 00367 goto identifier; 00368 case 'e': 00369 if ((pos+1)<n) { 00370 keyword ('l', "else", ELSE); 00371 keyword ('v', "evolutive", EVOLUTIVE); 00372 if (s[pos]=='x') { 00373 INC_POS; 00374 keyword ('i', "exists", EXISTS); 00375 keyword ('p', "export", EXPORT); 00376 if (((pos+2)<n) && (s[pos]=='t') && (s[pos+1]=='e')) { 00377 ADD_POS(2); 00378 keyword ('n', "extend", EXTEND); 00379 keyword ('r', "extern", EXTERN); 00380 } 00381 } 00382 } 00383 goto identifier; 00384 case 'f': 00385 if (pos<n) { 00386 if ((pos+1<n) && (s[pos]=='o') && (s[pos+1]=='r')) { 00387 if ((pos+2<n) && (s[pos+2]=='a')) { 00388 keyword ('o', "forall", FORALL); } 00389 else if ((pos+2<n) && (s[pos+2]=='e')) { 00390 keyword ('o', "foreign", FOREIGN); } 00391 else { 00392 keyword ('o', "for", FOR); } 00393 } 00394 keyword ('r', "from", FROM); 00395 } 00396 goto identifier; 00397 case 'g': 00398 if (pos<n) keyword ('e', "generate", GENERATE); 00399 goto identifier; 00400 case 'h': 00401 if (pos<n) { 00402 keyword ('a', "has", HAS); 00403 keyword ('i', "hidden", HIDDEN); 00404 keyword ('o', "holds", HOLDS); 00405 } 00406 goto identifier; 00407 case 'i': 00408 if (pos<n) { 00409 keyword ('f', "if", IF); 00410 keyword ('m', "import", IMPORT); 00411 if (s[pos]=='n') { 00412 INC_POS; 00413 if ((pos >= n) || (!is_alpha (s[pos]))) produce (IN); 00414 keyword ('d', "indirect", INDIRECT); 00415 keyword ('f', "infix", INFIX); 00416 keyword ('h', "inherit", INHERIT); 00417 keyword ('l', "inline", INLINE); 00418 keyword ('p', "inplace", INPLACE); 00419 if ((pos+3<n) && (s[pos]=='t') && (s[pos+1]=='e') && (s[pos+2]=='r')) { 00420 ADD_POS(3); 00421 keyword ('a', "interactive", INTERACTIVE); 00422 keyword ('n', "intern", INTERN); 00423 } 00424 } 00425 } 00426 goto identifier; 00427 case 'j': 00428 if (pos<n) keyword ('o', "join", JOIN); 00429 goto identifier; 00430 case 'k': 00431 if (pos<n) keyword ('e', "keyword", KEYWORD); 00432 goto identifier; 00433 case 'l': 00434 if (pos<n) { 00435 keyword ('a', "lambda", LAMBDA); 00436 keyword ('i', "literal", LITERAL); 00437 if (((pos+1)<n) && (s[pos]=='o')) { 00438 INC_POS; 00439 keyword ('c', "locked", LOCKED); 00440 keyword ('o', "loop", LOOP); 00441 } 00442 } 00443 goto identifier; 00444 case 'm': 00445 if (pos<n) { 00446 keyword ('a', "macro", MACRO); 00447 keyword ('e', "method", METHOD); 00448 if (((pos+1)<n) && (s[pos]=='o') && (s[pos+1]=='d')) { 00449 if ((pos+2<n) && (s[pos+2]=='u')) { 00450 keyword ('o', "module", MODULE); } 00451 else { 00452 keyword ('o', "mod", MOD); } 00453 } 00454 keyword ('u', "mutable", MUTABLE); 00455 } 00456 goto identifier; 00457 case 'n': 00458 goto identifier; 00459 case 'o': 00460 if (pos<n) { 00461 keyword ('p', "operator", OPERATOR); 00462 keyword ('r', "or", SEQOR); 00463 keyword ('u', "outline", OUTLINE); 00464 } 00465 goto identifier; 00466 case 'p': 00467 if ((pos+1)<n) { 00468 keyword ('a', "packed", PACKED); 00469 keyword ('e', "penalty", PENALTY); 00470 keyword ('o', "postfix", POSTFIX); 00471 if (s[pos]=='r') { 00472 INC_POS; 00473 keyword ('e', "prefix", PREFIX); 00474 keyword ('i', "private", PRIVATE); 00475 keyword ('o', "protected", PROTECTED); 00476 } 00477 keyword ('u', "public", PUBLIC); 00478 } 00479 goto identifier; 00480 case 'q': 00481 if (pos<n) keyword ('u', "quo", QUO); 00482 goto identifier; 00483 case 'r': 00484 if ((pos+1)<n) { 00485 keyword ('a', "raise", RAISE); 00486 if (s[pos]=='e') { 00487 INC_POS; 00488 keyword ('m', "rem", REM); 00489 keyword ('t', "return", RETURN); 00490 } 00491 } 00492 goto identifier; 00493 case 's': 00494 if (pos<n) { 00495 keyword ('e', "sequel", SEQUEL); 00496 keyword ('p', "split", SPLIT); 00497 keyword ('t', "step", STEP); 00498 } 00499 goto identifier; 00500 case 't': 00501 if ((pos+1)<n) { 00502 if (s[pos]=='h') { 00503 INC_POS; 00504 keyword ('e', "then", THEN); 00505 keyword ('i', "this", THIS); 00506 } 00507 keyword ('r', "try", TRY); 00508 keyword ('o', "to", TO); 00509 } 00510 goto identifier; 00511 case 'u': 00512 if (((pos+1)<n) && (s[pos]=='n')) { 00513 INC_POS; 00514 keyword ('p', "unpacked", UNPACKED); 00515 keyword ('t', "until", UNTIL); 00516 } 00517 goto identifier; 00518 case 'v': 00519 if (((pos+1)<n) && (s[pos]=='a')) { 00520 INC_POS; 00521 keyword ('l', "value", VALUE); 00522 } 00523 goto identifier; 00524 case 'w': 00525 if (pos<n) { 00526 keyword ('h', "while", WHILE); 00527 keyword ('i', "with", WITH); 00528 } 00529 goto identifier; 00530 case 'x': 00531 if (pos<n) keyword ('o', "xor", XOR); 00532 goto identifier; 00533 case 'y': 00534 if (pos<n) keyword ('i', "yield", YIELD); 00535 goto identifier; 00536 case 'z': 00537 goto identifier; 00538 case '{': 00539 produce (c); 00540 case '|': 00541 test ('|', VWHERE); 00542 produce (WHERE); 00543 case '}': 00544 produce (c); 00545 case '~': 00546 test ('>', CONVERTS); 00547 produce (TILDA); 00548 default: 00549 return mmx_lex (lval, obj); 00550 00551 identifier: 00552 while ((pos<n) && is_alpha (s[pos])) INC_POS; 00553 produce (IDENTIFIER); 00554 } 00555 } 00556 00557 } // namespace mmx