basix_doc 0.1
/Users/mourrain/Devel/mmx/basix/src/mmx_lexer.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : mmx_lexer.cpp
00004 * DESCRIPTION: Lexical analysis of mathemagix files
00005 * COPYRIGHT  : (C) 2000  Joris van der Hoeven
00006 *******************************************************************************
00007 * NOTE: This file is included from mmx_parser.ypp
00008 *       It is distributed apart together with the sources
00009 *******************************************************************************
00010 * This software falls under the GNU general public license and comes WITHOUT
00011 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
00012 * If you don't have this file, write to the Free Software Foundation, Inc.,
00013 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00014 ******************************************************************************/
00015 
00016 #include "basix/mmx_syntax.hpp"
00017 #include "basix/parse_tools.hpp"
00018 namespace mmx {
00019 
00020 #define s     (obj->lex_string)
00021 #define n     (obj->lex_length)
00022 
00023 #define start (obj->lex_start)
00024 #define prev  (obj->lex_prev)
00025 #define pos   (obj->lex_pos)
00026 
00027 #define start_line (obj->lex_start_line)
00028 #define prev_line  (obj->lex_prev_line)
00029 #define line       (obj->lex_line)
00030 
00031 #define start_begin_line (obj->lex_start_begin_line)
00032 #define prev_begin_line  (obj->lex_prev_begin_line)
00033 #define begin_line       (obj->lex_begin_line)
00034 
00035 #define file  (obj->lex_file_name)
00036 #define input (obj->lex_input_number)
00037 
00038 #define INC_POS   { pos++; }
00039 #define ADD_POS(z){ pos += z; }
00040 #define INC_LINE  { pos++; line++; begin_line = pos; }
00041 #define SET_PREV  { prev = pos; prev_line = line; prev_begin_line = begin_line; }
00042 #define SAVE_START { start = pos; start_line = line; start_begin_line = begin_line; }
00043 #define RESTORE_START { pos = start; line = start_line; begin_line = start_begin_line; }
00044 
00045 #define produce(code) { \
00046   *lval = lit(string (s + start, pos - start));         \
00047   source_insert (*lval, source_location (*lval, file, input,    \
00048                 source_position(start, start_line, start - start_begin_line),\
00049                 source_position(pos, line, pos - begin_line))); \
00050   return code; }
00051 
00052 #define test(c,code) \
00053   if ((pos<n) && (s[pos]==c)) { INC_POS; produce (code); }
00054 
00055 #define keyword(c,k,code) \
00056   if ((s[pos]==c) && test_keyword(k,obj)) produce (code);
00057 
00058 #define is_alpha(c) \
00059   ((((c)>='0') && ((c)<='9')) || \
00060    (((c)>='A') && ((c)<='Z')) || \
00061    (((c)>='a') && ((c)<='z')) || \
00062    (((c)=='_') || ((c)=='?') || ((c)=='$')))
00063 
00064 static bool
00065 test_keyword (const char* k, parse_instance* obj) {
00066   RESTORE_START;
00067   while ((*k)!='\0') {
00068     if (pos>=n) return false;
00069     if (s[pos]!=(*k)) return false;
00070     INC_POS;
00071     k++;
00072   }
00073   if (pos>=n) return true;
00074   if (is_alpha (s[pos])) return false;
00075   return true;
00076 }
00077 
00078 int
00079 mmx_lex (generic *lval, parse_instance* obj) {
00080   if (pos == n) {
00081     s= NULL;
00082     *lval= generic ();
00083     return 0;
00084   }
00085 
00086   SET_PREV;
00087   char c= s[pos];
00088   while ((c<=' ') || (c>'~')) {
00089     if ((c == '\n') || (c == '\r')) 
00090       INC_LINE
00091     else
00092       INC_POS;
00093     if (pos == n) {
00094       s= NULL;
00095       *lval= generic ();
00096       return 0;
00097     }
00098     c= s[pos];
00099   }
00100 
00101   SAVE_START;
00102   INC_POS;
00103   switch (c) {
00104   case '!':
00105     test ('=', NOT_EQUAL);
00106     if ((pos<n) && (s[pos]=='<')) {
00107       INC_POS;
00108       test ('=', NOT_LEQ);
00109       produce (NOT_LESS);
00110     }
00111     if ((pos<n) && (s[pos]=='>')) {
00112       INC_POS;
00113       test ('=', NOT_GEQ);
00114       produce (NOT_GREATER);
00115     }
00116     produce (NOT);
00117   case '\042':
00118     while (pos<n) {
00119       if ((s[pos]=='\\') && ((pos+2)<n)) { ADD_POS(2); continue; }
00120       if (s[pos]=='\n' || (s[pos]=='\r')) { INC_LINE; continue; }
00121       if (s[pos]=='\042') { INC_POS; break; }
00122       INC_POS;
00123     }
00124     produce (STRING);
00125   case '#':
00126     produce (SIZE);
00127   case '$':
00128     goto identifier;
00129   case '%':
00130     produce (PERCENT);
00131   case '&':
00132     produce (AMPERSAND);
00133   case '\047':
00134     produce (QUOTE);
00135   case '(':
00136   case ')':
00137     produce (c);
00138   case '*':
00139     test ('=', TIMES_ASSIGN);
00140     produce (TIMES);
00141   case '+':
00142     test ('=', PLUS_ASSIGN);
00143     test ('+', INC);
00144     produce (PLUS);
00145   case ',':
00146     produce (c);
00147   case '-':
00148     test ('=', MINUS_ASSIGN);
00149     test ('-', DEC);
00150     test ('>', INTO);
00151     produce (MINUS);
00152   case '.':
00153     test ('.', RANGE);
00154     produce (ACCESS);
00155   case '/':
00156     test ('\\', AND);
00157     if (s[pos]=='/') {
00158       INC_POS;
00159       while ((pos<n) && (s[pos]!='\n')) {
00160         INC_POS;
00161       }
00162       if (pos<n) { INC_LINE; }
00163       return mmx_lex (lval, obj);
00164     }
00165     if (s[pos]=='{') {
00166       nat level= 1;
00167       INC_POS;
00168       while ((pos+1<n) && (level>0)) {
00169         if (s[pos]=='\n' || (s[pos]=='\r')) {
00170           INC_LINE; continue; }
00171         if ((s[pos]=='/') && (s[pos+1]=='{')) {
00172           level++; ADD_POS(2); continue; }
00173         if ((s[pos]=='}') && (s[pos+1]=='/')) {
00174           level--; ADD_POS(2); continue; }
00175         INC_POS;
00176       }
00177       if (level>0) pos=n;
00178       return mmx_lex (lval, obj);
00179     }
00180     if (s[pos] == '\"') {
00181       INC_POS;
00182       while ((pos+1<n) && ((s[pos]!='\"') || (s[pos+1]!='/'))) {
00183         if (s[pos]=='\n' || (s[pos]=='\r')) {
00184           INC_LINE; }
00185         else {
00186           INC_POS; }
00187       }
00188       if (pos+1 < n) {
00189         ADD_POS(2);
00190       }
00191       produce (STRING);
00192     }
00193     test ('=', OVER_ASSIGN);
00194     produce (OVER);
00195   case '0':
00196   case '1':
00197   case '2':
00198   case '3':
00199   case '4':
00200   case '5':
00201   case '6':
00202   case '7':
00203   case '8':
00204   case '9':
00205     while ((pos<n) && (s[pos]>='0') && (s[pos]<='9')) INC_POS;
00206     if ((pos<n+1) && (s[pos]=='.') && (s[pos+1]>='0') && (s[pos+1]<='9')) {
00207       ADD_POS(2);
00208       while ((pos<n) && (s[pos]>='0') && (s[pos]<='9')) INC_POS;
00209       if ((pos<n+1) && ((s[pos]=='e') || (s[pos]=='E'))) {
00210         nat p= ((pos<n+2) && (s[pos+1]=='-') ? 2: 1);
00211         if ((s[pos+p]>='0') && (s[pos+p]<='9')) {
00212           ADD_POS(p);
00213           while ((pos<n) && (s[pos]>='0') && (s[pos]<='9')) INC_POS;
00214         }
00215       }
00216       produce (FLOATING);
00217     }
00218     else produce (INTEGER);
00219   case ':':
00220     if ((pos<n) && (s[pos]=='=')) {
00221       if ((pos+1<n) && (s[pos+1]=='>')) {
00222         ADD_POS(2); produce (ASSIGN_MACRO); }
00223       INC_POS; produce (ASSIGN);
00224     }
00225     test ('>', TRANSTYPE);
00226     test (':', SCOPE);
00227     if ((pos+1<n) && (s[pos]=='-') && (s[pos+1]=='>')) {
00228       ADD_POS(2); produce (MAPSTO);
00229     }
00230     produce (TYPE);
00231   case ';':
00232     produce (c);
00233   case '<':
00234     if ((pos<n) && (s[pos]=='<')) {
00235       if ((pos+1<n) && (s[pos+1]=='<')) {
00236         ADD_POS(2); produce (LEFT_FLUX_BIN); }
00237       if ((pos+1<n) && (s[pos+1]=='*')) {
00238         ADD_POS(2); produce (LEFT_FLUX_VAR); }
00239       if ((pos+1<n) && (s[pos+1]=='%')) {
00240         ADD_POS(2); produce (LEFT_FLUX_STR); }
00241       if ((pos+1<n) && (s[pos+1]=='=')) {
00242         ADD_POS(2); produce (LL_ASSIGN); }
00243       INC_POS; produce (LEFT_FLUX);
00244     }
00245     if ((pos+1<n) && (s[pos]=='=') && (s[pos+1]=='>')) {
00246       ADD_POS(2); produce (EQUIVALENT);
00247     }
00248     test ('=', LEQ);
00249     produce (LESS);
00250   case '=':
00251     if ((pos<n) && (s[pos]=='=')) {
00252       if ((pos+1<n) && (s[pos+1]=='>')) {
00253         ADD_POS(2); produce (DEFINE_MACRO); }
00254       INC_POS; produce (DEFINE);
00255     }
00256     test ('>', IMPLIES);
00257     produce (EQUAL);
00258   case '>':
00259     test ('<', APPEND);
00260     if ((pos<n) && (s[pos]=='>')) {
00261       if ((pos+1<n) && (s[pos+1]=='>')) {
00262         ADD_POS(2); produce (RIGHT_FLUX_BIN); }
00263       if ((pos+1<n) && (s[pos+1]=='=')) {
00264         ADD_POS(2); produce (GG_ASSIGN); }
00265       INC_POS; produce (RIGHT_FLUX);
00266     }
00267     test ('=', GEQ);
00268     produce (GREATER);
00269   case '?':
00270     goto identifier;
00271   case '@':
00272     test ('+', OPLUS);
00273     test ('-', OMINUS);
00274     test ('*', OTIMES);
00275     test ('/', OOVER);
00276     produce (COMPOSE);
00277   case 'A':
00278   case 'B':
00279   case 'C':
00280   case 'D':
00281   case 'E':
00282   case 'F':
00283   case 'G':
00284   case 'H':
00285   case 'I':
00286   case 'J':
00287   case 'K':
00288   case 'L':
00289   case 'M':
00290   case 'N':
00291   case 'O':
00292   case 'P':
00293   case 'Q':
00294   case 'R':
00295   case 'S':
00296   case 'T':
00297   case 'U':
00298   case 'V':
00299   case 'W':
00300   case 'X':
00301   case 'Y':
00302   case 'Z':
00303     goto identifier;
00304   case '[':
00305     produce (c);
00306   case '\\':
00307     test ('/', OR);
00308     produce (c);
00309   case ']':
00310     produce (c);
00311   case '^':
00312     test ('^', FILL);
00313     produce (POWER);
00314   case '_':
00315     goto identifier;
00316   case '`':
00317     produce (BACKQUOTE);
00318   case 'a':
00319     if (pos<n) {
00320       keyword ('b', "abstract", ABSTRACT);
00321       keyword ('d', "add", ADD);
00322       keyword ('n', "and", SEQAND);
00323       keyword ('s', "assume", ASSUME);
00324     }
00325     goto identifier;
00326   case 'b':
00327     if (pos<n) keyword ('r', "break", BREAK);
00328     goto identifier;
00329   case 'c':
00330     if ((pos+3)<n) {
00331       if (s[pos]=='a') {
00332         INC_POS;
00333         keyword ('s', "case", CASE);
00334         if (s[pos] == 't') {
00335           INC_POS;
00336           keyword ('c', "catch", CATCH);
00337           keyword ('e', "category", CATEGORY);
00338         }
00339       }
00340       keyword ('l', "class", CLASS);
00341       if ((s[pos]=='o') && (s[pos+1]=='n')) {
00342         ADD_POS(2);
00343         keyword ('c', "concrete", CONCRETE);
00344         if (((pos+1)<n) && s[pos]=='s' && s[pos+1]=='t') {
00345           ADD_POS(2);
00346           keyword ('a', "constant", CONSTANT);
00347           keyword ('r', "constructor", CONSTRUCTOR);
00348         }
00349         keyword ('t', "continue", CONTINUE);
00350       }
00351     }
00352     goto identifier;
00353   case 'd':
00354     if (pos<n) {
00355       keyword ('e', "destructor", DESTRUCTOR);
00356       if (s[pos]=='i') {
00357         INC_POS;
00358         keyword ('r', "direct", DIRECT);
00359         keyword ('v', "div", DIV);
00360       }
00361       if (s[pos]=='o') {
00362         INC_POS;
00363         if ((pos >= n) || (!is_alpha (s[pos]))) produce (DO);
00364         keyword ('w', "downto", DOWNTO);
00365       }
00366     }
00367     goto identifier;
00368   case 'e':
00369     if ((pos+1)<n) {
00370       keyword ('l', "else", ELSE);
00371       keyword ('v', "evolutive", EVOLUTIVE);
00372       if (s[pos]=='x') {
00373         INC_POS;
00374         keyword ('i', "exists", EXISTS);
00375         keyword ('p', "export", EXPORT);
00376         if (((pos+2)<n) && (s[pos]=='t') && (s[pos+1]=='e')) {
00377           ADD_POS(2);
00378           keyword ('n', "extend", EXTEND);
00379           keyword ('r', "extern", EXTERN);
00380         }
00381       }
00382     }
00383     goto identifier;
00384   case 'f':
00385     if (pos<n) {
00386       if ((pos+1<n) && (s[pos]=='o') && (s[pos+1]=='r')) {
00387         if ((pos+2<n) && (s[pos+2]=='a')) {
00388           keyword ('o', "forall", FORALL); }
00389         else if ((pos+2<n) && (s[pos+2]=='e')) {
00390           keyword ('o', "foreign", FOREIGN); }
00391         else {
00392           keyword ('o', "for", FOR); }
00393       }
00394       keyword ('r', "from", FROM);
00395     }
00396     goto identifier;
00397   case 'g':
00398     if (pos<n) keyword ('e', "generate", GENERATE);
00399     goto identifier;
00400   case 'h':
00401     if (pos<n) {
00402       keyword ('a', "has", HAS);
00403       keyword ('i', "hidden", HIDDEN);
00404       keyword ('o', "holds", HOLDS);
00405     }
00406     goto identifier;
00407   case 'i':
00408     if (pos<n) {
00409       keyword ('f', "if", IF);
00410       keyword ('m', "import", IMPORT);
00411       if (s[pos]=='n') {
00412         INC_POS;
00413         if ((pos >= n) || (!is_alpha (s[pos]))) produce (IN);
00414         keyword ('d', "indirect", INDIRECT);
00415         keyword ('f', "infix", INFIX);
00416         keyword ('h', "inherit", INHERIT);
00417         keyword ('l', "inline", INLINE);
00418         keyword ('p', "inplace", INPLACE);
00419         if ((pos+3<n) && (s[pos]=='t') && (s[pos+1]=='e') && (s[pos+2]=='r')) {
00420           ADD_POS(3);
00421           keyword ('a', "interactive", INTERACTIVE);
00422           keyword ('n', "intern", INTERN);
00423         }
00424       }
00425     }
00426     goto identifier;
00427   case 'j':
00428     if (pos<n) keyword ('o', "join", JOIN);
00429     goto identifier;
00430   case 'k':
00431     if (pos<n) keyword ('e', "keyword", KEYWORD);
00432     goto identifier;
00433   case 'l':
00434     if (pos<n) {
00435       keyword ('a', "lambda", LAMBDA);
00436       keyword ('i', "literal", LITERAL);
00437       if (((pos+1)<n) && (s[pos]=='o')) {
00438         INC_POS;
00439         keyword ('c', "locked", LOCKED);
00440         keyword ('o', "loop", LOOP);
00441       }
00442     }
00443     goto identifier;
00444   case 'm':
00445     if (pos<n) {
00446       keyword ('a', "macro", MACRO);
00447       keyword ('e', "method", METHOD);
00448       if (((pos+1)<n) && (s[pos]=='o') && (s[pos+1]=='d')) {
00449         if ((pos+2<n) && (s[pos+2]=='u')) {
00450           keyword ('o', "module", MODULE); }
00451         else {
00452           keyword ('o', "mod", MOD); }
00453       }
00454       keyword ('u', "mutable", MUTABLE);
00455     }
00456     goto identifier;
00457   case 'n':
00458     goto identifier;
00459   case 'o':
00460     if (pos<n) {
00461       keyword ('p', "operator", OPERATOR);
00462       keyword ('r', "or", SEQOR);
00463       keyword ('u', "outline", OUTLINE);
00464     }
00465     goto identifier;
00466   case 'p':
00467     if ((pos+1)<n) {
00468       keyword ('a', "packed", PACKED);
00469       keyword ('e', "penalty", PENALTY);
00470       keyword ('o', "postfix", POSTFIX);
00471       if (s[pos]=='r') {
00472         INC_POS;
00473         keyword ('e', "prefix", PREFIX);
00474         keyword ('i', "private", PRIVATE);
00475         keyword ('o', "protected", PROTECTED);
00476       }
00477       keyword ('u', "public", PUBLIC);
00478     }
00479     goto identifier;
00480   case 'q':
00481     if (pos<n) keyword ('u', "quo", QUO);
00482     goto identifier;
00483   case 'r':
00484     if ((pos+1)<n) {
00485       keyword ('a', "raise", RAISE);
00486       if (s[pos]=='e') {
00487         INC_POS;
00488         keyword ('m', "rem", REM);
00489         keyword ('t', "return", RETURN);
00490       }
00491     }
00492     goto identifier;
00493   case 's':
00494     if (pos<n) {
00495       keyword ('e', "sequel", SEQUEL);
00496       keyword ('p', "split", SPLIT);
00497       keyword ('t', "step", STEP);
00498     }
00499     goto identifier;
00500   case 't':
00501     if ((pos+1)<n) {
00502       if (s[pos]=='h') {
00503         INC_POS;
00504         keyword ('e', "then", THEN);
00505         keyword ('i', "this", THIS);
00506       }
00507       keyword ('r', "try", TRY);
00508       keyword ('o', "to", TO);
00509     }
00510     goto identifier;
00511   case 'u':
00512     if (((pos+1)<n) && (s[pos]=='n')) {
00513       INC_POS;
00514       keyword ('p', "unpacked", UNPACKED);
00515       keyword ('t', "until", UNTIL);
00516     }
00517     goto identifier;
00518   case 'v':
00519     if (((pos+1)<n) && (s[pos]=='a')) {
00520       INC_POS;
00521       keyword ('l', "value", VALUE);
00522     }
00523     goto identifier;
00524   case 'w':
00525     if (pos<n) {
00526       keyword ('h', "while", WHILE);
00527       keyword ('i', "with", WITH);
00528     }
00529     goto identifier;
00530   case 'x':
00531     if (pos<n) keyword ('o', "xor", XOR);
00532     goto identifier;
00533   case 'y':
00534     if (pos<n) keyword ('i', "yield", YIELD);
00535     goto identifier;
00536   case 'z':
00537     goto identifier;
00538   case '{':
00539     produce (c);
00540   case '|':
00541     test ('|', VWHERE);
00542     produce (WHERE);
00543   case '}':
00544     produce (c);
00545   case '~':
00546     test ('>', CONVERTS);
00547     produce (TILDA);
00548   default:
00549     return mmx_lex (lval, obj);
00550 
00551   identifier:
00552     while ((pos<n) && is_alpha (s[pos])) INC_POS;
00553     produce (IDENTIFIER);
00554   }
00555 }
00556 
00557 } // namespace mmx
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines