basix_doc 0.1
/Users/mourrain/Devel/mmx/basix/src/string.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : string.cpp
00004 * DESCRIPTION: Strings with possible zero characters
00005 * COPYRIGHT  : (C) 2000  Joris van der Hoeven
00006 *******************************************************************************
00007 * This software falls under the GNU general public license and comes WITHOUT
00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
00009 * If you don't have this file, write to the Free Software Foundation, Inc.,
00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00011 ******************************************************************************/
00012 
00013 #include <basix/string.hpp>
00014 #include <basix/vector.hpp>
00015 #include <string.h>
00016 namespace mmx {
00017 
00018 /******************************************************************************
00019 * Constructors
00020 ******************************************************************************/
00021 
00022 string::string (char c) {
00023   rep= new string_rep(1);
00024   rep->a[0]=c;
00025 }
00026 
00027 string::string (const char *a) {
00028   register nat i, n=strlen(a);
00029   rep= new string_rep(n);
00030   for (i=0; i<n; i++)
00031     rep->a[i]=a[i];
00032 }
00033 
00034 string::string (const char* a, nat n) {
00035   register nat i;
00036   rep= new string_rep(n);
00037   for (i=0; i<n; i++)
00038     rep->a[i]=a[i];
00039 }
00040 
00041 void
00042 string_rep::resize (nat n2) {
00043   nat l2;
00044   if (n2 > l) l2= max (n2, l << 1);
00045   else if (n2 < (l >> 1)) l2= n2;
00046   else { n= n2; return; }
00047   nat m= min (n, n2);
00048   char* b= (char*) mmx_malloc (l2);
00049   for (nat i=0; i<m; i++) b[i]= a[i];
00050   mmx_free ((void*) a, l);
00051   a= b;
00052   n= n2;
00053   l= l2;
00054 }
00055 
00056 /******************************************************************************
00057 * Hashing and string output
00058 ******************************************************************************/
00059 
00060 nat
00061 hash (const string& s) {
00062   register const char* a= S(s);
00063   register nat i, h=0, n= N(s);
00064   for (i=0; i<n; i++)
00065     h= (h<<1) ^ (h<<9) ^ (h>>23) ^ ((nat) a[i]);
00066   return h;
00067 }
00068 
00069 /******************************************************************************
00070 * Conversions
00071 ******************************************************************************/
00072 
00073 char*
00074 as_charp (const string& s) {
00075   nat i, n= N(s);
00076   char* r= (char*) mmx_malloc (n+1);
00077   for (i=0; i<n; i++) r[i]= s[i];
00078   r[i]= '\0';
00079   return r;
00080 }
00081 
00082 void
00083 free_charp (char* s) {
00084   mmx_free (s, strlen (s) + 1);
00085 }
00086 
00087 string
00088 as_string (void* ptr) {
00089   char buffer[100];
00090   sprintf (buffer, "0x%lx", (unsigned long) ptr);
00091   return string (buffer);
00092 }
00093 
00094 string
00095 as_string (int i) {
00096   char buffer[100];
00097   sprintf (buffer, "%d", i);
00098   return string (buffer);
00099 }
00100 
00101 string
00102 as_string (unsigned int i) {
00103   char buffer[100];
00104   sprintf (buffer, "%u", i);
00105   return string (buffer);
00106 }
00107 
00108 string
00109 as_string (long int i) {
00110   char buffer[100];
00111   sprintf (buffer, "%ld", i);
00112   return string (buffer);
00113 }
00114 
00115 string
00116 as_string (long unsigned int i) {
00117   char buffer[100];
00118   sprintf (buffer, "%lu", i);
00119   return string (buffer);
00120 }
00121 
00122 string
00123 as_string (long long int i) {
00124   char buffer[100];
00125   sprintf (buffer, "%lld", i);
00126   return string (buffer);
00127 }
00128 
00129 string
00130 as_string (long long unsigned int i) {
00131   char buffer[100];
00132   sprintf (buffer, "%llu", i);
00133   return string (buffer);
00134 }
00135 
00136 int
00137 as_int (const string& s) {
00138   int i=0, n=N(s), val=0;
00139   if (n==0) return 0;
00140   if (s[0]=='-') i++;
00141   while (i<n) {
00142     if (s[i]<'0') break;
00143     if (s[i]>'9') break;
00144     val *= 10;
00145     val += (int) (s[i]-'0');
00146     i++;
00147   }
00148   if (s[0]=='-') val=-val;
00149   return val;
00150 }
00151 
00152 string
00153 as_string (float x) {
00154   if (x == 0.0) return "0";
00155   char buffer[32];
00156   sprintf (buffer, "%1.3e", x);
00157   return string (buffer);
00158 }
00159 
00160 string (*as_string_hook) (double x)= NULL;
00161 
00162 string
00163 as_string (double x) {
00164   if (as_string_hook != NULL)
00165     return as_string_hook (x);
00166   if (x == 0.0) return "0";
00167   char buffer[32];
00168   sprintf (buffer, "%1.12g", x);
00169   return string (buffer);
00170 }
00171 
00172 string
00173 as_string (long double x) {
00174   char buffer[64];
00175   sprintf (buffer, "%Lf", x);
00176   return string (buffer);
00177 }
00178 
00179 double
00180 as_double (const string& s) {
00181   double x;
00182   char buffer[100];
00183   strncpy (buffer, s->a, 99);
00184   buffer[max(N(s),(nat) 99)]= '\0';
00185   sscanf (buffer, "%lf", &x);
00186   return x;
00187 }
00188 
00189 /******************************************************************************
00190 * Further routines for exact numeric input and output
00191 ******************************************************************************/
00192 
00193 template<typename C> inline void
00194 floating_to_string (const C& val, string& s, const char* fm) {
00195   char buffer[40];
00196   sprintf (buffer, fm, val);
00197   s << string (buffer);
00198 }
00199 
00200 template<typename C> inline void
00201 string_to_floating (const string& s, C& val, const char* fm) {
00202   nat n= max (N(s), (nat) 39);
00203   char buffer[40];
00204   strncpy (buffer, inside (s, 0), n);
00205   buffer [n]= '\0';
00206   sscanf (buffer, fm, &val);
00207 }
00208 
00209 STMPL void numeric_to_string (const float& x, string& s) {
00210   floating_to_string (x, s, "%1.8e"); }
00211 STMPL void numeric_to_string (const double& x, string& s) {
00212   floating_to_string (x, s, "%1.17le"); }
00213 STMPL void numeric_to_string (const long double& x, string& s) {
00214   floating_to_string (x, s, "%1.21Le"); }
00215 STMPL void string_to_numeric (const string& s, float& x) {
00216   string_to_floating (s, x, "%f"); }
00217 STMPL void string_to_numeric (const string& s, double& x) {
00218   string_to_floating (s, x, "%lf"); }
00219 STMPL void string_to_numeric (const string& s, long double& x) {
00220   string_to_floating (s, x, "%Lf"); }
00221 
00222 /******************************************************************************
00223 * Predicates
00224 ******************************************************************************/
00225 
00226 bool
00227 string::operator == (const char* s) const {
00228   register nat i, n= rep->n;
00229   register char* S= rep->a;
00230   for (i=0; i<n; i++) {
00231     if (s[i]!=S[i]) return false;
00232     if (s[i]=='\0') return false;
00233   }
00234   return (s[i]=='\0');
00235 }
00236 
00237 bool
00238 string::operator == (const string& s) const {
00239   register nat i;
00240   if (rep->n != s.rep->n) return false;
00241   for (i=0; i<rep->n; i++)
00242     if (rep->a[i] != s.rep->a[i]) return false;
00243   return true;
00244 }
00245 
00246 bool
00247 string::operator != (const char* s) const {
00248   register nat i, n= rep->n;
00249   register char* S= rep->a;
00250   for (i=0; i<n; i++) {
00251     if (s[i]!=S[i]) return true;
00252     if (s[i]=='\0') return true;
00253   }
00254   return (s[i]!='\0');
00255 }
00256 
00257 bool
00258 string::operator != (const string& s) const {
00259   register nat i;
00260   if (rep->n != s.rep->n) return true;
00261   for (i=0; i<rep->n; i++)
00262     if (rep->a[i] != s.rep->a[i]) return true;
00263   return false;
00264 }
00265 
00266 bool
00267 operator < (const string& s1, const string& s2) {
00268   register nat i;
00269   for (i=0; i<N(s1); i++) {
00270     if (i >= N(s2)) return false;
00271     if (s1[i] < s2[i]) return true;
00272     if (s2[i] < s1[i]) return false;
00273   }
00274   return false;
00275 }
00276 
00277 bool
00278 operator <= (const string& s1, const string& s2) {
00279   register nat i;
00280   for (i=0; i<N(s1); i++) {
00281     if (i >= N(s2)) return false;
00282     if (s1[i] < s2[i]) return true;
00283     if (s2[i] < s1[i]) return false;
00284   }
00285   return true;
00286 }
00287 
00288 bool
00289 operator > (const string& s1, const string& s2) {
00290   register nat i;
00291   for (i=0; i<N(s1); i++) {
00292     if (i >= N(s2)) return true;
00293     if (s1[i] > s2[i]) return true;
00294     if (s2[i] > s1[i]) return false;
00295   }
00296   return false;
00297 }
00298 
00299 bool
00300 operator >= (const string& s1, const string& s2) {
00301   register nat i;
00302   for (i=0; i<N(s1); i++) {
00303     if (i >= N(s2)) return true;
00304     if (s1[i] > s2[i]) return true;
00305     if (s2[i] > s1[i]) return false;
00306   }
00307   return true;
00308 }
00309 
00310 /******************************************************************************
00311 * Substrings and merging
00312 ******************************************************************************/
00313 
00314 bool
00315 starts (const string& s, const string& what) {
00316   return N(s) >= N(what) && s (0, N(what)) == what;
00317 }
00318 
00319 bool
00320 ends (const string& s, const string& what) {
00321   return N(s) >= N(what) && s (N(s) - N(what), N(s)) == what;
00322 }
00323 
00324 string
00325 string::operator () (nat i1, nat i2) const {
00326   register nat i;
00327   string r (i2-i1);
00328   for (i=i1; i<i2; i++) r.rep->a[i-i1]= rep->a[i];
00329   return r;
00330 }
00331 
00332 string
00333 copy (const string& s) {
00334   register nat i, n= N(s);
00335   string r (n);
00336   for (i=0; i<n; i++) r.rep->a[i]= s.rep->a[i];
00337   return r;
00338 }
00339 
00340 void
00341 string_rep::extend (nat d) {
00342   if (n+d > l) {
00343     register nat old_l= l;
00344     l= max (l << 1, n+d);
00345     char* b= (char*) mmx_malloc (l);
00346     for (nat i=0; i<n; i++)
00347       b[i]= a[i];
00348     mmx_free ((void*) a, old_l);
00349     a= b;
00350   }
00351   n += d;
00352 }
00353 
00354 string&
00355 string::operator << (char x) {
00356   secure ();
00357   rep->extend (1);
00358   rep->a[rep->n-1]= x;
00359   return *this;
00360 }
00361 
00362 string&
00363 string::operator << (const string& s) {
00364   register nat i, k1= rep->n, k2=N(s);
00365   secure ();
00366   rep->extend (k2);
00367   for (i=0; i<k2; i++)
00368     rep->a[i+k1]= s.rep->a[i];
00369   return *this;
00370 }
00371 
00372 string&
00373 string::operator >> (char& x) {
00374   ASSERT (rep->n != 0, "non empty string expected");
00375   secure ();
00376   x= rep->a[rep->n-1];
00377   rep->resize (rep->n-1);
00378   return *this;
00379 }
00380 
00381 string
00382 operator * (const string& s1, const string& s2) {
00383   register nat i, n1=N(s1), n2=N(s2);
00384   string r (n1 + n2);
00385   for (i=0; i<n1; i++) r.rep->a[i   ]= s1.rep->a[i];
00386   for (i=0; i<n2; i++) r.rep->a[i+n1]= s2.rep->a[i];
00387   return r;
00388 }
00389 
00390 /******************************************************************************
00391 * Rewriting routines
00392 ******************************************************************************/
00393 
00394 static bool
00395 is_locase (register char c) {
00396   int code= (int) ((unsigned char) c);
00397   return
00398     ((c>='a') && (c<='z')) ||
00399     ((code >= 160) && (code < 189)) ||
00400     (code >= 224);
00401 }
00402 
00403 static bool
00404 is_upcase (register char c) {
00405   int code= (int) ((unsigned char) c);
00406   return
00407     ((c>='A') && (c<='Z')) ||
00408     ((code >= 128) && (code < 159)) ||
00409     ((code >= 192) && (code < 224));
00410 }
00411 
00412 string
00413 upcase (const string& s) {
00414   nat i, n= N(s);
00415   string r (n);
00416   for (i=0; i<n; i++)
00417     if (!is_locase (s[i])) r[i]= s[i];
00418     else r[i]= (char) (((int) ((unsigned char) s[i]))-32);
00419   return r;
00420 }
00421 
00422 string
00423 locase (const string& s) {
00424   nat i, n= N(s);
00425   string r (n);
00426   for (i=0; i<n; i++)
00427     if (!is_upcase (s[i])) r[i]= s[i];
00428     else r[i]= (char) (((int) ((unsigned char) s[i]))+32);
00429   return r;
00430 }
00431 
00432 string
00433 upcase_first (const string& s) {
00434   string r= copy (s);
00435   if (N(r) != 0 && is_locase (s[0]))
00436     r[0]= (char) (((int) ((unsigned char) s[0]))-32);
00437   return r;
00438 }
00439 
00440 string
00441 locase_first (const string& s) {
00442   string r= copy (s);
00443   if (N(r) != 0 && is_upcase (s[0]))
00444     r[0]= (char) (((int) ((unsigned char) s[0]))+32);
00445   return r;
00446 }
00447 
00448 string
00449 escape (const string& s) {
00450   int i, n= N(s);
00451   string r;
00452   for (i=0; i<n; i++)
00453     if ((s[i] == '\\') || (s[i] == '\"')) r << '\\' << s[i];
00454     else if (s[i] == '\b') r << "\\b";
00455     else if (s[i] == '\t') r << "\\t";
00456     else if (s[i] == '\n') r << "\\n";
00457     else if (s[i] == '\r') r << "\\r";
00458     else r << s[i];
00459   return r;
00460 }
00461 
00462 string
00463 unescape (const string& s) {
00464   int i, n= N(s);
00465   string r;
00466   for (i=0; i<n; i++)
00467     if ((i+1<n) && (s[i] == '\\')) {
00468       i++;
00469       if (s[i] == 't') r << '\t';
00470       else if (s[i] == 'n') r << '\n';
00471       else if (s[i] == 'r') r << '\r';
00472       else if (s[i] == 'b') {
00473         if (N(r)>0)
00474           r.rep->resize (N(r)-1);
00475       }
00476       else r << s[i];
00477     }
00478     else r << s[i];
00479   return r;
00480 }
00481 
00482 string
00483 quote (const string& s) {
00484   return "\"" * escape (s) * "\"";
00485 }
00486 
00487 string
00488 unquote (const string& s) {
00489   if ((N(s) >= 2) && (s[0] == '\"') && (s[N(s)-1] == '\"'))
00490     return unescape (s (1, N(s)-1));
00491   else if (starts (s, "/\"") && ends (s, "\"/"))
00492     return s (2, N(s)-2);
00493   else return unescape (s);
00494 }
00495 
00496 /******************************************************************************
00497 * String replacement and searching
00498 ******************************************************************************/
00499 
00500 static bool
00501 reads (const string& s, nat pos, const string& what) {
00502   nat i, n= N(s), l= N(what);
00503   for (i=0; pos+i<n && i<l; i++)
00504     if (s[pos+i] != what[i]) return false;
00505   return i == l;
00506 }
00507 
00508 string
00509 replace (const string& s, const string& what, const string& by) {
00510   nat i, n= N(s);
00511   string r;
00512   for (i=0; i<n; )
00513     if (reads (s, i, what)) { r << by; i += N(what); }
00514     else r << s[i++];
00515   return r;
00516 }
00517 
00518 int
00519 search_forwards (const string& s, const string& what, const int& pos) {
00520   for (nat i= pos; i + N(what) <= N(s); i++)
00521     if (reads (s, i, what)) return i;
00522   return -1;
00523 }
00524 
00525 int
00526 search_backwards (const string& s, const string& what, const int& pos) {
00527   for (int i= pos - N(what); i >= 0; i--)
00528     if (reads (s, i, what)) return i;
00529   return -1;
00530 }
00531 
00532 string
00533 reverse (const string& s) {
00534   nat i, n= N(s);
00535   string r (n);
00536   for (i=0; i<n; i++) r[i]= s[n-1-i];
00537   return r;
00538 }
00539 
00540 /******************************************************************************
00541 * Tokenize and recompose
00542 ******************************************************************************/
00543 
00544 vector<string>
00545 tokenize (const string& s, const string& sep, bool keep) {
00546   ASSERT (N(sep) != 0, "separator should be non-empty");
00547   vector<string> v;
00548   nat start= 0;
00549   while (start < N(s)) {
00550     nat end= start;
00551     while (end < N(s) &&
00552            (s[end] != sep[0] || s (end, min (N(s), end + N(sep))) != sep))
00553       end++;
00554     if (keep) v << (s (start, end) * sep);
00555     else v << s (start, end);
00556     start= end + N(sep);
00557   }
00558   return v;
00559 }
00560 
00561 string
00562 recompose (const vector<string>& v, const string& sep, bool last) {
00563   string s;
00564   for (nat i=0; i<N(v); i++) {
00565     s << v[i];
00566     if (i+1 < N(v) || last) s << sep;
00567   }
00568   return s;
00569 }
00570 
00571 /******************************************************************************
00572 * Indentation related routines
00573 ******************************************************************************/
00574 
00575 string
00576 repeated (const string& s, nat n) {
00577   string r;
00578   for (nat i=0; i<n; i++) r << s;
00579   return r;
00580 }
00581 
00582 nat
00583 get_indentation (const string& s) {
00584   if (N(s) == 0) return 0;
00585   vector<string> v= tokenize (s, "\n");
00586   nat r= ((nat) -1);
00587   for (nat y=0; y<N(v); y++)
00588     for (nat x=0; x<N(v[y]); x++)
00589       if (v[y][x] != ' ') {
00590         r= min (r, x);
00591         break;
00592       }
00593   return r;
00594 }
00595 
00596 string
00597 add_indentation (const string& s, int delta) {
00598   if (delta == 0) return s;
00599   vector<string> v= tokenize (s, "\n");
00600   for (nat i=0; i<N(v); i++)
00601     if (delta > 0) v[i]= repeated (" ", delta) * v[i];
00602     else {
00603       int l= min (-delta, (int) N(v[i]));
00604       ASSERT (v[i] (0, l) == repeated (" ", l), "insufficient indentation");
00605       v[i]= v[i] (l, N(v[i]));
00606     }
00607   return recompose (v, "\n", N(s) != 0 && s[N(s) - 1] == '\n');
00608 }
00609 
00610 } // namespace mmx
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines