basix_doc 0.1
|
00001 00002 /****************************************************************************** 00003 * MODULE : string.cpp 00004 * DESCRIPTION: Strings with possible zero characters 00005 * COPYRIGHT : (C) 2000 Joris van der Hoeven 00006 ******************************************************************************* 00007 * This software falls under the GNU general public license and comes WITHOUT 00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details. 00009 * If you don't have this file, write to the Free Software Foundation, Inc., 00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00011 ******************************************************************************/ 00012 00013 #include <basix/string.hpp> 00014 #include <basix/vector.hpp> 00015 #include <string.h> 00016 namespace mmx { 00017 00018 /****************************************************************************** 00019 * Constructors 00020 ******************************************************************************/ 00021 00022 string::string (char c) { 00023 rep= new string_rep(1); 00024 rep->a[0]=c; 00025 } 00026 00027 string::string (const char *a) { 00028 register nat i, n=strlen(a); 00029 rep= new string_rep(n); 00030 for (i=0; i<n; i++) 00031 rep->a[i]=a[i]; 00032 } 00033 00034 string::string (const char* a, nat n) { 00035 register nat i; 00036 rep= new string_rep(n); 00037 for (i=0; i<n; i++) 00038 rep->a[i]=a[i]; 00039 } 00040 00041 void 00042 string_rep::resize (nat n2) { 00043 nat l2; 00044 if (n2 > l) l2= max (n2, l << 1); 00045 else if (n2 < (l >> 1)) l2= n2; 00046 else { n= n2; return; } 00047 nat m= min (n, n2); 00048 char* b= (char*) mmx_malloc (l2); 00049 for (nat i=0; i<m; i++) b[i]= a[i]; 00050 mmx_free ((void*) a, l); 00051 a= b; 00052 n= n2; 00053 l= l2; 00054 } 00055 00056 /****************************************************************************** 00057 * Hashing and string output 00058 ******************************************************************************/ 00059 00060 nat 00061 hash (const string& s) { 00062 register const char* a= S(s); 00063 register nat i, h=0, n= N(s); 00064 for (i=0; i<n; i++) 00065 h= (h<<1) ^ (h<<9) ^ (h>>23) ^ ((nat) a[i]); 00066 return h; 00067 } 00068 00069 /****************************************************************************** 00070 * Conversions 00071 ******************************************************************************/ 00072 00073 char* 00074 as_charp (const string& s) { 00075 nat i, n= N(s); 00076 char* r= (char*) mmx_malloc (n+1); 00077 for (i=0; i<n; i++) r[i]= s[i]; 00078 r[i]= '\0'; 00079 return r; 00080 } 00081 00082 void 00083 free_charp (char* s) { 00084 mmx_free (s, strlen (s) + 1); 00085 } 00086 00087 string 00088 as_string (void* ptr) { 00089 char buffer[100]; 00090 sprintf (buffer, "0x%lx", (unsigned long) ptr); 00091 return string (buffer); 00092 } 00093 00094 string 00095 as_string (int i) { 00096 char buffer[100]; 00097 sprintf (buffer, "%d", i); 00098 return string (buffer); 00099 } 00100 00101 string 00102 as_string (unsigned int i) { 00103 char buffer[100]; 00104 sprintf (buffer, "%u", i); 00105 return string (buffer); 00106 } 00107 00108 string 00109 as_string (long int i) { 00110 char buffer[100]; 00111 sprintf (buffer, "%ld", i); 00112 return string (buffer); 00113 } 00114 00115 string 00116 as_string (long unsigned int i) { 00117 char buffer[100]; 00118 sprintf (buffer, "%lu", i); 00119 return string (buffer); 00120 } 00121 00122 string 00123 as_string (long long int i) { 00124 char buffer[100]; 00125 sprintf (buffer, "%lld", i); 00126 return string (buffer); 00127 } 00128 00129 string 00130 as_string (long long unsigned int i) { 00131 char buffer[100]; 00132 sprintf (buffer, "%llu", i); 00133 return string (buffer); 00134 } 00135 00136 int 00137 as_int (const string& s) { 00138 int i=0, n=N(s), val=0; 00139 if (n==0) return 0; 00140 if (s[0]=='-') i++; 00141 while (i<n) { 00142 if (s[i]<'0') break; 00143 if (s[i]>'9') break; 00144 val *= 10; 00145 val += (int) (s[i]-'0'); 00146 i++; 00147 } 00148 if (s[0]=='-') val=-val; 00149 return val; 00150 } 00151 00152 string 00153 as_string (float x) { 00154 if (x == 0.0) return "0"; 00155 char buffer[32]; 00156 sprintf (buffer, "%1.3e", x); 00157 return string (buffer); 00158 } 00159 00160 string (*as_string_hook) (double x)= NULL; 00161 00162 string 00163 as_string (double x) { 00164 if (as_string_hook != NULL) 00165 return as_string_hook (x); 00166 if (x == 0.0) return "0"; 00167 char buffer[32]; 00168 sprintf (buffer, "%1.12g", x); 00169 return string (buffer); 00170 } 00171 00172 string 00173 as_string (long double x) { 00174 char buffer[64]; 00175 sprintf (buffer, "%Lf", x); 00176 return string (buffer); 00177 } 00178 00179 double 00180 as_double (const string& s) { 00181 double x; 00182 char buffer[100]; 00183 strncpy (buffer, s->a, 99); 00184 buffer[max(N(s),(nat) 99)]= '\0'; 00185 sscanf (buffer, "%lf", &x); 00186 return x; 00187 } 00188 00189 /****************************************************************************** 00190 * Further routines for exact numeric input and output 00191 ******************************************************************************/ 00192 00193 template<typename C> inline void 00194 floating_to_string (const C& val, string& s, const char* fm) { 00195 char buffer[40]; 00196 sprintf (buffer, fm, val); 00197 s << string (buffer); 00198 } 00199 00200 template<typename C> inline void 00201 string_to_floating (const string& s, C& val, const char* fm) { 00202 nat n= max (N(s), (nat) 39); 00203 char buffer[40]; 00204 strncpy (buffer, inside (s, 0), n); 00205 buffer [n]= '\0'; 00206 sscanf (buffer, fm, &val); 00207 } 00208 00209 STMPL void numeric_to_string (const float& x, string& s) { 00210 floating_to_string (x, s, "%1.8e"); } 00211 STMPL void numeric_to_string (const double& x, string& s) { 00212 floating_to_string (x, s, "%1.17le"); } 00213 STMPL void numeric_to_string (const long double& x, string& s) { 00214 floating_to_string (x, s, "%1.21Le"); } 00215 STMPL void string_to_numeric (const string& s, float& x) { 00216 string_to_floating (s, x, "%f"); } 00217 STMPL void string_to_numeric (const string& s, double& x) { 00218 string_to_floating (s, x, "%lf"); } 00219 STMPL void string_to_numeric (const string& s, long double& x) { 00220 string_to_floating (s, x, "%Lf"); } 00221 00222 /****************************************************************************** 00223 * Predicates 00224 ******************************************************************************/ 00225 00226 bool 00227 string::operator == (const char* s) const { 00228 register nat i, n= rep->n; 00229 register char* S= rep->a; 00230 for (i=0; i<n; i++) { 00231 if (s[i]!=S[i]) return false; 00232 if (s[i]=='\0') return false; 00233 } 00234 return (s[i]=='\0'); 00235 } 00236 00237 bool 00238 string::operator == (const string& s) const { 00239 register nat i; 00240 if (rep->n != s.rep->n) return false; 00241 for (i=0; i<rep->n; i++) 00242 if (rep->a[i] != s.rep->a[i]) return false; 00243 return true; 00244 } 00245 00246 bool 00247 string::operator != (const char* s) const { 00248 register nat i, n= rep->n; 00249 register char* S= rep->a; 00250 for (i=0; i<n; i++) { 00251 if (s[i]!=S[i]) return true; 00252 if (s[i]=='\0') return true; 00253 } 00254 return (s[i]!='\0'); 00255 } 00256 00257 bool 00258 string::operator != (const string& s) const { 00259 register nat i; 00260 if (rep->n != s.rep->n) return true; 00261 for (i=0; i<rep->n; i++) 00262 if (rep->a[i] != s.rep->a[i]) return true; 00263 return false; 00264 } 00265 00266 bool 00267 operator < (const string& s1, const string& s2) { 00268 register nat i; 00269 for (i=0; i<N(s1); i++) { 00270 if (i >= N(s2)) return false; 00271 if (s1[i] < s2[i]) return true; 00272 if (s2[i] < s1[i]) return false; 00273 } 00274 return false; 00275 } 00276 00277 bool 00278 operator <= (const string& s1, const string& s2) { 00279 register nat i; 00280 for (i=0; i<N(s1); i++) { 00281 if (i >= N(s2)) return false; 00282 if (s1[i] < s2[i]) return true; 00283 if (s2[i] < s1[i]) return false; 00284 } 00285 return true; 00286 } 00287 00288 bool 00289 operator > (const string& s1, const string& s2) { 00290 register nat i; 00291 for (i=0; i<N(s1); i++) { 00292 if (i >= N(s2)) return true; 00293 if (s1[i] > s2[i]) return true; 00294 if (s2[i] > s1[i]) return false; 00295 } 00296 return false; 00297 } 00298 00299 bool 00300 operator >= (const string& s1, const string& s2) { 00301 register nat i; 00302 for (i=0; i<N(s1); i++) { 00303 if (i >= N(s2)) return true; 00304 if (s1[i] > s2[i]) return true; 00305 if (s2[i] > s1[i]) return false; 00306 } 00307 return true; 00308 } 00309 00310 /****************************************************************************** 00311 * Substrings and merging 00312 ******************************************************************************/ 00313 00314 bool 00315 starts (const string& s, const string& what) { 00316 return N(s) >= N(what) && s (0, N(what)) == what; 00317 } 00318 00319 bool 00320 ends (const string& s, const string& what) { 00321 return N(s) >= N(what) && s (N(s) - N(what), N(s)) == what; 00322 } 00323 00324 string 00325 string::operator () (nat i1, nat i2) const { 00326 register nat i; 00327 string r (i2-i1); 00328 for (i=i1; i<i2; i++) r.rep->a[i-i1]= rep->a[i]; 00329 return r; 00330 } 00331 00332 string 00333 copy (const string& s) { 00334 register nat i, n= N(s); 00335 string r (n); 00336 for (i=0; i<n; i++) r.rep->a[i]= s.rep->a[i]; 00337 return r; 00338 } 00339 00340 void 00341 string_rep::extend (nat d) { 00342 if (n+d > l) { 00343 register nat old_l= l; 00344 l= max (l << 1, n+d); 00345 char* b= (char*) mmx_malloc (l); 00346 for (nat i=0; i<n; i++) 00347 b[i]= a[i]; 00348 mmx_free ((void*) a, old_l); 00349 a= b; 00350 } 00351 n += d; 00352 } 00353 00354 string& 00355 string::operator << (char x) { 00356 secure (); 00357 rep->extend (1); 00358 rep->a[rep->n-1]= x; 00359 return *this; 00360 } 00361 00362 string& 00363 string::operator << (const string& s) { 00364 register nat i, k1= rep->n, k2=N(s); 00365 secure (); 00366 rep->extend (k2); 00367 for (i=0; i<k2; i++) 00368 rep->a[i+k1]= s.rep->a[i]; 00369 return *this; 00370 } 00371 00372 string& 00373 string::operator >> (char& x) { 00374 ASSERT (rep->n != 0, "non empty string expected"); 00375 secure (); 00376 x= rep->a[rep->n-1]; 00377 rep->resize (rep->n-1); 00378 return *this; 00379 } 00380 00381 string 00382 operator * (const string& s1, const string& s2) { 00383 register nat i, n1=N(s1), n2=N(s2); 00384 string r (n1 + n2); 00385 for (i=0; i<n1; i++) r.rep->a[i ]= s1.rep->a[i]; 00386 for (i=0; i<n2; i++) r.rep->a[i+n1]= s2.rep->a[i]; 00387 return r; 00388 } 00389 00390 /****************************************************************************** 00391 * Rewriting routines 00392 ******************************************************************************/ 00393 00394 static bool 00395 is_locase (register char c) { 00396 int code= (int) ((unsigned char) c); 00397 return 00398 ((c>='a') && (c<='z')) || 00399 ((code >= 160) && (code < 189)) || 00400 (code >= 224); 00401 } 00402 00403 static bool 00404 is_upcase (register char c) { 00405 int code= (int) ((unsigned char) c); 00406 return 00407 ((c>='A') && (c<='Z')) || 00408 ((code >= 128) && (code < 159)) || 00409 ((code >= 192) && (code < 224)); 00410 } 00411 00412 string 00413 upcase (const string& s) { 00414 nat i, n= N(s); 00415 string r (n); 00416 for (i=0; i<n; i++) 00417 if (!is_locase (s[i])) r[i]= s[i]; 00418 else r[i]= (char) (((int) ((unsigned char) s[i]))-32); 00419 return r; 00420 } 00421 00422 string 00423 locase (const string& s) { 00424 nat i, n= N(s); 00425 string r (n); 00426 for (i=0; i<n; i++) 00427 if (!is_upcase (s[i])) r[i]= s[i]; 00428 else r[i]= (char) (((int) ((unsigned char) s[i]))+32); 00429 return r; 00430 } 00431 00432 string 00433 upcase_first (const string& s) { 00434 string r= copy (s); 00435 if (N(r) != 0 && is_locase (s[0])) 00436 r[0]= (char) (((int) ((unsigned char) s[0]))-32); 00437 return r; 00438 } 00439 00440 string 00441 locase_first (const string& s) { 00442 string r= copy (s); 00443 if (N(r) != 0 && is_upcase (s[0])) 00444 r[0]= (char) (((int) ((unsigned char) s[0]))+32); 00445 return r; 00446 } 00447 00448 string 00449 escape (const string& s) { 00450 int i, n= N(s); 00451 string r; 00452 for (i=0; i<n; i++) 00453 if ((s[i] == '\\') || (s[i] == '\"')) r << '\\' << s[i]; 00454 else if (s[i] == '\b') r << "\\b"; 00455 else if (s[i] == '\t') r << "\\t"; 00456 else if (s[i] == '\n') r << "\\n"; 00457 else if (s[i] == '\r') r << "\\r"; 00458 else r << s[i]; 00459 return r; 00460 } 00461 00462 string 00463 unescape (const string& s) { 00464 int i, n= N(s); 00465 string r; 00466 for (i=0; i<n; i++) 00467 if ((i+1<n) && (s[i] == '\\')) { 00468 i++; 00469 if (s[i] == 't') r << '\t'; 00470 else if (s[i] == 'n') r << '\n'; 00471 else if (s[i] == 'r') r << '\r'; 00472 else if (s[i] == 'b') { 00473 if (N(r)>0) 00474 r.rep->resize (N(r)-1); 00475 } 00476 else r << s[i]; 00477 } 00478 else r << s[i]; 00479 return r; 00480 } 00481 00482 string 00483 quote (const string& s) { 00484 return "\"" * escape (s) * "\""; 00485 } 00486 00487 string 00488 unquote (const string& s) { 00489 if ((N(s) >= 2) && (s[0] == '\"') && (s[N(s)-1] == '\"')) 00490 return unescape (s (1, N(s)-1)); 00491 else if (starts (s, "/\"") && ends (s, "\"/")) 00492 return s (2, N(s)-2); 00493 else return unescape (s); 00494 } 00495 00496 /****************************************************************************** 00497 * String replacement and searching 00498 ******************************************************************************/ 00499 00500 static bool 00501 reads (const string& s, nat pos, const string& what) { 00502 nat i, n= N(s), l= N(what); 00503 for (i=0; pos+i<n && i<l; i++) 00504 if (s[pos+i] != what[i]) return false; 00505 return i == l; 00506 } 00507 00508 string 00509 replace (const string& s, const string& what, const string& by) { 00510 nat i, n= N(s); 00511 string r; 00512 for (i=0; i<n; ) 00513 if (reads (s, i, what)) { r << by; i += N(what); } 00514 else r << s[i++]; 00515 return r; 00516 } 00517 00518 int 00519 search_forwards (const string& s, const string& what, const int& pos) { 00520 for (nat i= pos; i + N(what) <= N(s); i++) 00521 if (reads (s, i, what)) return i; 00522 return -1; 00523 } 00524 00525 int 00526 search_backwards (const string& s, const string& what, const int& pos) { 00527 for (int i= pos - N(what); i >= 0; i--) 00528 if (reads (s, i, what)) return i; 00529 return -1; 00530 } 00531 00532 string 00533 reverse (const string& s) { 00534 nat i, n= N(s); 00535 string r (n); 00536 for (i=0; i<n; i++) r[i]= s[n-1-i]; 00537 return r; 00538 } 00539 00540 /****************************************************************************** 00541 * Tokenize and recompose 00542 ******************************************************************************/ 00543 00544 vector<string> 00545 tokenize (const string& s, const string& sep, bool keep) { 00546 ASSERT (N(sep) != 0, "separator should be non-empty"); 00547 vector<string> v; 00548 nat start= 0; 00549 while (start < N(s)) { 00550 nat end= start; 00551 while (end < N(s) && 00552 (s[end] != sep[0] || s (end, min (N(s), end + N(sep))) != sep)) 00553 end++; 00554 if (keep) v << (s (start, end) * sep); 00555 else v << s (start, end); 00556 start= end + N(sep); 00557 } 00558 return v; 00559 } 00560 00561 string 00562 recompose (const vector<string>& v, const string& sep, bool last) { 00563 string s; 00564 for (nat i=0; i<N(v); i++) { 00565 s << v[i]; 00566 if (i+1 < N(v) || last) s << sep; 00567 } 00568 return s; 00569 } 00570 00571 /****************************************************************************** 00572 * Indentation related routines 00573 ******************************************************************************/ 00574 00575 string 00576 repeated (const string& s, nat n) { 00577 string r; 00578 for (nat i=0; i<n; i++) r << s; 00579 return r; 00580 } 00581 00582 nat 00583 get_indentation (const string& s) { 00584 if (N(s) == 0) return 0; 00585 vector<string> v= tokenize (s, "\n"); 00586 nat r= ((nat) -1); 00587 for (nat y=0; y<N(v); y++) 00588 for (nat x=0; x<N(v[y]); x++) 00589 if (v[y][x] != ' ') { 00590 r= min (r, x); 00591 break; 00592 } 00593 return r; 00594 } 00595 00596 string 00597 add_indentation (const string& s, int delta) { 00598 if (delta == 0) return s; 00599 vector<string> v= tokenize (s, "\n"); 00600 for (nat i=0; i<N(v); i++) 00601 if (delta > 0) v[i]= repeated (" ", delta) * v[i]; 00602 else { 00603 int l= min (-delta, (int) N(v[i])); 00604 ASSERT (v[i] (0, l) == repeated (" ", l), "insufficient indentation"); 00605 v[i]= v[i] (l, N(v[i])); 00606 } 00607 return recompose (v, "\n", N(s) != 0 && s[N(s) - 1] == '\n'); 00608 } 00609 00610 } // namespace mmx