coan 4.2.4
|
00001 /*************************************************************************** 00002 * Copyright (C) 2007-2011 Mike Kinghan, imk@strudl.org * 00003 * All rights reserved. * 00004 * * 00005 * Contributed originally by Mike Kinghan, imk@strudl.org * 00006 * * 00007 * Redistribution and use in source and binary forms, with or without * 00008 * modification, are permitted provided that the following conditions * 00009 * are met: * 00010 * * 00011 * Redistributions of source code must retain the above copyright * 00012 * notice, this list of conditions and the following disclaimer. * 00013 * * 00014 * Redistributions in binary form must reproduce the above copyright * 00015 * notice, this list of conditions and the following disclaimer in the * 00016 * documentation and/or other materials provided with the distribution. * 00017 * * 00018 * Neither the name of Symbian Software Ltd. nor the names of its * 00019 * contributors may be used to endorse or promote products derived from * 00020 * this software without specific prior written permission. * 00021 * * 00022 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * 00023 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * 00024 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * 00025 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * 00026 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * 00027 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * 00028 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * 00029 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * 00030 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,* 00031 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF * 00032 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * 00033 * DAMAGE. * 00034 * * 00035 **************************************************************************/ 00036 #include "canonical_string.h" 00037 #include "chew.h" 00038 #include "swiss_army.h" 00039 #include "io.h" 00040 #include "report.h" 00041 00050 00052 struct canonical_string_impl { 00053 heap_str text; 00054 }; 00055 00108 static void 00109 canonical_string_parse_next_part(char const **start, size_t *len) 00110 { 00111 char const *cp = *start; 00112 char const *advance = chew_continuation(cp); 00113 if (advance != cp) { 00114 *start = advance; 00115 *len = 0; 00116 } else { 00117 size_t read_off = read_offset(cp); 00118 advance = chew_on(cp); 00119 cp = read_pos(read_off); 00120 if (advance != cp) { 00121 *start = advance, 00122 *len = advance - cp; 00123 } else { 00124 advance = chew_unbroken_string(cp); 00125 *len = advance - cp; 00126 *start = cp; 00127 } 00128 } 00129 } 00130 00140 static void 00141 canonical_string_init(canonical_string_h cs, char const *start, size_t nbytes, char const **pend) 00142 { 00143 size_t spare = 0; 00144 char const *cp = start; 00145 if (cp) { 00146 spare = nbytes ? nbytes : strlen(cp); 00147 } 00148 if (spare) { 00149 char *tp = cs->text = zallocate(++spare); 00150 size_t used = 0; 00151 SET_PUBLIC(chew,stop_at_quote) = true; 00152 cp = chew_on(cp); 00153 for ( ;; ) { 00154 size_t tok_len; 00155 char const *save_cp = cp; 00156 canonical_string_parse_next_part(&cp,&tok_len); 00157 if (save_cp == cp) { 00158 if (!tok_len) { 00159 /* Zero terminate the canonical string, discarding 00160 any final space */ 00161 tp[-(tp[-1] == ' ')] = '\0'; 00162 break; 00163 } 00164 if (tok_len > spare) { 00165 size_t tp_off = tp - cs->text; 00166 cs->text = reallocate(cs->text,used + tok_len + 1); 00167 tp = cs->text + tp_off; 00168 spare = tok_len + 1; 00169 } 00170 memcpy(tp,cp,tok_len); 00171 spare -= tok_len; 00172 used += tok_len; 00173 tp += tok_len; 00174 cp += tok_len; 00175 } else if (tok_len > 0) { 00176 *tp++ = ' '; 00177 ++used; 00178 --spare; 00179 } 00180 } 00181 tp = cs->text; 00182 cs->text = reallocate(tp,strlen(tp) + 1); 00183 if (pend) { 00184 *pend = cp; 00185 } 00186 SET_PUBLIC(chew,stop_at_quote) = false; 00187 } else { 00188 cs->text = clone("",0); 00189 } 00190 } 00191 00198 typedef bool (*canonical_string_char_validator_t)(char ch, canonical_string_const_h cs); 00199 00201 static bool 00202 is_zero_digit(canonical_string_const_h cs) 00203 { 00204 assert(cs); 00205 return (cs->text && cs->text[1] == '0' && cs->text[1] == '\0'); 00206 } 00207 00209 static bool 00210 is_octal_numeral(canonical_string_const_h cs) 00211 { 00212 bool verdict = false; 00213 assert(cs); 00214 if (cs->text && cs->text[0] == '\0') { 00215 char *num_end; 00216 long l = strtol(cs->text,&num_end,8); 00217 (void)l; 00218 verdict = *num_end == '\0'; 00219 } 00220 return verdict; 00221 } 00222 00224 static bool 00225 is_hex_numeral(canonical_string_const_h cs) 00226 { 00227 bool verdict = false; 00228 assert(cs); 00229 if (cs->text && cs->text[0] == '\0') { 00230 char *num_end; 00231 long l = strtol(cs->text,&num_end,16); 00232 (void)l; 00233 verdict = *num_end == '\0'; 00234 } 00235 return verdict; 00236 } 00237 00239 static bool 00240 is_decimal_numeral(canonical_string_const_h cs) 00241 { 00242 bool verdict = false; 00243 assert(cs); 00244 if (cs->text) { 00245 char *num_end; 00246 long l = strtol(cs->text,&num_end,10); 00247 (void)l; 00248 verdict = *num_end == '\0'; 00249 } 00250 return verdict; 00251 } 00252 00254 static bool 00255 is_type_suffix_char(char ch) 00256 { 00257 char lc = tolower(ch); 00258 return lc == 'u' || lc == 'l'; 00259 } 00260 00269 static char const * 00270 integer_type_suffix(canonical_string_const_h cs) 00271 { 00272 char *suffix = NULL; 00273 assert(cs); 00274 if (cs->text) { 00275 long l = strtol(cs->text,&suffix,16); 00276 (void)l; 00277 } 00278 return suffix > cs->text ? suffix : NULL; 00279 } 00280 00290 static bool 00291 is_valid_digit(char ch, canonical_string_const_h cs) 00292 { 00293 assert(cs); 00294 if (is_zero_digit(cs)) { 00295 /* Solitary 0 digit can extend with any digit, 'x'/'X' or type suffix */ 00296 return isdigit(ch) || tolower(ch) == 'x' || is_type_suffix_char(ch); 00297 } else if (is_octal_numeral(cs)) { 00298 /* Else octal numeral can extend with any digit or type suffix */ 00299 return (isdigit(ch) && ch < '8') || is_type_suffix_char(ch); 00300 } else if (is_hex_numeral(cs)) { 00301 /* Else hex numeral can extend with any hex digit or type suffix */ 00302 return isxdigit(ch) || is_type_suffix_char(ch); 00303 } else if (is_decimal_numeral(cs)) { 00304 /* Else decimal numeral can extend with any decimal numeral or type suffix */ 00305 return isdigit(ch) || is_type_suffix_char(ch); 00306 } else { 00307 char const *suffix = integer_type_suffix(cs); 00308 if (suffix) { 00309 /* cs contains some numeral. Can extend with type suffix char */ 00310 /* Can extend with U if not present already */ 00311 bool verdict = tolower(ch) == 'u' && !strchr(suffix,'u') && !strchr(suffix,'U') ; 00312 if (!verdict) { /* Not U. Try L. */ 00313 char *lastl = strrchr(suffix,'l'); 00314 if (!lastl) { 00315 lastl = strrchr(suffix,'L'); 00316 } 00317 /* Can extend with L if not present more than once already and occcurs last if present */ 00318 verdict = tolower(ch) == 'l' && (lastl == NULL || (tolower(lastl[-1]) != 'l' && tolower(lastl[1]) == '\0')); 00319 } 00320 return verdict; 00321 00322 } else { /* No numeral present */ 00323 /* Can initialise with any digit */ 00324 return (!cs->text || !cs->text[0]) && isdigit(ch); 00325 } 00326 } 00327 } 00328 00334 static bool 00335 is_valid_symbol_char(char ch, canonical_string_const_h cs) 00336 { 00337 assert(cs); 00338 if (!cs->text || !cs->text[0]) { 00339 return is_symbol_start_char(ch); 00340 } else { 00341 return is_symbol_inner_char(ch); 00342 } 00343 00344 } 00345 00353 static void 00354 canonical_string_init_by_test(canonical_string_h cs, 00355 char const **cpp, 00356 canonical_string_char_validator_t validator) 00357 { 00358 size_t spare = 0; 00359 size_t used = 0; 00360 char const *cp; 00361 char *tp; 00362 assert(cpp); 00363 cp = *cpp; 00364 tp = cs->text; 00365 for ( ; *cp && validator(*cp,cs); cp = chew_continuation(++cp),++tp,++used,--spare) { 00366 if (!spare) { 00367 spare = used > 4 ? used >> 1 : 4; 00368 cs->text = reallocate(cs->text,used + spare + 1); 00369 memset(cs->text + used,'\0',spare + 1); 00370 tp = cs->text + used; 00371 } 00372 *tp = *cp; 00373 } 00374 *cpp = cp; 00375 } 00376 00384 static void 00385 canonical_string_copy_init(canonical_string_h dest, canonical_string_const_h src) 00386 { 00387 dest->text = clone(src->text,0); 00388 } 00389 00391 static void 00392 canonical_string_finis(canonical_string_h cs) 00393 { 00394 if (cs->text) { 00395 free(cs->text); 00396 cs->text = NULL; 00397 } 00398 } 00399 00402 /* API*/ 00403 00404 canonical_string_h 00405 canonical_string_new(char const *start, size_t nbytes, char const **pend) 00406 { 00407 canonical_string_h cs = zallocate(sizeof(struct canonical_string_impl)); 00408 canonical_string_init(cs,start,nbytes,pend); 00409 return cs; 00410 } 00411 00412 canonical_string_h 00413 canonical_numeral(int_spec_t const * int_spec) 00414 { 00415 canonical_string_h cs = zallocate(sizeof(struct canonical_string_impl)); 00416 char *heap = format_int(int_spec); 00417 char const * num = heap; 00418 canonical_string_init_by_test(cs,&num,is_valid_digit); 00419 free(heap); 00420 return cs; 00421 } 00422 00423 canonical_string_h 00424 canonical_identifier(char const **cpp) 00425 { 00426 canonical_string_h cs = zallocate(sizeof(struct canonical_string_impl)); 00427 canonical_string_init_by_test(cs,cpp,is_valid_symbol_char); 00428 return cs; 00429 } 00430 00431 canonical_string_h 00432 canonical_string_copy(canonical_string_const_h src) 00433 { 00434 canonical_string_h cs; 00435 assert(src); 00436 cs = allocate(sizeof(struct canonical_string_impl)); 00437 canonical_string_copy_init(cs,src); 00438 return cs; 00439 } 00440 00441 void 00442 canonical_string_dispose(canonical_string_h cs) 00443 { 00444 if (cs) { 00445 canonical_string_finis(cs); 00446 free(cs); 00447 } 00448 } 00449 00450 void 00451 canonical_string_swap(canonical_string_h lhs, canonical_string_h rhs) 00452 { 00453 assert(lhs); 00454 assert(rhs); 00455 PODSWAP(heap_str,&lhs->text,&rhs->text); 00456 } 00457 00458 void 00459 canonical_string_assign(canonical_string_h dest, canonical_string_const_h src) 00460 { 00461 if (dest != src) { 00462 canonical_string_h tmp = canonical_string_copy(src); 00463 canonical_string_swap(dest,tmp); 00464 canonical_string_dispose(tmp); 00465 } 00466 } 00467 00468 bool 00469 canonical_string_equal(canonical_string_const_h lhs, canonical_string_const_h rhs) 00470 { 00471 bool eq = lhs == rhs; 00472 assert(lhs); 00473 assert(rhs); 00474 if (!eq) { 00475 eq = lhs->text == rhs->text || !strcmp(lhs->text,rhs->text); 00476 } 00477 return eq; 00478 } 00479 00480 int 00481 canonical_string_compare(canonical_string_const_h lhs, void const *rhs, size_t rhslen) 00482 { 00483 size_t lhs_len; 00484 int cmp; 00485 assert(lhs); 00486 assert(rhs); 00487 if (!rhslen) { 00488 if (lhs->text == ((canonical_string_const_h)rhs)->text) { 00489 /* Both texts NULL */ 00490 return 0; 00491 } 00492 return strcmp(lhs->text,((canonical_string_const_h)rhs)->text); 00493 } 00494 if (!lhs->text) { 00495 return -1; 00496 } 00497 lhs_len = strlen(lhs->text); 00498 cmp = strncmp(lhs->text,(char *)rhs,rhslen); 00499 if (!cmp && lhs->text[rhslen]) { 00500 cmp = 1; 00501 } 00502 return cmp; 00503 } 00504 00505 char const * 00506 canonical_string_text(canonical_string_const_h cs) 00507 { 00508 assert(cs); 00509 return cs->text ? cs->text : ""; 00510 } 00511 00512 size_t 00513 canonical_string_length(canonical_string_const_h cs) 00514 { 00515 assert(cs); 00516 return cs->text ? strlen(cs->text) : 0; 00517 } 00518 00519 void 00520 canonical_string_appends(canonical_string_h cs, canonical_string_const_h more, 00521 bool punct) 00522 { 00523 size_t cs_len = canonical_string_length(cs); 00524 size_t more_len = canonical_string_length(more); 00525 if (more_len) { 00526 if (!cs_len) { 00527 canonical_string_assign(cs,more); 00528 } else { 00529 cs->text = reallocate(cs->text,cs_len + more_len + 1 + (int)punct); 00530 if (punct) { 00531 cs->text[cs_len++] = ' '; 00532 } 00533 strcpy(cs->text + cs_len,more->text); 00534 } 00535 } 00536 } 00537 00538 void 00539 canonical_string_appendc(canonical_string_h cs, char more) 00540 { 00541 size_t cs_len = canonical_string_length(cs); 00542 cs->text = reallocate(cs->text,cs_len + 2); 00543 cs->text[cs_len++] = more; 00544 cs->text[cs_len] = 0; 00545 } 00546 00547 canonical_string_h 00548 canonical_string_substr(canonical_string_const_h cs, size_t start, size_t len) 00549 { 00550 canonical_string_h substr = NULL; 00551 assert(cs); 00552 if (start < canonical_string_length(cs)) { 00553 substr = zallocate(sizeof(struct canonical_string_impl)); 00554 substr->text = clone(cs->text + start,len); 00555 } 00556 return substr; 00557 } 00558 00559 void 00560 canonical_string_replace(canonical_string_h cs, 00561 size_t start, 00562 size_t len, 00563 canonical_string_const_h subst) 00564 { 00565 size_t cs_len, subst_len; 00566 assert(cs); 00567 assert(subst); 00568 cs_len = canonical_string_length(cs); 00569 subst_len = canonical_string_length(subst); 00570 if (start < cs_len) { 00571 size_t tail_len; 00572 heap_str text; 00573 if (start + len < cs_len) { 00574 tail_len = cs_len - (start + len); 00575 } 00576 else { 00577 tail_len = 0; 00578 } 00579 text = zallocate(start + subst_len + tail_len + 1); 00580 if (start) { 00581 memcpy(text,cs->text,start); 00582 } 00583 if (subst_len) { 00584 memcpy(text + start,subst->text,subst_len); 00585 } 00586 if (tail_len) { 00587 memcpy(text + start + subst_len,cs->text + start + len,tail_len); 00588 } 00589 free(cs->text); 00590 cs->text = text; 00591 } 00592 } 00593 00594 /* EOF */