coan 4.2.4
canonical_string.c
Go to the documentation of this file.
00001 /***************************************************************************
00002  *   Copyright (C) 2007-2011 Mike Kinghan, imk@strudl.org                  *
00003  *   All rights reserved.                                                  *
00004  *                                                                         *
00005  *   Contributed originally by Mike Kinghan, imk@strudl.org                *
00006  *                                                                         *
00007  *   Redistribution and use in source and binary forms, with or without    *
00008  *   modification, are permitted provided that the following conditions    *
00009  *   are met:                                                              *
00010  *                                                                         *
00011  *   Redistributions of source code must retain the above copyright        *
00012  *   notice, this list of conditions and the following disclaimer.         *
00013  *                                                                         *
00014  *   Redistributions in binary form must reproduce the above copyright     *
00015  *   notice, this list of conditions and the following disclaimer in the   *
00016  *   documentation and/or other materials provided with the distribution.  *
00017  *                                                                         *
00018  *   Neither the name of Symbian Software Ltd. nor the names of its        *
00019  *   contributors may be used to endorse or promote products derived from  *
00020  *   this software without specific prior written permission.              *
00021  *                                                                         *
00022  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS   *
00023  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT     *
00024  *   LIMITED TO, THE IMPLIED WARRANTIES OF  MERCHANTABILITY AND FITNESS    *
00025  *   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE        *
00026  *   COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,   *
00027  *   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,  *
00028  *   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
00029  *   OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    *
00030  *   AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,*
00031  *   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF *
00032  *   THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH  *
00033  *   DAMAGE.                                                               *
00034  *                                                                         *
00035  **************************************************************************/
00036 #include "canonical_string.h"
00037 #include "chew.h"
00038 #include "swiss_army.h"
00039 #include "io.h"
00040 #include "report.h"
00041 
00050 
00052 struct canonical_string_impl {
00053     heap_str text;      
00054 };
00055 
00108 static void
00109 canonical_string_parse_next_part(char const **start, size_t *len)
00110 {
00111     char const *cp = *start;
00112     char const *advance = chew_continuation(cp);
00113     if (advance != cp) {
00114         *start = advance;
00115         *len = 0;
00116     } else {
00117         size_t read_off = read_offset(cp);
00118         advance = chew_on(cp);
00119         cp = read_pos(read_off);
00120         if (advance != cp) {
00121             *start = advance,
00122                      *len = advance - cp;
00123         } else {
00124             advance = chew_unbroken_string(cp);
00125             *len = advance - cp;
00126             *start = cp;
00127         }
00128     }
00129 }
00130 
00140 static void
00141 canonical_string_init(canonical_string_h cs, char const *start, size_t nbytes, char const **pend)
00142 {
00143     size_t spare = 0;
00144     char const *cp = start;
00145     if (cp) {
00146         spare = nbytes ? nbytes : strlen(cp);
00147     }
00148     if (spare) {
00149         char *tp = cs->text = zallocate(++spare);
00150         size_t used = 0;
00151         SET_PUBLIC(chew,stop_at_quote) = true;
00152         cp = chew_on(cp);
00153         for (   ;;      ) {
00154             size_t tok_len;
00155             char const *save_cp = cp;
00156             canonical_string_parse_next_part(&cp,&tok_len);
00157             if (save_cp == cp) {
00158                 if (!tok_len) {
00159                     /* Zero terminate the canonical string, discarding
00160                         any final space */
00161                     tp[-(tp[-1] == ' ')] = '\0';
00162                     break;
00163                 }
00164                 if (tok_len > spare) {
00165                     size_t tp_off = tp - cs->text;
00166                     cs->text = reallocate(cs->text,used + tok_len + 1);
00167                     tp = cs->text + tp_off;
00168                     spare = tok_len + 1;
00169                 }
00170                 memcpy(tp,cp,tok_len);
00171                 spare -= tok_len;
00172                 used += tok_len;
00173                 tp += tok_len;
00174                 cp += tok_len;
00175             } else if (tok_len > 0) {
00176                 *tp++ = ' ';
00177                 ++used;
00178                 --spare;
00179             }
00180         }
00181         tp = cs->text;
00182         cs->text = reallocate(tp,strlen(tp) + 1);
00183         if (pend) {
00184             *pend = cp;
00185         }
00186         SET_PUBLIC(chew,stop_at_quote) = false;
00187     } else {
00188         cs->text = clone("",0);
00189     }
00190 }
00191 
00198 typedef bool (*canonical_string_char_validator_t)(char ch, canonical_string_const_h cs);
00199 
00201 static bool
00202 is_zero_digit(canonical_string_const_h cs)
00203 {
00204     assert(cs);
00205     return (cs->text && cs->text[1] == '0' && cs->text[1] == '\0');
00206 }
00207 
00209 static bool
00210 is_octal_numeral(canonical_string_const_h cs)
00211 {
00212     bool verdict = false;
00213     assert(cs);
00214     if (cs->text && cs->text[0] == '\0') {
00215         char *num_end;
00216         long l = strtol(cs->text,&num_end,8);
00217         (void)l;
00218         verdict = *num_end == '\0';
00219     }
00220     return verdict;
00221 }
00222 
00224 static bool
00225 is_hex_numeral(canonical_string_const_h cs)
00226 {
00227     bool verdict = false;
00228     assert(cs);
00229     if (cs->text && cs->text[0] == '\0') {
00230         char *num_end;
00231         long l = strtol(cs->text,&num_end,16);
00232         (void)l;
00233         verdict = *num_end == '\0';
00234     }
00235     return verdict;
00236 }
00237 
00239 static bool
00240 is_decimal_numeral(canonical_string_const_h cs)
00241 {
00242     bool verdict = false;
00243     assert(cs);
00244     if (cs->text) {
00245         char *num_end;
00246         long l = strtol(cs->text,&num_end,10);
00247         (void)l;
00248         verdict = *num_end == '\0';
00249     }
00250     return verdict;
00251 }
00252 
00254 static bool
00255 is_type_suffix_char(char ch)
00256 {
00257     char lc = tolower(ch);
00258     return lc == 'u' || lc == 'l';
00259 }
00260 
00269 static char const *
00270 integer_type_suffix(canonical_string_const_h cs)
00271 {
00272     char *suffix = NULL;
00273     assert(cs);
00274     if (cs->text) {
00275         long l = strtol(cs->text,&suffix,16);
00276         (void)l;
00277     }
00278     return suffix > cs->text ? suffix : NULL;
00279 }
00280 
00290 static bool
00291 is_valid_digit(char ch, canonical_string_const_h cs)
00292 {
00293     assert(cs);
00294     if (is_zero_digit(cs)) {
00295         /* Solitary 0 digit can extend with any digit, 'x'/'X' or type suffix */
00296         return isdigit(ch) || tolower(ch) == 'x' || is_type_suffix_char(ch);
00297     } else if (is_octal_numeral(cs)) {
00298         /* Else octal numeral can extend with any digit or type suffix */
00299         return (isdigit(ch) && ch < '8') || is_type_suffix_char(ch);
00300     } else if (is_hex_numeral(cs)) {
00301         /* Else hex numeral can extend with any hex digit or type suffix */
00302         return isxdigit(ch) || is_type_suffix_char(ch);
00303     } else if (is_decimal_numeral(cs)) {
00304         /* Else decimal numeral can extend with any decimal numeral or type suffix */
00305         return isdigit(ch) || is_type_suffix_char(ch);
00306     } else {
00307         char const *suffix = integer_type_suffix(cs);
00308         if (suffix) {
00309             /* cs contains some numeral. Can extend with type suffix char */
00310             /* Can extend with U if not present already */
00311             bool verdict = tolower(ch) == 'u' && !strchr(suffix,'u') && !strchr(suffix,'U') ;
00312             if (!verdict) { /* Not U. Try L.  */
00313                 char *lastl = strrchr(suffix,'l');
00314                 if (!lastl) {
00315                     lastl = strrchr(suffix,'L');
00316                 }
00317                 /* Can extend with L if not present more than once already and occcurs last if present */
00318                 verdict =  tolower(ch) == 'l' && (lastl == NULL || (tolower(lastl[-1]) != 'l' && tolower(lastl[1]) == '\0'));
00319             }
00320             return verdict;
00321 
00322         } else { /* No numeral present */
00323             /* Can initialise with any digit */
00324             return (!cs->text || !cs->text[0]) && isdigit(ch);
00325         }
00326     }
00327 }
00328 
00334 static bool
00335 is_valid_symbol_char(char ch, canonical_string_const_h cs)
00336 {
00337     assert(cs);
00338     if (!cs->text || !cs->text[0]) {
00339         return is_symbol_start_char(ch);
00340     } else {
00341         return is_symbol_inner_char(ch);
00342     }
00343 
00344 }
00345 
00353 static void
00354 canonical_string_init_by_test(canonical_string_h cs,
00355                               char const **cpp,
00356                               canonical_string_char_validator_t validator)
00357 {
00358     size_t spare = 0;
00359     size_t used = 0;
00360     char const *cp;
00361         char *tp;
00362         assert(cpp);
00363         cp = *cpp;
00364     tp = cs->text;
00365     for (   ; *cp && validator(*cp,cs); cp = chew_continuation(++cp),++tp,++used,--spare) {
00366         if (!spare) {
00367             spare = used > 4 ? used >> 1 : 4;
00368             cs->text = reallocate(cs->text,used + spare + 1);
00369             memset(cs->text + used,'\0',spare + 1);
00370             tp = cs->text + used;
00371         }
00372         *tp = *cp;
00373     }
00374     *cpp = cp;
00375 }
00376 
00384 static void
00385 canonical_string_copy_init(canonical_string_h dest, canonical_string_const_h src)
00386 {
00387     dest->text = clone(src->text,0);
00388 }
00389 
00391 static void
00392 canonical_string_finis(canonical_string_h cs)
00393 {
00394     if (cs->text) {
00395         free(cs->text);
00396         cs->text = NULL;
00397     }
00398 }
00399 
00402 /* API*/
00403 
00404 canonical_string_h
00405 canonical_string_new(char const *start, size_t nbytes, char const **pend)
00406 {
00407     canonical_string_h cs = zallocate(sizeof(struct canonical_string_impl));
00408     canonical_string_init(cs,start,nbytes,pend);
00409     return cs;
00410 }
00411 
00412 canonical_string_h
00413 canonical_numeral(int_spec_t const * int_spec)
00414 {
00415     canonical_string_h cs = zallocate(sizeof(struct canonical_string_impl));
00416     char *heap = format_int(int_spec);
00417     char const * num = heap;
00418     canonical_string_init_by_test(cs,&num,is_valid_digit);
00419     free(heap);
00420     return cs;
00421 }
00422 
00423 canonical_string_h
00424 canonical_identifier(char const **cpp)
00425 {
00426     canonical_string_h cs = zallocate(sizeof(struct canonical_string_impl));
00427     canonical_string_init_by_test(cs,cpp,is_valid_symbol_char);
00428     return cs;
00429 }
00430 
00431 canonical_string_h
00432 canonical_string_copy(canonical_string_const_h src)
00433 {
00434     canonical_string_h cs;
00435     assert(src);
00436     cs = allocate(sizeof(struct canonical_string_impl));
00437     canonical_string_copy_init(cs,src);
00438     return cs;
00439 }
00440 
00441 void
00442 canonical_string_dispose(canonical_string_h cs)
00443 {
00444     if (cs) {
00445         canonical_string_finis(cs);
00446         free(cs);
00447     }
00448 }
00449 
00450 void
00451 canonical_string_swap(canonical_string_h lhs, canonical_string_h rhs)
00452 {
00453     assert(lhs);
00454     assert(rhs);
00455     PODSWAP(heap_str,&lhs->text,&rhs->text);
00456 }
00457 
00458 void
00459 canonical_string_assign(canonical_string_h dest, canonical_string_const_h src)
00460 {
00461     if (dest != src) {
00462         canonical_string_h tmp = canonical_string_copy(src);
00463         canonical_string_swap(dest,tmp);
00464         canonical_string_dispose(tmp);
00465     }
00466 }
00467 
00468 bool
00469 canonical_string_equal(canonical_string_const_h lhs, canonical_string_const_h rhs)
00470 {
00471     bool eq = lhs == rhs;
00472     assert(lhs);
00473     assert(rhs);
00474     if (!eq) {
00475         eq = lhs->text == rhs->text || !strcmp(lhs->text,rhs->text);
00476     }
00477     return eq;
00478 }
00479 
00480 int
00481 canonical_string_compare(canonical_string_const_h lhs, void const *rhs, size_t rhslen)
00482 {
00483     size_t lhs_len;
00484     int cmp;
00485     assert(lhs);
00486     assert(rhs);
00487     if (!rhslen) {
00488         if (lhs->text == ((canonical_string_const_h)rhs)->text) {
00489             /* Both texts NULL */
00490             return 0;
00491         }
00492         return strcmp(lhs->text,((canonical_string_const_h)rhs)->text);
00493     }
00494     if (!lhs->text) {
00495         return -1;
00496     }
00497     lhs_len = strlen(lhs->text);
00498     cmp = strncmp(lhs->text,(char *)rhs,rhslen);
00499     if (!cmp && lhs->text[rhslen]) {
00500         cmp = 1;
00501     }
00502     return cmp;
00503 }
00504 
00505 char const *
00506 canonical_string_text(canonical_string_const_h cs)
00507 {
00508     assert(cs);
00509     return cs->text ? cs->text : "";
00510 }
00511 
00512 size_t
00513 canonical_string_length(canonical_string_const_h cs)
00514 {
00515     assert(cs);
00516     return cs->text ? strlen(cs->text) : 0;
00517 }
00518 
00519 void
00520 canonical_string_appends(canonical_string_h cs, canonical_string_const_h more,
00521                         bool punct)
00522 {
00523     size_t cs_len = canonical_string_length(cs);
00524     size_t more_len = canonical_string_length(more);
00525     if (more_len) {
00526         if (!cs_len) {
00527             canonical_string_assign(cs,more);
00528         } else {
00529             cs->text = reallocate(cs->text,cs_len + more_len + 1 + (int)punct);
00530             if (punct) {
00531                 cs->text[cs_len++] = ' ';
00532             }
00533             strcpy(cs->text + cs_len,more->text);
00534         }
00535     }
00536 }
00537 
00538 void
00539 canonical_string_appendc(canonical_string_h cs, char more)
00540 {
00541     size_t cs_len = canonical_string_length(cs);
00542     cs->text = reallocate(cs->text,cs_len + 2);
00543     cs->text[cs_len++] = more;
00544     cs->text[cs_len] = 0;
00545 }
00546 
00547 canonical_string_h
00548 canonical_string_substr(canonical_string_const_h cs, size_t start, size_t len)
00549 {
00550     canonical_string_h substr = NULL;
00551     assert(cs);
00552     if (start < canonical_string_length(cs)) {
00553         substr = zallocate(sizeof(struct canonical_string_impl));
00554         substr->text = clone(cs->text + start,len);
00555     }
00556     return substr;
00557 }
00558 
00559 void
00560 canonical_string_replace(canonical_string_h cs,
00561                         size_t start,
00562                         size_t len,
00563                         canonical_string_const_h subst)
00564 {
00565     size_t cs_len, subst_len;
00566     assert(cs);
00567     assert(subst);
00568     cs_len = canonical_string_length(cs);
00569     subst_len = canonical_string_length(subst);
00570     if (start < cs_len) {
00571         size_t tail_len;
00572         heap_str text;
00573         if (start + len < cs_len) {
00574             tail_len = cs_len - (start + len);
00575         }
00576         else {
00577             tail_len = 0;
00578         }
00579         text = zallocate(start + subst_len + tail_len + 1);
00580         if (start) {
00581             memcpy(text,cs->text,start);
00582         }
00583         if (subst_len) {
00584             memcpy(text + start,subst->text,subst_len);
00585         }
00586         if (tail_len) {
00587             memcpy(text + start + subst_len,cs->text + start + len,tail_len);
00588         }
00589         free(cs->text);
00590         cs->text = text;
00591     }
00592 }
00593 
00594 /* EOF */
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines