coan 4.2.4
integer_constant.c
Go to the documentation of this file.
00001 /***************************************************************************
00002  *   Copyright (C) 2004, 2006 Symbian Software Ltd.                        *
00003  *   All rights reserved.                                                  *
00004  *   Copyright (C) 2007-2011 Mike Kinghan, imk@strudl.org                  *
00005  *   All rights reserved.                                                  *
00006  *                                                                         *
00007  *   Contributed originally by Mike Kinghan, imk@strudl.org                *
00008  *                                                                         *
00009  *   Redistribution and use in source and binary forms, with or without    *
00010  *   modification, are permitted provided that the following conditions    *
00011  *   are met:                                                              *
00012  *                                                                         *
00013  *   Redistributions of source code must retain the above copyright        *
00014  *   notice, this list of conditions and the following disclaimer.         *
00015  *                                                                         *
00016  *   Redistributions in binary form must reproduce the above copyright     *
00017  *   notice, this list of conditions and the following disclaimer in the   *
00018  *   documentation and/or other materials provided with the distribution.  *
00019  *                                                                         *
00020  *   Neither the name of Symbian Software Ltd. nor the names of its        *
00021  *   contributors may be used to endorse or promote products derived from  *
00022  *   this software without specific prior written permission.              *
00023  *                                                                         *
00024  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS   *
00025  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT     *
00026  *   LIMITED TO, THE IMPLIED WARRANTIES OF  MERCHANTABILITY AND FITNESS    *
00027  *   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE        *
00028  *   COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,   *
00029  *   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,  *
00030  *   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
00031  *   OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    *
00032  *   AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,*
00033  *   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF *
00034  *   THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH  *
00035  *   DAMAGE.                                                               *
00036  *                                                                         *
00037  **************************************************************************/
00038 
00039 #include "integer_constant.h"
00040 #include "report.h"
00041 #include "chew.h"
00042 #include <stdio.h>
00043 #include <limits.h>
00044 
00045 
00046 
00054 
00055 /* Helpers */
00056 
00070 static int
00071 char_escape_val(char const **cpp)
00072 {
00073     int val = EOF;
00074     char const *cp = *cpp;
00075     if (*cp == '\\') {
00076         cp = chew_continuation(++cp);
00077         switch(*cp) {
00078         case '\0':
00079             break;
00080         case 'a':
00081             val = '\a';
00082             break;
00083         case 'b':
00084             val = '\b';
00085             break;
00086         case 'f':
00087             val = '\f';
00088             break;
00089         case 'n':
00090             val = '\n';
00091             break;
00092         case 'r':
00093             val = '\r';
00094             break;
00095         case 't':
00096             val = '\t';
00097             break;
00098         case 'v':
00099             val = '\v';
00100             break;
00101         default:
00102             val = *cp;
00103         }
00104         cp = chew_continuation(++cp);
00105     } else if (*cp) {
00106         val = *cp;
00107         cp = chew_continuation(++cp);
00108     }
00109     *cpp = cp;
00110     return val;
00111 }
00112 
00122 static unsigned long long
00123 decode_utf8(char const **cpp)
00124 {
00125     unsigned long long val = ULLONG_MAX;
00126     char const *cp = *cpp;
00127     size_t bytes = 0;
00128     unsigned int byte1 = *cp & 0xff;
00129     unsigned int byte2, byte3, byte4;
00130     if (byte1 == EOF) {
00131         bytes = 0;
00132     } else if (byte1 >> 7 == 0) {
00133         /* Top bit of lead byte is 0. This is a plain ascii code */
00134         bytes = 1;
00135     } else if (byte1 >> 5 == 0x6) {
00136         /* Top 3 bits of lead byte are 110. A 2-byte code. */
00137         bytes = 2;
00138     } else if (byte1 >> 4 == 0xe) {
00139         /* Top 4 bits of lead byte are 1110. A 3-byte code. */
00140         bytes = 3;
00141     } else if (bytes >> 3 == 0x1e) {
00142         /* Top 5 bits of lead byte are 11110. A 4-byte code. */
00143         bytes = 4;
00144     }
00145     switch(bytes) {
00146     case 1:
00147         val = byte1;
00148         *cpp = chew_continuation(++cp);
00149         break;
00150     case 2:
00151         byte1 &= 0x1F; /* Clear top three bits of lead byte. */
00152         byte2 = 0xff & *(cp = chew_continuation(++cp));
00153         if (byte2 == EOF || byte2 >> 6 != 2) { /* Top 2 bits of continuation byte must be 10 */
00154             break;
00155         }
00156         byte2 &= 0x3F;
00157         /* Value is concatenation of non-control bits */
00158         val = ((byte1 << 6) | byte2);
00159         *cpp = chew_continuation(++cp);
00160         break;
00161     case 3:
00162         byte1 &= 0x1F;
00163         byte2 = 0xff & *(cp = chew_continuation(++cp));
00164         if (byte2 == EOF || byte2 >> 6 != 2) {
00165             break;
00166         }
00167         byte2 &= 0x3F;
00168         byte3 = 0xff & *(cp = chew_continuation(++cp));
00169         if (byte3 == EOF || byte3 >> 6 != 2) {
00170             break;
00171         }
00172         byte3 &= 0x3F;
00173         val = (byte1 << 12) | (byte2 << 6) | byte3;
00174         *cpp = chew_continuation(++cp);
00175         break;
00176     case 4:
00177         byte1 &= 0x1F;
00178         byte2 = 0xff & *(cp = chew_continuation(++cp));
00179         if (byte2 == EOF || byte2 >> 6 != 2) {
00180             break;
00181         }
00182         byte2 &= 0x3F;
00183         byte3 = 0xff & *(cp = chew_continuation(++cp));
00184         if (byte3 == EOF || byte3 >> 6 != 2) {
00185             break;
00186         }
00187         byte3 &= 0x3F;
00188         byte4 = 0xff & *(cp = chew_continuation(++cp));
00189         if (byte4 == EOF || byte4 >> 6 != 2) {
00190             break;
00191         }
00192         byte4 &= 0x3F;
00193         val = (byte1 << 18) | (byte2 << 12) | (byte3 << 6) | byte4;
00194         *cpp = chew_continuation(++cp);
00195         break;
00196     default:
00197         break;
00198     }
00199     return val;
00200 }
00201 
00215 static unsigned long long
00216 eval_numeral_helper(char const **pnum, unsigned int base, bool *overflow)
00217 {
00218     char const *num = *pnum;
00219     int dval = 0;
00220     unsigned long long val = 0;
00221     unsigned long long tmp;
00222     *overflow = false;
00223     for (       ;; num = chew_continuation(++num)) {
00224         switch(*num) {
00225         case '0':
00226             dval = 0;
00227             break;
00228         case '1':
00229             dval = 1;
00230             break;
00231         case '2':
00232             dval = 2;
00233             break;
00234         case '3':
00235             dval = 3;
00236             break;
00237         case '4':
00238             dval = 4;
00239             break;
00240         case '5':
00241             dval = 5;
00242             break;
00243         case '6':
00244             dval = 6;
00245             break;
00246         case '7':
00247             dval = 7;
00248             break;
00249         case '8':
00250             dval = 8;
00251             break;
00252         case '9':
00253             dval = 9;
00254             break;
00255         case 'a':
00256         case 'A':
00257             dval = 10;
00258             break;
00259         case 'b':
00260         case 'B':
00261             dval = 11;
00262             break;
00263         case 'c':
00264         case 'C':
00265             dval = 12;
00266             break;
00267         case 'd':
00268         case 'D':
00269             dval = 13;
00270             break;
00271         case 'e':
00272         case 'E':
00273             dval = 14;
00274             break;
00275         case 'f':
00276         case 'F':
00277             dval = 15;
00278             break;
00279         default:
00280             dval = 16;
00281         }
00282         if ((unsigned)dval < base) {
00283             tmp = val;
00284             val *= base;
00285             if (val / base != tmp) {
00286                 *overflow = true;
00287             } else if (dval) {
00288                 tmp = val;
00289                 val += dval;
00290                 if (val <= tmp) {
00291                     *overflow = true;
00292                 }
00293             }
00294         } else {
00295             break;
00296         }
00297     }
00298     *pnum = num;
00299     return val;
00300 }
00301 
00302 
00316 static unsigned long long
00317 eval_numerical_character_constant(char const **cpp, int base)
00318 {
00319     unsigned long long val = ULLONG_MAX;
00320     char const *cp = *cpp;
00321     char prefix = 0;
00322     assert(base == 8 || base == 16);
00323     if (base == 8) {
00324         prefix = '0';
00325     } else if (base == 16) {
00326         prefix = 'x';
00327     } else {
00328         assert(false);
00329     }
00330     if (*cp == '\'') {
00331         cp = chew_continuation(++cp);
00332         if (*cp == '\\' && *(cp = chew_continuation(++cp)) == prefix) {
00333             bool overflow;
00334             cp = chew_continuation(++cp);
00335             val = eval_numeral_helper(&cp,base,&overflow);
00336             if (overflow || val > INT_MAX || *cp != '\'') {
00337                 val = ULLONG_MAX;
00338             } else {
00339                 *cpp = chew_continuation(++cp);
00340             }
00341         }
00342     }
00343     return val;
00344 }
00345 
00358 static unsigned long long
00359 eval_octal_character_constant(char const **cpp)
00360 {
00361     return eval_numerical_character_constant(cpp,8);
00362 }
00363 
00376 static unsigned long long
00377 eval_hex_character_constant(char const **cpp)
00378 {
00379     return eval_numerical_character_constant(cpp,16);
00380 }
00381 
00384 /* API*/
00385 
00386 int_spec_t
00387 eval_numeral(char const *num, char const **numend)
00388 {
00389     int_spec_t result = {INT_UNION_INITOR,INT_INSOLUBLE};
00390     unsigned int base = 10;
00391     unsigned long long val, max_val = ULLONG_MAX;
00392     bool overflow = false;
00393     int_type type = INT_LLONG;
00394     /*  Initially assume numeral may represent a long long
00395         until a type suffix confirms or disconfirms.
00396         Finally, in the absence of any type suffix, we will say the numeral
00397         is an int (signed or unsigned) if it is not too big */
00398     size_t num_len = 0;
00399     char const *start = num;
00400     char const *int_type = NULL;
00401     heap_str gripe = NULL;
00402     bool has_suffix = false;
00403     if (*num == 0) {
00404         return result;
00405     }
00406     if (*num == '0') {
00407         num = chew_continuation(++num);
00408         if (*num == 'x' || *num == 'X') {
00409             num = chew_continuation(++num);
00410             base = 16;
00411         } else {
00412             base = 8;
00413         }
00414     }
00415     val = eval_numeral_helper(&num,base,&overflow);
00416     if (overflow) {
00417         type = INT_ULLONG;
00418         int_type = "unsigned long long";
00419         max_val = ULLONG_MAX;
00420     }
00421     num_len = num - start;
00422     if (num_len == 1 && (*num == 'x' || *num == 'X')) {
00423         num = start;
00424     } else if (num_len > 0) {
00425         if (*num == 'u' || *num == 'U') {
00426             has_suffix = true;
00427             type = INT_UINT;
00428             num = chew_continuation(++num);
00429         }
00430         if (*num == 'l' || *num == 'L') {
00431             has_suffix = true;
00432             num = chew_continuation(++num);
00433             type = IS_SIGNED(type) ? INT_LONG : INT_ULONG;
00434         }
00435         if (*num == 'l' || *num == 'L') {
00436             has_suffix = true;
00437             num = chew_continuation(++num);
00438             type = IS_SIGNED(type) ? INT_LLONG : INT_ULLONG;
00439         }
00440         if (IS_SIGNED(type) && (*num == 'u' || *num == 'U')) {
00441             if (!has_suffix) {
00442                 has_suffix = true;
00443                 type = INT_UINT;
00444             } else {
00445                 type = MAKE_UNSIGNED(type);
00446             }
00447             num = chew_continuation(++num);
00448         }
00449 
00450     }
00451     *numend = num;
00452     if (num_len == 0) {
00453         return result;
00454     }
00455     if (IS_SIGNED(type) && val > LLONG_MAX && !has_suffix) {
00456         /* If we have no suffix explicitly specifying long or long long
00457             then we allow the value to be an unsigned long long and emit a warning.
00458             We store the warning at this point and will write it later if it
00459             is not superceded by an overflow warning */
00460         report(GRIPE_FORCED_UNSIGNED,&gripe,
00461                "Integer constant \"%.*s\" is so large it is treated as unsigned",
00462                num - start,start);
00463 
00464         type = INT_ULLONG;
00465     }
00466     if (type == INT_ULONG && val > ULONG_MAX) {
00467         overflow = true;
00468         int_type = "unsigned long";
00469         max_val = ULONG_MAX;
00470     } else if (type == INT_LONG && val > LONG_MAX) {
00471         overflow = true;
00472         int_type = "long";
00473         max_val = LONG_MAX;
00474     } else if (type == INT_LLONG && val > LLONG_MAX) {
00475         overflow = true;
00476         int_type = "long long";
00477         max_val = LLONG_MAX;
00478     }
00479     if (overflow) {
00480         release((void **)&gripe); /* On overflow discard any stored warning */
00481         report(GRIPE_INT_OVERFLOW,NULL,
00482                "Integer constant \"%.*s\" is too large for type %s "
00483                "(max %llu): expression will not be resolved",
00484                num - start,start,int_type,max_val);
00485         result.type = INT_TOO_BIG;
00486     } else {
00487         if (gripe) { /* Didn't overflow so emit any stored warning */
00488             report(GRIPE_FORCED_UNSIGNED,&gripe,NULL);
00489         }
00490         if (!has_suffix) {
00491             /* No type-suffix. Reduce type to (unsigned) int if value will fit */
00492             if (IS_SIGNED(type)) {
00493                 if (val <= (unsigned long long)INT_MAX) {
00494                     type = INT_INT;
00495                 }
00496             } else if (val <= (unsigned long long)UINT_MAX) {
00497                 type = INT_UINT;
00498             }
00499         }
00500         result.val.ull = val;
00501         result.type = type;
00502     }
00503     return result;
00504 }
00505 
00515 int_spec_t
00516 eval_character_constant(char const **cpp)
00517 {
00518     int_spec_t result = INT_SPEC_INITOR;
00519     char const *cp = *cpp;
00520     char const *int_type = "int";
00521     unsigned long long val = ULLONG_MAX;
00522     bool wide = false;
00523     int max_len = sizeof(int); /* Assume multi-byte constant. Can have sizeof(int) bytes */
00524     if (*cp == 'L') { /* No, this is a wide-character constant */
00525         cp = chew_continuation(++cp);
00526         max_len = sizeof(wchar_t); /* Can have sizeof(wchar_t) bytes */
00527         int_type = "wchar_t";
00528         wide = true;
00529     }
00530     if (*cp == '\'') {
00531         if (val == ULLONG_MAX) {
00532             val = eval_hex_character_constant(&cp);
00533         }
00534         if (val == ULLONG_MAX) {
00535             val = eval_octal_character_constant(&cp);
00536         }
00537         if (val == ULLONG_MAX) {
00538             /* Not a hex or octal constant. Try multi-byte or wide constant */
00539             int chval = 0;
00540             int nbytes = 0;
00541             val = 0;
00542             cp = chew_continuation(++cp); /* Consume opening quote */
00543             /* If not a wide character constant then we'll evaluate as a concatenation of
00544                 8-bit ints. But likewise even if it is a wide character constant that commences with
00545                 an escape, because an escaped character must be ascii. */
00546             if (!wide || (cp[0] == '\\' && cp[1] != '\n')) {
00547                 for (; nbytes < max_len && *cp && *cp != '\'' && (chval = char_escape_val(&cp)) != EOF;
00548                         val = (val << 8 | chval), ++nbytes) {}
00549             } else {
00550                 val = decode_utf8(&cp);
00551             }
00552             if (val == ULLONG_MAX) {}
00553             if (*cp != '\'') { /* Not a valid constant. No closing quote within max_len. */
00554                 val = ULLONG_MAX;
00555                 /* Look for that closing quote... */
00556                 for (; *(cp = chew_continuation(++cp)) != '\0' && *cp != '\n' && *cp != '\'';
00557                         cp = chew_continuation(++cp)) {};
00558                 if (!*cp || *cp == '\n') { /* Unclosed quotation */
00559                     report(GRIPE_UNCLOSED_QUOTE,NULL,
00560                            "Unclosed quotation after \"%.*s\"",cp - *cpp,*cpp);
00561                 } else { /* Constant is too long for type */
00562                     report(GRIPE_CHAR_CONSTANT_TOO_LONG,NULL,
00563                            "Character constant %.*s is too long for type %s (max %d bytes). Will not be resolved",
00564                            (cp - *cpp) + 1,*cpp,int_type,max_len);
00565                 }
00566             } else if (!wide && nbytes > 1) {
00567                 report(GRIPE_MULITBYTE_CHAR_CONSTANT,NULL,
00568                        "Multi-byte character constant %.*s",(cp - *cpp) + 1,*cpp);
00569             }
00570             cp = chew_continuation(++cp);
00571         }
00572     }
00573     if (val != ULLONG_MAX) {
00574         result.val.i = (int)val;
00575         result.type = INT_INT;
00576     } else {
00577         result.type = INT_INSOLUBLE;
00578     }
00579     *cpp = cp;
00580     return result;
00581 }
00582 
00583 
00584 /* EOF */
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines