coan 4.2.4
|
00001 /*************************************************************************** 00002 * Copyright (C) 2004, 2006 Symbian Software Ltd. * 00003 * All rights reserved. * 00004 * Copyright (C) 2007-2011 Mike Kinghan, imk@strudl.org * 00005 * All rights reserved. * 00006 * * 00007 * Contributed originally by Mike Kinghan, imk@strudl.org * 00008 * * 00009 * Redistribution and use in source and binary forms, with or without * 00010 * modification, are permitted provided that the following conditions * 00011 * are met: * 00012 * * 00013 * Redistributions of source code must retain the above copyright * 00014 * notice, this list of conditions and the following disclaimer. * 00015 * * 00016 * Redistributions in binary form must reproduce the above copyright * 00017 * notice, this list of conditions and the following disclaimer in the * 00018 * documentation and/or other materials provided with the distribution. * 00019 * * 00020 * Neither the name of Symbian Software Ltd. nor the names of its * 00021 * contributors may be used to endorse or promote products derived from * 00022 * this software without specific prior written permission. * 00023 * * 00024 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * 00025 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * 00026 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * 00027 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * 00028 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * 00029 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * 00030 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * 00031 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * 00032 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,* 00033 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF * 00034 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * 00035 * DAMAGE. * 00036 * * 00037 **************************************************************************/ 00038 00039 #include "integer_constant.h" 00040 #include "report.h" 00041 #include "chew.h" 00042 #include <stdio.h> 00043 #include <limits.h> 00044 00045 00046 00054 00055 /* Helpers */ 00056 00070 static int 00071 char_escape_val(char const **cpp) 00072 { 00073 int val = EOF; 00074 char const *cp = *cpp; 00075 if (*cp == '\\') { 00076 cp = chew_continuation(++cp); 00077 switch(*cp) { 00078 case '\0': 00079 break; 00080 case 'a': 00081 val = '\a'; 00082 break; 00083 case 'b': 00084 val = '\b'; 00085 break; 00086 case 'f': 00087 val = '\f'; 00088 break; 00089 case 'n': 00090 val = '\n'; 00091 break; 00092 case 'r': 00093 val = '\r'; 00094 break; 00095 case 't': 00096 val = '\t'; 00097 break; 00098 case 'v': 00099 val = '\v'; 00100 break; 00101 default: 00102 val = *cp; 00103 } 00104 cp = chew_continuation(++cp); 00105 } else if (*cp) { 00106 val = *cp; 00107 cp = chew_continuation(++cp); 00108 } 00109 *cpp = cp; 00110 return val; 00111 } 00112 00122 static unsigned long long 00123 decode_utf8(char const **cpp) 00124 { 00125 unsigned long long val = ULLONG_MAX; 00126 char const *cp = *cpp; 00127 size_t bytes = 0; 00128 unsigned int byte1 = *cp & 0xff; 00129 unsigned int byte2, byte3, byte4; 00130 if (byte1 == EOF) { 00131 bytes = 0; 00132 } else if (byte1 >> 7 == 0) { 00133 /* Top bit of lead byte is 0. This is a plain ascii code */ 00134 bytes = 1; 00135 } else if (byte1 >> 5 == 0x6) { 00136 /* Top 3 bits of lead byte are 110. A 2-byte code. */ 00137 bytes = 2; 00138 } else if (byte1 >> 4 == 0xe) { 00139 /* Top 4 bits of lead byte are 1110. A 3-byte code. */ 00140 bytes = 3; 00141 } else if (bytes >> 3 == 0x1e) { 00142 /* Top 5 bits of lead byte are 11110. A 4-byte code. */ 00143 bytes = 4; 00144 } 00145 switch(bytes) { 00146 case 1: 00147 val = byte1; 00148 *cpp = chew_continuation(++cp); 00149 break; 00150 case 2: 00151 byte1 &= 0x1F; /* Clear top three bits of lead byte. */ 00152 byte2 = 0xff & *(cp = chew_continuation(++cp)); 00153 if (byte2 == EOF || byte2 >> 6 != 2) { /* Top 2 bits of continuation byte must be 10 */ 00154 break; 00155 } 00156 byte2 &= 0x3F; 00157 /* Value is concatenation of non-control bits */ 00158 val = ((byte1 << 6) | byte2); 00159 *cpp = chew_continuation(++cp); 00160 break; 00161 case 3: 00162 byte1 &= 0x1F; 00163 byte2 = 0xff & *(cp = chew_continuation(++cp)); 00164 if (byte2 == EOF || byte2 >> 6 != 2) { 00165 break; 00166 } 00167 byte2 &= 0x3F; 00168 byte3 = 0xff & *(cp = chew_continuation(++cp)); 00169 if (byte3 == EOF || byte3 >> 6 != 2) { 00170 break; 00171 } 00172 byte3 &= 0x3F; 00173 val = (byte1 << 12) | (byte2 << 6) | byte3; 00174 *cpp = chew_continuation(++cp); 00175 break; 00176 case 4: 00177 byte1 &= 0x1F; 00178 byte2 = 0xff & *(cp = chew_continuation(++cp)); 00179 if (byte2 == EOF || byte2 >> 6 != 2) { 00180 break; 00181 } 00182 byte2 &= 0x3F; 00183 byte3 = 0xff & *(cp = chew_continuation(++cp)); 00184 if (byte3 == EOF || byte3 >> 6 != 2) { 00185 break; 00186 } 00187 byte3 &= 0x3F; 00188 byte4 = 0xff & *(cp = chew_continuation(++cp)); 00189 if (byte4 == EOF || byte4 >> 6 != 2) { 00190 break; 00191 } 00192 byte4 &= 0x3F; 00193 val = (byte1 << 18) | (byte2 << 12) | (byte3 << 6) | byte4; 00194 *cpp = chew_continuation(++cp); 00195 break; 00196 default: 00197 break; 00198 } 00199 return val; 00200 } 00201 00215 static unsigned long long 00216 eval_numeral_helper(char const **pnum, unsigned int base, bool *overflow) 00217 { 00218 char const *num = *pnum; 00219 int dval = 0; 00220 unsigned long long val = 0; 00221 unsigned long long tmp; 00222 *overflow = false; 00223 for ( ;; num = chew_continuation(++num)) { 00224 switch(*num) { 00225 case '0': 00226 dval = 0; 00227 break; 00228 case '1': 00229 dval = 1; 00230 break; 00231 case '2': 00232 dval = 2; 00233 break; 00234 case '3': 00235 dval = 3; 00236 break; 00237 case '4': 00238 dval = 4; 00239 break; 00240 case '5': 00241 dval = 5; 00242 break; 00243 case '6': 00244 dval = 6; 00245 break; 00246 case '7': 00247 dval = 7; 00248 break; 00249 case '8': 00250 dval = 8; 00251 break; 00252 case '9': 00253 dval = 9; 00254 break; 00255 case 'a': 00256 case 'A': 00257 dval = 10; 00258 break; 00259 case 'b': 00260 case 'B': 00261 dval = 11; 00262 break; 00263 case 'c': 00264 case 'C': 00265 dval = 12; 00266 break; 00267 case 'd': 00268 case 'D': 00269 dval = 13; 00270 break; 00271 case 'e': 00272 case 'E': 00273 dval = 14; 00274 break; 00275 case 'f': 00276 case 'F': 00277 dval = 15; 00278 break; 00279 default: 00280 dval = 16; 00281 } 00282 if ((unsigned)dval < base) { 00283 tmp = val; 00284 val *= base; 00285 if (val / base != tmp) { 00286 *overflow = true; 00287 } else if (dval) { 00288 tmp = val; 00289 val += dval; 00290 if (val <= tmp) { 00291 *overflow = true; 00292 } 00293 } 00294 } else { 00295 break; 00296 } 00297 } 00298 *pnum = num; 00299 return val; 00300 } 00301 00302 00316 static unsigned long long 00317 eval_numerical_character_constant(char const **cpp, int base) 00318 { 00319 unsigned long long val = ULLONG_MAX; 00320 char const *cp = *cpp; 00321 char prefix = 0; 00322 assert(base == 8 || base == 16); 00323 if (base == 8) { 00324 prefix = '0'; 00325 } else if (base == 16) { 00326 prefix = 'x'; 00327 } else { 00328 assert(false); 00329 } 00330 if (*cp == '\'') { 00331 cp = chew_continuation(++cp); 00332 if (*cp == '\\' && *(cp = chew_continuation(++cp)) == prefix) { 00333 bool overflow; 00334 cp = chew_continuation(++cp); 00335 val = eval_numeral_helper(&cp,base,&overflow); 00336 if (overflow || val > INT_MAX || *cp != '\'') { 00337 val = ULLONG_MAX; 00338 } else { 00339 *cpp = chew_continuation(++cp); 00340 } 00341 } 00342 } 00343 return val; 00344 } 00345 00358 static unsigned long long 00359 eval_octal_character_constant(char const **cpp) 00360 { 00361 return eval_numerical_character_constant(cpp,8); 00362 } 00363 00376 static unsigned long long 00377 eval_hex_character_constant(char const **cpp) 00378 { 00379 return eval_numerical_character_constant(cpp,16); 00380 } 00381 00384 /* API*/ 00385 00386 int_spec_t 00387 eval_numeral(char const *num, char const **numend) 00388 { 00389 int_spec_t result = {INT_UNION_INITOR,INT_INSOLUBLE}; 00390 unsigned int base = 10; 00391 unsigned long long val, max_val = ULLONG_MAX; 00392 bool overflow = false; 00393 int_type type = INT_LLONG; 00394 /* Initially assume numeral may represent a long long 00395 until a type suffix confirms or disconfirms. 00396 Finally, in the absence of any type suffix, we will say the numeral 00397 is an int (signed or unsigned) if it is not too big */ 00398 size_t num_len = 0; 00399 char const *start = num; 00400 char const *int_type = NULL; 00401 heap_str gripe = NULL; 00402 bool has_suffix = false; 00403 if (*num == 0) { 00404 return result; 00405 } 00406 if (*num == '0') { 00407 num = chew_continuation(++num); 00408 if (*num == 'x' || *num == 'X') { 00409 num = chew_continuation(++num); 00410 base = 16; 00411 } else { 00412 base = 8; 00413 } 00414 } 00415 val = eval_numeral_helper(&num,base,&overflow); 00416 if (overflow) { 00417 type = INT_ULLONG; 00418 int_type = "unsigned long long"; 00419 max_val = ULLONG_MAX; 00420 } 00421 num_len = num - start; 00422 if (num_len == 1 && (*num == 'x' || *num == 'X')) { 00423 num = start; 00424 } else if (num_len > 0) { 00425 if (*num == 'u' || *num == 'U') { 00426 has_suffix = true; 00427 type = INT_UINT; 00428 num = chew_continuation(++num); 00429 } 00430 if (*num == 'l' || *num == 'L') { 00431 has_suffix = true; 00432 num = chew_continuation(++num); 00433 type = IS_SIGNED(type) ? INT_LONG : INT_ULONG; 00434 } 00435 if (*num == 'l' || *num == 'L') { 00436 has_suffix = true; 00437 num = chew_continuation(++num); 00438 type = IS_SIGNED(type) ? INT_LLONG : INT_ULLONG; 00439 } 00440 if (IS_SIGNED(type) && (*num == 'u' || *num == 'U')) { 00441 if (!has_suffix) { 00442 has_suffix = true; 00443 type = INT_UINT; 00444 } else { 00445 type = MAKE_UNSIGNED(type); 00446 } 00447 num = chew_continuation(++num); 00448 } 00449 00450 } 00451 *numend = num; 00452 if (num_len == 0) { 00453 return result; 00454 } 00455 if (IS_SIGNED(type) && val > LLONG_MAX && !has_suffix) { 00456 /* If we have no suffix explicitly specifying long or long long 00457 then we allow the value to be an unsigned long long and emit a warning. 00458 We store the warning at this point and will write it later if it 00459 is not superceded by an overflow warning */ 00460 report(GRIPE_FORCED_UNSIGNED,&gripe, 00461 "Integer constant \"%.*s\" is so large it is treated as unsigned", 00462 num - start,start); 00463 00464 type = INT_ULLONG; 00465 } 00466 if (type == INT_ULONG && val > ULONG_MAX) { 00467 overflow = true; 00468 int_type = "unsigned long"; 00469 max_val = ULONG_MAX; 00470 } else if (type == INT_LONG && val > LONG_MAX) { 00471 overflow = true; 00472 int_type = "long"; 00473 max_val = LONG_MAX; 00474 } else if (type == INT_LLONG && val > LLONG_MAX) { 00475 overflow = true; 00476 int_type = "long long"; 00477 max_val = LLONG_MAX; 00478 } 00479 if (overflow) { 00480 release((void **)&gripe); /* On overflow discard any stored warning */ 00481 report(GRIPE_INT_OVERFLOW,NULL, 00482 "Integer constant \"%.*s\" is too large for type %s " 00483 "(max %llu): expression will not be resolved", 00484 num - start,start,int_type,max_val); 00485 result.type = INT_TOO_BIG; 00486 } else { 00487 if (gripe) { /* Didn't overflow so emit any stored warning */ 00488 report(GRIPE_FORCED_UNSIGNED,&gripe,NULL); 00489 } 00490 if (!has_suffix) { 00491 /* No type-suffix. Reduce type to (unsigned) int if value will fit */ 00492 if (IS_SIGNED(type)) { 00493 if (val <= (unsigned long long)INT_MAX) { 00494 type = INT_INT; 00495 } 00496 } else if (val <= (unsigned long long)UINT_MAX) { 00497 type = INT_UINT; 00498 } 00499 } 00500 result.val.ull = val; 00501 result.type = type; 00502 } 00503 return result; 00504 } 00505 00515 int_spec_t 00516 eval_character_constant(char const **cpp) 00517 { 00518 int_spec_t result = INT_SPEC_INITOR; 00519 char const *cp = *cpp; 00520 char const *int_type = "int"; 00521 unsigned long long val = ULLONG_MAX; 00522 bool wide = false; 00523 int max_len = sizeof(int); /* Assume multi-byte constant. Can have sizeof(int) bytes */ 00524 if (*cp == 'L') { /* No, this is a wide-character constant */ 00525 cp = chew_continuation(++cp); 00526 max_len = sizeof(wchar_t); /* Can have sizeof(wchar_t) bytes */ 00527 int_type = "wchar_t"; 00528 wide = true; 00529 } 00530 if (*cp == '\'') { 00531 if (val == ULLONG_MAX) { 00532 val = eval_hex_character_constant(&cp); 00533 } 00534 if (val == ULLONG_MAX) { 00535 val = eval_octal_character_constant(&cp); 00536 } 00537 if (val == ULLONG_MAX) { 00538 /* Not a hex or octal constant. Try multi-byte or wide constant */ 00539 int chval = 0; 00540 int nbytes = 0; 00541 val = 0; 00542 cp = chew_continuation(++cp); /* Consume opening quote */ 00543 /* If not a wide character constant then we'll evaluate as a concatenation of 00544 8-bit ints. But likewise even if it is a wide character constant that commences with 00545 an escape, because an escaped character must be ascii. */ 00546 if (!wide || (cp[0] == '\\' && cp[1] != '\n')) { 00547 for (; nbytes < max_len && *cp && *cp != '\'' && (chval = char_escape_val(&cp)) != EOF; 00548 val = (val << 8 | chval), ++nbytes) {} 00549 } else { 00550 val = decode_utf8(&cp); 00551 } 00552 if (val == ULLONG_MAX) {} 00553 if (*cp != '\'') { /* Not a valid constant. No closing quote within max_len. */ 00554 val = ULLONG_MAX; 00555 /* Look for that closing quote... */ 00556 for (; *(cp = chew_continuation(++cp)) != '\0' && *cp != '\n' && *cp != '\''; 00557 cp = chew_continuation(++cp)) {}; 00558 if (!*cp || *cp == '\n') { /* Unclosed quotation */ 00559 report(GRIPE_UNCLOSED_QUOTE,NULL, 00560 "Unclosed quotation after \"%.*s\"",cp - *cpp,*cpp); 00561 } else { /* Constant is too long for type */ 00562 report(GRIPE_CHAR_CONSTANT_TOO_LONG,NULL, 00563 "Character constant %.*s is too long for type %s (max %d bytes). Will not be resolved", 00564 (cp - *cpp) + 1,*cpp,int_type,max_len); 00565 } 00566 } else if (!wide && nbytes > 1) { 00567 report(GRIPE_MULITBYTE_CHAR_CONSTANT,NULL, 00568 "Multi-byte character constant %.*s",(cp - *cpp) + 1,*cpp); 00569 } 00570 cp = chew_continuation(++cp); 00571 } 00572 } 00573 if (val != ULLONG_MAX) { 00574 result.val.i = (int)val; 00575 result.type = INT_INT; 00576 } else { 00577 result.type = INT_INSOLUBLE; 00578 } 00579 *cpp = cp; 00580 return result; 00581 } 00582 00583 00584 /* EOF */