coan  6.0.1
A C/C++ Configuration Analyzer
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
integer_constant.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2007-2013 Mike Kinghan, imk@burroingroingjoing.com *
3  * All rights reserved. *
4  * *
5  * Contributed originally by Mike Kinghan, imk@burroingroingjoing.com *
6  * *
7  * Redistribution and use in source and binary forms, with or without *
8  * modification, are permitted provided that the following conditions *
9  * are met: *
10  * *
11  * Redistributions of source code must retain the above copyright *
12  * notice, this list of conditions and the following disclaimer. *
13  * *
14  * Redistributions in binary form must reproduce the above copyright *
15  * notice, this list of conditions and the following disclaimer in the *
16  * documentation and/or other materials provided with the distribution. *
17  * *
18  * Neither the name of Mike Kinghan nor the names of its contributors *
19  * may be used to endorse or promote products derived from this software *
20  * without specific prior written permission. *
21  * *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
23  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT *
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS *
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE *
26  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, *
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, *
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
29  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED *
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,*
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF *
32  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH *
33  * DAMAGE. *
34  * *
35  **************************************************************************/
36 
37 #include "integer_constant.h"
38 #include "parse_buffer.h"
39 #include "diagnostic.h"
40 #include "chew.h"
41 #include <limits>
42 #include <cassert>
43 
48 
50 using namespace std;
51 
52 template<class CharSeq>
53 unsigned long long
55 {
57 
58  unsigned long long val = numeric_limits<unsigned long long>::max();
59  size_t bytes = 0;
60  unsigned byte1 = *chew & 0xff;
61  unsigned byte2, byte3, byte4;
62  if (byte1 == unsigned(EOF)) {
63  bytes = 0;
64  } else if (byte1 >> 7 == 0) {
65  /* Top bit of lead byte is 0. This is a plain ascii code */
66  bytes = 1;
67  } else if (byte1 >> 5 == 0x6) {
68  /* Top 3 bits of lead byte are 110. A 2-byte code. */
69  bytes = 2;
70  } else if (byte1 >> 4 == 0xe) {
71  /* Top 4 bits of lead byte are 1110. A 3-byte code. */
72  bytes = 3;
73  } else if (byte1 >> 3 == 0x1e) {
74  /* Top 5 bits of lead byte are 11110. A 4-byte code. */
75  bytes = 4;
76  }
77  switch(bytes) {
78  case 1:
79  val = byte1;
80  chew(+1,continuation);
81  break;
82  case 2:
83  byte1 &= 0x1F; /* Clear top three bits of lead byte. */
84  chew(+1,continuation);
85  byte2 = 0xff & *chew;
86  if (byte2 == unsigned(EOF) || byte2 >> 6 != 2) {
87  /* Top 2 bits of continuation byte must be 10 */
88  break;
89  }
90  byte2 &= 0x3F;
91  /* Value is concatenation of non-control bits */
92  val = ((byte1 << 6) | byte2);
93  chew(+1,continuation);
94  break;
95  case 3:
96  byte1 &= 0x1F;
97  chew(+1,continuation);
98  byte2 = 0xff & *chew;
99  if (byte2 == unsigned(EOF) || byte2 >> 6 != 2) {
100  break;
101  }
102  byte2 &= 0x3F;
103  chew(+1,continuation);
104  byte3 = 0xff & *chew;
105  if (byte3 == unsigned(EOF) || byte3 >> 6 != 2) {
106  break;
107  }
108  byte3 &= 0x3F;
109  val = (byte1 << 12) | (byte2 << 6) | byte3;
110  chew(+1,continuation);
111  break;
112  case 4:
113  byte1 &= 0x1F;
114  chew(+1,continuation);
115  byte2 = 0xff & *chew;
116  if (byte2 == unsigned(EOF) || byte2 >> 6 != 2) {
117  break;
118  }
119  byte2 &= 0x3F;
120  chew(+1,continuation);
121  byte3 = 0xff & *chew;
122  if (byte3 == unsigned(EOF) || byte3 >> 6 != 2) {
123  break;
124  }
125  byte3 &= 0x3F;
126  chew(+1,continuation);
127  byte4 = 0xff & *chew;
128  if (byte4 == unsigned(EOF) || byte4 >> 6 != 2) {
129  break;
130  }
131  byte4 &= 0x3F;
132  val = (byte1 << 18) | (byte2 << 12) | (byte3 << 6) | byte4;
133  chew(+1,continuation);
134  break;
135  default:
136  break;
137  }
138  return val;
139 }
140 
141 template<class CharSeq>
142 pair<unsigned long long,bool>
144  unsigned base, chewer<CharSeq> & chew)
145 {
147  int dval = 0;
148  unsigned long long val = 0;
149  unsigned long long tmp;
150  bool overflow = false;
151  for ( ;; chew(+1,continuation)) {
152  switch(*chew) {
153  case '0':
154  dval = 0;
155  break;
156  case '1':
157  dval = 1;
158  break;
159  case '2':
160  dval = 2;
161  break;
162  case '3':
163  dval = 3;
164  break;
165  case '4':
166  dval = 4;
167  break;
168  case '5':
169  dval = 5;
170  break;
171  case '6':
172  dval = 6;
173  break;
174  case '7':
175  dval = 7;
176  break;
177  case '8':
178  dval = 8;
179  break;
180  case '9':
181  dval = 9;
182  break;
183  case 'a':
184  case 'A':
185  dval = 10;
186  break;
187  case 'b':
188  case 'B':
189  dval = 11;
190  break;
191  case 'c':
192  case 'C':
193  dval = 12;
194  break;
195  case 'd':
196  case 'D':
197  dval = 13;
198  break;
199  case 'e':
200  case 'E':
201  dval = 14;
202  break;
203  case 'f':
204  case 'F':
205  dval = 15;
206  break;
207  default:
208  dval = 16;
209  }
210  if (unsigned(dval) < base) {
211  tmp = val;
212  val *= base;
213  if (val / base != tmp) {
214  overflow = true;
215  } else if (dval) {
216  tmp = val;
217  val += dval;
218  if (val <= tmp) {
219  overflow = true;
220  }
221  }
222  } else {
223  break;
224  }
225  }
226  return pair<unsigned long long, bool>(val,overflow);
227 }
228 
229 template<class CharSeq>
230 int
232 {
234  int val = EOF;
235  if (*chew == '\\') {
236  chew(+1,continuation);
237  switch(*chew) {
238  case '\0':
239  break;
240  case 'a':
241  val = '\a';
242  break;
243  case 'b':
244  val = '\b';
245  break;
246  case 'f':
247  val = '\f';
248  break;
249  case 'n':
250  val = '\n';
251  break;
252  case 'r':
253  val = '\r';
254  break;
255  case 't':
256  val = '\t';
257  break;
258  case 'v':
259  val = '\v';
260  break;
261  default:
262  val = *chew;
263  }
264  chew(+1,continuation);
265  } else if (chew) {
266  val = *chew;
267  chew(+1,continuation);
268  }
269  return val;
270 }
271 
272 template<class CharSeq>
273 unsigned long long
275 {
277 
278  unsigned long long const bad_val =
279  numeric_limits<unsigned long long>::max();
280  unsigned long long val = bad_val;
281  assert(base == 8 || base == 16);
282  size_t mark = size_t(chew);
283  if (*chew == '\'') {
284  chew(+1,continuation);
285  if (*chew == '\\') {
286  chew(+1,continuation);
287  if ( (base == 8 && *chew == '0') ||
288  (base == 16 && (*chew == 'x' || *chew == 'u' ||
289  *chew == 'U'))) {
290  chew(+1,continuation);
291  std::pair<unsigned long long,bool> verdict =
292  read_based_numeral(base,chew);
293  val = verdict.first;
294  bool overflow = verdict.second;
295  if (overflow ||
296  val > (unsigned long long)numeric_limits<int>::max()
297  || *chew != '\'') {
298  val = bad_val;
299  }
300  }
301  }
302  }
303  if (val == bad_val) {
304  chew = mark;
305  }
306  return val;
307 }
308 
309 template<class CharSeq>
310 integer
312 {
314  /* Initially assume numeral may represent a long long
315  until a type suffix confirms or disconfirms.
316  Finally, in the absence of any type suffix, we will say the numeral
317  is an int (signed or unsigned) if it is not too big
318  */
319  unsigned long long const bad_val =
320  numeric_limits<unsigned long long>::max();
321  integer result(INT_LLONG,bad_val);
322  unsigned base = 10;
323  unsigned long long max_val = bad_val;
324  bool overflow = false;
325  size_t num_len = 0;
326  size_t mark = size_t(chew);
327  char const *type_desc = nullptr;
328  bool has_suffix = false;
329  if (!chew) {
330  return result;
331  }
332  if (*chew == '0') {
333  chew(+1,continuation);
334  if (*chew == 'x' || *chew == 'X') {
335  chew(+1,continuation);
336  base = 16;
337  } else {
338  base = 8;
339  }
340  }
341  pair<unsigned long long, bool> verdict =
342  read_based_numeral(base,chew);
343  result._val = verdict.first;
344  overflow = verdict.second;
345  if (overflow) {
346  result._type = INT_ULLONG;
347  type_desc = "unsigned long long";
348  max_val = bad_val;
349  }
350  num_len = size_t(chew) - mark;
351  if (num_len == 1 && base == 16) {
352  chew = mark;
353  } else if (num_len > 0) {
354  if (*chew == 'u' || *chew == 'U') {
355  has_suffix = true;
356  result._type = INT_UINT;
357  chew(+1,continuation);
358  }
359  if (*chew == 'l' || *chew == 'L') {
360  has_suffix = true;
361  chew(+1,continuation);
362  result._type = result.is_signed() ? INT_LONG : INT_ULONG;
363  }
364  if (*chew == 'l' || *chew == 'L') {
365  has_suffix = true;
366  chew(+1,continuation);
367  result._type = result.is_signed() ? INT_LLONG : INT_ULLONG;
368  }
369  if (result.is_signed() && (*chew == 'u' || *chew == 'U')) {
370  if (!has_suffix) {
371  has_suffix = true;
372  result._type = INT_UINT;
373  } else {
374  result.make_unsigned();
375  }
376  chew(+1,continuation);
377  }
378  }
379  if (num_len == 0) {
380  return result;
381  }
382  if (result.is_signed() &&
383  result._val > (unsigned long long)numeric_limits<long long>::max() &&
384  !has_suffix) {
385  /* If we have no suffix explicitly specifying long or long long
386  then we allow the value to be an unsigned long long and emit
387  a warning. We store the warning at this point and will write it
388  later if it is not superceded by an overflow warning */
390  "Integer constant \"" <<
391  chew.buf().substr(mark,size_t(chew) - mark)
392  << "\" is so large it is treated as unsigned" << defer();
393 
394  result._type = INT_ULLONG;
395  }
396  if (result.type() == INT_ULONG &&
397  result._val > numeric_limits<unsigned long>::max()) {
398  overflow = true;
399  type_desc = "unsigned long";
400  max_val = numeric_limits<unsigned long>::max();
401  } else if (result.type() == INT_LONG &&
402  result._val > (unsigned long long)numeric_limits<long>::max()) {
403  overflow = true;
404  type_desc = "long";
405  max_val = numeric_limits<long>::max();
406  } else if (result.type() == INT_LLONG &&
407  result._val >
408  (unsigned long long)numeric_limits<long long>::max()) {
409  overflow = true;
410  type_desc = "long long";
411  max_val = numeric_limits<long long>::max();
412  }
413  if (overflow) {
414  /* On overflow discard any stored warning */
416  warning_int_overflow() << "Integer constant \"" <<
417  chew.buf().substr(mark,size_t(chew) - mark)
418  << "\" is too large for type " <<
419  type_desc << "(max " << max_val <<
420  "): expression will not be resolved" << emit();
421  result._type = INT_UNDEF;
422  } else {
423  warning_forced_unsigned::flush(); /* Emit any stored warning */
424  if (!has_suffix) {
425  /* No type-suffix. Reduce type to (unsigned) int if value will fit
426  */
427  if (result.is_signed()) {
428  if (result._val <=
429  (unsigned long long)numeric_limits<int>::max()) {
430  result._type = INT_INT;
431  }
432  } else if (result._val <=
433  (unsigned long long)numeric_limits<unsigned>::max()) {
434  result._type = INT_UINT;
435  }
436  }
437  }
438  return result;
439 }
440 
441 template
442 integer
444 template
445 integer
447 
448 template<class CharSeq>
450 {
452  integer result(INT_UNDEF);
453  char const *type_desc = "int";
454  unsigned long long const bad_val =
455  numeric_limits<unsigned long long>::max();
456  unsigned long long val = bad_val;
457  unsigned long max_val = numeric_limits<int>::max();
458  size_t mark = size_t(chew);
459  /* Assume multi-byte constant.*/
460  if (*chew == 'L') { /* No, this is a wide-character constant */
461  max_val = numeric_limits<wchar_t>::max();
462  type_desc = "wchar_t";
463  chew(+1,continuation);
464  } else if (*chew == 'u') {
465  max_val = numeric_limits<char16_t>::max();
466  type_desc = "char16_t";
467  chew(+1,continuation);
468  } else if (*chew == 'U') {
469  max_val = numeric_limits<char32_t>::max();
470  type_desc = "char32_t";
471  chew(+1,continuation);
472  }
473  if (*chew != '\'') {
474  chew = mark;
475  return result;
476  }
477  val = read_encoded_char(16,chew); // hex?
478  if (val == bad_val) {
479  val = read_encoded_char(8,chew); // octal?
480  }
481  if (val == bad_val) {
482  /* Not a hex or octal constant. Try UTF-8 or multibyte */
483  chew(+1,continuation); // Consume opening quote
484  size_t restart = size_t(chew);
485  // Try to read UTF-8.
486  val = decode_utf8(chew);
487  chew(continuation);
488  if (*chew != '\'') { // Not UTF-8
489  chew = restart;
490  val = 0;
491  /* Last resort. Read as concatentation of 8 bit-ints,
492  possibly overflowing int.
493  */
494  int nbytes = 0;
495  for ( ; chew && *chew != '\'' && *chew != '\n'; ++nbytes) {
496  int chval =
498  if (chval == EOF) {
499  break;
500  }
501  val = (val << 8 | chval);
502  }
503  if (*chew == '\'') {
504  if (nbytes > 1) {
506  "Multi-byte character constant "
507  << chew.buf().substr(
508  mark, size_t(chew) - mark + 1)
509  << emit();
510  }
511  } else {
512  val = bad_val;
514  << "Missing \"'\" terminator after \""
515  << chew.buf().substr(mark) << '\"' << emit();
516  }
517  }
518  }
519  chew += (*chew == '\'');
520  if (val != bad_val && val > max_val) {
521  warning_char_constant_too_long() << "Character constant "
522  << chew.buf().substr(mark, size_t(chew) - mark)
523  << " overflows type " <<
524  type_desc << "(max " << max_val
525  << "). Will not be resolved" << emit();
526  val = bad_val;
527  }
528  if (val != bad_val) {
529  result._val = val;
530  result._type = INT_INT;
531 
532  } else {
533  result._type = INT_UNDEF;
534  }
535  return result;
536 }
537 
538 template
539 integer
541 template
542 integer
544 
546 
547 // EOF
warning_msg< 18 > warning_int_overflow
Report that an integer constant evaluates > INT_MAX.
Definition: diagnostic.h:685
template struct traits::is_random_access_char_sequence<T> exports a static const boolean member value...
Definition: traits.h:166
static integer read_numeral(chewer< CharSeq > &chew)
Read a numeral from a chewer<CharSeq>
static std::pair< unsigned long long, bool > read_based_numeral(unsigned base, chewer< CharSeq > &chew)
Read a numeral of known base from a text pointer, returning a value and overflow indicator.
Type long long int
Definition: integer.h:59
warning_msg< 19 > warning_missing_terminator
Report missing terminator quotation.
Definition: diagnostic.h:687
static size_t flush()
Emit all deferred diagnostics of this type.
Definition: diagnostic.h:425
Type unsigned long
Definition: integer.h:57
static int read_char_escaping(chewer< CharSeq > &chew)
Read a possibly escaped ASCII character from a text offset, returning its escaped value...
Undetermined type or invalid.
Definition: integer.h:49
warning_msg< 21 > warning_forced_unsigned
Report a huge integer constant forced to be unsigned.
Definition: diagnostic.h:691
Class integer encapsulates an integer of some type.
Definition: integer.h:65
Type int
Definition: integer.h:51
warning_msg< 25 > warning_mulitbyte_char_constant
Report that character constant goes multi-byte.
Definition: diagnostic.h:699
sequence_type & buf()
Get a [const] reference to the associated sequence_type
Definition: chew.h:413
Type long
Definition: integer.h:55
static unsigned long long read_encoded_char(int base, chewer< CharSeq > &chew)
Read a numerically encoded character constant from a text offset, returning its value as an integer...
static unsigned long long decode_utf8(chewer< CharSeq > &chew)
Decode an UTF-8 encoded character from a text offset.
warning_msg< 24 > warning_char_constant_too_long
Report a character constant too long for current locale.
Definition: diagnostic.h:697
chew_mode::continuation const continuation
An exemplar chew_mode::continuation
Definition: chew.h:213
static size_t discard()
Forget all queued diagnostics of this type.
Definition: diagnostic.h:420
The tag class is inserted in a diagnostic_base to tell it to emit itself.
Definition: diagnostic.h:77
The tag class is inserted in a diagnostic_base to tell it to defer itself.
Definition: diagnostic.h:82
Type unsigned long long
Definition: integer.h:61
`template struct chewer<CharSeq> is a cursor-like type that is associated with a character-sequence t...
Definition: chew.h:248
static integer read_char(chewer< CharSeq > &chew)
Read a character constant from a text offset, returning its value as an integer.
Type unsigned
Definition: integer.h:53