coan 4.2.4
|
00001 /*************************************************************************** 00002 * Copyright (C) 2004, 2006 Symbian Software Ltd. * 00003 * All rights reserved. * 00004 * Copyright (C) 2002, 2003 Tony Finch <dot@dotat.at>. * 00005 * All rights reserved. * 00006 * Copyright (C) 1985, 1993 The Regents of the University of California. * 00007 * All rights reserved. * 00008 * Copyright (C) 2007-2011 Mike Kinghan, imk@strudl.org * 00009 * All rights reserved. * 00010 * * 00011 * Contributed by Mike Kinghan, imk@strudl.org, derived from the code * 00012 * of Tony Finch * 00013 * * 00014 * Redistribution and use in source and binary forms, with or without * 00015 * modification, are permitted provided that the following conditions * 00016 * are met: * 00017 * * 00018 * Redistributions of source code must retain the above copyright * 00019 * notice, this list of conditions and the following disclaimer. * 00020 * * 00021 * Redistributions in binary form must reproduce the above copyright * 00022 * notice, this list of conditions and the following disclaimer in the * 00023 * documentation and/or other materials provided with the distribution. * 00024 * * 00025 * Neither the name of Symbian Software Ltd. nor the names of its * 00026 * contributors may be used to endorse or promote products derived from * 00027 * this software without specific prior written permission. * 00028 * * 00029 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * 00030 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * 00031 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * 00032 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * 00033 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * 00034 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * 00035 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * 00036 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * 00037 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,* 00038 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF * 00039 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * 00040 * DAMAGE. * 00041 * * 00042 **************************************************************************/ 00043 00044 #include "chew.h" 00045 #include "args.h" 00046 #include "io.h" 00047 #include "report.h" 00048 00056 00058 STATE_DEF(chew) 00059 { 00061 INCLUDE_PUBLIC(chew); 00062 size_t single_quote_length; 00078 bool escape; 00079 } 00080 STATE_T(chew); 00081 00086 IMPLEMENT(chew,ZERO_INITABLE) 00091 #if 0 /* Redundant for now */ 00092 00105 static char const * 00106 straighten_continuation(char const *cp) 00107 { 00108 bool reported = false; 00109 for ( ; cp[0] == '\\' && eol(cp + 1); ) { 00110 truncate_line(LINE_OFF(cp)); 00111 cp = read_more(cp); 00112 if (is_symbol_inner_char(cp[1]) && !reported) { 00113 report(GRIPE_STRAIGHTENED_LINE,NULL, 00114 "Obfuscating line-continuation(s) deleted"); 00115 reported = true; 00116 } 00117 00118 } 00119 return cp; 00120 } 00121 #endif 00122 00124 static bool 00125 is_line_continuation(char const *cp) 00126 { 00127 return *cp == '\\' && eol(cp + 1); 00128 } 00129 00130 00131 /* API **************************************************************/ 00132 00133 char const * 00134 chew_continuation(char const *cp) 00135 { 00136 size_t nl_len; 00137 for ( ; cp[0] == '\\' && (nl_len = eol(cp + 1)); cp += nl_len + 1) { 00138 if (cp[nl_len + 1] == 0) { 00139 cp = read_more(cp); 00140 } 00141 } 00142 return cp; 00143 } 00144 00145 char const * 00146 chew_symbol(char const *cp) 00147 { 00148 if (is_symbol_start_char(*cp)) { 00149 for ( ; is_symbol_inner_char(*cp); cp = chew_continuation(++cp)) {} 00150 } 00151 return (cp); 00152 } 00153 00154 char const * 00155 chew_string(char const *cp) 00156 { 00157 char const * save = cp; 00158 if (*cp == '\"') { 00159 for ( ++cp ; *cp && *cp != '\"' && !eol(cp); 00160 cp = chew_continuation(++cp)) {} 00161 if (!*cp && GET_PUBLIC(chew,in_source)) { 00162 report(GRIPE_UNCLOSED_QUOTE,NULL, 00163 "Unclosed quotation in context \"%s\"",save); 00164 } 00165 ++cp; 00166 } else for ( ; isgraph(*cp); cp = chew_continuation(++cp)) { 00167 size_t read_off = read_offset(cp); 00168 char const * leader = chew_on(cp); 00169 cp = read_pos(read_off); 00170 if (leader != cp) { 00171 break; 00172 } 00173 } 00174 return cp; 00175 } 00176 00177 char const * 00178 chew_unbroken_string(char const *cp) 00179 { 00180 char const * save = cp; 00181 if (*cp == '\"') { 00182 for ( ++cp ; *cp && *cp != '\"' && !eol(cp) && !is_line_continuation(cp); ++cp) {} 00183 if (!*cp && GET_PUBLIC(chew,in_source)) { 00184 report(GRIPE_UNCLOSED_QUOTE,NULL, 00185 "Unclosed quotation in context \"%s\"",save); 00186 } else if (*cp == '\"') { 00187 ++cp; 00188 } 00189 } else for ( ; isgraph(*cp) && !is_line_continuation(cp); ++cp) { 00190 size_t read_off = read_offset(cp); 00191 char const * leader = chew_on(cp); 00192 cp = read_pos(read_off); 00193 if (leader != cp) { 00194 break; 00195 } 00196 } 00197 return cp; 00198 } 00199 00200 char const * 00201 chew_macro_call(char const *cp) 00202 { 00203 cp = chew_symbol(cp); 00204 cp = chew_continuation(cp); 00205 if (*cp == '(') { 00206 cp = chew_continuation(++cp); 00207 while(*cp != ')') { 00208 char const * saved_cp; 00209 saved_cp = cp = chew_on(cp); 00210 cp = chew_string(cp); 00211 if (cp > saved_cp && cp[-1] == ')') { 00212 return cp; 00213 } 00214 cp = chew_on(cp); 00215 if (*cp == ',') { 00216 cp = chew_continuation(++cp); 00217 } else if (*cp == '(') { 00218 cp = chew_continuation(++cp); 00219 cp = chew_on(cp); 00220 cp = chew_macro_call(cp); 00221 } else if (!*cp) { 00222 break; 00223 } 00224 } 00225 if (*cp == ')') { 00226 ++cp; 00227 } else { 00228 for( ; isspace(cp[-1]); --cp) {} 00229 } 00230 } 00231 return cp; 00232 } 00233 00234 char const * 00235 chew_header_name(char const *cp) 00236 { 00237 char delim; 00238 if (*cp == '\"') { 00239 delim = *cp; 00240 } else if (*cp == '<') { 00241 delim = '>'; 00242 } else { 00243 delim = 0; 00244 } 00245 if (delim) { 00246 for (cp = chew_continuation(++cp); 00247 *cp != delim && 00248 (is_symbol_inner_char(*cp) || 00249 *cp == ' ' || 00250 *cp == '/' || 00251 #ifdef WINDOWS 00252 *cp == '\\' || 00253 #endif 00254 *cp == '.'); 00255 cp = chew_continuation(++cp)) {}; 00256 if (*cp == delim) { 00257 cp = chew_continuation(++cp); 00258 } 00259 } 00260 return cp; 00261 } 00262 00263 unsigned 00264 eol(char const * cp) 00265 { 00266 if (cp[0] == '\n') { 00267 return 1; 00268 } 00269 if (cp[0] == '\r' && cp[1] == '\n') { 00270 return 2; 00271 } 00272 return 0; 00273 } 00274 00275 00276 char const * 00277 chew_on(char const *cp) 00278 { 00279 char const *chew_start = cp; 00280 unsigned nl_len; 00281 /* Need to track single-quotes to detect quotations of the 00282 character '"', which must not count as opening or closing 00283 double-quotes. Is necessary only to recognise single-quotes 00284 enclosing a single character: if the enclosed charccter is 00285 escaped, e.g. '\"', then escape-tracking copes. 00286 */ 00287 if (GET_PUBLIC(args,plaintext)) { 00288 for (; isspace((unsigned char)*cp); ++cp) { 00289 if (eol(cp)) { 00290 SET_PUBLIC(chew,line_state) = LS_NEUTER; 00291 } 00292 } 00293 return (cp); 00294 } 00295 while (*cp != '\0') { 00296 if (GET_STATE(chew,single_quote_length)) { 00297 ++SET_STATE(chew,single_quote_length); 00298 } 00299 if (cp[0] == '\\') { 00300 if (eol(cp + 1)) { 00301 /* Line continuation "\\n" sequence cannot itself be escaped */ 00302 SET_STATE(chew,escape) = true; 00303 } else if (GET_STATE(chew,single_quote_length)) { 00304 /* Toggle escape state if we are in single quotes */ 00305 SET_STATE(chew,escape) = !GET_STATE(chew,escape); 00306 /* Discount an escape token from the length of a single quotation */ 00307 --SET_STATE(chew,single_quote_length); 00308 } else if (GET_PUBLIC(chew,in_double_quote)) { 00309 /* Also Toggle escape state if we are in double quotes */ 00310 SET_STATE(chew,escape) = !GET_STATE(chew,escape); 00311 } else if (GET_PUBLIC(chew,line_state) != LS_CODE || 00312 GET_PUBLIC(chew,comment_state) == NO_COMMENT) { 00313 /* This is a plain backslash within directive. Quit */ 00314 break; 00315 } 00316 ++cp; 00317 continue; 00318 } 00319 if (cp[0] == '"') { 00320 if (!GET_STATE(chew,escape)) { 00321 if (GET_PUBLIC(chew,stop_at_quote) && 00322 GET_PUBLIC(chew,comment_state) == NO_COMMENT) { 00323 break; 00324 } 00325 /* Can toggle double-quote state if we are not escaped. 00326 This action will be invalid if this double-quote is 00327 within single-quotes, but in that case we will 00328 detect and rectify the error when we close the 00329 single-quote. */ 00330 if ((SET_PUBLIC(chew,in_double_quote) = 00331 !GET_PUBLIC(chew,in_double_quote)) == true) { 00332 SET_PUBLIC(chew,last_quote_start_line) = 00333 GET_PUBLIC(io,line_num); 00334 } 00335 } 00336 ++cp; 00337 } else if (cp[0] == '\'') { 00338 if (!GET_STATE(chew,escape)) { 00339 size_t single_quote_len; 00340 if (GET_PUBLIC(chew,stop_at_quote) && 00341 GET_PUBLIC(chew,comment_state) == NO_COMMENT) { 00342 break; 00343 } 00344 /* Can only enter a single-quote state is we are not escaped */ 00345 single_quote_len = GET_STATE(chew,single_quote_length); 00346 if (single_quote_len == 0 && 00347 !GET_PUBLIC(chew,in_double_quote)) { 00348 /* Can enter single-quotation if not already in 00349 single- or double-quotatioon */ 00350 ++SET_STATE(chew,single_quote_length); 00351 } else if (single_quote_len > 0) { /* Leaving single quotation */ 00352 if (GET_PUBLIC(chew,in_double_quote)) { 00353 /* If we are leaving single-quotation then we 00354 cannot have been within double-quotation when 00355 we entered single-quotation because we forbid 00356 entry to single-quotation within double-quotation. 00357 Therefore we have entered double-quotation within 00358 single-quotation. If the last '"'-character 00359 parsed was escaped we would not have entered double 00360 quotation. And since we are still within 00361 single-quotation now, we know that the previous 00362 character parsed was an unescaped '"', which 00363 we falsely interpreted as the start of 00364 double-quotation. So we correct that error 00365 now by exiting double-quotation.*/ 00366 SET_PUBLIC(chew,in_double_quote) = false; 00367 } 00368 SET_STATE(chew,single_quote_length) = 0; 00369 } 00370 ++cp; 00371 } 00372 else if (GET_PUBLIC(chew,in_double_quote) || GET_STATE(chew,single_quote_length) > 0 || 00373 GET_PUBLIC(chew,comment_state) != NO_COMMENT) { 00374 /* If we're in double quotation or comments then apostrophe is not start of a 00375 character constant */ 00376 ++cp; 00377 } 00378 else { /* Apostrophe may be start of a character constant */ 00379 break; 00380 } 00381 } else if ((nl_len = eol(cp)) != 0) { /* Newline*/ 00382 if (GET_STATE(chew,escape)) { 00383 /* Line continuation */ 00384 if (cp[nl_len] == 0) { 00385 /* Newline is at actual end of line, so extend line */ 00386 ++SET_PUBLIC(io,extension_lines); 00387 cp = read_more(cp); 00388 } 00389 if (GET_STATE(chew,single_quote_length)) { 00390 /* A line continuation doesn't count as a character, 00391 so discount both a '\' and preceding '\\' from the length of the single-quote */ 00392 SET_STATE(chew,single_quote_length) -= 2; 00393 } 00394 } else { /* Plain newline*/ 00395 SET_PUBLIC(chew,line_state) = LS_NEUTER; 00396 if (GET_PUBLIC(chew,comment_state) == CXX_COMMENT) { 00397 /* Newline terminates C++ comment*/ 00398 SET_PUBLIC(chew,comment_state) = NO_COMMENT; 00399 SET_PUBLIC(chew,in_double_quote) = false; 00400 } else if (GET_PUBLIC(chew,comment_state) == C_COMMENT) { 00401 SET_PUBLIC(chew,in_double_quote) = false; 00402 ++SET_PUBLIC(io,extension_lines); 00403 cp = read_more(cp); 00404 } else if (GET_PUBLIC(chew,in_double_quote)) { 00405 SET_PUBLIC(chew,in_double_quote) = false; 00406 if (GET_PUBLIC(chew,must_balance_quotes && 00407 GET_PUBLIC(chew,in_source))) { 00408 /* Dangling quotation is not in comment. Error*/ 00409 parse_error(GRIPE_NEWLINE_IN_QUOTE, 00410 "Newline within quotation"); 00411 } 00412 } 00413 } 00414 if (*cp == '\r') { 00415 /* Detected Windows line-end in Unix environment*/ 00416 ++cp; 00417 } 00418 ++cp; 00419 } else if (cp[0] == ' ' || cp[0] == '\t') { /* Some whitespace*/ 00420 ++cp; 00421 } 00422 /* Could be at the start or end of a comment*/ 00423 else if (GET_PUBLIC(chew,comment_state) == NO_COMMENT) { 00424 /* Not in C or C++ comment*/ 00425 /* Check for start of comment*/ 00426 if (!GET_PUBLIC(chew,in_double_quote) && !GET_STATE(chew,single_quote_length)) { 00427 /* Don't let comments start within quotation*/ 00428 if (!strncmp(cp, "/\\\n",3)) { 00429 SET_PUBLIC(chew,comment_state) = STARTING_COMMENT; 00430 SET_PUBLIC(chew,last_comment_start_line) = 00431 GET_PUBLIC(io,line_num); 00432 cp += 3; 00433 } 00434 if (!strncmp(cp, "/\\\r\n",4)) { 00435 SET_PUBLIC(chew,comment_state) = STARTING_COMMENT; 00436 SET_PUBLIC(chew,last_comment_start_line) = 00437 GET_PUBLIC(io,line_num); 00438 cp += 4; 00439 } else if (cp[0] == '/' && cp[1] == '*') { 00440 SET_PUBLIC(chew,comment_state) = C_COMMENT; 00441 SET_PUBLIC(chew,last_comment_start_line) = 00442 GET_PUBLIC(io,line_num); 00443 cp += 2; 00444 } else if (cp[0] == '/' && cp[1] == cp[0]) { 00445 SET_PUBLIC(chew,comment_state) = CXX_COMMENT; 00446 SET_PUBLIC(chew,last_comment_start_line) = 00447 GET_PUBLIC(io,line_num); 00448 cp += 2; 00449 } else if (GET_PUBLIC(chew,line_state) == LS_CODE) { 00450 /* Cool if we have got into code. Carry on */ 00451 //TODO See how much we get here! */ 00452 ++cp; 00453 } else { 00454 /* Got some non-chewable char within directive. Quit */ 00455 break; 00456 } 00457 } else { /* We're inside quotation. Truck on*/ 00458 ++cp; 00459 } 00460 } 00461 /* We are in a comment state. Check for end of comment*/ 00462 /* No need to test for end of C++ comment 'cos newline case 00463 covers it*/ 00464 else if (GET_PUBLIC(chew,comment_state) == C_COMMENT) { 00465 if (!strncmp(cp, "*\\\n",3)) { 00466 SET_PUBLIC(chew,comment_state) = FINISHING_COMMENT; 00467 cp += 3; 00468 } 00469 if (!strncmp(cp, "*\\\r\n",4)) { 00470 SET_PUBLIC(chew,comment_state) = FINISHING_COMMENT; 00471 cp += 4; 00472 } else if (cp[0] == '*' && cp[1] == '/') { 00473 SET_PUBLIC(chew,comment_state) = NO_COMMENT; 00474 SET_PUBLIC(chew,in_double_quote) = false; 00475 cp += 2; 00476 } else { 00477 ++cp; 00478 } 00479 } else if (GET_PUBLIC(chew,comment_state) == STARTING_COMMENT) { 00480 if (*cp == '*') { 00481 SET_PUBLIC(chew,comment_state) = C_COMMENT; 00482 ++cp; 00483 } else if (*cp == '/') { 00484 SET_PUBLIC(chew,comment_state) = CXX_COMMENT; 00485 ++cp; 00486 } else { 00487 SET_PUBLIC(chew,comment_state) = NO_COMMENT; 00488 SET_PUBLIC(chew,line_state) = LS_CODE; 00489 } 00490 } else if (GET_PUBLIC(chew,comment_state) == FINISHING_COMMENT) { 00491 if (*cp == '/') { 00492 SET_PUBLIC(chew,comment_state) = NO_COMMENT; 00493 ++cp; 00494 } else { 00495 SET_PUBLIC(chew,comment_state) = C_COMMENT; 00496 } 00497 } else { 00498 ++cp; 00499 } 00500 SET_STATE(chew,escape) = false; 00501 if (GET_STATE(chew,single_quote_length) > 3) { 00502 SET_STATE(chew,single_quote_length) = 0; 00503 } 00504 } 00505 if (GET_STATE(chew,escape)) { 00506 SET_STATE(chew,escape) = false; 00507 if (GET_PUBLIC(chew,in_source)) { 00508 report(GRIPE_STRAY_ESCAPE,NULL,"Stray '\\' ignored"); 00509 } 00510 } 00511 if (GET_PUBLIC(chew,in_double_quote) && GET_PUBLIC(chew,in_source)) { 00512 report(GRIPE_UNCLOSED_QUOTE,NULL,"Unclosed quotation in context \"%s\"", 00513 chew_start); 00514 } 00515 return cp; 00516 } 00517 00518 void 00519 chew_toplevel(void) 00520 { 00521 SET_PUBLIC(chew,line_state) = LS_NEUTER; 00522 SET_PUBLIC(chew,comment_state) = NO_COMMENT; 00523 SET_PUBLIC(chew,in_double_quote) = false; 00524 SET_STATE(chew,single_quote_length) = 0; 00525 SET_PUBLIC(chew,stop_at_quote) = false; 00526 SET_PUBLIC(chew,must_balance_quotes) = true; 00527 SET_PUBLIC(chew,in_source) = true; 00528 } 00529 00530 /* EOF*/ 00531