coan 4.2.4
chew.c
Go to the documentation of this file.
00001 /***************************************************************************
00002  *   Copyright (C) 2004, 2006 Symbian Software Ltd.                        *
00003  *   All rights reserved.                                                  *
00004  *   Copyright (C) 2002, 2003 Tony Finch <dot@dotat.at>.                   *
00005  *   All rights reserved.                                                  *
00006  *   Copyright (C) 1985, 1993 The Regents of the University of California. *
00007  *   All rights reserved.                                                  *
00008  *   Copyright (C) 2007-2011 Mike Kinghan, imk@strudl.org                  *
00009  *   All rights reserved.                                                  *
00010  *                                                                         *
00011  *   Contributed by Mike Kinghan, imk@strudl.org, derived from the code    *
00012  *   of Tony Finch                                                         *
00013  *                                                                         *
00014  *   Redistribution and use in source and binary forms, with or without    *
00015  *   modification, are permitted provided that the following conditions    *
00016  *   are met:                                                              *
00017  *                                                                         *
00018  *   Redistributions of source code must retain the above copyright        *
00019  *   notice, this list of conditions and the following disclaimer.         *
00020  *                                                                         *
00021  *   Redistributions in binary form must reproduce the above copyright     *
00022  *   notice, this list of conditions and the following disclaimer in the   *
00023  *   documentation and/or other materials provided with the distribution.  *
00024  *                                                                         *
00025  *   Neither the name of Symbian Software Ltd. nor the names of its        *
00026  *   contributors may be used to endorse or promote products derived from  *
00027  *   this software without specific prior written permission.              *
00028  *                                                                         *
00029  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS   *
00030  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT     *
00031  *   LIMITED TO, THE IMPLIED WARRANTIES OF  MERCHANTABILITY AND FITNESS    *
00032  *   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE        *
00033  *   COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,   *
00034  *   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,  *
00035  *   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
00036  *   OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    *
00037  *   AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,*
00038  *   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF *
00039  *   THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH  *
00040  *   DAMAGE.                                                               *
00041  *                                                                         *
00042  **************************************************************************/
00043 
00044 #include "chew.h"
00045 #include "args.h"
00046 #include "io.h"
00047 #include "report.h"
00048 
00056 
00058 STATE_DEF(chew)
00059 {
00061     INCLUDE_PUBLIC(chew);
00062     size_t single_quote_length;
00078     bool                escape; 
00079 }
00080 STATE_T(chew);
00081 
00086 IMPLEMENT(chew,ZERO_INITABLE)
00091 #if 0 /* Redundant for now */
00092 
00105 static char const *
00106 straighten_continuation(char const *cp)
00107 {
00108     bool reported = false;
00109     for (       ; cp[0] == '\\' && eol(cp + 1); ) {
00110         truncate_line(LINE_OFF(cp));
00111         cp = read_more(cp);
00112         if (is_symbol_inner_char(cp[1]) && !reported) {
00113             report(GRIPE_STRAIGHTENED_LINE,NULL,
00114                    "Obfuscating line-continuation(s) deleted");
00115             reported = true;
00116         }
00117 
00118     }
00119     return cp;
00120 }
00121 #endif
00122 
00124 static bool
00125 is_line_continuation(char const *cp)
00126 {
00127     return *cp == '\\' && eol(cp + 1);
00128 }
00129 
00130 
00131 /* API **************************************************************/
00132 
00133 char const *
00134 chew_continuation(char const *cp)
00135 {
00136     size_t nl_len;
00137     for (       ; cp[0] == '\\' && (nl_len = eol(cp + 1)); cp += nl_len + 1) {
00138         if (cp[nl_len + 1] == 0) {
00139             cp = read_more(cp);
00140         }
00141     }
00142     return cp;
00143 }
00144 
00145 char const *
00146 chew_symbol(char const *cp)
00147 {
00148     if (is_symbol_start_char(*cp)) {
00149         for (   ; is_symbol_inner_char(*cp); cp = chew_continuation(++cp)) {}
00150     }
00151     return (cp);
00152 }
00153 
00154 char const *
00155 chew_string(char const *cp)
00156 {
00157     char const * save = cp;
00158     if (*cp == '\"') {
00159         for ( ++cp ; *cp && *cp != '\"' && !eol(cp);
00160                 cp = chew_continuation(++cp)) {}
00161         if (!*cp && GET_PUBLIC(chew,in_source)) {
00162             report(GRIPE_UNCLOSED_QUOTE,NULL,
00163                    "Unclosed quotation in context \"%s\"",save);
00164         }
00165         ++cp;
00166     } else for (        ; isgraph(*cp); cp = chew_continuation(++cp)) {
00167             size_t read_off = read_offset(cp);
00168             char const * leader = chew_on(cp);
00169             cp = read_pos(read_off);
00170             if (leader != cp) {
00171                 break;
00172             }
00173         }
00174     return cp;
00175 }
00176 
00177 char const *
00178 chew_unbroken_string(char const *cp)
00179 {
00180     char const * save = cp;
00181     if (*cp == '\"') {
00182         for ( ++cp ; *cp && *cp != '\"' && !eol(cp) && !is_line_continuation(cp); ++cp) {}
00183         if (!*cp && GET_PUBLIC(chew,in_source)) {
00184             report(GRIPE_UNCLOSED_QUOTE,NULL,
00185                    "Unclosed quotation in context \"%s\"",save);
00186         } else if (*cp == '\"') {
00187             ++cp;
00188         }
00189     } else for (        ; isgraph(*cp) && !is_line_continuation(cp); ++cp) {
00190             size_t read_off = read_offset(cp);
00191             char const * leader = chew_on(cp);
00192             cp = read_pos(read_off);
00193             if (leader != cp) {
00194                 break;
00195             }
00196         }
00197     return cp;
00198 }
00199 
00200 char const *
00201 chew_macro_call(char const *cp)
00202 {
00203     cp = chew_symbol(cp);
00204     cp = chew_continuation(cp);
00205     if (*cp == '(') {
00206         cp = chew_continuation(++cp);
00207         while(*cp != ')') {
00208             char const * saved_cp;
00209             saved_cp = cp = chew_on(cp);
00210             cp = chew_string(cp);
00211             if (cp > saved_cp && cp[-1] == ')') {
00212                 return cp;
00213             }
00214             cp = chew_on(cp);
00215             if (*cp == ',') {
00216                 cp = chew_continuation(++cp);
00217             } else if (*cp == '(') {
00218                 cp = chew_continuation(++cp);
00219                 cp = chew_on(cp);
00220                 cp = chew_macro_call(cp);
00221             } else if (!*cp) {
00222                 break;
00223             }
00224         }
00225         if (*cp == ')') {
00226             ++cp;
00227         } else {
00228             for(        ; isspace(cp[-1]); --cp) {}
00229         }
00230     }
00231     return cp;
00232 }
00233 
00234 char const *
00235 chew_header_name(char const *cp)
00236 {
00237     char delim;
00238     if (*cp == '\"') {
00239         delim = *cp;
00240     } else if (*cp == '<') {
00241         delim = '>';
00242     } else {
00243         delim = 0;
00244     }
00245     if (delim) {
00246         for (cp = chew_continuation(++cp);
00247                 *cp != delim &&
00248                 (is_symbol_inner_char(*cp) ||
00249                  *cp == ' ' ||
00250                  *cp == '/' ||
00251 #ifdef WINDOWS
00252                  *cp == '\\' ||
00253 #endif
00254                  *cp == '.');
00255                 cp = chew_continuation(++cp)) {};
00256         if (*cp == delim) {
00257             cp = chew_continuation(++cp);
00258         }
00259     }
00260     return cp;
00261 }
00262 
00263 unsigned
00264 eol(char const * cp)
00265 {
00266     if (cp[0] == '\n') {
00267         return 1;
00268     }
00269     if (cp[0] == '\r' && cp[1] == '\n') {
00270         return 2;
00271     }
00272     return 0;
00273 }
00274 
00275 
00276 char const *
00277 chew_on(char const *cp)
00278 {
00279     char const *chew_start = cp;
00280     unsigned nl_len;
00281     /*  Need to track single-quotes to detect quotations of the
00282         character '"', which must not count as opening or closing
00283         double-quotes. Is necessary only to recognise single-quotes
00284         enclosing a single character: if the enclosed charccter is
00285         escaped, e.g. '\"', then escape-tracking copes.
00286     */
00287     if (GET_PUBLIC(args,plaintext)) {
00288         for (; isspace((unsigned char)*cp); ++cp) {
00289             if (eol(cp)) {
00290                 SET_PUBLIC(chew,line_state) = LS_NEUTER;
00291             }
00292         }
00293         return (cp);
00294     }
00295     while (*cp != '\0') {
00296         if (GET_STATE(chew,single_quote_length)) {
00297             ++SET_STATE(chew,single_quote_length);
00298         }
00299         if (cp[0] == '\\') {
00300             if (eol(cp + 1)) {
00301                 /* Line continuation "\\n" sequence cannot itself be escaped */
00302                 SET_STATE(chew,escape) = true;
00303             } else if (GET_STATE(chew,single_quote_length)) {
00304                 /* Toggle escape state if we are in single quotes */
00305                 SET_STATE(chew,escape) = !GET_STATE(chew,escape);
00306                 /* Discount an escape token from the length of a single quotation */
00307                 --SET_STATE(chew,single_quote_length);
00308             } else if (GET_PUBLIC(chew,in_double_quote)) {
00309                 /* Also Toggle escape state if we are in double quotes */
00310                 SET_STATE(chew,escape) = !GET_STATE(chew,escape);
00311             } else if (GET_PUBLIC(chew,line_state) != LS_CODE ||
00312                        GET_PUBLIC(chew,comment_state) == NO_COMMENT) {
00313                 /* This is a plain backslash within directive. Quit */
00314                 break;
00315             }
00316             ++cp;
00317             continue;
00318         }
00319         if (cp[0] == '"') {
00320             if (!GET_STATE(chew,escape)) {
00321                 if (GET_PUBLIC(chew,stop_at_quote) &&
00322                         GET_PUBLIC(chew,comment_state) == NO_COMMENT) {
00323                     break;
00324                 }
00325                 /*      Can toggle double-quote state if we are not escaped.
00326                     This action will be invalid if this double-quote is
00327                     within single-quotes, but in that case we will
00328                     detect and rectify the error when we close the
00329                     single-quote. */
00330                 if ((SET_PUBLIC(chew,in_double_quote) =
00331                             !GET_PUBLIC(chew,in_double_quote)) == true) {
00332                     SET_PUBLIC(chew,last_quote_start_line) =
00333                         GET_PUBLIC(io,line_num);
00334                 }
00335             }
00336             ++cp;
00337         } else if (cp[0] == '\'') {
00338             if (!GET_STATE(chew,escape)) {
00339                 size_t single_quote_len;
00340                 if (GET_PUBLIC(chew,stop_at_quote) &&
00341                         GET_PUBLIC(chew,comment_state) == NO_COMMENT) {
00342                     break;
00343                 }
00344                 /* Can only enter a single-quote state is we are not escaped */
00345                 single_quote_len = GET_STATE(chew,single_quote_length);
00346                 if (single_quote_len == 0 &&
00347                         !GET_PUBLIC(chew,in_double_quote)) {
00348                     /* Can enter single-quotation if not already in
00349                     single- or double-quotatioon */
00350                     ++SET_STATE(chew,single_quote_length);
00351                 } else if (single_quote_len > 0) { /* Leaving single quotation */
00352                     if (GET_PUBLIC(chew,in_double_quote)) {
00353                         /* If we are leaving single-quotation then we
00354                         cannot have been within double-quotation when
00355                         we entered single-quotation because we forbid
00356                         entry to single-quotation within double-quotation.
00357                         Therefore we have entered double-quotation within
00358                         single-quotation. If the last '"'-character
00359                         parsed was escaped we would not have entered double
00360                         quotation. And since we are     still within
00361                         single-quotation now, we know that the previous
00362                         character parsed was an unescaped '"', which
00363                         we falsely interpreted as the start of
00364                         double-quotation. So we correct that error
00365                         now by exiting double-quotation.*/
00366                         SET_PUBLIC(chew,in_double_quote) = false;
00367                     }
00368                     SET_STATE(chew,single_quote_length) = 0;
00369                 }
00370                 ++cp;
00371             }
00372             else if (GET_PUBLIC(chew,in_double_quote) || GET_STATE(chew,single_quote_length) > 0 ||
00373                 GET_PUBLIC(chew,comment_state) != NO_COMMENT) {
00374                 /* If we're in double quotation or comments then apostrophe is not start of a
00375                     character constant */
00376                 ++cp;
00377             }
00378             else { /* Apostrophe may be start of a character constant */
00379                 break;
00380             }
00381         } else if ((nl_len = eol(cp)) != 0) { /* Newline*/
00382             if (GET_STATE(chew,escape)) {
00383                 /* Line continuation */
00384                 if (cp[nl_len] == 0) {
00385                     /* Newline is at actual end of line, so extend line */
00386                     ++SET_PUBLIC(io,extension_lines);
00387                     cp = read_more(cp);
00388                 }
00389                 if (GET_STATE(chew,single_quote_length)) {
00390                     /*  A line continuation doesn't count as a character,
00391                         so discount both a '\' and preceding '\\' from the length of the single-quote */
00392                     SET_STATE(chew,single_quote_length) -= 2;
00393                 }
00394             } else {    /* Plain newline*/
00395                 SET_PUBLIC(chew,line_state) = LS_NEUTER;
00396                 if (GET_PUBLIC(chew,comment_state) == CXX_COMMENT) {
00397                     /* Newline terminates C++ comment*/
00398                     SET_PUBLIC(chew,comment_state) = NO_COMMENT;
00399                     SET_PUBLIC(chew,in_double_quote) = false;
00400                 } else if (GET_PUBLIC(chew,comment_state) == C_COMMENT) {
00401                     SET_PUBLIC(chew,in_double_quote) = false;
00402                     ++SET_PUBLIC(io,extension_lines);
00403                     cp = read_more(cp);
00404                 } else if (GET_PUBLIC(chew,in_double_quote)) {
00405                     SET_PUBLIC(chew,in_double_quote) = false;
00406                     if (GET_PUBLIC(chew,must_balance_quotes &&
00407                                    GET_PUBLIC(chew,in_source))) {
00408                         /* Dangling quotation is not in comment. Error*/
00409                         parse_error(GRIPE_NEWLINE_IN_QUOTE,
00410                                     "Newline within quotation");
00411                     }
00412                 }
00413             }
00414             if (*cp == '\r') {
00415                 /* Detected Windows line-end in Unix environment*/
00416                 ++cp;
00417             }
00418             ++cp;
00419         } else if (cp[0] == ' ' || cp[0] == '\t') { /* Some whitespace*/
00420             ++cp;
00421         }
00422         /* Could be at the start or end of a comment*/
00423         else if (GET_PUBLIC(chew,comment_state) == NO_COMMENT) {
00424             /* Not in C or C++ comment*/
00425             /* Check for start of comment*/
00426             if (!GET_PUBLIC(chew,in_double_quote) && !GET_STATE(chew,single_quote_length)) {
00427                 /* Don't let comments start within quotation*/
00428                 if (!strncmp(cp, "/\\\n",3)) {
00429                     SET_PUBLIC(chew,comment_state) = STARTING_COMMENT;
00430                     SET_PUBLIC(chew,last_comment_start_line) =
00431                         GET_PUBLIC(io,line_num);
00432                     cp += 3;
00433                 }
00434                 if (!strncmp(cp, "/\\\r\n",4)) {
00435                     SET_PUBLIC(chew,comment_state) = STARTING_COMMENT;
00436                     SET_PUBLIC(chew,last_comment_start_line) =
00437                         GET_PUBLIC(io,line_num);
00438                     cp += 4;
00439                 } else if (cp[0] == '/' && cp[1] == '*') {
00440                     SET_PUBLIC(chew,comment_state) = C_COMMENT;
00441                     SET_PUBLIC(chew,last_comment_start_line) =
00442                         GET_PUBLIC(io,line_num);
00443                     cp += 2;
00444                 } else if (cp[0] == '/' && cp[1] == cp[0]) {
00445                     SET_PUBLIC(chew,comment_state) = CXX_COMMENT;
00446                     SET_PUBLIC(chew,last_comment_start_line) =
00447                         GET_PUBLIC(io,line_num);
00448                     cp += 2;
00449                 } else if (GET_PUBLIC(chew,line_state) == LS_CODE) {
00450                     /* Cool if we have got into code. Carry on */
00451                     //TODO See how much we get here! */
00452                     ++cp;
00453                 } else {
00454                     /* Got some non-chewable char within directive. Quit */
00455                     break;
00456                 }
00457             } else {    /* We're inside quotation. Truck on*/
00458                 ++cp;
00459             }
00460         }
00461         /* We are in a comment state. Check for end of comment*/
00462         /* No need to test for end of C++ comment 'cos newline case
00463                 covers it*/
00464         else if (GET_PUBLIC(chew,comment_state) == C_COMMENT) {
00465             if (!strncmp(cp, "*\\\n",3)) {
00466                 SET_PUBLIC(chew,comment_state) = FINISHING_COMMENT;
00467                 cp += 3;
00468             }
00469             if (!strncmp(cp, "*\\\r\n",4)) {
00470                 SET_PUBLIC(chew,comment_state) = FINISHING_COMMENT;
00471                 cp += 4;
00472             } else if (cp[0] == '*' && cp[1] == '/') {
00473                 SET_PUBLIC(chew,comment_state) = NO_COMMENT;
00474                 SET_PUBLIC(chew,in_double_quote) = false;
00475                 cp += 2;
00476             } else {
00477                 ++cp;
00478             }
00479         } else if (GET_PUBLIC(chew,comment_state) == STARTING_COMMENT) {
00480             if (*cp == '*') {
00481                 SET_PUBLIC(chew,comment_state) = C_COMMENT;
00482                 ++cp;
00483             } else if (*cp == '/') {
00484                 SET_PUBLIC(chew,comment_state) = CXX_COMMENT;
00485                 ++cp;
00486             } else {
00487                 SET_PUBLIC(chew,comment_state) = NO_COMMENT;
00488                 SET_PUBLIC(chew,line_state) = LS_CODE;
00489             }
00490         } else if (GET_PUBLIC(chew,comment_state) == FINISHING_COMMENT) {
00491             if (*cp == '/') {
00492                 SET_PUBLIC(chew,comment_state) = NO_COMMENT;
00493                 ++cp;
00494             } else {
00495                 SET_PUBLIC(chew,comment_state) = C_COMMENT;
00496             }
00497         } else {
00498             ++cp;
00499         }
00500         SET_STATE(chew,escape) = false;
00501         if (GET_STATE(chew,single_quote_length) > 3) {
00502             SET_STATE(chew,single_quote_length) = 0;
00503         }
00504     }
00505     if (GET_STATE(chew,escape)) {
00506         SET_STATE(chew,escape) = false;
00507         if (GET_PUBLIC(chew,in_source)) {
00508             report(GRIPE_STRAY_ESCAPE,NULL,"Stray '\\' ignored");
00509         }
00510     }
00511     if (GET_PUBLIC(chew,in_double_quote) && GET_PUBLIC(chew,in_source)) {
00512         report(GRIPE_UNCLOSED_QUOTE,NULL,"Unclosed quotation in context \"%s\"",
00513                chew_start);
00514     }
00515     return cp;
00516 }
00517 
00518 void
00519 chew_toplevel(void)
00520 {
00521     SET_PUBLIC(chew,line_state) = LS_NEUTER;
00522     SET_PUBLIC(chew,comment_state) = NO_COMMENT;
00523     SET_PUBLIC(chew,in_double_quote) = false;
00524     SET_STATE(chew,single_quote_length) = 0;
00525     SET_PUBLIC(chew,stop_at_quote) = false;
00526     SET_PUBLIC(chew,must_balance_quotes) = true;
00527     SET_PUBLIC(chew,in_source) = true;
00528 }
00529 
00530 /* EOF*/
00531 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines