reSIProcate/stack  9694
MsgHeaderScanner.cxx
Go to the documentation of this file.
00001 #if defined(HAVE_CONFIG_H)
00002 #include "config.h"
00003 #endif
00004 
00005 #include <ctype.h>
00006 #include <limits.h>
00007 #include <stdio.h>
00008 #include "resip/stack/HeaderTypes.hxx"
00009 #include "resip/stack/SipMessage.hxx"
00010 #include "resip/stack/MsgHeaderScanner.hxx"
00011 #include "rutil/WinLeakCheck.hxx"
00012 
00013 namespace resip 
00014 {
00015 
00017 //   Any character could be used as the chunk terminating sentinel, as long as
00018 //   it would otherwise be character category "other".  The null character
00019 //   was chosen because it is unlikely to occur naturally -- but it's OK if it
00020 //   does.
00021 
00022 enum { chunkTermSentinelChar = '\0' };
00023 
00024 enum CharCategoryEnum
00025 {
00026    ccChunkTermSentinel,
00027    ccOther,
00028    ccFieldName,
00029    ccWhitespace,
00030    ccColon,
00031    ccDoubleQuotationMark,
00032    ccLeftAngleBracket,
00033    ccRightAngleBracket,
00034    ccBackslash,
00035    ccComma,
00036    ccCarriageReturn,
00037    ccLineFeed,
00038    numCharCategories
00039 };
00040 typedef char CharCategory;
00041 
00042 char* 
00043 MsgHeaderScanner::allocateBuffer(int size)
00044 {
00045    return new char[size + MaxNumCharsChunkOverflow];
00046 }
00047 
00048 struct CharInfo
00049 {
00050       CharCategory category;
00051       MsgHeaderScanner::TextPropBitMask textPropBitMask;
00052 };
00053     
00054 static CharInfo charInfoArray[UCHAR_MAX+1];
00055     
00056 static inline int c2i(unsigned char c)
00057 {
00058    return static_cast<int>(c); 
00059 }
00060 
00061 static void initCharInfoArray()
00062 {
00063    for(unsigned int charIndex = 0; charIndex <= UCHAR_MAX; ++charIndex) 
00064    {
00065       charInfoArray[charIndex].category = ccOther;
00066       charInfoArray[charIndex].textPropBitMask = 0;
00067    }
00068 
00069    for(const char *charPtr = "abcdefghijklmnopqrstuvwxyz"
00070           "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-.!%*_+`'~";
00071        *charPtr;
00072        ++charPtr)
00073    {
00074       charInfoArray[c2i(*charPtr)].category = ccFieldName;
00075    }
00076 
00077    charInfoArray[c2i(' ')].category  = ccWhitespace;
00078    charInfoArray[c2i('\t')].category = ccWhitespace;
00079    charInfoArray[c2i(':')].category  = ccColon;
00080    charInfoArray[c2i('"')].category  = ccDoubleQuotationMark;
00081    charInfoArray[c2i('<')].category  = ccLeftAngleBracket;
00082    charInfoArray[c2i('>')].category  = ccRightAngleBracket;
00083    charInfoArray[c2i('\\')].category  = ccBackslash;
00084    charInfoArray[c2i(',')].category  = ccComma;
00085    charInfoArray[c2i('\r')].category = ccCarriageReturn;
00086    charInfoArray[c2i('\n')].category = ccLineFeed;
00087    // Assert: "chunkTermSentinelChar"'s category is still the default "ccOther".
00088    charInfoArray[c2i(chunkTermSentinelChar)].category = ccChunkTermSentinel;
00089    // Init text property bit masks.
00090    charInfoArray[c2i('\r')].textPropBitMask =
00091       MsgHeaderScanner::tpbmContainsLineBreak;
00092    charInfoArray[c2i('\n')].textPropBitMask =
00093       MsgHeaderScanner::tpbmContainsLineBreak;
00094    charInfoArray[c2i(' ')].textPropBitMask =
00095       MsgHeaderScanner::tpbmContainsWhitespace;
00096    charInfoArray[c2i('\t')].textPropBitMask =
00097       MsgHeaderScanner::tpbmContainsWhitespace;
00098    charInfoArray[c2i('\\')].textPropBitMask =
00099       MsgHeaderScanner::tpbmContainsBackslash;
00100    charInfoArray[c2i('%')].textPropBitMask =
00101       MsgHeaderScanner::tpbmContainsPercent;
00102    charInfoArray[c2i(';')].textPropBitMask =
00103       MsgHeaderScanner::tpbmContainsSemicolon;
00104    charInfoArray[c2i('(')].textPropBitMask =
00105       MsgHeaderScanner::tpbmContainsParen;
00106    charInfoArray[c2i(')')].textPropBitMask =
00107       MsgHeaderScanner::tpbmContainsParen;
00108 }
00109 
00111 //   States marked '1' scan normal values.  States marked 'N' scan multi-values.
00112 
00113 enum StateEnum
00114 {
00115    sMsgStart,
00116    sHalfLineBreakAtMsgStart,
00117    sScanStatusLine,
00118    sHalfLineBreakAfterStatusLine,
00119    sAfterLineBreakAfterStatusLine,
00120    sScanFieldName,
00121    sScanWhitespaceAfter1FieldName,
00122    sScanWhitespaceAfterNFieldName,
00123    sScanWhitespaceOr1Value,
00124    sScanWhitespaceOrNValue,
00125    sHalfLineBreakInWhitespaceBefore1Value,
00126    sHalfLineBreakInWhitespaceBeforeNValue,
00127    sAfterLineBreakInWhitespaceBefore1Value,
00128    sAfterLineBreakInWhitespaceBeforeNValue,
00129    sScan1Value,
00130    sScanNValue,
00131    sHalfLineBreakIn1Value,
00132    sHalfLineBreakInNValue,
00133    sAfterLineBreakIn1Value,
00134    sAfterLineBreakInNValue,
00135    sScanNValueInQuotes,
00136    sAfterEscCharInQuotesInNValue,
00137    sHalfLineBreakInQuotesInNValue,
00138    sAfterLineBreakInQuotesInNValue,
00139    sScanNValueInAngles,
00140    sHalfLineBreakInAnglesInNValue,
00141    sAfterLineBreakInAnglesInNValue,
00142    sHalfLineBreakAfterLineBreak,
00143    numStates
00144 };
00145 
00146 typedef char State;
00147 
00148 // For each '1' state, the 'N' state is "deltaOfNStateFrom1State" larger.
00149 enum { deltaOfNStateFrom1State = 1 };
00150 
00152     
00153 enum TransitionActionEnum {
00154    taNone,
00155    taTermStatusLine,       // The current character terminates the status
00156    //     line.
00157    taTermFieldName,        // The current character terminates a field name.
00158    //     If the field supports multi-values, shift
00159    //     the state machine into multi-value scanning.
00160    taBeyondEmptyValue,     // The current character terminates an empty value.
00161    //     Implies taStartText.
00162    taTermValueAfterLineBreak, 
00163    // The previous two characters are a linebreak
00164    //      terminating a value.  Implies taStartText.
00165    taTermValue,            // The current character terminates a value.
00166    taStartText,            // The current character starts a text unit.
00167    //     (The status line, a field name, or a value.)
00168    taEndHeader,            // The current character mEnds_ the header.
00169    taChunkTermSentinel,    // Either the current character terminates the
00170    //    current chunk or it is an ordinary character.
00171    taError                 // The input is erroneous.
00172 };
00173 typedef char TransitionAction;
00174 
00175 
00176 struct TransitionInfo
00177 {
00178       TransitionAction  action;
00179       State             nextState;
00180 };
00181 
00182 static TransitionInfo stateMachine[numStates][numCharCategories];
00183 
00184 inline void specTransition(State state,
00185                            CharCategory charCategory,
00186                            TransitionAction action,
00187                            State nextState)
00188 {
00189    stateMachine[c2i(state)][c2i(charCategory)].action = action;
00190    stateMachine[c2i(state)][c2i(charCategory)].nextState = nextState;
00191 }
00192 
00193 static void specDefaultTransition(State state,
00194                                   TransitionAction action,
00195                                   State nextState)
00196 {
00197    for (int charCategory = 0;
00198         charCategory < numCharCategories;
00199         ++charCategory) 
00200    {
00201       specTransition(state, charCategory, action, nextState);
00202    }
00203    specTransition(state, ccCarriageReturn, taError, state);
00204    specTransition(state, ccLineFeed, taError, state);
00205    specTransition(state, ccChunkTermSentinel, taChunkTermSentinel, state);
00206 }
00207 
00208 static void specHalfLineBreakState(State halfLineBreakState,
00209                                    State  afterLineBreakState)
00210 {
00211    specDefaultTransition(halfLineBreakState, taError, halfLineBreakState);
00212    specTransition(halfLineBreakState, ccLineFeed, taNone, afterLineBreakState);
00213 }
00214 
00215 
00216 //   Single-value (1) scanning and multi-value (N) scanning involves several nearly
00217 //   identical states.
00218 //   "stateDelta" is either 0 or "deltaOfNStateFrom1State".
00219 
00220 static void specXValueStates(int  stateDelta)
00221 {
00222    specDefaultTransition(sScanWhitespaceAfter1FieldName + stateDelta,
00223                          taError,
00224                          sScanWhitespaceAfter1FieldName + stateDelta);
00225    specTransition(sScanWhitespaceAfter1FieldName + stateDelta,
00226                   ccWhitespace,
00227                   taNone,
00228                   sScanWhitespaceAfter1FieldName + stateDelta);
00229    specTransition(sScanWhitespaceAfter1FieldName + stateDelta,
00230                   ccColon,
00231                   taNone,
00232                   sScanWhitespaceOr1Value + stateDelta);
00233    specDefaultTransition(sScanWhitespaceOr1Value + stateDelta,
00234                          taStartText,
00235                          sScan1Value + stateDelta);
00236    specTransition(sScanWhitespaceOr1Value + stateDelta,
00237                   ccWhitespace,
00238                   taNone,
00239                   sScanWhitespaceOr1Value + stateDelta);
00240    if (stateDelta == deltaOfNStateFrom1State)
00241    {
00242       specTransition(sScanWhitespaceOr1Value + stateDelta,
00243                      ccComma,
00244                      taError,
00245                      sScanWhitespaceOr1Value + stateDelta);
00246       specTransition(sScanWhitespaceOr1Value + stateDelta,
00247                      ccLeftAngleBracket,
00248                      taStartText,
00249                      sScanNValueInAngles);
00250       specTransition(sScanWhitespaceOr1Value + stateDelta,
00251                      ccDoubleQuotationMark,
00252                      taStartText,
00253                      sScanNValueInQuotes);
00254    }
00255    specTransition(sScanWhitespaceOr1Value + stateDelta,
00256                   ccCarriageReturn,
00257                   taNone,
00258                   sHalfLineBreakInWhitespaceBefore1Value + stateDelta);
00259    specHalfLineBreakState(sHalfLineBreakInWhitespaceBefore1Value + stateDelta,
00260                           sAfterLineBreakInWhitespaceBefore1Value + stateDelta);
00261    specDefaultTransition(sAfterLineBreakInWhitespaceBefore1Value + stateDelta,
00262                          taError,
00263                          sAfterLineBreakInWhitespaceBefore1Value + stateDelta);
00264    specTransition(sAfterLineBreakInWhitespaceBefore1Value + stateDelta,
00265                   ccFieldName,
00266                   taBeyondEmptyValue,
00267                   sScanFieldName);
00268    specTransition(sAfterLineBreakInWhitespaceBefore1Value + stateDelta,
00269                   ccWhitespace,
00270                   taNone,
00271                   sScanWhitespaceOr1Value + stateDelta);
00272    specTransition(sAfterLineBreakInWhitespaceBefore1Value + stateDelta,
00273                   ccCarriageReturn,
00274                   taBeyondEmptyValue,
00275                   sHalfLineBreakAfterLineBreak);
00276    specDefaultTransition(sScan1Value + stateDelta,
00277                          taNone,
00278                          sScan1Value + stateDelta);
00279    if (stateDelta == deltaOfNStateFrom1State)
00280    {
00281       specTransition(sScan1Value + stateDelta,
00282                      ccComma,
00283                      taTermValue,
00284                      sScanWhitespaceOr1Value + stateDelta);
00285       specTransition(sScan1Value + stateDelta,
00286                      ccLeftAngleBracket,
00287                      taNone,
00288                      sScanNValueInAngles);
00289       specTransition(sScan1Value + stateDelta,
00290                      ccDoubleQuotationMark,
00291                      taNone,
00292                      sScanNValueInQuotes);
00293    }
00294    specTransition(sScan1Value + stateDelta,
00295                   ccCarriageReturn,
00296                   taNone,
00297                   sHalfLineBreakIn1Value + stateDelta);
00298    specHalfLineBreakState(sHalfLineBreakIn1Value + stateDelta,
00299                           sAfterLineBreakIn1Value + stateDelta);
00300    specDefaultTransition(sAfterLineBreakIn1Value + stateDelta,
00301                          taError,
00302                          sAfterLineBreakIn1Value + stateDelta);
00303    specTransition(sAfterLineBreakIn1Value + stateDelta,
00304                   ccFieldName,
00305                   taTermValueAfterLineBreak,
00306                   sScanFieldName);
00307    specTransition(sAfterLineBreakIn1Value + stateDelta,
00308                   ccWhitespace,
00309                   taNone,
00310                   sScan1Value + stateDelta);
00311    specTransition(sAfterLineBreakIn1Value + stateDelta,
00312                   ccCarriageReturn,
00313                   taTermValueAfterLineBreak,
00314                   sHalfLineBreakAfterLineBreak);
00315 }
00316 
00317 static void initStateMachine()
00318 {
00319    // By convention, error transitions maintain the same state.
00320    specDefaultTransition(sMsgStart, taStartText, sScanStatusLine);
00321    specTransition(sMsgStart,
00322                   ccCarriageReturn,
00323                   taNone,
00324                   sHalfLineBreakAtMsgStart);
00325    specTransition(sMsgStart, ccLineFeed, taError, sMsgStart);
00326    specHalfLineBreakState(sHalfLineBreakAtMsgStart, sMsgStart);
00327    specDefaultTransition(sScanStatusLine, taNone, sScanStatusLine);
00328    specTransition(sScanStatusLine,
00329                   ccCarriageReturn,
00330                   taTermStatusLine,
00331                   sHalfLineBreakAfterStatusLine);
00332    specHalfLineBreakState(sHalfLineBreakAfterStatusLine,
00333                           sAfterLineBreakAfterStatusLine);
00334    specDefaultTransition(sAfterLineBreakAfterStatusLine,
00335                          taError,
00336                          sAfterLineBreakAfterStatusLine);
00337    specTransition(sAfterLineBreakAfterStatusLine,
00338                   ccFieldName,
00339                   taStartText,
00340                   sScanFieldName);
00341    specTransition(sAfterLineBreakAfterStatusLine,
00342                   ccWhitespace,
00343                   taError,
00344                   sAfterLineBreakAfterStatusLine);
00345    specTransition(sAfterLineBreakAfterStatusLine,
00346                   ccCarriageReturn,
00347                   taNone,
00348                   sHalfLineBreakAfterLineBreak);
00349    specDefaultTransition(sScanFieldName, taError, sScanFieldName);
00350    specTransition(sScanFieldName, ccFieldName, taNone, sScanFieldName);
00351    specTransition(sScanFieldName,
00352                   ccWhitespace,
00353                   taTermFieldName,
00354                   sScanWhitespaceAfter1FieldName);
00355    specTransition(sScanFieldName,
00356                   ccColon,
00357                   taTermFieldName,
00358                   sScanWhitespaceOr1Value);
00359    specXValueStates(0);
00360    specXValueStates(deltaOfNStateFrom1State);
00361    specDefaultTransition(sScanNValueInQuotes, taNone, sScanNValueInQuotes);
00362    specTransition(sScanNValueInQuotes,
00363                   ccDoubleQuotationMark,
00364                   taNone,
00365                   sScanNValue);
00366    specTransition(sScanNValueInQuotes,
00367                   ccBackslash,
00368                   taNone,
00369                   sAfterEscCharInQuotesInNValue);
00370    specTransition(sScanNValueInQuotes,
00371                   ccCarriageReturn,
00372                   taNone,
00373                   sHalfLineBreakInQuotesInNValue);
00374    specDefaultTransition(sAfterEscCharInQuotesInNValue,
00375                          taNone,
00376                          sScanNValueInQuotes);
00377    specHalfLineBreakState(sHalfLineBreakInQuotesInNValue,
00378                           sAfterLineBreakInQuotesInNValue);
00379    specDefaultTransition(sAfterLineBreakInQuotesInNValue,
00380                          taError,
00381                          sAfterLineBreakInQuotesInNValue);
00382    specTransition(sAfterLineBreakInQuotesInNValue,
00383                   ccWhitespace,
00384                   taNone,
00385                   sScanNValueInQuotes);
00386    specDefaultTransition(sScanNValueInAngles, taNone, sScanNValueInAngles);
00387    specTransition(sScanNValueInAngles,
00388                   ccRightAngleBracket,
00389                   taNone,
00390                   sScanNValue);
00391    specTransition(sScanNValueInAngles,
00392                   ccCarriageReturn,
00393                   taNone,
00394                   sHalfLineBreakInAnglesInNValue);
00395    specHalfLineBreakState(sHalfLineBreakInAnglesInNValue,
00396                           sAfterLineBreakInAnglesInNValue);
00397    specDefaultTransition(sAfterLineBreakInAnglesInNValue,
00398                          taError,
00399                          sAfterLineBreakInAnglesInNValue);
00400    specTransition(sAfterLineBreakInAnglesInNValue,
00401                   ccWhitespace,
00402                   taNone,
00403                   sScanNValueInAngles);
00404    specHalfLineBreakState(sHalfLineBreakAfterLineBreak, sMsgStart);
00405 
00406    // Most half-line-break states do nothing when they read a line feed,
00407    // but sHalfLineBreakAfterLineBreak must end the message header scanning.
00408 
00409    specTransition(sHalfLineBreakAfterLineBreak,
00410                   ccLineFeed,
00411                   taEndHeader,
00412                   sMsgStart); // Arbitrary but possibly handy.
00413 }
00414 
00415 // Debug follows
00416 #if defined(RESIP_MSG_HEADER_SCANNER_DEBUG)  
00417 
00418 static void printText(const char *  text,
00419                       unsigned int  textLength)
00420 {
00421    const char *charPtr = text;
00422    for (unsigned int counter = 0; counter < textLength; ++charPtr, ++counter)
00423    {
00424       char c = *charPtr;
00425       switch (c)
00426       {
00427          case '\\': printf("\\\\");
00428             break;
00429          case '\r': printf("\\r");
00430             break;
00431          case '\n': printf("\\n");
00432             break;
00433          case '\t': printf("\\t");
00434             break;
00435          case '\0': printf("\\0");
00436             break;
00437          default:   putchar(c);
00438       }
00439    }
00440 }
00441 
00442 static const char *
00443 categorySymbol(CharCategory c)
00444 {
00445    switch(c)
00446    {
00447       case ccChunkTermSentinel: return "TERM";
00448       case ccOther: return "*";
00449       case ccFieldName: return "FName";
00450       case ccWhitespace: return "WS";
00451       case ccColon: return "\\\":\\\"";
00452       case ccDoubleQuotationMark: return "\\\"";
00453       case ccLeftAngleBracket: return "\\\"<\\\"";
00454       case ccRightAngleBracket: return "\\\">\\\"";
00455       case ccBackslash: return "\\\"\\\\\\\"";
00456       case ccComma: return "\\\",\\\"";
00457       case ccCarriageReturn: return "CR";
00458       case ccLineFeed: return "LF";
00459    }
00460    return "??CC??";
00461 }
00462 
00463 static const char *
00464 categoryName(CharCategory c)
00465 {
00466    switch(c)
00467    {
00468       case ccChunkTermSentinel: return "ccChunkTermSentinel";
00469       case ccOther: return "ccOther";
00470       case ccFieldName: return "ccFieldName";
00471       case ccWhitespace: return "ccWhitespace";
00472       case ccColon: return "ccColon";
00473       case ccDoubleQuotationMark: return "ccDoubleQuotationMark";
00474       case ccLeftAngleBracket: return "ccLeftAngleBracket";
00475       case ccRightAngleBracket: return "ccRightAngleBracket";
00476       case ccBackslash: return "ccBackslash";
00477       case ccComma: return "ccComma";
00478       case ccCarriageReturn: return "ccCarriageReturn";
00479       case ccLineFeed: return "ccLineFeed";
00480    }
00481    return "UNKNOWNCC";
00482 }
00483 
00484 static const char *
00485 cleanName(const char * name)
00486 {
00487    // Remove leading type-noise from name
00488    static char *leaders[] = {
00489       "cc",
00490       "s",
00491       "taChunkTerm", // hack to make ChunkTermSentinel smaller
00492       "ta"
00493    };
00494    const int nLeaders = sizeof(leaders)/sizeof(*leaders);
00495    int offset = 0;
00496    for(int i = 0 ; i < nLeaders ; i++)
00497    {
00498       unsigned int l = strlen(leaders[i]);
00499       if (strstr(name,leaders[i]) == name &&
00500           strlen(name) > l && 
00501           isupper(name[l]))
00502       {
00503          offset = l;
00504          break;
00505       }
00506    }
00507    return &name[offset];
00508 }
00509 
00510 static const char * 
00511 stateName(State state)
00512 {
00513    const char *stateName;
00514    switch (state) 
00515    {
00516       case sMsgStart:
00517          stateName = "sMsgStart";
00518          break;
00519       case sHalfLineBreakAtMsgStart:
00520          stateName = "sHalfLineBreakAtMsgStart";
00521          break;
00522       case sScanStatusLine:
00523          stateName = "sScanStatusLine";
00524          break;
00525       case sHalfLineBreakAfterStatusLine:
00526          stateName = "sHalfLineBreakAfterStatusLine";
00527          break;
00528       case sAfterLineBreakAfterStatusLine:
00529          stateName = "sAfterLineBreakAfterStatusLine";
00530          break;
00531       case sScanFieldName:
00532          stateName = "sScanFieldName";
00533          break;
00534       case sScanWhitespaceAfter1FieldName:
00535          stateName = "sScanWhitespaceAfter1FieldName";
00536          break;
00537       case sScanWhitespaceAfterNFieldName:
00538          stateName = "sScanWhitespaceAfterNFieldName";
00539          break;
00540       case sScanWhitespaceOr1Value:
00541          stateName = "sScanWhitespaceOr1Value";
00542          break;
00543       case sScanWhitespaceOrNValue:
00544          stateName = "sScanWhitespaceOrNValue";
00545          break;
00546       case sHalfLineBreakInWhitespaceBefore1Value:
00547          stateName = "sHalfLineBreakInWhitespaceBefore1Value";
00548          break;
00549       case sHalfLineBreakInWhitespaceBeforeNValue:
00550          stateName = "sHalfLineBreakInWhitespaceBeforeNValue";
00551          break;
00552       case sAfterLineBreakInWhitespaceBefore1Value:
00553          stateName = "sAfterLineBreakInWhitespaceBefore1Value";
00554          break;
00555       case sAfterLineBreakInWhitespaceBeforeNValue:
00556          stateName = "sAfterLineBreakInWhitespaceBeforeNValue";
00557          break;
00558       case sScan1Value:
00559          stateName = "sScan1Value";
00560          break;
00561       case sScanNValue:
00562          stateName = "sScanNValue";
00563          break;
00564       case sHalfLineBreakIn1Value:
00565          stateName = "sHalfLineBreakIn1Value";
00566          break;
00567       case sHalfLineBreakInNValue:
00568          stateName = "sHalfLineBreakInNValue";
00569          break;
00570       case sAfterLineBreakIn1Value:
00571          stateName = "sAfterLineBreakIn1Value";
00572          break;
00573       case sAfterLineBreakInNValue:
00574          stateName = "sAfterLineBreakInNValue";
00575          break;
00576       case sScanNValueInQuotes:
00577          stateName = "sScanNValueInQuotes";
00578          break;
00579       case sAfterEscCharInQuotesInNValue:
00580          stateName = "sAfterEscCharInQuotesInNValue";
00581          break;
00582       case sHalfLineBreakInQuotesInNValue:
00583          stateName = "sHalfLineBreakInQuotesInNValue";
00584          break;
00585       case sAfterLineBreakInQuotesInNValue:
00586          stateName = "sAfterLineBreakInQuotesInNValue";
00587          break;
00588       case sScanNValueInAngles:
00589          stateName = "sScanNValueInAngles";
00590          break;
00591       case sHalfLineBreakInAnglesInNValue:
00592          stateName = "sHalfLineBreakInAnglesInNValue";
00593          break;
00594       case sAfterLineBreakInAnglesInNValue:
00595          stateName = "sAfterLineBreakInAnglesInNValue";
00596          break;
00597       case sHalfLineBreakAfterLineBreak:
00598          stateName = "sHalfLineBreakAfterLineBreak";
00599          break;
00600       default:
00601          stateName = "<unknown>";
00602    }//switch
00603    return stateName;
00604 }
00605 
00606 static const char *
00607 trActionName(TransitionAction transitionAction)
00608 {  
00609    const char *transitionActionName;
00610    switch (transitionAction)
00611    {
00612       case taNone:
00613          transitionActionName = "taNone";
00614          break;
00615       case taTermStatusLine:
00616          transitionActionName = "taTermStatusLine";
00617          break;
00618       case taTermFieldName:
00619          transitionActionName = "taTermFieldName";
00620          break;
00621       case taBeyondEmptyValue:
00622          transitionActionName = "taBeyondEmptyValue";
00623          break;
00624       case taTermValueAfterLineBreak:
00625          transitionActionName = "taTermValueAfterLineBreak";
00626          break;
00627       case taTermValue:
00628          transitionActionName = "taTermValue";
00629          break;
00630       case taStartText:
00631          transitionActionName = "taStartText";
00632          break;
00633       case taEndHeader:
00634          transitionActionName = "taEndHeader";
00635          break;
00636       case taChunkTermSentinel:
00637          transitionActionName = "taChunkTermSentinel";
00638          break;
00639       case taError:
00640          transitionActionName = "taError";
00641          break;
00642       default:
00643          transitionActionName = "<unknown>";
00644    }
00645    return transitionActionName;
00646 }
00647 
00648 static void
00649 printStateTransition(State state,
00650                      char character,
00651                      TransitionAction transitionAction)
00652 {
00653    printf("                %s['", cleanName(stateName(state)));
00654    printText(&character, 1);
00655    printf("']: %s\n", cleanName(trActionName(transitionAction)));
00656 }
00657 #if !defined(RESIP_MSG_HEADER_SCANNER_DEBUG)
00658 static const char* stateName(const char*)
00659 { return "RECOMPILE_WITH_SCANNER_DEBUG"; }
00660 static const char* trActionName(const char*)
00661 { return stateName(0); }
00662 #endif
00663 
00664 
00665 
00666 
00667 int
00668 MsgHeaderScanner::dumpStateMachine(int fd)
00669 {
00670    FILE *fp = fdopen(fd,"w");
00671    if (!fp) 
00672    {
00673       fprintf(stderr,"MsgHeaderScanner:: unable to open output file\n");
00674       return -1;
00675    }
00676    // Force instance so things are initialized -- YUCK! 
00677    MsgHeaderScanner scanner;(void)scanner;
00678    fprintf(fp,"digraph MsgHeaderScannerFSM {\n");
00679    fprintf(fp,"\tnode[shape=record\n\t\tfontsize=8\n\t\tfontname=\"Helvetica\"\n\t]\n");
00680    fprintf(fp,"\tedge [ fontsize=6 fontname=\"Helvetica\"]\n");
00681    
00682    fprintf(fp,"\tgraph [ ratio=0.8\n\t\tfontsize=6 compound=true ]");
00683    for(int state  = 0 ; state < numStates; ++state)
00684    {
00685       fprintf(fp,
00686               "  %s [ label = \"%d|%s\" ]\n",
00687               cleanName(stateName(state)),
00688               state,
00689               cleanName(stateName(state))
00690          );
00691       for(int category = 0 ; category < numCharCategories; ++category)
00692       {
00693          // Skip Verbose Error or Empty Transitions
00694          if (stateMachine[state][category].nextState == state &&
00695              (stateMachine[state][category].action == taError ||
00696               stateMachine[state][category].action == taNone
00697                 )) continue;
00698               
00699          fprintf(fp,
00700                  "    %s -> %s [label=\"%s\\n%s\" ]\n",
00701                  cleanName(stateName(state)),
00702                  cleanName(stateName(stateMachine[state][category].nextState)),
00703                  categorySymbol(category),
00704                  cleanName(trActionName(stateMachine[state][category].action)));
00705       }
00706       fprintf(fp,"\n");
00707    }
00708    fprintf(fp,"}\n");
00709 
00710    return 0;
00711 }
00712 
00713 #endif //defined(RESIP_MSG_HEADER_SCANNER_DEBUG) 
00714 
00715 
00716 
00717 #if defined(RESIP_MSG_HEADER_SCANNER_DEBUG)  
00718 
00719 static const char *const multiValuedFieldNameArray[] = {
00720    "allow-events",
00721    "accept-encoding",
00722    "accept-language",
00723    "allow",
00724    "content-language",
00725    "proxy-require",
00726    "require",
00727    "supported",
00728    "subscription-state",
00729    "unsupported",
00730    "security-client",
00731    "security-server",
00732    "security-verify",
00733    "accept",
00734    "call-info",
00735    "alert-info",
00736    "error-info",
00737    "record-route",
00738    "route",
00739    "contact",
00740    "authorization",
00741    "proxy-authenticate",
00742    "proxy-authorization",
00743    "www-authenticate",
00744    "via",
00745    0
00746 };
00747 
00748 extern
00749 void
00750 lookupMsgHeaderFieldInfo(
00751    char *                             fieldName,               //inout
00752    unsigned int                       *fieldNameLength,        //inout
00753    MsgHeaderScanner::TextPropBitMask  fieldNameTextPropBitMask,
00754    int                                *fieldKind,              //out
00755    bool                               *isMultiValueAllowed)    //out
00756 {
00757    *isMultiValueAllowed = false;
00758    const char *const *multiValuedFieldNamePtr = multiValuedFieldNameArray;
00759    for (;;)
00760    {
00761       const char *multiValuedFieldName = *multiValuedFieldNamePtr;
00762       if (!multiValuedFieldName) 
00763       {
00764          break;
00765       }
00766       if (strncmp(fieldName, multiValuedFieldName, *fieldNameLength) == 0) 
00767       {
00768          *isMultiValueAllowed = true;
00769          break;
00770       }
00771       ++multiValuedFieldNamePtr;
00772    }//for
00773 }
00774 
00775 static
00776 bool
00777 processMsgHeaderStatusLine(
00778    SipMessage *                       msg,
00779    char *                             lineText,
00780    unsigned int                       lineTextLength,
00781    MsgHeaderScanner::TextPropBitMask  lineTextPropBitMask)
00782 {
00783    printf("status line: ");
00784    printText(lineText, lineTextLength);
00785    printf("\n");
00786    return true;
00787 }
00788 
00789 static
00790 void
00791 processMsgHeaderFieldNameAndValue(
00792    SipMessage *                       msg,
00793    int                                fieldKind,
00794    const char *                       fieldName,
00795    unsigned int                       fieldNameLength,
00796    char *                             valueText,
00797    unsigned int                       valueTextLength,
00798    MsgHeaderScanner::TextPropBitMask  valueTextPropBitMask)
00799 {
00800    printText(fieldName, fieldNameLength);
00801    printf(": [[[[");
00802    printText(valueText, valueTextLength);
00803    printf("]]]]\n");
00804 }
00805 
00806 #else //!defined(RESIP_MSG_HEADER_SCANNER_DEBUG) } {
00807 
00808 
00809 //   Determine a field's kind and whether it allows (comma separated) multi-values.
00810 //   "fieldName" is not empty and contains only legal characters.
00811 //   The text in "fieldName" may be canonicalized (eg, translating % escapes),
00812 //   including shrinking it if necessary.
00813 
00814 inline void
00815 lookupMsgHeaderFieldInfo(char * fieldName,
00816                          unsigned int *fieldNameLength,   
00817                          MsgHeaderScanner::TextPropBitMask fieldNameTextPropBitMask,
00818                          int *fieldKind,             
00819                          bool *isMultiValueAllowed)    
00820 {
00821    //.jacob. Don't ignore fieldNameTextPropBitMask.
00822    *fieldKind = Headers::getType(fieldName, *fieldNameLength);
00823    *isMultiValueAllowed =
00824       Headers::isCommaTokenizing(static_cast<Headers::Type>(*fieldKind));
00825 }
00826 
00827 
00828 // "lineText" contains no carriage returns and no line feeds.
00829 // Return true on success, false on failure.
00830 
00831 inline bool
00832 processMsgHeaderStatusLine(SipMessage * msg,
00833                            char * lineText,
00834                            unsigned int lineTextLength,
00835                            MsgHeaderScanner::TextPropBitMask lineTextPropBitMask)
00836 {
00837    //.jacob. Don't ignore valueTextPropBitMask, and don't always return true.
00838    msg->setStartLine(lineText, lineTextLength);
00839    return true;
00840 }
00841 
00842 // This function is called once for a field with one value.  (The value could be
00843 // several values, but separated by something other than commas.)
00844 // This function is called once for a field with 0 comma-separated values, with
00845 // an empty value.
00846 // This function is called N times for a field with N comma-separated values,
00847 // but with the same value of "fieldName" each time.
00848 // "fieldName" is not empty and contains only legal characters.
00849 // "valueText" may be empty, has no leading whitespace, may contain trailing
00850 // whitespace, contains carriage returns and line feeds only in correct pairs
00851 // and followed by whitespace, and, if the field is multi-valued, contains
00852 // balanced '<'/'>' and '"' pairs, contains ',' only within '<'/'>' or '"'
00853 // pairs, and respects '\\'s within '"' pairs.
00854 // The text in "valueText" may be canonicalized (eg, translating % escapes),
00855 // including shrinking it if necessary.
00856 
00857 inline void
00858 processMsgHeaderFieldNameAndValue(SipMessage * msg,
00859                                   int fieldKind,
00860                                   const char * fieldName,
00861                                   unsigned int fieldNameLength,
00862                                   char * valueText,
00863                                   unsigned int valueTextLength,
00864                                   MsgHeaderScanner::TextPropBitMask valueTextPropBitMask)
00865 {
00866    //.jacob. Don't ignore valueTextPropBitMask, particularly for '\r' & '\n'.
00867    msg->addHeader(static_cast<Headers::Type>(fieldKind),
00868                   fieldName,
00869                   fieldNameLength,
00870                   valueText,
00871                   valueTextLength);
00872 }
00873 
00874 #endif //!defined(RESIP_MSG_HEADER_SCANNER_DEBUG) }
00875 
00876 bool MsgHeaderScanner::mInitialized = false;
00877 
00878 MsgHeaderScanner::MsgHeaderScanner()
00879 {
00880    if (!mInitialized)
00881    {
00882       mInitialized = true;
00883       initialize();
00884    }
00885 }
00886 
00887 void
00888 MsgHeaderScanner::prepareForMessage(SipMessage *  msg)
00889 {
00890    mMsg = msg;
00891    mState = sMsgStart;
00892    mPrevScanChunkNumSavedTextChars = 0;
00893    mNumHeaders=0;
00894 }
00895 
00896 void
00897 MsgHeaderScanner::prepareForFrag(SipMessage *  msg, bool hasStartLine)
00898 {
00899    mMsg = msg;
00900    if (hasStartLine)
00901    {
00902       mState = sMsgStart;
00903    }
00904    else
00905    {
00906       mState = sAfterLineBreakAfterStatusLine;
00907    }
00908    mPrevScanChunkNumSavedTextChars = 0;
00909    mNumHeaders=0;
00910 }
00911 
00912 MsgHeaderScanner::ScanChunkResult
00913 MsgHeaderScanner::scanChunk(char * chunk,
00914                             unsigned int chunkLength,
00915                             char ** unprocessedCharPtr)
00916 {
00917    MsgHeaderScanner::ScanChunkResult result;
00918    CharInfo* localCharInfoArray = charInfoArray;
00919    TransitionInfo (*localStateMachine)[numCharCategories] = stateMachine;
00920    State localState = mState;
00921    char *charPtr = chunk + mPrevScanChunkNumSavedTextChars;
00922    char *termCharPtr = chunk + chunkLength;
00923    char saveChunkTermChar = *termCharPtr;
00924    *termCharPtr = chunkTermSentinelChar;
00925    char *textStartCharPtr;
00926    MsgHeaderScanner::TextPropBitMask localTextPropBitMask = mTextPropBitMask;
00927    if (mPrevScanChunkNumSavedTextChars == 0)
00928    {
00929       textStartCharPtr = 0;
00930    }
00931    else
00932    {
00933       textStartCharPtr = chunk;
00934    }
00935    --charPtr;  // The loop starts by advancing "charPtr", so pre-adjust it.
00936    for (;;)
00937    {
00938       // BEGIN message header character scan block BEGIN
00939       // The code in this block is executed once per message header character.
00940       // This entire file is designed specifically to minimize this block's size.
00941       ++charPtr;
00942       CharInfo *charInfo = &localCharInfoArray[((unsigned char) (*charPtr))];
00943       CharCategory charCategory = charInfo->category;
00944       localTextPropBitMask |= charInfo->textPropBitMask;
00945      determineTransitionFromCharCategory:
00946       TransitionInfo *transitionInfo =
00947          &(localStateMachine[(unsigned)localState][(size_t)charCategory]);
00948       TransitionAction transitionAction = transitionInfo->action;
00949 #if defined(RESIP_MSG_HEADER_SCANNER_DEBUG)  
00950       printStateTransition(localState, *charPtr, transitionAction);
00951 #endif
00952       localState = transitionInfo->nextState;
00953       if (transitionAction == taNone) continue;
00954       // END message header character scan block END
00955       // The loop remainder is executed about 4-5 times per message header line.
00956       switch (transitionAction)
00957       {
00958          case taTermStatusLine:
00959             if (!processMsgHeaderStatusLine(mMsg,
00960                                             textStartCharPtr,
00961                                             (unsigned int)(charPtr - textStartCharPtr),
00962                                             localTextPropBitMask))
00963             {
00964                result = MsgHeaderScanner::scrError;
00965                *unprocessedCharPtr = charPtr;
00966                goto endOfFunction;
00967             }
00968             textStartCharPtr = 0;
00969             break;
00970          case taTermFieldName:
00971          {
00972             mFieldNameLength = (unsigned int)(charPtr - textStartCharPtr);
00973             bool isMultiValueAllowed;
00974             lookupMsgHeaderFieldInfo(textStartCharPtr,
00975                                      &mFieldNameLength,
00976                                      localTextPropBitMask,
00977                                      &mFieldKind,
00978                                      &isMultiValueAllowed);
00979             mFieldName = textStartCharPtr;
00980             textStartCharPtr = 0;
00981             if (isMultiValueAllowed) 
00982             {
00983                localState += deltaOfNStateFrom1State;
00984             }
00985          }
00986          break;
00987          case taBeyondEmptyValue:
00988             processMsgHeaderFieldNameAndValue(mMsg,
00989                                               mFieldKind,
00990                                               mFieldName,
00991                                               mFieldNameLength,
00992                                               0,
00993                                               0,
00994                                               0);
00995             ++mNumHeaders;
00996             goto performStartTextAction;
00997          case taTermValueAfterLineBreak:
00998             processMsgHeaderFieldNameAndValue(mMsg,
00999                                               mFieldKind,
01000                                               mFieldName,
01001                                               mFieldNameLength,
01002                                               textStartCharPtr,
01003                                               (unsigned int)((charPtr - textStartCharPtr) - 2),
01004                                               localTextPropBitMask);       //^:CRLF
01005             ++mNumHeaders;
01006             goto performStartTextAction;
01007          case taTermValue:
01008             processMsgHeaderFieldNameAndValue(mMsg,
01009                                               mFieldKind,
01010                                               mFieldName,
01011                                               mFieldNameLength,
01012                                               textStartCharPtr,
01013                                               (unsigned int)(charPtr - textStartCharPtr),
01014                                               localTextPropBitMask);
01015             textStartCharPtr = 0;
01016             ++mNumHeaders;
01017             break;
01018          case taStartText:
01019         performStartTextAction:
01020             textStartCharPtr = charPtr;
01021             localTextPropBitMask = 0;
01022             break;
01023          case taEndHeader:
01024             // textStartCharPtr is not 0.  Not currently relevant.
01025             result = MsgHeaderScanner::scrEnd;
01026             *unprocessedCharPtr = charPtr + 1;  // The current char is processed.
01027             goto endOfFunction;
01028             break;
01029          case taChunkTermSentinel:
01030             if (charPtr == termCharPtr)
01031             {
01032                // The chunk has been consumed.  Save some state and request another.
01033                mState = localState;
01034                if (textStartCharPtr == 0) 
01035                {
01036                   mPrevScanChunkNumSavedTextChars = 0;
01037                }
01038                else
01039                {
01040                   mPrevScanChunkNumSavedTextChars = (unsigned int)(termCharPtr - textStartCharPtr);
01041                }
01042                mTextPropBitMask = localTextPropBitMask;
01043                result = MsgHeaderScanner::scrNextChunk;
01044                *unprocessedCharPtr = termCharPtr - mPrevScanChunkNumSavedTextChars;
01045                goto endOfFunction;
01046             }
01047             else
01048             {
01049                // The character is not the sentinel.  Treat it like any other.
01050                charCategory = ccOther;
01051                goto determineTransitionFromCharCategory;
01052             }
01053             break;
01054          default:
01055             result = MsgHeaderScanner::scrError;
01056             *unprocessedCharPtr = charPtr;
01057             goto endOfFunction;
01058       }//switch
01059    }//for
01060   endOfFunction:
01061    *termCharPtr = saveChunkTermChar;
01062    return result;
01063 }
01064 
01065 bool
01066 MsgHeaderScanner::initialize()
01067 {
01068    initCharInfoArray();
01069    initStateMachine();
01070    return true;
01071 }
01072 
01073 
01074 } //namespace resip
01075 
01076 
01077 
01078 #if defined(RESIP_MSG_HEADER_SCANNER_DEBUG) && defined(MSG_SCANNER_STANDALONE)
01079 
01080 extern
01081 int
01082 main(unsigned int   numArgs,
01083      const char * * argVector)
01084 {
01085    ::resip::MsgHeaderScanner scanner;
01086    scanner.prepareForMessage(0);
01087    char *text =
01088       "status\r\n"
01089       "bobby: dummy\r\n"
01090       "allow: foo, bar, \"don,\\\"xyz\r\n zy\", buzz\r\n\r\n";
01091    unsigned int textLength = strlen(text);
01092    char chunk[10000];
01093    strcpy(chunk, text);
01094    ::resip::MsgHeaderScanner::ScanChunkResult scanChunkResult;
01095    char *unprocessedCharPtr;
01096    scanChunkResult = scanner.scanChunk(chunk, 21, &unprocessedCharPtr);
01097    if (scanChunkResult == ::resip::MsgHeaderScanner::scrNextChunk)
01098    {
01099       printf("Scanning another chunk '.");
01100       ::resip::printText(unprocessedCharPtr, 1);
01101       printf("'\n");
01102       scanChunkResult =
01103          scanner.scanChunk(unprocessedCharPtr,
01104                            (chunk + textLength) - unprocessedCharPtr,
01105                            &unprocessedCharPtr);
01106    }
01107    if (scanChunkResult != ::resip::MsgHeaderScanner::scrEnd)
01108    {
01109       printf("Error %d at character %d.\n",
01110              scanChunkResult,
01111              unprocessedCharPtr - chunk);
01112    }
01113    return 0;
01114 }
01115 
01116 #endif //!defined(RESIP_MSG_HEADER_SCANNER_DEBUG) }
01117 
01118 /* ====================================================================
01119  * The Vovida Software License, Version 1.0 
01120  * 
01121  * Copyright (c) 2000-2005
01122  * 
01123  * Redistribution and use in source and binary forms, with or without
01124  * modification, are permitted provided that the following conditions
01125  * are met:
01126  * 
01127  * 1. Redistributions of source code must retain the above copyright
01128  *    notice, this list of conditions and the following disclaimer.
01129  * 
01130  * 2. Redistributions in binary form must reproduce the above copyright
01131  *    notice, this list of conditions and the following disclaimer in
01132  *    the documentation and/or other materials provided with the
01133  *    distribution.
01134  * 
01135  * 3. The names "VOCAL", "Vovida Open Communication Application Library",
01136  *    and "Vovida Open Communication Application Library (VOCAL)" must
01137  *    not be used to endorse or promote products derived from this
01138  *    software without prior written permission. For written
01139  *    permission, please contact vocal@vovida.org.
01140  *
01141  * 4. Products derived from this software may not be called "VOCAL", nor
01142  *    may "VOCAL" appear in their name, without prior written
01143  *    permission of Vovida Networks, Inc.
01144  * 
01145  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
01146  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
01147  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
01148  * NON-INFRINGEMENT ARE DISCLAIMED.  IN NO EVENT SHALL VOVIDA
01149  * NETWORKS, INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT DAMAGES
01150  * IN EXCESS OF $1,000, NOR FOR ANY INDIRECT, INCIDENTAL, SPECIAL,
01151  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
01152  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
01153  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
01154  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
01155  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
01156  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
01157  * DAMAGE.
01158  * 
01159  * ====================================================================
01160  * 
01161  * This software consists of voluntary contributions made by Vovida
01162  * Networks, Inc. and many individuals on behalf of Vovida Networks,
01163  * Inc.  For more information on Vovida Networks, Inc., please see
01164  * <http://www.vovida.org/>.
01165  *
01166  */