reSIProcate/stack  9373
XMLCursor.cxx
Go to the documentation of this file.
00001 #if defined(HAVE_CONFIG_H)
00002 #include "resip/stack/config.hxx"
00003 #endif
00004 
00005 #include "resip/stack/XMLCursor.hxx"
00006 #include "resip/stack/Symbols.hxx"
00007 #include "rutil/Logger.hxx"
00008 #include "rutil/WinLeakCheck.hxx"
00009 
00010 using namespace resip;
00011 using namespace std;
00012 
00013 #define RESIPROCATE_SUBSYSTEM Subsystem::CONTENTS
00014 
00040 static char BANG[] = "!";
00041 static char HYPHEN[] = "-";
00042 //http://www.w3.org/TR/1998/REC-xml-19980210
00043 static const Data COMMENT_START("<!--");
00044 static const Data COMMENT_END("-->");
00045 
00046 // An alternative to stripping comments out in preparse
00047 // is to deal with them in the parse; ignore when after non-leaf element
00048 // put a leaf after a comment after a leaf in the first leaf's children
00049 // getValue() needs to copy first leaf and all 'child' leaves to mValue
00050 //
00051 // has the advantage of allowing
00052 // 1. lazier parsing
00053 // 2. embedded wierdnesses like <! > and <? >
00054 XMLCursor::XMLCursor(const ParseBuffer& pb)
00055    : mRoot(0),
00056      mCursor(0),
00057      mAttributesSet(false)
00058 {
00059    ParseBuffer lPb(pb);
00060 
00061    skipProlog(lPb);
00062    const char* start = lPb.position();
00063 
00064    lPb.skipToChars(COMMENT_START);
00065    if (!lPb.eof())
00066    {
00067       StackLog(<< "removing comments");
00068       lPb.reset(start);
00069       mData.reserve(lPb.end() - lPb.start());
00070 
00071       const char* anchor = start;
00072       {
00073          DataStream str(mData);
00074          Data temp;
00075          while (true)
00076          {
00077             lPb.skipToChars(COMMENT_START);
00078             if (!lPb.eof())
00079             {
00080                lPb.data(temp, anchor);
00081                str << temp;
00082                anchor = Node::skipComments(lPb);
00083             }
00084             else
00085             {
00086                lPb.data(temp, anchor);
00087                str << temp;
00088                break;
00089             }
00090          }
00091       }
00092       mRoot = new Node(ParseBuffer(mData.data(), mData.size()));
00093    }
00094    else
00095    {
00096       mRoot = new Node(ParseBuffer(start, pb.end() - start));
00097    }
00098    mCursor = mRoot;
00099 
00100    if (mRoot->extractTag())
00101    {
00102       InfoLog(<< "XML: empty element no a legal root");
00103       mRoot->mPb.fail(__FILE__, __LINE__);
00104    }
00105 
00106    mTag = mRoot->mTag;
00107    decodeName(mRoot->mTag);
00108 
00109    // check for # & and note -- make decode, decodeName do stuff if set
00110 
00111    //<top></top> // no children
00112    ParseBuffer pbtemp(mRoot->mPb);
00113    pbtemp.skipToChar(Symbols::RA_QUOTE[0]);
00114    pbtemp.skipChar();
00115    if (!WhitespaceSignificant)
00116    {
00117       pbtemp.skipWhitespace();
00118    }
00119    if (*pbtemp.position() == Symbols::LA_QUOTE[0] &&
00120        *(pbtemp.position()+1) == Symbols::SLASH[0])
00121    {
00122       pbtemp.skipChar();
00123       pbtemp.skipChar();
00124       if (strncmp(mRoot->mTag.data(), pbtemp.position(), mRoot->mTag.size()) == 0)
00125       {
00126          // no children ever
00127          mRoot->mPb.reset(mRoot->mPb.end());
00128          return;
00129       }
00130    }
00131 }
00132 
00133 XMLCursor::~XMLCursor()
00134 {
00135    delete mRoot;
00136 }
00137 
00138 static const Data QUESTION_RA_QUOTE("?>");
00139 void
00140 XMLCursor::skipProlog(ParseBuffer& pb)
00141 {
00142    //'<?xml' VersionInfo '<xml?' EncodingDecl '?>'? '<?xml' SDDecl '?>'? S? '?>
00143 
00144    // !dlb! much more complicated than this.. can contain comments
00145    const char* start = pb.position();
00146    pb.skipToChars(QUESTION_RA_QUOTE);
00147    if(pb.eof()) 
00148    {
00149       // No Prolog
00150       pb.reset(start);
00151       return;
00152    }
00153    pb.skipN(2);
00154    pb.skipWhitespace();
00155 }
00156 
00157 void
00158 XMLCursor::decode(Data& text)
00159 {
00160 }
00161 
00162 void
00163 XMLCursor::decodeName(Data& name)
00164 {
00165 }
00166 
00167 void
00168 XMLCursor::parseNextRootChild()
00169 {
00170    // no next child to parse?
00171    if (mRoot->mPb.eof())
00172    {
00173       return;
00174    }
00175 
00176    // next child already parsed?
00177    if (mRoot->mNext != mRoot->mChildren.end())
00178    {
00179       return;
00180    }
00181 
00182    // skip self tag
00183    if (mRoot->mPb.position() == mRoot->mPb.start())
00184    {
00185       mRoot->mPb.skipToChar(Symbols::RA_QUOTE[0]);
00186       mRoot->mPb.skipChar();
00187    }
00188 
00189    if (!WhitespaceSignificant)
00190    {
00191       mRoot->mPb.skipWhitespace();
00192    }
00193 
00194    // root end tag?
00195    if (*mRoot->mPb.position() == Symbols::LA_QUOTE[0])
00196    {
00197       ParseBuffer pb(mRoot->mPb.position(), 
00198                      mRoot->mPb.end() - mRoot->mPb.position());
00199       pb.skipChar();
00200       if (!pb.eof() && *pb.position() == Symbols::SLASH[0])
00201       {
00202          pb.skipChar();
00203          // CodeWarrior isn't helpful enough to pick the "obvious" operator definition
00204          // so we add volatile here so CW is completely unconfused what to do.
00205                  // second note - MSVC 7.0 won't compile the volatile - tried the following to fix
00206                  const char* end = pb.position();
00207          if ( (const char*)pb.end() < end + mTag.size() )
00208          {
00209             InfoLog(<< "XML: unexpected end");
00210             pb.fail(__FILE__, __LINE__);
00211          }
00212          
00213          if (strncmp(mTag.data(), pb.position(), mRoot->mTag.size()) == 0)
00214          {
00215             mRoot->mPb.skipToEnd();
00216             return;
00217          }
00218       }
00219    }
00220 
00221    // leaf?
00222    if (*mRoot->mPb.position() != Symbols::LA_QUOTE[0])
00223    {
00224       const char* anchor = mRoot->mPb.position();
00225       mRoot->mPb.skipToChar(Symbols::LA_QUOTE[0]);
00226       Node* leaf = new Node(ParseBuffer(anchor, mRoot->mPb.position() - anchor));
00227       leaf->mIsLeaf = true;
00228       mRoot->addChild(leaf);
00229    }
00230    else
00231    {
00232       Node* child = new Node(mRoot->mPb);
00233       child->skipToEndTag();
00234 
00235       // leave the parse buffer after the child
00236       mRoot->mPb.reset(child->mPb.end());
00237 
00238       mRoot->addChild(child);
00239    }
00240 
00241    // mNext always points at cursored child
00242    mRoot->mNext = mRoot->mChildren.end();
00243    mRoot->mNext--;
00244 }
00245 
00246 bool
00247 XMLCursor::nextSibling()
00248 {
00249    if (atRoot())
00250    {
00251       StackLog(<< "XMLCursor::nextSibling" << *this->mCursor << " <<root>>");
00252       return false;
00253    }
00254 
00255    StackLog(<< "XMLCursor::nextSibling" << *this->mCursor << " " << *this->mCursor->mParent);
00256    if (mCursor->mParent == mRoot)
00257    {
00258       parseNextRootChild();
00259    }
00260 
00261    if (mCursor->mParent->mNext != mCursor->mParent->mChildren.end())
00262    {
00263       mCursor = *((mCursor->mParent->mNext)++);
00264       mAttributesSet = false;
00265       return true;
00266    }
00267    else
00268    {
00269       return false;
00270    }
00271 }
00272 
00273 bool
00274 XMLCursor::firstChild()
00275 {
00276    if (atRoot() &&
00277        mRoot->mChildren.empty())
00278    {
00279       parseNextRootChild();
00280    }
00281 
00282    if (mCursor->mChildren.empty())
00283    {
00284       return false;
00285    }
00286    else
00287    {
00288       // mNext always points after cursored child
00289       mCursor->mNext = mCursor->mChildren.begin();
00290       mCursor->mNext++;
00291       mCursor = mCursor->mChildren.front();
00292       mAttributesSet = false;
00293       return true;
00294    }
00295 }
00296 
00297 bool
00298 XMLCursor::parent()
00299 {
00300    if (atRoot())
00301    {
00302       return false;
00303    }
00304 
00305    mCursor = mCursor->mParent;
00306    mAttributesSet = false;
00307    return true;
00308 }
00309 
00310 void
00311 XMLCursor::reset()
00312 {
00313    mCursor = mRoot;
00314    mAttributesSet = false;
00315 }
00316 
00317 bool
00318 XMLCursor::atRoot() const
00319 {
00320    return mCursor == mRoot;
00321 }
00322 
00323 bool
00324 XMLCursor::atLeaf() const
00325 {
00326    return mCursor->mIsLeaf;
00327 }
00328 
00329 const Data&
00330 XMLCursor::getTag() const
00331 {
00332    return mCursor->mTag;
00333 }
00334 
00335 //<foo >
00336 //<foo>
00337 //<foo/>
00338 //<foo attr = 'value'   attr="value">
00339 //<foo attr = 'value'   attr="value" >
00340 //
00341 //<foo attr = 'value'   attr="value" />
00342 static const Data RA_QUOTE_SLASH(">/");
00343 const XMLCursor::AttributeMap&
00344 XMLCursor::getAttributes() const
00345 {
00346    if (!atLeaf() &&
00347        !mAttributesSet)
00348    {
00349       mAttributes.clear();
00350       mAttributesSet = true;
00351    
00352       ParseBuffer pb(mCursor->mPb);
00353       pb.reset(mCursor->mPb.start());
00354 
00355       Data attribute;
00356       Data value;
00357 
00358       pb.skipToOneOf(ParseBuffer::Whitespace, RA_QUOTE_SLASH);
00359 
00360       while (!pb.eof() && 
00361              *pb.position() != Symbols::RA_QUOTE[0] &&
00362              *pb.position() != Symbols::SLASH[0])
00363       {
00364          attribute.clear();
00365          value.clear();
00366 
00367          const char* anchor = pb.skipWhitespace();
00368          pb.skipToOneOf(ParseBuffer::Whitespace, Symbols::EQUALS);
00369          pb.data(attribute, anchor);
00370          XMLCursor::decodeName(attribute);
00371 
00372          StackLog(<< "attribute: " << attribute);
00373 
00374          pb.skipWhitespace();
00375          pb.skipToChar(Symbols::EQUALS[0]);
00376          pb.skipChar();
00377          pb.skipWhitespace();
00378          if (!pb.eof())
00379          {
00380             const char quote = *pb.position();
00381 
00382             StackLog(<< "quote is <" << quote << ">");
00383             
00384             if (quote != Symbols::DOUBLE_QUOTE[0] &&
00385                 quote != '\'')
00386             {
00387                InfoLog(<< "XML: badly quoted attribute value");
00388                pb.fail(__FILE__, __LINE__);
00389             }
00390             anchor = pb.skipChar();
00391             pb.skipToChar(quote);
00392             pb.data(value, anchor);
00393             XMLCursor::decode(value);
00394             pb.skipChar();
00395             mAttributes[attribute] = value;
00396          }
00397          pb.skipWhitespace();
00398       }
00399    }
00400 
00401    return mAttributes;
00402 }
00403 
00404 const Data&
00405 XMLCursor::getValue() const
00406 {
00407    if (atLeaf())
00408    {
00409       ParseBuffer pb(mCursor->mPb);
00410       pb.skipToEnd();
00411       mValue = pb.data(pb.start());
00412       XMLCursor::decode(mValue);
00413    }
00414    else
00415    {
00416       mValue.clear();
00417    }
00418    return mValue;
00419 }
00420 
00421 EncodeStream&
00422 XMLCursor::encode(EncodeStream& str, const AttributeMap& attrs)
00423 {
00424    for(AttributeMap::const_iterator i = attrs.begin();
00425        i != attrs.end(); ++i)
00426    {
00427       if (i != attrs.begin())
00428       {
00429          str << " ";
00430       }
00431       // !dlb! some sort of character encoding required here
00432       str << i->first << "=\"" << i->second << "\"";
00433    }
00434 
00435    return str;
00436 }
00437 
00438 XMLCursor::Node::Node(const ParseBuffer& pb)
00439    : mPb(pb.position(), pb.end() - pb.position()),
00440      mParent(0),
00441      mChildren(),
00442      mNext(mChildren.begin()),
00443      mIsLeaf(false)
00444 {
00445    mPb.assertNotEof();
00446    StackLog(<< "XMLCursor::Node::Node" << *this);
00447 }
00448 
00449 XMLCursor::Node::~Node()
00450 {
00451    for (vector<Node*>::iterator i = mChildren.begin();
00452         i != mChildren.end(); ++i)
00453    {
00454       delete *i;
00455    }
00456 }
00457 
00458 // start:
00459 //<foo >
00460 //^
00461 // end:
00462 //<foo >
00463 //      ^
00464 static Data SLASH_RA_QUOTE("/>");
00465 bool
00466 XMLCursor::Node::extractTag()
00467 {
00468    ParseBuffer pb(mPb);
00469    const char* anchor = pb.skipChar();
00470    pb.skipToOneOf(ParseBuffer::Whitespace, SLASH_RA_QUOTE);
00471    pb.assertNotEof();
00472    pb.data(mTag, anchor);
00473 
00474    return !pb.eof() && *pb.position() == Symbols::SLASH[0];
00475 }
00476 
00477 void
00478 XMLCursor::Node::addChild(Node* child)
00479 {
00480    mChildren.push_back(child);
00481    child->mParent = this;
00482 }
00483 
00484 //<foo> <bar> </bar> <baz> </baz> </foo>
00485 //^start
00486 //      ^child      
00487 //                   ^child
00488 //                                ^end
00489 //
00490 //<foo> sdfsf sadfsf <bar> asdfdf </bar> sadfsdf </foo>
00491 //^start
00492 //      ^child
00493 //                   ^child sub 
00494 //                                      ^child
00495 void
00496 XMLCursor::Node::skipToEndTag()
00497 {
00498    extractTag();
00499    StackLog(<< "XMLCursor::Node::skipToEndTag(" <<  mTag << ")");
00500    //StackLog(<< "XMLCursor::Node::skipToEndTag(" << Data(mPb.position(), mPb.end() - mPb.position()) << ")");
00501 
00502    //<foo />
00503    mPb.skipToChar(Symbols::RA_QUOTE[0]);
00504    if (*(mPb.position()-1) == Symbols::SLASH[0])
00505    {
00506       mPb.skipChar();
00507       mPb = ParseBuffer(mPb.start(), mPb.position() - mPb.start());
00508       return;
00509    }
00510 
00511    //<foo> ...<child> ... </child> </foo>
00512    //    ^
00513    mPb.skipChar();
00514    //<foo> ...<child> ... </child> </foo>
00515    //     ^
00516    while (true)
00517    {
00518       if (!WhitespaceSignificant)
00519       {
00520          mPb.skipWhitespace();
00521       }
00522 
00523       // Some text contents ...<
00524       // ^                     ^
00525       if (*mPb.position() != Symbols::LA_QUOTE[0])
00526       {
00527          const char* anchor = mPb.position();
00528          mPb.skipToChar(Symbols::LA_QUOTE[0]);
00529          Node* leaf = new Node(ParseBuffer(anchor, mPb.position() - anchor));
00530          leaf->mIsLeaf = true;
00531          addChild(leaf);
00532       }
00533 
00534       //<...
00535       //^
00536       mPb.skipChar();
00537       //<...
00538       // ^
00539 
00540       // exit condition
00541       //</foo>
00542       if (*mPb.position() == Symbols::SLASH[0])
00543       {
00544          mPb.skipChar();
00545          // CodeWarrior isn't helpful enough to pick the "obvious" operator definition
00546          // so we add volatile here so CW is completely unconfused what to do.
00547                  // second note - MSVC 7.0 won't compile the volatile - tried the following to fix
00548                  const char* end = mPb.position();
00549          if ( (const char*)mPb.end() < end + mTag.size() )
00550          {
00551             InfoLog(<< "XML: unexpected end");
00552             mPb.fail(__FILE__, __LINE__);
00553          }
00554 
00555          if (strncmp(mTag.data(), mPb.position(), mTag.size()) == 0)
00556          {
00557             mPb.skipToChar(Symbols::RA_QUOTE[0]);
00558             mPb.skipChar();
00559             mPb = ParseBuffer(mPb.start(), mPb.position() - mPb.start());
00560             return;
00561          }
00562          else
00563          {
00564             InfoLog(<< "Badly formed XML: unexpected endtag");
00565             mPb.fail(__FILE__, __LINE__);
00566          }
00567       }
00568 
00569       //<child>...
00570       // ^
00571       if (mPb.position() == mPb.start())
00572       {
00573          InfoLog(<< "XML: badly formed element");
00574          mPb.fail(__FILE__, __LINE__);
00575       }
00576 
00577       mPb.reset(mPb.position()-1);
00578       //<child>...
00579       //^
00580       Node* child = new Node(mPb);
00581       addChild(child);
00582       child->skipToEndTag();
00583       mPb.reset(child->mPb.end());
00584       XMLCursor::decodeName(child->mTag);
00585       StackLog(<< mTag << "(" << child->mTag << ")");
00586     }
00587 }
00588 
00589 //<!-- declarations for <head> & <body> -->
00590 const char*
00591 XMLCursor::Node::skipComments(ParseBuffer& pb)
00592 {
00593    while (*pb.position() == Symbols::LA_QUOTE[0] &&
00594           *(pb.position()+1) == BANG[0] &&
00595           *(pb.position()+2) == HYPHEN[0] &&
00596           *(pb.position()+3) == HYPHEN[0])
00597    {
00598       pb.skipToChars(COMMENT_END);
00599       pb.skipChars(COMMENT_END);
00600       pb.skipWhitespace();
00601       if(pb.eof())
00602       {
00603          return pb.end();
00604       }
00605    }
00606 
00607    return pb.position();
00608 }
00609 
00610 EncodeStream&
00611 resip::operator<<(EncodeStream& str, const XMLCursor::Node& node)
00612 {
00613    Data::size_type size = node.mPb.end() - node.mPb.start();
00614 
00615    static const Data::size_type showSize(35);
00616 
00617    str << &node << "[" 
00618        << Data(node.mPb.start(), 
00619                min(showSize, size))
00620         << "]" << (size ? "" : "...");
00621 
00622    return str;
00623 }
00624 
00625 EncodeStream&
00626 resip::operator<<(EncodeStream& str, const XMLCursor& cursor)
00627 {
00628    str << "XMLCursor " << *cursor.mCursor;
00629    return str;
00630 }
00631 
00632 /* ====================================================================
00633  * The Vovida Software License, Version 1.0 
00634  * 
00635  * Copyright (c) 2000 Vovida Networks, Inc.  All rights reserved.
00636  * 
00637  * Redistribution and use in source and binary forms, with or without
00638  * modification, are permitted provided that the following conditions
00639  * are met:
00640  * 
00641  * 1. Redistributions of source code must retain the above copyright
00642  *    notice, this list of conditions and the following disclaimer.
00643  * 
00644  * 2. Redistributions in binary form must reproduce the above copyright
00645  *    notice, this list of conditions and the following disclaimer in
00646  *    the documentation and/or other materials provided with the
00647  *    distribution.
00648  * 
00649  * 3. The names "VOCAL", "Vovida Open Communication Application Library",
00650  *    and "Vovida Open Communication Application Library (VOCAL)" must
00651  *    not be used to endorse or promote products derived from this
00652  *    software without prior written permission. For written
00653  *    permission, please contact vocal@vovida.org.
00654  *
00655  * 4. Products derived from this software may not be called "VOCAL", nor
00656  *    may "VOCAL" appear in their name, without prior written
00657  *    permission of Vovida Networks, Inc.
00658  * 
00659  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
00660  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00661  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
00662  * NON-INFRINGEMENT ARE DISCLAIMED.  IN NO EVENT SHALL VOVIDA
00663  * NETWORKS, INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT DAMAGES
00664  * IN EXCESS OF $1,000, NOR FOR ANY INDIRECT, INCIDENTAL, SPECIAL,
00665  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00666  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00667  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
00668  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00669  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
00670  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
00671  * DAMAGE.
00672  * 
00673  * ====================================================================
00674  * 
00675  * This software consists of voluntary contributions made by Vovida
00676  * Networks, Inc. and many individuals on behalf of Vovida Networks,
00677  * Inc.  For more information on Vovida Networks, Inc., please see
00678  * <http://www.vovida.org/>.
00679  *
00680  */