|
reSIProcate/stack
9373
|
00001 #if defined(HAVE_CONFIG_H) 00002 #include "resip/stack/config.hxx" 00003 #endif 00004 00005 #include "resip/stack/XMLCursor.hxx" 00006 #include "resip/stack/Symbols.hxx" 00007 #include "rutil/Logger.hxx" 00008 #include "rutil/WinLeakCheck.hxx" 00009 00010 using namespace resip; 00011 using namespace std; 00012 00013 #define RESIPROCATE_SUBSYSTEM Subsystem::CONTENTS 00014 00040 static char BANG[] = "!"; 00041 static char HYPHEN[] = "-"; 00042 //http://www.w3.org/TR/1998/REC-xml-19980210 00043 static const Data COMMENT_START("<!--"); 00044 static const Data COMMENT_END("-->"); 00045 00046 // An alternative to stripping comments out in preparse 00047 // is to deal with them in the parse; ignore when after non-leaf element 00048 // put a leaf after a comment after a leaf in the first leaf's children 00049 // getValue() needs to copy first leaf and all 'child' leaves to mValue 00050 // 00051 // has the advantage of allowing 00052 // 1. lazier parsing 00053 // 2. embedded wierdnesses like <! > and <? > 00054 XMLCursor::XMLCursor(const ParseBuffer& pb) 00055 : mRoot(0), 00056 mCursor(0), 00057 mAttributesSet(false) 00058 { 00059 ParseBuffer lPb(pb); 00060 00061 skipProlog(lPb); 00062 const char* start = lPb.position(); 00063 00064 lPb.skipToChars(COMMENT_START); 00065 if (!lPb.eof()) 00066 { 00067 StackLog(<< "removing comments"); 00068 lPb.reset(start); 00069 mData.reserve(lPb.end() - lPb.start()); 00070 00071 const char* anchor = start; 00072 { 00073 DataStream str(mData); 00074 Data temp; 00075 while (true) 00076 { 00077 lPb.skipToChars(COMMENT_START); 00078 if (!lPb.eof()) 00079 { 00080 lPb.data(temp, anchor); 00081 str << temp; 00082 anchor = Node::skipComments(lPb); 00083 } 00084 else 00085 { 00086 lPb.data(temp, anchor); 00087 str << temp; 00088 break; 00089 } 00090 } 00091 } 00092 mRoot = new Node(ParseBuffer(mData.data(), mData.size())); 00093 } 00094 else 00095 { 00096 mRoot = new Node(ParseBuffer(start, pb.end() - start)); 00097 } 00098 mCursor = mRoot; 00099 00100 if (mRoot->extractTag()) 00101 { 00102 InfoLog(<< "XML: empty element no a legal root"); 00103 mRoot->mPb.fail(__FILE__, __LINE__); 00104 } 00105 00106 mTag = mRoot->mTag; 00107 decodeName(mRoot->mTag); 00108 00109 // check for # & and note -- make decode, decodeName do stuff if set 00110 00111 //<top></top> // no children 00112 ParseBuffer pbtemp(mRoot->mPb); 00113 pbtemp.skipToChar(Symbols::RA_QUOTE[0]); 00114 pbtemp.skipChar(); 00115 if (!WhitespaceSignificant) 00116 { 00117 pbtemp.skipWhitespace(); 00118 } 00119 if (*pbtemp.position() == Symbols::LA_QUOTE[0] && 00120 *(pbtemp.position()+1) == Symbols::SLASH[0]) 00121 { 00122 pbtemp.skipChar(); 00123 pbtemp.skipChar(); 00124 if (strncmp(mRoot->mTag.data(), pbtemp.position(), mRoot->mTag.size()) == 0) 00125 { 00126 // no children ever 00127 mRoot->mPb.reset(mRoot->mPb.end()); 00128 return; 00129 } 00130 } 00131 } 00132 00133 XMLCursor::~XMLCursor() 00134 { 00135 delete mRoot; 00136 } 00137 00138 static const Data QUESTION_RA_QUOTE("?>"); 00139 void 00140 XMLCursor::skipProlog(ParseBuffer& pb) 00141 { 00142 //'<?xml' VersionInfo '<xml?' EncodingDecl '?>'? '<?xml' SDDecl '?>'? S? '?> 00143 00144 // !dlb! much more complicated than this.. can contain comments 00145 const char* start = pb.position(); 00146 pb.skipToChars(QUESTION_RA_QUOTE); 00147 if(pb.eof()) 00148 { 00149 // No Prolog 00150 pb.reset(start); 00151 return; 00152 } 00153 pb.skipN(2); 00154 pb.skipWhitespace(); 00155 } 00156 00157 void 00158 XMLCursor::decode(Data& text) 00159 { 00160 } 00161 00162 void 00163 XMLCursor::decodeName(Data& name) 00164 { 00165 } 00166 00167 void 00168 XMLCursor::parseNextRootChild() 00169 { 00170 // no next child to parse? 00171 if (mRoot->mPb.eof()) 00172 { 00173 return; 00174 } 00175 00176 // next child already parsed? 00177 if (mRoot->mNext != mRoot->mChildren.end()) 00178 { 00179 return; 00180 } 00181 00182 // skip self tag 00183 if (mRoot->mPb.position() == mRoot->mPb.start()) 00184 { 00185 mRoot->mPb.skipToChar(Symbols::RA_QUOTE[0]); 00186 mRoot->mPb.skipChar(); 00187 } 00188 00189 if (!WhitespaceSignificant) 00190 { 00191 mRoot->mPb.skipWhitespace(); 00192 } 00193 00194 // root end tag? 00195 if (*mRoot->mPb.position() == Symbols::LA_QUOTE[0]) 00196 { 00197 ParseBuffer pb(mRoot->mPb.position(), 00198 mRoot->mPb.end() - mRoot->mPb.position()); 00199 pb.skipChar(); 00200 if (!pb.eof() && *pb.position() == Symbols::SLASH[0]) 00201 { 00202 pb.skipChar(); 00203 // CodeWarrior isn't helpful enough to pick the "obvious" operator definition 00204 // so we add volatile here so CW is completely unconfused what to do. 00205 // second note - MSVC 7.0 won't compile the volatile - tried the following to fix 00206 const char* end = pb.position(); 00207 if ( (const char*)pb.end() < end + mTag.size() ) 00208 { 00209 InfoLog(<< "XML: unexpected end"); 00210 pb.fail(__FILE__, __LINE__); 00211 } 00212 00213 if (strncmp(mTag.data(), pb.position(), mRoot->mTag.size()) == 0) 00214 { 00215 mRoot->mPb.skipToEnd(); 00216 return; 00217 } 00218 } 00219 } 00220 00221 // leaf? 00222 if (*mRoot->mPb.position() != Symbols::LA_QUOTE[0]) 00223 { 00224 const char* anchor = mRoot->mPb.position(); 00225 mRoot->mPb.skipToChar(Symbols::LA_QUOTE[0]); 00226 Node* leaf = new Node(ParseBuffer(anchor, mRoot->mPb.position() - anchor)); 00227 leaf->mIsLeaf = true; 00228 mRoot->addChild(leaf); 00229 } 00230 else 00231 { 00232 Node* child = new Node(mRoot->mPb); 00233 child->skipToEndTag(); 00234 00235 // leave the parse buffer after the child 00236 mRoot->mPb.reset(child->mPb.end()); 00237 00238 mRoot->addChild(child); 00239 } 00240 00241 // mNext always points at cursored child 00242 mRoot->mNext = mRoot->mChildren.end(); 00243 mRoot->mNext--; 00244 } 00245 00246 bool 00247 XMLCursor::nextSibling() 00248 { 00249 if (atRoot()) 00250 { 00251 StackLog(<< "XMLCursor::nextSibling" << *this->mCursor << " <<root>>"); 00252 return false; 00253 } 00254 00255 StackLog(<< "XMLCursor::nextSibling" << *this->mCursor << " " << *this->mCursor->mParent); 00256 if (mCursor->mParent == mRoot) 00257 { 00258 parseNextRootChild(); 00259 } 00260 00261 if (mCursor->mParent->mNext != mCursor->mParent->mChildren.end()) 00262 { 00263 mCursor = *((mCursor->mParent->mNext)++); 00264 mAttributesSet = false; 00265 return true; 00266 } 00267 else 00268 { 00269 return false; 00270 } 00271 } 00272 00273 bool 00274 XMLCursor::firstChild() 00275 { 00276 if (atRoot() && 00277 mRoot->mChildren.empty()) 00278 { 00279 parseNextRootChild(); 00280 } 00281 00282 if (mCursor->mChildren.empty()) 00283 { 00284 return false; 00285 } 00286 else 00287 { 00288 // mNext always points after cursored child 00289 mCursor->mNext = mCursor->mChildren.begin(); 00290 mCursor->mNext++; 00291 mCursor = mCursor->mChildren.front(); 00292 mAttributesSet = false; 00293 return true; 00294 } 00295 } 00296 00297 bool 00298 XMLCursor::parent() 00299 { 00300 if (atRoot()) 00301 { 00302 return false; 00303 } 00304 00305 mCursor = mCursor->mParent; 00306 mAttributesSet = false; 00307 return true; 00308 } 00309 00310 void 00311 XMLCursor::reset() 00312 { 00313 mCursor = mRoot; 00314 mAttributesSet = false; 00315 } 00316 00317 bool 00318 XMLCursor::atRoot() const 00319 { 00320 return mCursor == mRoot; 00321 } 00322 00323 bool 00324 XMLCursor::atLeaf() const 00325 { 00326 return mCursor->mIsLeaf; 00327 } 00328 00329 const Data& 00330 XMLCursor::getTag() const 00331 { 00332 return mCursor->mTag; 00333 } 00334 00335 //<foo > 00336 //<foo> 00337 //<foo/> 00338 //<foo attr = 'value' attr="value"> 00339 //<foo attr = 'value' attr="value" > 00340 // 00341 //<foo attr = 'value' attr="value" /> 00342 static const Data RA_QUOTE_SLASH(">/"); 00343 const XMLCursor::AttributeMap& 00344 XMLCursor::getAttributes() const 00345 { 00346 if (!atLeaf() && 00347 !mAttributesSet) 00348 { 00349 mAttributes.clear(); 00350 mAttributesSet = true; 00351 00352 ParseBuffer pb(mCursor->mPb); 00353 pb.reset(mCursor->mPb.start()); 00354 00355 Data attribute; 00356 Data value; 00357 00358 pb.skipToOneOf(ParseBuffer::Whitespace, RA_QUOTE_SLASH); 00359 00360 while (!pb.eof() && 00361 *pb.position() != Symbols::RA_QUOTE[0] && 00362 *pb.position() != Symbols::SLASH[0]) 00363 { 00364 attribute.clear(); 00365 value.clear(); 00366 00367 const char* anchor = pb.skipWhitespace(); 00368 pb.skipToOneOf(ParseBuffer::Whitespace, Symbols::EQUALS); 00369 pb.data(attribute, anchor); 00370 XMLCursor::decodeName(attribute); 00371 00372 StackLog(<< "attribute: " << attribute); 00373 00374 pb.skipWhitespace(); 00375 pb.skipToChar(Symbols::EQUALS[0]); 00376 pb.skipChar(); 00377 pb.skipWhitespace(); 00378 if (!pb.eof()) 00379 { 00380 const char quote = *pb.position(); 00381 00382 StackLog(<< "quote is <" << quote << ">"); 00383 00384 if (quote != Symbols::DOUBLE_QUOTE[0] && 00385 quote != '\'') 00386 { 00387 InfoLog(<< "XML: badly quoted attribute value"); 00388 pb.fail(__FILE__, __LINE__); 00389 } 00390 anchor = pb.skipChar(); 00391 pb.skipToChar(quote); 00392 pb.data(value, anchor); 00393 XMLCursor::decode(value); 00394 pb.skipChar(); 00395 mAttributes[attribute] = value; 00396 } 00397 pb.skipWhitespace(); 00398 } 00399 } 00400 00401 return mAttributes; 00402 } 00403 00404 const Data& 00405 XMLCursor::getValue() const 00406 { 00407 if (atLeaf()) 00408 { 00409 ParseBuffer pb(mCursor->mPb); 00410 pb.skipToEnd(); 00411 mValue = pb.data(pb.start()); 00412 XMLCursor::decode(mValue); 00413 } 00414 else 00415 { 00416 mValue.clear(); 00417 } 00418 return mValue; 00419 } 00420 00421 EncodeStream& 00422 XMLCursor::encode(EncodeStream& str, const AttributeMap& attrs) 00423 { 00424 for(AttributeMap::const_iterator i = attrs.begin(); 00425 i != attrs.end(); ++i) 00426 { 00427 if (i != attrs.begin()) 00428 { 00429 str << " "; 00430 } 00431 // !dlb! some sort of character encoding required here 00432 str << i->first << "=\"" << i->second << "\""; 00433 } 00434 00435 return str; 00436 } 00437 00438 XMLCursor::Node::Node(const ParseBuffer& pb) 00439 : mPb(pb.position(), pb.end() - pb.position()), 00440 mParent(0), 00441 mChildren(), 00442 mNext(mChildren.begin()), 00443 mIsLeaf(false) 00444 { 00445 mPb.assertNotEof(); 00446 StackLog(<< "XMLCursor::Node::Node" << *this); 00447 } 00448 00449 XMLCursor::Node::~Node() 00450 { 00451 for (vector<Node*>::iterator i = mChildren.begin(); 00452 i != mChildren.end(); ++i) 00453 { 00454 delete *i; 00455 } 00456 } 00457 00458 // start: 00459 //<foo > 00460 //^ 00461 // end: 00462 //<foo > 00463 // ^ 00464 static Data SLASH_RA_QUOTE("/>"); 00465 bool 00466 XMLCursor::Node::extractTag() 00467 { 00468 ParseBuffer pb(mPb); 00469 const char* anchor = pb.skipChar(); 00470 pb.skipToOneOf(ParseBuffer::Whitespace, SLASH_RA_QUOTE); 00471 pb.assertNotEof(); 00472 pb.data(mTag, anchor); 00473 00474 return !pb.eof() && *pb.position() == Symbols::SLASH[0]; 00475 } 00476 00477 void 00478 XMLCursor::Node::addChild(Node* child) 00479 { 00480 mChildren.push_back(child); 00481 child->mParent = this; 00482 } 00483 00484 //<foo> <bar> </bar> <baz> </baz> </foo> 00485 //^start 00486 // ^child 00487 // ^child 00488 // ^end 00489 // 00490 //<foo> sdfsf sadfsf <bar> asdfdf </bar> sadfsdf </foo> 00491 //^start 00492 // ^child 00493 // ^child sub 00494 // ^child 00495 void 00496 XMLCursor::Node::skipToEndTag() 00497 { 00498 extractTag(); 00499 StackLog(<< "XMLCursor::Node::skipToEndTag(" << mTag << ")"); 00500 //StackLog(<< "XMLCursor::Node::skipToEndTag(" << Data(mPb.position(), mPb.end() - mPb.position()) << ")"); 00501 00502 //<foo /> 00503 mPb.skipToChar(Symbols::RA_QUOTE[0]); 00504 if (*(mPb.position()-1) == Symbols::SLASH[0]) 00505 { 00506 mPb.skipChar(); 00507 mPb = ParseBuffer(mPb.start(), mPb.position() - mPb.start()); 00508 return; 00509 } 00510 00511 //<foo> ...<child> ... </child> </foo> 00512 // ^ 00513 mPb.skipChar(); 00514 //<foo> ...<child> ... </child> </foo> 00515 // ^ 00516 while (true) 00517 { 00518 if (!WhitespaceSignificant) 00519 { 00520 mPb.skipWhitespace(); 00521 } 00522 00523 // Some text contents ...< 00524 // ^ ^ 00525 if (*mPb.position() != Symbols::LA_QUOTE[0]) 00526 { 00527 const char* anchor = mPb.position(); 00528 mPb.skipToChar(Symbols::LA_QUOTE[0]); 00529 Node* leaf = new Node(ParseBuffer(anchor, mPb.position() - anchor)); 00530 leaf->mIsLeaf = true; 00531 addChild(leaf); 00532 } 00533 00534 //<... 00535 //^ 00536 mPb.skipChar(); 00537 //<... 00538 // ^ 00539 00540 // exit condition 00541 //</foo> 00542 if (*mPb.position() == Symbols::SLASH[0]) 00543 { 00544 mPb.skipChar(); 00545 // CodeWarrior isn't helpful enough to pick the "obvious" operator definition 00546 // so we add volatile here so CW is completely unconfused what to do. 00547 // second note - MSVC 7.0 won't compile the volatile - tried the following to fix 00548 const char* end = mPb.position(); 00549 if ( (const char*)mPb.end() < end + mTag.size() ) 00550 { 00551 InfoLog(<< "XML: unexpected end"); 00552 mPb.fail(__FILE__, __LINE__); 00553 } 00554 00555 if (strncmp(mTag.data(), mPb.position(), mTag.size()) == 0) 00556 { 00557 mPb.skipToChar(Symbols::RA_QUOTE[0]); 00558 mPb.skipChar(); 00559 mPb = ParseBuffer(mPb.start(), mPb.position() - mPb.start()); 00560 return; 00561 } 00562 else 00563 { 00564 InfoLog(<< "Badly formed XML: unexpected endtag"); 00565 mPb.fail(__FILE__, __LINE__); 00566 } 00567 } 00568 00569 //<child>... 00570 // ^ 00571 if (mPb.position() == mPb.start()) 00572 { 00573 InfoLog(<< "XML: badly formed element"); 00574 mPb.fail(__FILE__, __LINE__); 00575 } 00576 00577 mPb.reset(mPb.position()-1); 00578 //<child>... 00579 //^ 00580 Node* child = new Node(mPb); 00581 addChild(child); 00582 child->skipToEndTag(); 00583 mPb.reset(child->mPb.end()); 00584 XMLCursor::decodeName(child->mTag); 00585 StackLog(<< mTag << "(" << child->mTag << ")"); 00586 } 00587 } 00588 00589 //<!-- declarations for <head> & <body> --> 00590 const char* 00591 XMLCursor::Node::skipComments(ParseBuffer& pb) 00592 { 00593 while (*pb.position() == Symbols::LA_QUOTE[0] && 00594 *(pb.position()+1) == BANG[0] && 00595 *(pb.position()+2) == HYPHEN[0] && 00596 *(pb.position()+3) == HYPHEN[0]) 00597 { 00598 pb.skipToChars(COMMENT_END); 00599 pb.skipChars(COMMENT_END); 00600 pb.skipWhitespace(); 00601 if(pb.eof()) 00602 { 00603 return pb.end(); 00604 } 00605 } 00606 00607 return pb.position(); 00608 } 00609 00610 EncodeStream& 00611 resip::operator<<(EncodeStream& str, const XMLCursor::Node& node) 00612 { 00613 Data::size_type size = node.mPb.end() - node.mPb.start(); 00614 00615 static const Data::size_type showSize(35); 00616 00617 str << &node << "[" 00618 << Data(node.mPb.start(), 00619 min(showSize, size)) 00620 << "]" << (size ? "" : "..."); 00621 00622 return str; 00623 } 00624 00625 EncodeStream& 00626 resip::operator<<(EncodeStream& str, const XMLCursor& cursor) 00627 { 00628 str << "XMLCursor " << *cursor.mCursor; 00629 return str; 00630 } 00631 00632 /* ==================================================================== 00633 * The Vovida Software License, Version 1.0 00634 * 00635 * Copyright (c) 2000 Vovida Networks, Inc. All rights reserved. 00636 * 00637 * Redistribution and use in source and binary forms, with or without 00638 * modification, are permitted provided that the following conditions 00639 * are met: 00640 * 00641 * 1. Redistributions of source code must retain the above copyright 00642 * notice, this list of conditions and the following disclaimer. 00643 * 00644 * 2. Redistributions in binary form must reproduce the above copyright 00645 * notice, this list of conditions and the following disclaimer in 00646 * the documentation and/or other materials provided with the 00647 * distribution. 00648 * 00649 * 3. The names "VOCAL", "Vovida Open Communication Application Library", 00650 * and "Vovida Open Communication Application Library (VOCAL)" must 00651 * not be used to endorse or promote products derived from this 00652 * software without prior written permission. For written 00653 * permission, please contact vocal@vovida.org. 00654 * 00655 * 4. Products derived from this software may not be called "VOCAL", nor 00656 * may "VOCAL" appear in their name, without prior written 00657 * permission of Vovida Networks, Inc. 00658 * 00659 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED 00660 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00661 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND 00662 * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL VOVIDA 00663 * NETWORKS, INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT DAMAGES 00664 * IN EXCESS OF $1,000, NOR FOR ANY INDIRECT, INCIDENTAL, SPECIAL, 00665 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00666 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00667 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 00668 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00669 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 00670 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 00671 * DAMAGE. 00672 * 00673 * ==================================================================== 00674 * 00675 * This software consists of voluntary contributions made by Vovida 00676 * Networks, Inc. and many individuals on behalf of Vovida Networks, 00677 * Inc. For more information on Vovida Networks, Inc., please see 00678 * <http://www.vovida.org/>. 00679 * 00680 */
1.7.5.1