/[resiprocate]/branches/b-directory-reorg/sip/resiprocate/XMLCursor.cxx
ViewVC logotype

Contents of /branches/b-directory-reorg/sip/resiprocate/XMLCursor.cxx

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5271 - (show annotations) (download)
Thu Aug 18 23:43:07 2005 UTC (14 years, 3 months ago) by jason
File size: 16481 byte(s)
new directory reorg proposal
1 #if defined(HAVE_CONFIG_H)
2 #include "resiprocate/config.hxx"
3 #endif
4
5 #include "resiprocate/XMLCursor.hxx"
6 #include "resiprocate/Symbols.hxx"
7 #include "resiprocate/os/Logger.hxx"
8 #include "resiprocate/os/WinLeakCheck.hxx"
9
10 //#ifndef `WIN32 // !cj! TODO FIX
11 #if 1
12
13 using namespace resip;
14 using namespace std;
15
16 #define RESIPROCATE_SUBSYSTEM Subsystem::CONTENTS
17
18 /**
19 Whitespace handling:
20 Are the following XML fragments equivalent?
21
22 Strictly interpreted, the root of the first XML document has one
23 child while the root of the second XML doucment has three children.
24 The line breaks and spaces after the <root> and before </root> are
25 tagless children.
26
27 --->
28 <root><child>child content</child></root>
29 <--
30 vs.
31 --->
32 <root>
33 <child>child content</child>
34 </root>
35 <--
36
37 Treating whitespace as children is consistent with the spec but not usually
38 convenient. <!ATTLIST poem xml:space (default|preserve) 'preserve'> is used to
39 control whitespace handling. Supporting this switch is painful. For now, treat
40 whitespace as non-significant.
41 */
42
43 static char BANG[] = "!";
44 static char HYPHEN[] = "-";
45 //http://www.w3.org/TR/1998/REC-xml-19980210
46 static const Data COMMENT_START("<!--");
47 static const Data COMMENT_END("-->");
48
49 // An alternative to stripping comments out in preparse
50 // is to deal with them in the parse; ignore when after non-leaf element
51 // put a leaf after a comment after a leaf in the first leaf's children
52 // getValue() needs to copy first leaf and all 'child' leaves to mValue
53 //
54 // has the advantage of allowing
55 // 1. lazier parsing
56 // 2. embedded wierdnesses like <! > and <? >
57 XMLCursor::XMLCursor(const ParseBuffer& pb)
58 : mRoot(0),
59 mCursor(0),
60 mAttributesSet(false)
61 {
62 ParseBuffer lPb(pb);
63
64 skipProlog(lPb);
65 const char* start = lPb.position();
66
67 lPb.skipToChars(COMMENT_START);
68 if (!lPb.eof())
69 {
70 StackLog(<< "removing comments");
71 lPb.reset(start);
72 mData.reserve(lPb.end() - lPb.start());
73
74 const char* anchor = lPb.position();
75 {
76 DataStream str(mData);
77 Data temp;
78 while (true)
79 {
80 lPb.skipToChars(COMMENT_START);
81 if (!lPb.eof())
82 {
83 lPb.data(temp, anchor);
84 str << temp;
85 anchor = Node::skipComments(lPb);
86 }
87 else
88 {
89 break;
90 }
91 }
92 }
93 mRoot = new Node(ParseBuffer(mData.data(), mData.size()));
94 }
95 else
96 {
97 mRoot = new Node(ParseBuffer(start, pb.end() - start));
98 }
99 mCursor = mRoot;
100
101 if (mRoot->extractTag())
102 {
103 InfoLog(<< "XML: empty element no a legal root");
104 mRoot->mPb.fail(__FILE__, __LINE__);
105 }
106
107 mTag = mRoot->mTag;
108 decodeName(mRoot->mTag);
109
110 // check for # & and note -- make decode, decodeName do stuff if set
111
112 //<top></top> // no children
113 lPb.reset(lPb.start());
114 lPb.skipToChar(Symbols::RA_QUOTE[0]);
115 lPb.skipChar();
116 if (!WhitespaceSignificant)
117 {
118 lPb.skipWhitespace();
119 }
120 if (*lPb.position() == Symbols::LA_QUOTE[0] &&
121 *(lPb.position()+1) == Symbols::SLASH[0])
122 {
123 lPb.skipChar();
124 lPb.skipChar();
125 if (strncmp(mRoot->mTag.data(), lPb.position(), mRoot->mTag.size()) == 0)
126 {
127 // no children ever
128 mRoot->mPb.reset(mRoot->mPb.end());
129 return;
130 }
131 }
132 }
133
134 XMLCursor::~XMLCursor()
135 {
136 delete mRoot;
137 }
138
139 static const Data QUESTION_RA_QUOTE("?>");
140 void
141 XMLCursor::skipProlog(ParseBuffer& pb)
142 {
143 //'<?xml' VersionInfo '<xml?' EncodingDecl '?>'? '<?xml' SDDecl '?>'? S? '?>
144
145 // !dlb! much more complicated than this.. can contain comments
146 pb.skipToChars(QUESTION_RA_QUOTE);
147 pb.skipN(2);
148 pb.skipWhitespace();
149 }
150
151 void
152 XMLCursor::decode(Data& text)
153 {
154 }
155
156 void
157 XMLCursor::decodeName(Data& name)
158 {
159 }
160
161 void
162 XMLCursor::parseNextRootChild()
163 {
164 // no next child to parse?
165 if (mRoot->mPb.eof())
166 {
167 return;
168 }
169
170 // next child already parsed?
171 if (mRoot->mNext != mRoot->mChildren.end())
172 {
173 return;
174 }
175
176 // skip self tag
177 if (mRoot->mPb.position() == mRoot->mPb.start())
178 {
179 mRoot->mPb.skipToChar(Symbols::RA_QUOTE[0]);
180 mRoot->mPb.skipChar();
181 }
182
183 if (!WhitespaceSignificant)
184 {
185 mRoot->mPb.skipWhitespace();
186 }
187
188 // root end tag?
189 if (*mRoot->mPb.position() == Symbols::LA_QUOTE[0])
190 {
191 ParseBuffer pb(mRoot->mPb.position(),
192 mRoot->mPb.end() - mRoot->mPb.position());
193 pb.skipChar();
194 if (!pb.eof() && *pb.position() == Symbols::SLASH[0])
195 {
196 pb.skipChar();
197 // CodeWarrior isn't helpful enough to pick the "obvious" operator definition
198 // so we add volatile here so CW is completely unconfused what to do.
199 // second note - MSVC 7.0 won't compile the volatile - tried the following to fix
200 const char* end = pb.position();
201 if ( pb.end() < end + mTag.size() )
202 {
203 InfoLog(<< "XML: unexpected end");
204 pb.fail(__FILE__, __LINE__);
205 }
206
207 if (strncmp(mTag.data(), pb.position(), mRoot->mTag.size()) == 0)
208 {
209 mRoot->mPb.skipToEnd();
210 return;
211 }
212 }
213 }
214
215 // leaf?
216 if (*mRoot->mPb.position() != Symbols::LA_QUOTE[0])
217 {
218 const char* anchor = mRoot->mPb.position();
219 mRoot->mPb.skipToChar(Symbols::LA_QUOTE[0]);
220 Node* leaf = new Node(ParseBuffer(anchor, mRoot->mPb.position() - anchor));
221 leaf->mIsLeaf = true;
222 mRoot->addChild(leaf);
223 }
224 else
225 {
226 Node* child = new Node(mRoot->mPb);
227 child->skipToEndTag();
228
229 // leave the parse buffer after the child
230 mRoot->mPb.reset(child->mPb.end());
231
232 mRoot->addChild(child);
233 }
234
235 // mNext always points at cursored child
236 mRoot->mNext = mRoot->mChildren.end();
237 mRoot->mNext--;
238 }
239
240 bool
241 XMLCursor::nextSibling()
242 {
243 if (atRoot())
244 {
245 StackLog(<< "XMLCursor::nextSibling" << *this->mCursor << " <<root>>");
246 return false;
247 }
248
249 StackLog(<< "XMLCursor::nextSibling" << *this->mCursor << " " << *this->mCursor->mParent);
250 if (mCursor->mParent == mRoot)
251 {
252 parseNextRootChild();
253 }
254
255 if (mCursor->mParent->mNext != mCursor->mParent->mChildren.end())
256 {
257 mCursor = *((mCursor->mParent->mNext)++);
258 mAttributesSet = false;
259 return true;
260 }
261 else
262 {
263 return false;
264 }
265 }
266
267 bool
268 XMLCursor::firstChild()
269 {
270 if (atRoot() &&
271 mRoot->mChildren.empty())
272 {
273 parseNextRootChild();
274 }
275
276 if (mCursor->mChildren.empty())
277 {
278 return false;
279 }
280 else
281 {
282 // mNext always points after cursored child
283 mCursor->mNext = mCursor->mChildren.begin();
284 mCursor->mNext++;
285 mCursor = mCursor->mChildren.front();
286 mAttributesSet = false;
287 return true;
288 }
289 }
290
291 bool
292 XMLCursor::parent()
293 {
294 if (atRoot())
295 {
296 return false;
297 }
298
299 mCursor = mCursor->mParent;
300 mAttributesSet = false;
301 return true;
302 }
303
304 void
305 XMLCursor::reset()
306 {
307 mCursor = mRoot;
308 mAttributesSet = false;
309 }
310
311 bool
312 XMLCursor::atRoot() const
313 {
314 return mCursor == mRoot;
315 }
316
317 bool
318 XMLCursor::atLeaf() const
319 {
320 return mCursor->mIsLeaf;
321 }
322
323 const Data&
324 XMLCursor::getTag() const
325 {
326 return mCursor->mTag;
327 }
328
329 //<foo >
330 //<foo>
331 //<foo/>
332 //<foo attr = 'value' attr="value">
333 //<foo attr = 'value' attr="value" >
334 //
335 //<foo attr = 'value' attr="value" />
336 const XMLCursor::AttributeMap&
337 XMLCursor::getAttributes() const
338 {
339 if (!atLeaf() &&
340 !mAttributesSet)
341 {
342 mAttributes.clear();
343 mAttributesSet = true;
344
345 ParseBuffer pb(mCursor->mPb);
346 pb.reset(mCursor->mPb.start());
347
348 Data attribute;
349 Data value;
350
351 static const Data term(">/");
352 pb.skipToOneOf(ParseBuffer::Whitespace, term);
353
354 while (!pb.eof() &&
355 *pb.position() != Symbols::RA_QUOTE[0] &&
356 *pb.position() != Symbols::SLASH[0])
357 {
358 attribute.clear();
359 value.clear();
360
361 const char* anchor = pb.skipWhitespace();
362 pb.skipToOneOf(ParseBuffer::Whitespace, Symbols::EQUALS);
363 pb.data(attribute, anchor);
364 XMLCursor::decodeName(attribute);
365
366 StackLog(<< "attribute: " << attribute);
367
368 pb.skipWhitespace();
369 pb.skipToChar(Symbols::EQUALS[0]);
370 pb.skipChar();
371 pb.skipWhitespace();
372 if (!pb.eof())
373 {
374 const char quote = *pb.position();
375
376 StackLog(<< "quote is <" << quote << ">");
377
378 if (quote != Symbols::DOUBLE_QUOTE[0] &&
379 quote != '\'')
380 {
381 InfoLog(<< "XML: badly quoted attribute value");
382 pb.fail(__FILE__, __LINE__);
383 }
384 anchor = pb.skipChar();
385 pb.skipToChar(quote);
386 pb.data(value, anchor);
387 XMLCursor::decode(value);
388 pb.skipChar();
389 mAttributes[attribute] = value;
390 }
391 pb.skipWhitespace();
392 }
393 }
394
395 return mAttributes;
396 }
397
398 const Data&
399 XMLCursor::getValue() const
400 {
401 if (atLeaf())
402 {
403 ParseBuffer pb(mCursor->mPb);
404 pb.skipToEnd();
405 mValue = pb.data(pb.start());
406 XMLCursor::decode(mValue);
407 }
408 else
409 {
410 mValue.clear();
411 }
412 return mValue;
413 }
414
415 std::ostream&
416 XMLCursor::encode(std::ostream& str, const AttributeMap& attrs)
417 {
418 for(AttributeMap::const_iterator i = attrs.begin();
419 i != attrs.end(); ++i)
420 {
421 if (i != attrs.begin())
422 {
423 str << " ";
424 }
425 // !dlb! some sort of character encoding required here
426 str << i->first << "=\"" << i->second << "\"";
427 }
428
429 return str;
430 }
431
432 XMLCursor::Node::Node(const ParseBuffer& pb)
433 : mPb(pb.position(), pb.end() - pb.position()),
434 mParent(0),
435 mChildren(),
436 mNext(mChildren.begin()),
437 mIsLeaf(false)
438 {
439 mPb.assertNotEof();
440 StackLog(<< "XMLCursor::Node::Node" << *this);
441 }
442
443 XMLCursor::Node::~Node()
444 {
445 for (vector<Node*>::iterator i = mChildren.begin();
446 i != mChildren.end(); ++i)
447 {
448 delete *i;
449 }
450 }
451
452 // start:
453 //<foo >
454 //^
455 // end:
456 //<foo >
457 // ^
458 bool
459 XMLCursor::Node::extractTag()
460 {
461 ParseBuffer pb(mPb);
462 const char* anchor = pb.skipChar();
463 static Data SLASH_RA_QUOTE("/>");
464 pb.skipToOneOf(ParseBuffer::Whitespace, SLASH_RA_QUOTE);
465 pb.assertNotEof();
466 pb.data(mTag, anchor);
467
468 return !pb.eof() && *pb.position() == Symbols::SLASH[0];
469 }
470
471 void
472 XMLCursor::Node::addChild(Node* child)
473 {
474 mChildren.push_back(child);
475 child->mParent = this;
476 }
477
478 //<foo> <bar> </bar> <baz> </baz> </foo>
479 //^start
480 // ^child
481 // ^child
482 // ^end
483 //
484 //<foo> sdfsf sadfsf <bar> asdfdf </bar> sadfsdf </foo>
485 //^start
486 // ^child
487 // ^child sub
488 // ^child
489 void
490 XMLCursor::Node::skipToEndTag()
491 {
492 extractTag();
493 StackLog(<< "XMLCursor::Node::skipToEndTag(" << mTag << ")");
494 //StackLog(<< "XMLCursor::Node::skipToEndTag(" << Data(mPb.position(), mPb.end() - mPb.position()) << ")");
495
496 //<foo />
497 mPb.skipToChar(Symbols::RA_QUOTE[0]);
498 if (*(mPb.position()-1) == Symbols::SLASH[0])
499 {
500 mPb.skipChar();
501 mPb = ParseBuffer(mPb.start(), mPb.position() - mPb.start());
502 return;
503 }
504
505 //<foo> ...<child> ... </child> </foo>
506 // ^
507 mPb.skipChar();
508 //<foo> ...<child> ... </child> </foo>
509 // ^
510 while (true)
511 {
512 if (!WhitespaceSignificant)
513 {
514 mPb.skipWhitespace();
515 }
516
517 // Some text contents ...<
518 // ^ ^
519 if (*mPb.position() != Symbols::LA_QUOTE[0])
520 {
521 const char* anchor = mPb.position();
522 mPb.skipToChar(Symbols::LA_QUOTE[0]);
523 Node* leaf = new Node(ParseBuffer(anchor, mPb.position() - anchor));
524 leaf->mIsLeaf = true;
525 addChild(leaf);
526 }
527
528 //<...
529 //^
530 mPb.skipChar();
531 //<...
532 // ^
533
534 // exit condition
535 //</foo>
536 if (*mPb.position() == Symbols::SLASH[0])
537 {
538 mPb.skipChar();
539 // CodeWarrior isn't helpful enough to pick the "obvious" operator definition
540 // so we add volatile here so CW is completely unconfused what to do.
541 // second note - MSVC 7.0 won't compile the volatile - tried the following to fix
542 const char* end = mPb.position();
543 if ( mPb.end() < end + mTag.size() )
544 {
545 InfoLog(<< "XML: unexpected end");
546 mPb.fail(__FILE__, __LINE__);
547 }
548
549 if (strncmp(mTag.data(), mPb.position(), mTag.size()) == 0)
550 {
551 mPb.skipToChar(Symbols::RA_QUOTE[0]);
552 mPb.skipChar();
553 mPb = ParseBuffer(mPb.start(), mPb.position() - mPb.start());
554 return;
555 }
556 else
557 {
558 InfoLog(<< "Badly formed XML: unexpected endtag");
559 mPb.fail(__FILE__, __LINE__);
560 }
561 }
562
563 //<child>...
564 // ^
565 if (mPb.position() == mPb.start())
566 {
567 InfoLog(<< "XML: badly formed element");
568 mPb.fail(__FILE__, __LINE__);
569 }
570
571 mPb.reset(mPb.position()-1);
572 //<child>...
573 //^
574 Node* child = new Node(mPb);
575 addChild(child);
576 child->skipToEndTag();
577 mPb.reset(child->mPb.end());
578 XMLCursor::decodeName(child->mTag);
579 StackLog(<< mTag << "(" << child->mTag << ")");
580 }
581 }
582
583 //<!-- declarations for <head> & <body> -->
584 const char*
585 XMLCursor::Node::skipComments(ParseBuffer& pb)
586 {
587 while (*pb.position() == Symbols::LA_QUOTE[0] &&
588 *(pb.position()+1) == BANG[0] &&
589 *(pb.position()+2) == HYPHEN[0] &&
590 *(pb.position()+3) == HYPHEN[0])
591 {
592 pb.skipToChars(COMMENT_END);
593 pb.assertNotEof();
594 }
595
596 return pb.position();
597 }
598
599 std::ostream&
600 resip::operator<<(std::ostream& str, const XMLCursor::Node& node)
601 {
602 Data::size_type size = node.mPb.end() - node.mPb.start();
603
604 static const Data::size_type showSize(35);
605
606 str << &node << "["
607 << Data(node.mPb.start(),
608 min(showSize, size))
609 << "]" << (size ? "" : "...");
610
611 return str;
612 }
613
614 std::ostream&
615 resip::operator<<(std::ostream& str, const XMLCursor& cursor)
616 {
617 str << "XMLCursor " << *cursor.mCursor;
618 return str;
619 }
620
621 #endif // WIN32
622 /* ====================================================================
623 * The Vovida Software License, Version 1.0
624 *
625 * Copyright (c) 2000 Vovida Networks, Inc. All rights reserved.
626 *
627 * Redistribution and use in source and binary forms, with or without
628 * modification, are permitted provided that the following conditions
629 * are met:
630 *
631 * 1. Redistributions of source code must retain the above copyright
632 * notice, this list of conditions and the following disclaimer.
633 *
634 * 2. Redistributions in binary form must reproduce the above copyright
635 * notice, this list of conditions and the following disclaimer in
636 * the documentation and/or other materials provided with the
637 * distribution.
638 *
639 * 3. The names "VOCAL", "Vovida Open Communication Application Library",
640 * and "Vovida Open Communication Application Library (VOCAL)" must
641 * not be used to endorse or promote products derived from this
642 * software without prior written permission. For written
643 * permission, please contact vocal@vovida.org.
644 *
645 * 4. Products derived from this software may not be called "VOCAL", nor
646 * may "VOCAL" appear in their name, without prior written
647 * permission of Vovida Networks, Inc.
648 *
649 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
650 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
651 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
652 * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL VOVIDA
653 * NETWORKS, INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT DAMAGES
654 * IN EXCESS OF $1,000, NOR FOR ANY INDIRECT, INCIDENTAL, SPECIAL,
655 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
656 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
657 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
658 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
659 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
660 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
661 * DAMAGE.
662 *
663 * ====================================================================
664 *
665 * This software consists of voluntary contributions made by Vovida
666 * Networks, Inc. and many individuals on behalf of Vovida Networks,
667 * Inc. For more information on Vovida Networks, Inc., please see
668 * <http://www.vovida.org/>.
669 *
670 */

Properties

Name Value
svn:eol-style LF

webmaster AT resiprocate DOT org
ViewVC Help
Powered by ViewVC 1.1.27