ProteoWizard
SAXParserTest.cpp
Go to the documentation of this file.
1 //
2 // $Id: SAXParserTest.cpp 4243 2012-12-28 20:24:37Z chambm $
3 //
4 //
5 // Original author: Darren Kessner <darren@proteowizard.org>
6 //
7 // Copyright 2007 Spielberg Family Center for Applied Proteomics
8 // Cedars-Sinai Medical Center, Los Angeles, California 90048
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 //
22 
23 
25 #include "SAXParser.hpp"
28 #include <cstring>
29 
30 
31 using namespace pwiz::util;
32 using namespace pwiz::minimxml;
33 using namespace pwiz::minimxml::SAXParser;
34 
35 
36 ostream* os_;
37 
38 // note: this tests single-quoted double quotes
39 const char* sampleXML =
40  "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
41  "<!DOCTYPE foo>\n"
42  "<RootElement param=\"value\">\n"
43  " <FirstElement escaped_attribute=\"&quot;&lt;&amp;lt;&gt;&quot;\">\n"
44  " Some Text with Entity References: &lt;&amp;&gt;\n"
45  " </FirstElement>\n"
46  " <SecondElement param2=\"something\" param3=\"something.else 1234-56\">\n"
47  " Pre-Text <Inline>Inlined text with <![CDATA[<&\">]]></Inline> Post-text. <br/>\n"
48  " </SecondElement>\n"
49  " <prefix:ThirdElement goober:name=\"value\">\n"
50  " <!--this is a comment-->\n"
51  " <empty_with_space />\n"
52  " </prefix:ThirdElement>\n"
53  " <FifthElement leeloo='>Leeloo > mul-\"tipass'>\n"
54  " You're a monster, Zorg.>I know.\n"
55  " </FifthElement>\n"
56  "</RootElement>\n"
57  "<AnotherRoot>The quick brown fox jumps over the lazy dog.</AnotherRoot>\n";
58 
59 
60 //
61 // demo of event handling
62 //
63 
64 
66 {
67  PrintAttribute(ostream& os) : os_(os) {}
68  ostream& os_;
69 
71  {
72  os_ << " (" << attr.getName() << "," << attr.getValue() << ")";
73  }
74 };
75 
76 
77 class PrintEventHandler : public Handler
78 {
79  public:
80 
81  PrintEventHandler(ostream& os)
82  : os_(os)
83  {}
84 
85  virtual Status processingInstruction(const string& name,
86  const string& value,
88  {
89  os_ << "[0x" << hex << position << "] processingInstruction: (" << name << "," << value << ")\n";
90  return Status::Ok;
91  };
92 
93  virtual Status startElement(const string& name,
94  const Attributes& attributes,
96  {
97  os_ << "[0x" << hex << position << "] startElement: " << name;
98  for_each(attributes.begin(), attributes.end(), PrintAttribute(os_));
99  os_ << endl;
100  return Status::Ok;
101  };
102 
103  virtual Status endElement(const string& name, stream_offset position)
104  {
105  os_ << "[0x" << hex << position << "] endElement: " << name << endl;
106  return Status::Ok;
107  }
108 
110  {
111  os_ << "[0x" << hex << position << "] text: " << text << endl;
112  return Status::Ok;
113  }
114 
115  private:
116  ostream& os_;
117 };
118 
119 
120 void demo()
121 {
122  if (os_)
123  {
124  *os_ << "sampleXML:\n" << sampleXML << endl;
125 
126  istringstream is(sampleXML);
127  PrintEventHandler handler(*os_);
128 
129  *os_ << "first parse events:\n";
130  parse(is, handler);
131  *os_ << endl;
132 
133  *os_ << "second parse events:\n";
134  parse(is, handler);
135  *os_ << endl;
136  }
137 }
138 
139 
140 //
141 // C++ model of the sample XML
142 //
143 
144 
145 struct First
146 {
148  string text;
149 };
150 
151 
152 struct Second
153 {
154  string param2;
155  string param3;
156  vector<string> text;
157 };
158 
159 
160 struct Fifth
161 {
162  string leeloo;
163  string mr_zorg;
164 };
165 
166 
167 struct Root
168 {
169  string param;
173 };
174 
175 
176 //
177 //
178 // Handlers to connect XML to C++ model
179 //
180 
181 
182 void readAttribute(const Handler::Attributes& attributes,
183  const string& attributeName,
184  string& result)
185 {
186  Handler::Attributes::attribute_list::const_iterator it = attributes.find(attributeName);
187  if (it != attributes.end())
188  result = it->getValue();
189 }
190 
191 
192 class FirstHandler : public Handler
193 {
194  public:
195 
196  FirstHandler(First& first, bool autoUnescapeAttributes, bool autoUnescapeCharacters)
197  : object_(first)
198  {
199  parseCharacters = true;
200  this->autoUnescapeAttributes = autoUnescapeAttributes;
201  this->autoUnescapeCharacters = autoUnescapeCharacters;
202  }
203 
204  virtual Status startElement(const string& name,
205  const Handler::Attributes& attributes,
207  {
208  if (name == "FirstElement")
209  readAttribute(attributes, "escaped_attribute", object_.escaped_attribute);
210  return Status::Ok;
211  }
212 
214  {
215  unit_assert_operator_equal(158, position);
216  object_.text = text.c_str();
217  return Status::Ok;
218  }
219 
220  virtual Status endElement(const string& name, stream_offset position)
221  {
222  unit_assert_operator_equal(210, position);
223  return Status::Ok;
224  }
225 
226  private:
228 };
229 
230 
231 class SecondHandler : public Handler
232 {
233  public:
234 
235  SecondHandler(Second& object, bool autoUnescapeAttributes, bool autoUnescapeCharacters)
236  : object_(object)
237  {
238  parseCharacters = true;
239  this->autoUnescapeAttributes = autoUnescapeAttributes;
240  this->autoUnescapeCharacters = autoUnescapeCharacters;
241  }
242 
243  virtual Status startElement(const string& name,
244  const Handler::Attributes& attributes,
246  {
247  if (name == "SecondElement")
248  {
249  readAttribute(attributes, "param2", object_.param2);
250  readAttribute(attributes, "param3", object_.param3);
251  // long as we're here, verify copyability of Handler::Attributes
252  Handler::Attributes *copy1 = new Handler::Attributes(attributes);
253  Handler::Attributes copy2(*copy1);
254  delete copy1;
255  std::string str;
256  readAttribute(copy2, "param2", str);
257  unit_assert(str==object_.param2);
258  }
259 
260  return Status::Ok;
261  }
262 
264  {
265  object_.text.push_back(text.c_str());
266  return Status::Ok;
267  }
268 
269  private:
271 };
272 
273 
274 class FifthHandler : public Handler
275 {
276  public:
277 
278  FifthHandler(Fifth& object, bool autoUnescapeAttributes, bool autoUnescapeCharacters)
279  : object_(object)
280  {
281  parseCharacters = true;
282  this->autoUnescapeAttributes = autoUnescapeAttributes;
283  this->autoUnescapeCharacters = autoUnescapeCharacters;
284  }
285 
286  virtual Status startElement(const string& name,
287  const Handler::Attributes& attributes,
289  {
290  if (name == "FifthElement")
291  {
292  getAttribute(attributes, "leeloo", object_.leeloo);
293  }
294 
295  return Status::Ok;
296  }
297 
299  {
300  object_.mr_zorg = text.c_str();
301  return Status::Ok;
302  }
303 
304  virtual Status endElement(const string& name, stream_offset position)
305  {
306  unit_assert_operator_equal(625, position);
307  return Status::Ok;
308  }
309 
310  private:
312 };
313 
314 
315 class RootHandler : public Handler
316 {
317  public:
318 
319  RootHandler(Root& root, bool autoUnescapeAttributes = true, bool autoUnescapeCharacters = true)
320  : object_(root),
321  firstHandler_(object_.first, autoUnescapeAttributes, autoUnescapeCharacters),
322  secondHandler_(object_.second, autoUnescapeAttributes, autoUnescapeCharacters),
323  fifthHandler_(object_.fifth, autoUnescapeAttributes, autoUnescapeCharacters)
324  {
325  parseCharacters = true;
326  this->autoUnescapeAttributes = autoUnescapeAttributes;
327  this->autoUnescapeCharacters = autoUnescapeCharacters;
328  }
329 
330  virtual Status startElement(const string& name,
331  const Attributes& attributes,
333  {
334  if (name == "RootElement")
335  {
336  readAttribute(attributes, "param", object_.param);
337  unit_assert_operator_equal(54, position);
338  }
339  else if (name == "FirstElement")
340  {
341  // delegate handling to a FirstHandler
342  unit_assert_operator_equal(86, position);
343  return Status(Status::Delegate, &firstHandler_);
344  }
345  else if (name == "SecondElement")
346  {
347  // delegate handling to a SecondHandler
348  return Status(Status::Delegate, &secondHandler_);
349  }
350  else if (name == "FifthElement")
351  {
352  // delegate handling to a FifthHandler
353  return Status(Status::Delegate, &fifthHandler_);
354  }
355 
356  return Status::Ok;
357  }
358 
359  private:
364 };
365 
366 
367 void test()
368 {
369  if (os_) *os_ << "test()\n";
370 
371  istringstream is(sampleXML);
372  Root root;
373  RootHandler rootHandler(root);
374  parse(is, rootHandler);
375 
376  if (os_)
377  {
378  *os_ << "root.param: " << root.param << endl
379  << "first.escaped_attribute: " << root.first.escaped_attribute << endl
380  << "first.text: " << root.first.text << endl
381  << "second.param2: " << root.second.param2 << endl
382  << "second.param3: " << root.second.param3 << endl
383  << "second.text: ";
384  copy(root.second.text.begin(), root.second.text.end(), ostream_iterator<string>(*os_,"|"));
385  *os_ << "\nfifth.leeloo: " << root.fifth.leeloo << endl
386  << "fifth.mr_zorg: " << root.fifth.mr_zorg << endl
387  << "\n";
388  }
389 
390  unit_assert_operator_equal("value", root.param);
392  unit_assert_operator_equal("Some Text with Entity References: <&>", root.first.text);
393  unit_assert_operator_equal("something", root.second.param2);
394  unit_assert_operator_equal("something.else 1234-56", root.second.param3);
395  unit_assert_operator_equal(4, root.second.text.size());
396  unit_assert_operator_equal("Pre-Text", root.second.text[0]);
397  unit_assert_operator_equal("Inlined text with", root.second.text[1]);
398  unit_assert_operator_equal("<&\">", root.second.text[2]);
399  unit_assert_operator_equal("Post-text.", root.second.text[3]);
400  unit_assert_operator_equal(">Leeloo > mul-\"tipass", root.fifth.leeloo);
401  unit_assert_operator_equal("You're a monster, Zorg.>I know.", root.fifth.mr_zorg);
402 }
403 
404 
406 {
407  if (os_) *os_ << "testNoAutoUnescape()\n";
408 
409  istringstream is(sampleXML);
410  Root root;
411  RootHandler rootHandler(root, false, false);
412  parse(is, rootHandler);
413 
414  if (os_)
415  {
416  *os_ << "root.param: " << root.param << endl
417  << "first.escaped_attribute: " << root.first.escaped_attribute << endl
418  << "first.text: " << root.first.text << endl
419  << "second.param2: " << root.second.param2 << endl
420  << "second.param3: " << root.second.param3 << endl
421  << "second.text: ";
422  copy(root.second.text.begin(), root.second.text.end(), ostream_iterator<string>(*os_,"|"));
423  *os_ << "\n\n";
424  }
425 
426  unit_assert_operator_equal("value", root.param);
427  unit_assert_operator_equal("&quot;&lt;&amp;lt;&gt;&quot;", root.first.escaped_attribute);
428  unit_assert_operator_equal("Some Text with Entity References: &lt;&amp;&gt;", root.first.text);
429  unit_assert_operator_equal("something", root.second.param2);
430  unit_assert_operator_equal("something.else 1234-56", root.second.param3);
431  unit_assert_operator_equal(4, root.second.text.size());
432  unit_assert_operator_equal("Pre-Text", root.second.text[0]);
433  unit_assert_operator_equal("Inlined text with", root.second.text[1]);
434  unit_assert_operator_equal("<&\">", root.second.text[2]);
435  unit_assert_operator_equal("Post-text.", root.second.text[3]);
436 }
437 
438 
440 {
441  public:
442 
443  virtual Status startElement(const string& name,
444  const Attributes& attributes,
446  {
447  if (name == "AnotherRoot")
448  {
449  unit_assert_operator_equal(656, position);
450  return Status::Done;
451  }
452 
453  return Status::Ok;
454  }
455 };
456 
457 
458 void testDone()
459 {
460  if (os_) *os_ << "testDone()\n";
461 
462  istringstream is(sampleXML);
463  AnotherRootHandler handler;
464  parse(is, handler); // parses <RootElement> ... </RootElement>
465  parse(is, handler); // parses <AnotherRootElement> and aborts
466 
467  string buffer;
468  getline(is, buffer, '<');
469 
470  if (os_) *os_ << "buffer: " << buffer << "\n\n";
471  unit_assert_operator_equal("The quick brown fox jumps over the lazy dog.", buffer);
472 }
473 
474 
476 {
477  if (os_) *os_ << "testBadXML()\n";
478 
479  const char* bad = "<A><B></A></B>";
480  istringstream is(bad);
481  Handler handler;
482 
483  try
484  {
485  parse(is, handler);
486  }
487  catch (exception& e)
488  {
489  if (os_) *os_ << e.what() << "\nOK: Parser caught bad XML.\n\n";
490  return;
491  }
492 
493  throw runtime_error("Parser failed to catch bad XML.");
494 }
495 
496 
498 {
499  int count;
500  NestedHandler() : count(0) {}
501 
502  virtual Status endElement(const string& name, stream_offset position)
503  {
504  count++;
505  return Status::Ok;
506  }
507 };
508 
509 
511 {
512  if (os_) *os_ << "testNested()\n";
513  const char* nested = "<a><a></a></a>";
514  istringstream is(nested);
515 
516  NestedHandler nestedHandler;
517  parse(is, nestedHandler);
518  if (os_) *os_ << "count: " << nestedHandler.count << "\n\n";
519  unit_assert_operator_equal(2, nestedHandler.count);
520 }
521 
522 
524 {
525  if (os_) *os_ << "testRootElement()\n";
526 
527  string RootElement = "RootElement";
529 
530  istringstream sampleXMLStream(sampleXML);
531  unit_assert_operator_equal(RootElement, xml_root_element(sampleXMLStream));
532 
533  {ofstream sampleXMLFile("testRootElement.xml"); sampleXMLFile << sampleXML;}
534  unit_assert_operator_equal(RootElement, xml_root_element_from_file("testRootElement.xml"));
535  bfs::remove("testRootElement.xml");
536 
537  unit_assert_operator_equal(RootElement, xml_root_element("<?xml?><RootElement>"));
538  unit_assert_operator_equal(RootElement, xml_root_element("<?xml?><RootElement name='value'"));
539 
540  unit_assert_throws(xml_root_element("not-xml"), runtime_error);
541 }
542 
543 
545 {
546  string id1("_x0031_invalid_x0020_ID");
547  unit_assert_operator_equal("1invalid ID", decode_xml_id_copy(id1));
548  unit_assert_operator_equal((void *)&id1, (void *)&decode_xml_id(id1)); // should return reference to id1
549  unit_assert_operator_equal("1invalid ID", id1);
550 
551  string id2("_invalid-ID__x0023_2__x003c_3_x003e_");
552  unit_assert_operator_equal("_invalid-ID_#2_<3>", decode_xml_id_copy(id2));
553  unit_assert_operator_equal("_invalid-ID_#2_<3>", decode_xml_id(id2));
554 
555  string crazyId("_x0021__x0021__x0021_");
556  unit_assert_operator_equal("!!!", decode_xml_id(crazyId));
557 }
558 
560 {
561  std::string str = " \t foo \n";
562  saxstring xstr = str;
563  unit_assert_operator_equal(xstr,str);
564  unit_assert_operator_equal(xstr,str.c_str());
565  unit_assert_operator_equal(str.length(),xstr.length());
566  xstr.trim_lead_ws();
567  unit_assert_operator_equal(xstr.length(),str.length()-3);
568  unit_assert_operator_equal(xstr,str.substr(3));
569  xstr.trim_trail_ws();
570  unit_assert_operator_equal(xstr.length(),str.length()-5);
571  unit_assert_operator_equal(xstr,str.substr(3,3));
572  unit_assert_operator_equal(xstr[1],'o');
573  xstr[1] = '0';
574  unit_assert_operator_equal(xstr[1],'0');
575  std::string str2(xstr.data());
576  unit_assert_operator_equal(str2,"f0o");
577  std::string str3(xstr.c_str());
578  unit_assert_operator_equal(str2,str3);
579  saxstring xstr2(xstr);
580  unit_assert_operator_equal(xstr2,xstr);
581  saxstring xstr3;
582  unit_assert_operator_equal(xstr3.c_str(),std::string());
583 }
584 
585 int main(int argc, char* argv[])
586 {
587  TEST_PROLOG(argc, argv)
588 
589  try
590  {
591  if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
592  demo();
594  test();
596  testDone();
597  testBadXML();
598  testNested();
599  testRootElement();
600  testDecoding();
601  }
602  catch (exception& e)
603  {
604  TEST_FAILED(e.what())
605  }
606  catch (...)
607  {
608  TEST_FAILED("Caught unknown exception.")
609  }
610 
612 }
613 
void test()
string escaped_attribute
const char * sampleXML
PWIZ_API_DECL std::string xml_root_element_from_file(const std::string &filepath)
Returns the root element from an XML file; throws runtime_error if no element is found.
virtual Status characters(const SAXParser::saxstring &text, stream_offset position)
#define unit_assert_throws(x, exception)
Definition: unit.hpp:106
virtual Status endElement(const string &name, stream_offset position)
SecondHandler secondHandler_
string param3
virtual Status characters(const SAXParser::saxstring &text, stream_offset position)
void testDecoding()
virtual Status endElement(const string &name, stream_offset position)
virtual Status startElement(const string &name, const Handler::Attributes &attributes, stream_offset position)
RootHandler(Root &root, bool autoUnescapeAttributes=true, bool autoUnescapeCharacters=true)
SAX event handler interface.
Definition: SAXParser.hpp:315
virtual Status startElement(const string &name, const Attributes &attributes, stream_offset position)
PrintAttribute(ostream &os)
Fifth fifth
void testRootElement()
PrintEventHandler(ostream &os)
attribute_list::const_iterator end() const
Definition: SAXParser.hpp:532
void testDone()
virtual Status endElement(const string &name, stream_offset position)
#define TEST_EPILOG
Definition: unit.hpp:182
Second second
An extended SAX interface for custom XML stream parsing.
Definition: SAXParser.hpp:54
virtual Status startElement(const string &name, const Handler::Attributes &attributes, stream_offset position)
FirstHandler(First &first, bool autoUnescapeAttributes, bool autoUnescapeCharacters)
int main(int argc, char *argv[])
string param2
boost::iostreams::stream_offset stream_offset
Definition: SAXParser.hpp:583
PWIZ_API_DECL std::string xml_root_element(const std::string &fileheader)
Returns the root element from an XML buffer; throws runtime_error if no element is found...
void operator()(const Handler::Attributes::attribute &attr)
PWIZ_API_DECL std::map< std::string, std::string > parse(const std::string &id)
parses an id string into a map<string,string>
string leeloo
virtual Status startElement(const string &name, const Handler::Attributes &attributes, stream_offset position)
#define unit_assert_operator_equal(expected, actual)
Definition: unit.hpp:92
string mr_zorg
Handler returns the Status struct as a means of changing the parser&#39;s behavior.
Definition: SAXParser.hpp:332
std::string getValue(XMLUnescapeBehavior_t Unescape=XMLUnescapeDefault) const
Definition: SAXParser.hpp:493
FifthHandler(Fifth &object, bool autoUnescapeAttributes, bool autoUnescapeCharacters)
string param
FirstHandler firstHandler_
attribute_list::const_iterator begin() const
Definition: SAXParser.hpp:527
void testSaxParserString()
virtual Status characters(const SAXParser::saxstring &text, stream_offset position)
string text
vector< string > text
void readAttribute(const Handler::Attributes &attributes, const string &attributeName, string &result)
void testBadXML()
virtual Status processingInstruction(const string &name, const string &value, stream_offset position)
ostream * os_
virtual Status endElement(const string &name, stream_offset position)
#define TEST_FAILED(x)
Definition: unit.hpp:176
PWIZ_API_DECL Position position(CVID cvid=CVID_Unknown)
returns a Position corresponding to one of the following CVIDs: CVID_Unknown: Position::Anywhere MS_m...
First first
virtual Status startElement(const string &name, const Attributes &attributes, stream_offset position)
#define TEST_PROLOG(argc, argv)
Definition: unit.hpp:174
void testNested()
void testNoAutoUnescape()
void demo()
virtual Status startElement(const string &name, const Attributes &attributes, stream_offset position)
PWIZ_API_DECL std::string value(const std::string &id, const std::string &name)
convenience function to extract a named value from an id string
PWIZ_API_DECL std::string & decode_xml_id(std::string &str)
Decodes any characters encoded with their hexadecimal value, e.g.
FifthHandler fifthHandler_
SecondHandler(Second &object, bool autoUnescapeAttributes, bool autoUnescapeCharacters)
#define unit_assert(x)
Definition: unit.hpp:85
attribute_list::const_iterator find(const std::string &name) const
Definition: SAXParser.hpp:537
PWIZ_API_DECL std::string decode_xml_id_copy(const std::string &str)
Decodes any characters encoded with their hexadecimal value, e.g.
virtual Status characters(const SAXParser::saxstring &text, stream_offset position)