libdap  Updated for version 3.17.2
DDXParserSAX2.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 //#define DODS_DEBUG 1
29 //#define DODS_DEBUG2 1
30 
31 #include <cstring>
32 #include <cstdarg>
33 
34 #include "BaseType.h"
35 #include "Byte.h"
36 #include "Int16.h"
37 #include "UInt16.h"
38 #include "Int32.h"
39 #include "UInt32.h"
40 #include "Float32.h"
41 #include "Float64.h"
42 #include "Str.h"
43 #include "Url.h"
44 #include "Array.h"
45 #include "Structure.h"
46 #include "Sequence.h"
47 #include "Grid.h"
48 
49 #include "DDXParserSAX2.h"
50 
51 #include "util.h"
52 #include "mime_util.h"
53 #include "debug.h"
54 
55 namespace libdap {
56 
57 #if defined(DODS_DEBUG) || defined(DODS_DEUG2)
58 static const char *states[] =
59  {
60  "start",
61 
62  "dataset",
63 
64  "attribute_container",
65  "attribute",
66  "attribute_value",
67  "other_xml_attribute",
68 
69  "alias",
70 
71  "simple_type",
72 
73  "array",
74  "dimension",
75 
76  "grid",
77  "map",
78 
79  "structure",
80  "sequence",
81 
82  "blob href",
83 
84  "unknown",
85  "error"
86  };
87 #endif
88 // Glue the BaseTypeFactory to the enum-based factory defined statically
89 // here.
90 
91 BaseType *DDXParser::factory(Type t, const string & name)
92 {
93  switch (t) {
94  case dods_byte_c:
95  return d_factory->NewByte(name);
96  break;
97 
98  case dods_int16_c:
99  return d_factory->NewInt16(name);
100  break;
101 
102  case dods_uint16_c:
103  return d_factory->NewUInt16(name);
104  break;
105 
106  case dods_int32_c:
107  return d_factory->NewInt32(name);
108  break;
109 
110  case dods_uint32_c:
111  return d_factory->NewUInt32(name);
112  break;
113 
114  case dods_float32_c:
115  return d_factory->NewFloat32(name);
116  break;
117 
118  case dods_float64_c:
119  return d_factory->NewFloat64(name);
120  break;
121 
122  case dods_str_c:
123  return d_factory->NewStr(name);
124  break;
125 
126  case dods_url_c:
127  return d_factory->NewUrl(name);
128  break;
129 
130  case dods_array_c:
131  return d_factory->NewArray(name);
132  break;
133 
134  case dods_structure_c:
135  return d_factory->NewStructure(name);
136  break;
137 
138  case dods_sequence_c:
139  return d_factory->NewSequence(name);
140  break;
141 
142  case dods_grid_c:
143  return d_factory->NewGrid(name);
144  break;
145 
146  default:
147  return 0;
148  }
149 }
150 
151 #if 0
152 
153 static Type get_type(const char *name)
154 {
155  if (strcmp(name, "Byte") == 0)
156  return dods_byte_c;
157 
158  if (strcmp(name, "Int16") == 0)
159  return dods_int16_c;
160 
161  if (strcmp(name, "UInt16") == 0)
162  return dods_uint16_c;
163 
164  if (strcmp(name, "Int32") == 0)
165  return dods_int32_c;
166 
167  if (strcmp(name, "UInt32") == 0)
168  return dods_uint32_c;
169 
170  if (strcmp(name, "Float32") == 0)
171  return dods_float32_c;
172 
173  if (strcmp(name, "Float64") == 0)
174  return dods_float64_c;
175 
176  if (strcmp(name, "String") == 0)
177  return dods_str_c;
178 
179  if (strcmp(name, "Url") == 0)
180  return dods_url_c;
181 
182  if (strcmp(name, "Array") == 0)
183  return dods_array_c;
184 
185  if (strcmp(name, "Structure") == 0)
186  return dods_structure_c;
187 
188  if (strcmp(name, "Sequence") == 0)
189  return dods_sequence_c;
190 
191  if (strcmp(name, "Grid") == 0)
192  return dods_grid_c;
193 
194  return dods_null_c;
195 }
196 #endif
197 
198 #if 0
199 // Not used. jhrg 1/17/13
200 static Type is_simple_type(const char *name)
201 {
202  Type t = get_type(name);
203  switch (t) {
204  case dods_byte_c:
205  case dods_int16_c:
206  case dods_uint16_c:
207  case dods_int32_c:
208  case dods_uint32_c:
209  case dods_float32_c:
210  case dods_float64_c:
211  case dods_str_c:
212  case dods_url_c:
213  return t;
214  default:
215  return dods_null_c;
216  }
217 }
218 #endif
219 
220 static bool is_not(const char *name, const char *tag)
221 {
222  return strcmp(name, tag) != 0;
223 }
224 
225 void DDXParser::set_state(DDXParser::ParseState state)
226 {
227  s.push(state);
228 }
229 
230 DDXParser::ParseState DDXParser::get_state() const
231 {
232  return s.top();
233 }
234 
235 void DDXParser::pop_state()
236 {
237  s.pop();
238 }
239 
243 void DDXParser::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
244 {
245  if (!attribute_table.empty())
246  attribute_table.clear(); // erase old attributes
247 
248  unsigned int index = 0;
249  for (int i = 0; i < nb_attributes; ++i, index += 5) {
250  // Make a value using the attribute name and the prefix, namespace URI
251  // and the value. The prefix might be null.
252  attribute_table.insert(map<string, XMLAttribute>::value_type(
253  string((const char *)attributes[index]),
254  XMLAttribute(attributes + index + 1)));
255 
256  DBG(cerr << "Attribute '" << (const char *)attributes[index] << "': "
257  << attribute_table[(const char *)attributes[index]].value << endl);
258  }
259 }
260 
261 void DDXParser::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
262 {
263  for (int i = 0; i < nb_namespaces; ++i ) {
264  // make a value with the prefix and namespace URI. The prefix might be
265  // null.
266  namespace_table.insert(map<string,string>::value_type(
267  namespaces[i*2] != 0 ? (const char *)namespaces[i*2] : "",
268  (const char *)namespaces[i*2+1]));
269  }
270 }
271 
276 bool DDXParser::check_required_attribute(const string & attr)
277 {
278  map < string, XMLAttribute >::iterator i = attribute_table.find(attr);
279  if (i == attribute_table.end())
280  ddx_fatal_error(this, "Required attribute '%s' not found.",
281  attr.c_str());
282  return true;
283 }
284 
290 bool DDXParser::check_attribute(const string & attr)
291 {
292  return (attribute_table.find(attr) != attribute_table.end());
293 }
294 
303 void DDXParser::process_attribute_element(const xmlChar **attrs, int nb_attributes)
304 {
305  // These methods set the state to parser_error if a problem is found.
306  transfer_xml_attrs(attrs, nb_attributes);
307 
308  bool error = !(check_required_attribute(string("name"))
309  && check_required_attribute(string("type")));
310  if (error)
311  return;
312 
313  if (attribute_table["type"].value == "Container") {
314  set_state(inside_attribute_container);
315 
316  AttrTable *child;
317  AttrTable *parent = at_stack.top();
318 
319  child = parent->append_container(attribute_table["name"].value);
320  at_stack.push(child); // save.
321  DBG2(cerr << "Pushing at" << endl);
322  }
323  else if (attribute_table["type"].value == "OtherXML") {
324  set_state(inside_other_xml_attribute);
325 
326  dods_attr_name = attribute_table["name"].value;
327  dods_attr_type = attribute_table["type"].value;
328  }
329  else {
330  set_state(inside_attribute);
331  // *** Modify parser. Add a special state for inside OtherXML since it
332  // does not use the <value> element.
333 
334  dods_attr_name = attribute_table["name"].value;
335  dods_attr_type = attribute_table["type"].value;
336  }
337 }
338 
342 void DDXParser::process_attribute_alias(const xmlChar **attrs, int nb_attributes)
343 {
344  transfer_xml_attrs(attrs, nb_attributes);
345  if (check_required_attribute(string("name"))
346  && check_required_attribute(string("attribute"))) {
347  set_state(inside_alias);
348  at_stack.top()->attr_alias(attribute_table["name"].value,
349  attribute_table["attribute"].value);
350  }
351 }
352 
360 void DDXParser::process_variable(Type t, ParseState s, const xmlChar **attrs,
361  int nb_attributes)
362 {
363  transfer_xml_attrs(attrs, nb_attributes);
364 
365  set_state(s);
366 
367  if (bt_stack.top()->type() == dods_array_c
368  || check_required_attribute("name")) { // throws on error/false
369  BaseType *btp = factory(t, attribute_table["name"].value);
370  if (!btp) {
371  ddx_fatal_error(this, "Internal parser error; could not instantiate the variable '%s'.",
372  attribute_table["name"].value.c_str());
373  }
374  else {
375  // Only run this code if btp is not null! jhrg 9/14/15
376  // Once we make the new variable, we not only load it on to the
377  // BaseType stack, we also load its AttrTable on the AttrTable stack.
378  // The attribute processing software always operates on the AttrTable
379  // at the top of the AttrTable stack (at_stack).
380  bt_stack.push(btp);
381  at_stack.push(&btp->get_attr_table());
382  }
383  }
384 }
385 
389 void DDXParser::process_dimension(const xmlChar **attrs, int nb_attributes)
390 {
391  transfer_xml_attrs(attrs, nb_attributes);
392  if (check_required_attribute(string("size"))) {
393  set_state(inside_dimension);
394  Array *ap = dynamic_cast < Array * >(bt_stack.top());
395  if (!ap) {
396  ddx_fatal_error(this, "Parse error: Expected an array variable.");
397  return;
398  }
399 
400  ap->append_dim(atoi(attribute_table["size"].value.c_str()),
401  attribute_table["name"].value);
402  }
403 }
404 
407 void DDXParser::process_blob(const xmlChar **attrs, int nb_attributes)
408 {
409  transfer_xml_attrs(attrs, nb_attributes);
410  if (check_required_attribute(string("href"))) {
411  set_state(inside_blob_href);
412  *blob_href = attribute_table["href"].value;
413  }
414 }
415 
422 inline bool
423 DDXParser::is_attribute_or_alias(const char *name, const xmlChar **attrs,
424  int nb_attributes)
425 {
426  if (strcmp(name, "Attribute") == 0) {
427  process_attribute_element(attrs, nb_attributes);
428  // next state: inside_attribtue or inside_attribute_container
429  return true;
430  }
431  else if (strcmp(name, "Alias") == 0) {
432  process_attribute_alias(attrs, nb_attributes);
433  // next state: inside_alias
434  return true;
435  }
436 
437  return false;
438 }
439 
445 inline bool DDXParser::is_variable(const char *name, const xmlChar **attrs,
446  int nb_attributes)
447 {
448  Type t = get_type(name);
449  //if ((t = is_simple_type(name)) != dods_null_c) {
450  if (is_simple_type(t)) {
451  process_variable(t, inside_simple_type, attrs, nb_attributes);
452  return true;
453  }
454  else if (strcmp(name, "Array") == 0) {
455  process_variable(dods_array_c, inside_array, attrs, nb_attributes);
456  return true;
457  }
458  else if (strcmp(name, "Structure") == 0) {
459  process_variable(dods_structure_c, inside_structure, attrs, nb_attributes);
460  return true;
461  }
462  else if (strcmp(name, "Sequence") == 0) {
463  process_variable(dods_sequence_c, inside_sequence, attrs, nb_attributes);
464  return true;
465  }
466  else if (strcmp(name, "Grid") == 0) {
467  process_variable(dods_grid_c, inside_grid, attrs, nb_attributes);
468  return true;
469  }
470 
471  return false;
472 }
473 
474 void DDXParser::finish_variable(const char *tag, Type t, const char *expected)
475 {
476  if (strcmp(tag, expected) != 0) {
478  "Expected an end tag for a %s; found '%s' instead.",
479  expected, tag);
480  return;
481  }
482 
483  pop_state();
484 
485  BaseType *btp = bt_stack.top();
486 
487  bt_stack.pop();
488  at_stack.pop();
489 
490  if (btp->type() != t) {
492  "Internal error: Expected a %s variable.",
493  expected);
494  delete btp;
495  return;
496  }
497  // Once libxml2 validates, this can go away. 05/30/03 jhrg
498  if (t == dods_array_c
499  && static_cast<Array*>(btp)->dimensions() == 0) {
501  "No dimension element included in the Array '%s'.",
502  btp->name().c_str());
503  delete btp;
504  return;
505  }
506 
507  BaseType *parent = bt_stack.top();
508 
509  if (!(parent->is_vector_type() || parent->is_constructor_type())) {
511  "Tried to add the array variable '%s' to a non-constructor type (%s %s).",
512  tag,
513  bt_stack.top()->type_name().c_str(),
514  bt_stack.top()->name().c_str());
515  delete btp;
516  return;
517  }
518 
519  parent->add_var_nocopy(btp);
520 }
521 
528 
534 {
535  DDXParser *parser = static_cast<DDXParser*>(p);
536  parser->error_msg = "";
537  parser->char_data = "";
538 
539  // init attr table stack.
540  parser->at_stack.push(&parser->dds->get_attr_table());
541 
542  // Trick; DDS *should* be a child of Structure. To simplify parsing,
543  // stuff a Structure on the bt_stack and dump the top level variables
544  // there. Once we're done, transfer the variables to the DDS.
545  parser->bt_stack.push(new Structure("dummy_dds"));
546 
547  parser->set_state(parser_start);
548 
549  DBG2(cerr << "Parser state: " << states[parser->get_state()] << endl);
550 }
551 
555 {
556  DDXParser *parser = static_cast<DDXParser*>(p);
557  DBG2(cerr << "Ending state == " << states[parser->get_state()] <<
558  endl);
559 
560  if (parser->get_state() != parser_start)
561  DDXParser::ddx_fatal_error(parser, "The document contained unbalanced tags.");
562 
563  // If we've found any sort of error, don't make the DDX; intern() will
564  // take care of the error.
565  if (parser->get_state() == parser_error) {
566  return;
567  }
568 
569  // Pop the temporary Structure off the stack and transfer its variables
570  // to the DDS.
571  Constructor *cp = dynamic_cast < Constructor * >(parser->bt_stack.top());
572  if (!cp) {
573  delete parser->bt_stack.top();
574  parser->bt_stack.pop();
575  ddx_fatal_error(parser, "Parse error: Expected a Structure, Sequence or Grid variable.");
576  return;
577  }
578 
579  for (Constructor::Vars_iter i = cp->var_begin(); i != cp->var_end(); ++i) {
580  (*i)->set_parent(0); // top-level vars have no parents
581  parser->dds->add_var(*i);
582  }
583 
584  delete parser->bt_stack.top();
585  parser->bt_stack.pop();
586 }
587 
588 void DDXParser::ddx_sax2_start_element(void *p,
589  const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
590  int nb_namespaces, const xmlChar **namespaces,
591  int nb_attributes, int /*nb_defaulted*/, const xmlChar **attributes)
592 {
593  DDXParser *parser = static_cast<DDXParser*>(p);
594  const char *localname = (const char *)l;
595 
596  DBG2(cerr << "start element: " << localname << ", states: "
597  << states[parser->get_state()]);
598 
599  switch (parser->get_state()) {
600  case parser_start:
601  if (strcmp(localname, "Dataset") == 0) {
602  parser->set_state(inside_dataset);
603  parser->root_ns = URI != 0 ? (const char *)URI: "";
604  parser->transfer_xml_attrs(attributes, nb_attributes);
605 
606  if (parser->check_required_attribute(string("name")))
607  parser->dds->set_dataset_name(parser->attribute_table["name"].value);
608 
609  if (parser->check_attribute("dapVersion"))
610  parser->dds->set_dap_version(parser->attribute_table["dapVersion"].value);
611  }
612  else
614  "Expected response to start with a Dataset element; found '%s' instead.",
615  localname);
616  break;
617 
618  case inside_dataset:
619  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
620  break;
621  else if (parser->is_variable(localname, attributes, nb_attributes))
622  break;
623  else if (strcmp(localname, "blob") == 0 || strcmp(localname, "dataBLOB") == 0) {
624  parser->process_blob(attributes, nb_attributes);
625  // next state: inside_data_blob
626  }
627  else
629  "Expected an Attribute, Alias or variable element; found '%s' instead.",
630  localname);
631  break;
632 
633  case inside_attribute_container:
634  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
635  break;
636  else
638  "Expected an Attribute or Alias element; found '%s' instead.",
639  localname);
640  break;
641 
642  case inside_attribute:
643  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
644  break;
645  else if (strcmp(localname, "value") == 0)
646  parser->set_state(inside_attribute_value);
647  else
648  ddx_fatal_error(parser,
649  "Expected an 'Attribute', 'Alias' or 'value' element; found '%s' instead.",
650  localname);
651  break;
652 
653  case inside_attribute_value:
654  ddx_fatal_error(parser,
655  "Internal parser error; unexpected state, inside value while processing element '%s'.",
656  localname);
657  break;
658 
659  case inside_other_xml_attribute:
660  DBGN(cerr << endl << "\t inside_other_xml_attribute: " << localname << endl);
661 
662  parser->other_xml_depth++;
663 
664  // Accumulate the elements here
665 
666  parser->other_xml.append("<");
667  if (prefix) {
668  parser->other_xml.append((const char *)prefix);
669  parser->other_xml.append(":");
670  }
671  parser->other_xml.append(localname);
672 
673  if (nb_namespaces != 0) {
674  parser->transfer_xml_ns(namespaces, nb_namespaces);
675 
676  for (map<string,string>::iterator i = parser->namespace_table.begin();
677  i != parser->namespace_table.end();
678  ++i) {
679  parser->other_xml.append(" xmlns");
680  if (!i->first.empty()) {
681  parser->other_xml.append(":");
682  parser->other_xml.append(i->first);
683  }
684  parser->other_xml.append("=\"");
685  parser->other_xml.append(i->second);
686  parser->other_xml.append("\"");
687  }
688  }
689 
690  if (nb_attributes != 0) {
691  parser->transfer_xml_attrs(attributes, nb_attributes);
692  for (XMLAttrMap::iterator i = parser->attr_table_begin();
693  i != parser->attr_table_end();
694  ++i) {
695  parser->other_xml.append(" ");
696  if (!i->second.prefix.empty()) {
697  parser->other_xml.append(i->second.prefix);
698  parser->other_xml.append(":");
699  }
700  parser->other_xml.append(i->first);
701  parser->other_xml.append("=\"");
702  parser->other_xml.append(i->second.value);
703  parser->other_xml.append("\"");
704  }
705  }
706 
707  parser->other_xml.append(">");
708  break;
709 
710  case inside_alias:
711  ddx_fatal_error(parser,
712  "Internal parser error; unexpected state, inside alias while processing element '%s'.",
713  localname);
714  break;
715 
716  case inside_simple_type:
717  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
718  break;
719  else
720  ddx_fatal_error(parser,
721  "Expected an 'Attribute' or 'Alias' element; found '%s' instead.",
722  localname);
723  break;
724 
725  case inside_array:
726  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
727  break;
728  else if (is_not(localname, "Array")
729  && parser->is_variable(localname, attributes, nb_attributes))
730  break;
731  else if (strcmp(localname, "dimension") == 0) {
732  parser->process_dimension(attributes, nb_attributes);
733  // next state: inside_dimension
734  }
735  else
736  ddx_fatal_error(parser,
737  "Expected an 'Attribute' or 'Alias' element; found '%s' instead.",
738  localname);
739  break;
740 
741  case inside_dimension:
742  ddx_fatal_error(parser,
743  "Internal parser error; unexpected state, inside dimension while processing element '%s'.",
744  localname);
745  break;
746 
747  case inside_structure:
748  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
749  break;
750  else if (parser->is_variable(localname, attributes, nb_attributes))
751  break;
752  else
754  "Expected an Attribute, Alias or variable element; found '%s' instead.",
755  localname);
756  break;
757 
758  case inside_sequence:
759  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
760  break;
761  else if (parser->is_variable(localname, attributes, nb_attributes))
762  break;
763  else
765  "Expected an Attribute, Alias or variable element; found '%s' instead.",
766  localname);
767  break;
768 
769  case inside_grid:
770  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
771  break;
772  else if (strcmp(localname, "Array") == 0)
773  parser->process_variable(dods_array_c, inside_array, attributes, nb_attributes);
774  else if (strcmp(localname, "Map") == 0)
775  parser->process_variable(dods_array_c, inside_map, attributes, nb_attributes);
776  else
778  "Expected an Attribute, Alias or variable element; found '%s' instead.",
779  localname);
780  break;
781 
782  case inside_map:
783  if (parser->is_attribute_or_alias(localname, attributes, nb_attributes))
784  break;
785  else if (is_not(localname, "Array") && is_not(localname, "Sequence")
786  && is_not(localname, "Grid")
787  && parser->is_variable(localname, attributes, nb_attributes))
788  break;
789  else if (strcmp(localname, "dimension") == 0) {
790  parser->process_dimension(attributes, nb_attributes);
791  // next state: inside_dimension
792  }
793  else
794  ddx_fatal_error(parser,
795  "Expected an 'Attribute', 'Alias', variable or 'dimension' element; found '%s' instead.",
796  localname);
797  break;
798 
799  case inside_blob_href:
800  ddx_fatal_error(parser,
801  "Internal parser error; unexpected state, inside blob href while processing element '%s'.",
802  localname);
803  break;
804 
805  case parser_unknown:
806  // *** Never used? If so remove/error
807  parser->set_state(parser_unknown);
808  break;
809 
810  case parser_error:
811  break;
812  }
813 
814  DBGN(cerr << " ... " << states[parser->get_state()] << endl);
815 }
816 
817 void DDXParser::ddx_sax2_end_element(void *p, const xmlChar *l,
818  const xmlChar *prefix, const xmlChar *URI)
819 {
820  DDXParser *parser = static_cast<DDXParser*>(p);
821  const char *localname = (const char *)l;
822 
823  DBG2(cerr << "End element " << localname << " (state "
824  << states[parser->get_state()] << ")" << endl);
825 
826  switch (parser->get_state()) {
827  case parser_start:
828  ddx_fatal_error(parser,
829  "Internal parser error; unexpected state, inside start state while processing element '%s'.",
830  localname);
831  break;
832 
833  case inside_dataset:
834  if (strcmp(localname, "Dataset") == 0)
835  parser->pop_state();
836  else
838  "Expected an end Dataset tag; found '%s' instead.",
839  localname);
840  break;
841 
842  case inside_attribute_container:
843  if (strcmp(localname, "Attribute") == 0) {
844  parser->pop_state();
845  parser->at_stack.pop(); // pop when leaving a container.
846  }
847  else
849  "Expected an end Attribute tag; found '%s' instead.",
850  localname);
851  break;
852 
853  case inside_attribute:
854  if (strcmp(localname, "Attribute") == 0)
855  parser->pop_state();
856  else
858  "Expected an end Attribute tag; found '%s' instead.",
859  localname);
860  break;
861 
862  case inside_attribute_value:
863  if (strcmp(localname, "value") == 0) {
864  parser->pop_state();
865  AttrTable *atp = parser->at_stack.top();
866  atp->append_attr(parser->dods_attr_name,
867  parser->dods_attr_type, parser->char_data);
868  parser->char_data = ""; // Null this after use.
869  }
870  else
872  "Expected an end value tag; found '%s' instead.",
873  localname);
874 
875  break;
876 
877  case inside_other_xml_attribute: {
878  if (strcmp(localname, "Attribute") == 0
879  && parser->root_ns == (const char *)URI) {
880 
881  DBGN(cerr << endl << "\t Popping the 'inside_other_xml_attribute' state"
882  << endl);
883 
884  parser->pop_state();
885 
886  AttrTable *atp = parser->at_stack.top();
887  atp->append_attr(parser->dods_attr_name,
888  parser->dods_attr_type, parser->other_xml);
889 
890  parser->other_xml = ""; // Null this after use.
891  }
892  else {
893  DBGN(cerr << endl << "\t inside_other_xml_attribute: " << localname
894  << ", depth: " << parser->other_xml_depth << endl);
895  if (parser->other_xml_depth == 0)
897  "Expected an OtherXML attribute to end! Instead I found '%s'",
898  localname);
899  parser->other_xml_depth--;
900 
901  parser->other_xml.append("</");
902  if (prefix) {
903  parser->other_xml.append((const char *)prefix);
904  parser->other_xml.append(":");
905  }
906  parser->other_xml.append(localname);
907  parser->other_xml.append(">");
908  }
909  break;
910  }
911  // Alias is busted in libdap++ 05/29/03 jhrg
912  case inside_alias:
913  parser->pop_state();
914  break;
915 
916  case inside_simple_type: {
917  Type t = get_type(localname);
918  if (is_simple_type(t)) {
919  parser->pop_state();
920  BaseType *btp = parser->bt_stack.top();
921  parser->bt_stack.pop();
922  parser->at_stack.pop();
923 
924  BaseType *parent = parser->bt_stack.top();
925 
926  if (parent->is_vector_type() || parent->is_constructor_type()) {
927  parent->add_var(btp);
928  delete btp;
929  }
930  else {
932  "Tried to add the simple-type variable '%s' to a non-constructor type (%s %s).",
933  localname,
934  parser->bt_stack.top()->
935  type_name().c_str(),
936  parser->bt_stack.top()->name().
937  c_str());
938  delete btp;
939  }
940  }
941  else {
943  "Expected an end tag for a simple type; found '%s' instead.",
944  localname);
945  }
946  break;
947  }
948 
949  case inside_array:
950  parser->finish_variable(localname, dods_array_c, "Array");
951  break;
952 
953  case inside_dimension:
954  if (strcmp(localname, "dimension") == 0)
955  parser->pop_state();
956  else
958  "Expected an end dimension tag; found '%s' instead.",
959  localname);
960  break;
961 
962  case inside_structure:
963  parser->finish_variable(localname, dods_structure_c, "Structure");
964  break;
965 
966  case inside_sequence:
967  parser->finish_variable(localname, dods_sequence_c, "Sequence");
968  break;
969 
970  case inside_grid:
971  parser->finish_variable(localname, dods_grid_c, "Grid");
972  break;
973 
974  case inside_map:
975  parser->finish_variable(localname, dods_array_c, "Map");
976  break;
977 
978  case inside_blob_href:
979  if (strcmp(localname, "blob") == 0 || strcmp(localname, "dataBLOB") == 0)
980  parser->pop_state();
981  else
983  "Expected an end dataBLOB/blob tag; found '%s' instead.",
984  localname);
985  break;
986 
987  case parser_unknown:
988  parser->pop_state();
989  break;
990 
991  case parser_error:
992  break;
993  }
994 
995 
996  DBGN(cerr << " ... " << states[parser->get_state()] << endl);
997 }
998 
1002 void DDXParser::ddx_get_characters(void * p, const xmlChar * ch, int len)
1003 {
1004  DDXParser *parser = static_cast<DDXParser*>(p);
1005 
1006  switch (parser->get_state()) {
1007  case inside_attribute_value:
1008  parser->char_data.append((const char *)(ch), len);
1009  DBG2(cerr << "Characters: '" << parser->char_data << "'" << endl);
1010  break;
1011 
1012  case inside_other_xml_attribute:
1013  parser->other_xml.append((const char *)(ch), len);
1014  DBG2(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1015  break;
1016 
1017  default:
1018  break;
1019  }
1020 }
1021 
1026 void DDXParser::ddx_ignoreable_whitespace(void *p, const xmlChar *ch,
1027  int len)
1028 {
1029  DDXParser *parser = static_cast<DDXParser*>(p);
1030 
1031  switch (parser->get_state()) {
1032  case inside_other_xml_attribute:
1033  parser->other_xml.append((const char *)(ch), len);
1034  break;
1035 
1036  default:
1037  break;
1038  }
1039 }
1040 
1046 void DDXParser::ddx_get_cdata(void *p, const xmlChar *value, int len)
1047 {
1048  DDXParser *parser = static_cast<DDXParser*>(p);
1049 
1050  switch (parser->get_state()) {
1051  case inside_other_xml_attribute:
1052  parser->other_xml.append((const char *)(value), len);
1053  break;
1054 
1055  case parser_unknown:
1056  break;
1057 
1058  default:
1060  "Found a CData block but none are allowed by DAP.");
1061 
1062  break;
1063  }
1064 }
1065 
1070 xmlEntityPtr DDXParser::ddx_get_entity(void *, const xmlChar * name)
1071 {
1072  return xmlGetPredefinedEntity(name);
1073 }
1074 
1082 void DDXParser::ddx_fatal_error(void * p, const char *msg, ...)
1083 {
1084  va_list args;
1085  DDXParser *parser = static_cast<DDXParser*>(p);
1086 
1087  parser->set_state(parser_error);
1088 
1089  va_start(args, msg);
1090  char str[1024];
1091  vsnprintf(str, 1024, msg, args);
1092  va_end(args);
1093 
1094  int line = xmlSAX2GetLineNumber(parser->ctxt);
1095 
1096  parser->error_msg += "At line " + long_to_string(line) + ": ";
1097  parser->error_msg += string(str) + string("\n");
1098 }
1099 
1101 
1102 void DDXParser::cleanup_parse(xmlParserCtxtPtr & context)
1103 {
1104  bool wellFormed = context->wellFormed;
1105  bool valid = context->valid;
1106 
1107  context->sax = NULL;
1108  xmlFreeParserCtxt(context);
1109 
1110  // If there's an error, there may still be items on the stack at the
1111  // end of the parse.
1112  while (!bt_stack.empty()) {
1113  delete bt_stack.top();
1114  bt_stack.pop();
1115  }
1116 
1117  if (!wellFormed) {
1118  throw DDXParseFailed(string("\nThe DDX is not a well formed XML document.\n") + error_msg);
1119  }
1120 
1121  if (!valid) {
1122  throw DDXParseFailed(string("\nThe DDX is not a valid document.\n") + error_msg);
1123  }
1124 
1125  if (get_state() == parser_error) {
1126  throw DDXParseFailed(string("\nError parsing DDX response.\n") + error_msg);
1127  }
1128 }
1129 
1137 void DDXParser::intern_stream(istream &in, DDS *dest_dds, string &cid, const string &boundary)
1138 {
1139  // Code example from libxml2 docs re: read from a stream.
1140  if (!in || in.eof())
1141  throw InternalErr(__FILE__, __LINE__, "Input stream not open or read error");
1142 
1143  const int size = 1024;
1144  char chars[size + 1];
1145 
1146  // int res = fread(chars, 1, 4, in);
1147  in.readsome(chars, 4);
1148  int res = in.gcount();
1149  if (res > 0) {
1150  chars[4]='\0';
1151  xmlParserCtxtPtr context = xmlCreatePushParserCtxt(NULL, NULL, chars, res, "stream");
1152 
1153  ctxt = context; // need ctxt for error messages
1154  dds = dest_dds; // dump values here
1155  blob_href = &cid; // cid goes here
1156 
1157  xmlSAXHandler ddx_sax_parser;
1158  memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
1159 
1160  ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
1161  ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
1162  ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
1163  ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
1164  ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
1165  ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
1166  ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
1167  ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
1168  ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
1169  ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1170  ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
1171  ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
1172 
1173  context->sax = &ddx_sax_parser;
1174  context->userData = this;
1175  context->validate = true;
1176 
1177  in.getline(chars, size); // chars has size+1 elements
1178  res = in.gcount();
1179  chars[res-1] = '\n'; // libxml needs the newline; w/o it the parse will fail
1180  chars[res] = '\0';
1181  while (res > 0 && !is_boundary(chars, boundary)) {
1182  DBG(cerr << "line (" << res << "): " << chars << endl);
1183  xmlParseChunk(ctxt, chars, res, 0);
1184 
1185  in.getline(chars, size); // chars has size+1 elements
1186  res = in.gcount();
1187  if (res > 0) {
1188  chars[res-1] = '\n';
1189  chars[res] = '\0';
1190  }
1191  }
1192 
1193  // This call ends the parse: The fourth argument of xmlParseChunk is
1194  // the bool 'terminate.'
1195  xmlParseChunk(ctxt, chars, 0, 1);
1196 
1197  cleanup_parse(context);
1198  }
1199 }
1200 
1203 void DDXParser::intern_stream(FILE *in, DDS *dest_dds, string &cid, const string &boundary)
1204 {
1205  // Code example from libxml2 docs re: read from a stream.
1206  if (!in || feof(in) || ferror(in))
1207  throw InternalErr(__FILE__, __LINE__,
1208  "Input stream not open or read error");
1209 
1210  const int size = 1024;
1211  char chars[size];
1212 
1213  int res = fread(chars, 1, 4, in);
1214  if (res > 0) {
1215  chars[4]='\0';
1216  xmlParserCtxtPtr context =
1217  xmlCreatePushParserCtxt(NULL, NULL, chars, res, "stream");
1218 
1219  ctxt = context; // need ctxt for error messages
1220  dds = dest_dds; // dump values here
1221  blob_href = &cid; // cid goes here
1222 
1223  xmlSAXHandler ddx_sax_parser;
1224  memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
1225 
1226  ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
1227  ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
1228  ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
1229  ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
1230  ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
1231  ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
1232  ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
1233  ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
1234  ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
1235  ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1236  ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
1237  ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
1238 
1239  context->sax = &ddx_sax_parser;
1240  context->userData = this;
1241  context->validate = true;
1242 
1243 
1244  while ((fgets(chars, size, in) != 0) && !is_boundary(chars, boundary)) {
1245  DBG(cerr << "line (" << strlen(chars) << "): " << chars << endl);
1246  xmlParseChunk(ctxt, chars, strlen(chars), 0);
1247  }
1248  // This call ends the parse: The fourth argument of xmlParseChunk is
1249  // the bool 'terminate.'
1250  xmlParseChunk(ctxt, chars, 0, 1);
1251 
1252  cleanup_parse(context);
1253  }
1254 }
1255 
1256 
1268 void DDXParser::intern(const string & document, DDS * dest_dds, string &cid)
1269 {
1270  // Create the context pointer explicitly so that we can store a pointer
1271  // to it in the DDXParser instance. This provides a way to generate our
1272  // own error messages *with* line numbers. The messages are pretty
1273  // meaningless otherwise. This means that we use an interface from the
1274  // 'parser internals' header, and not the 'parser' header. However, this
1275  // interface is also used in one of the documented examples, so it's
1276  // probably pretty stable. 06/02/03 jhrg
1277  xmlParserCtxtPtr context = xmlCreateFileParserCtxt(document.c_str());
1278  if (!context)
1279  throw
1280  DDXParseFailed(string
1281  ("Could not initialize the parser with the file: '")
1282  + document + string("'."));
1283 
1284  dds = dest_dds; // dump values here
1285  blob_href = &cid;
1286  ctxt = context; // need ctxt for error messages
1287 
1288  xmlSAXHandler ddx_sax_parser;
1289  memset( &ddx_sax_parser, 0, sizeof(xmlSAXHandler) );
1290 
1291  ddx_sax_parser.getEntity = &DDXParser::ddx_get_entity;
1292  ddx_sax_parser.startDocument = &DDXParser::ddx_start_document;
1293  ddx_sax_parser.endDocument = &DDXParser::ddx_end_document;
1294  ddx_sax_parser.characters = &DDXParser::ddx_get_characters;
1295  ddx_sax_parser.ignorableWhitespace = &DDXParser::ddx_ignoreable_whitespace;
1296  ddx_sax_parser.cdataBlock = &DDXParser::ddx_get_cdata;
1297  ddx_sax_parser.warning = &DDXParser::ddx_fatal_error;
1298  ddx_sax_parser.error = &DDXParser::ddx_fatal_error;
1299  ddx_sax_parser.fatalError = &DDXParser::ddx_fatal_error;
1300  ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1301  ddx_sax_parser.startElementNs = &DDXParser::ddx_sax2_start_element;
1302  ddx_sax_parser.endElementNs = &DDXParser::ddx_sax2_end_element;
1303 
1304  context->sax = &ddx_sax_parser;
1305  context->userData = this;
1306  context->validate = false;
1307 
1308  xmlParseDocument(context);
1309 
1310  cleanup_parse(context);
1311 }
1312 
1313 } // namespace libdap
void intern_stream(FILE *in, DDS *dds, string &cid, const string &boundary="")
Read the DDX from a stream instead of a file.
Contains the attributes for a dataset.
Definition: AttrTable.h:142
static void ddx_start_document(void *parser)
static void ddx_get_cdata(void *parser, const xmlChar *value, int len)
Holds a structure (aggregate) type.
Definition: Structure.h:83
virtual void add_var(BaseType *bt, Part part=nil)
Add a variable.
Definition: BaseType.cc:736
Type
Identifies the data type.
Definition: Type.h:94
A class for software fault reporting.
Definition: InternalErr.h:64
virtual bool is_constructor_type() const
Returns true if the instance is a constructor (i.e., Structure, Sequence or Grid) type variable...
Definition: BaseType.cc:357
static void ddx_fatal_error(void *parser, const char *msg,...)
ObjectType get_type(const string &value)
Definition: mime_util.cc:326
virtual bool is_vector_type() const
Returns true if the instance is a vector (i.e., array) type variable.
Definition: BaseType.cc:347
static void ddx_end_document(void *parser)
static xmlEntityPtr ddx_get_entity(void *parser, const xmlChar *name)
static void ddx_get_characters(void *parser, const xmlChar *ch, int len)
bool is_simple_type(Type t)
Returns true if the instance is a numeric, string or URL type variable.
Definition: util.cc:771
virtual AttrTable & get_attr_table()
Definition: DDS.cc:373
bool is_boundary(const char *line, const string &boundary)
Definition: mime_util.cc:927
virtual unsigned int append_attr(const string &name, const string &type, const string &value)
Add an attribute to the table.
Definition: AttrTable.cc:306
void set_dataset_name(const string &n)
Definition: DDS.cc:364
The basic data type for the DODS DAP types.
Definition: BaseType.h:117
Vars_iter var_begin()
Definition: Constructor.cc:331
static void ddx_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
Vars_iter var_end()
Definition: Constructor.cc:339
void intern(const string &document, DDS *dest_dds, string &cid)
string type_name(Type t)
Definition: util.cc:756
void set_dap_version(const string &version_string="2.0")
Definition: DDS.cc:441
void add_var(BaseType *bt)
Adds a copy of the variable to the DDS. Using the ptr_duplicate() method, perform a deep copy on the ...
Definition: DDS.cc:587