libdap  Updated for version 3.17.2
chunked_istream.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 //
25 // Portions of this code were taken verbatim from Josuttis,
26 // "The C++ Standard Library," p.672
27 
28 #include "config.h"
29 
30 #include <stdint.h>
31 #include <byteswap.h>
32 #include <arpa/inet.h>
33 
34 #include <cstring>
35 #include <vector>
36 
37 #include "chunked_stream.h"
38 #include "chunked_istream.h"
39 
40 #include "Error.h"
41 
42 //#define DODS_DEBUG
43 //#define DODS_DEBUG2
44 #ifdef DODS_DEBUG
45 #include <iostream>
46 #endif
47 
48 #include "util.h"
49 #include "debug.h"
50 
51 namespace libdap {
52 
53 /*
54  This code does not use a 'put back' buffer, but here's a picture of the
55  d_buffer pointer, eback(), gptr() and egptr() that can be used to see how
56  the I/O Stream library's streambuf class works. For the case with no
57  putback, just imagine it as zero and eliminate the leftmost extension. This
58  might also come in useful if the code was extended to support put back. I
59  removed that feature because I don't see it being used with our chunked
60  transmission protocol and it requires an extra call to memcopy() when data
61  are added to the internal buffer.
62 
63  d_buffer d_buffer + putBack
64  | |
65  v v
66  |---------|--------------------------------------------|....
67  | | | .
68  |---------|--------------------------------------------|....
69  ^ ^ ^
70  | | |
71  eback() gptr() egptr()
72 
73  */
74 
84 std::streambuf::int_type
86 {
87  DBG(cerr << "underflow..." << endl);
88  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
89 
90  // return the next character; uflow() increments the puffer pointer.
91  if (gptr() < egptr())
92  return traits_type::to_int_type(*gptr());
93 
94  // gptr() == egptr() so read more data from the underlying input source.
95 
96  // To read data from the chunked stream, first read the header
97  uint32_t header;
98  d_is.read((char *) &header, 4);
99 #if !BYTE_ORDER_PREFIX
100  // When the endian nature of the server is encoded in the chunk header, the header is
101  // sent using network byte order
102  ntohl(header);
103 #endif
104 
105  // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
106  // it holds data. In the latter case, bytes those will be read and moved into the
107  // buffer. Once those data are consumed, we'll be back here again and this read()
108  // will return EOF. See below for the other case...
109  if (d_is.eof()) return traits_type::eof();
110 #if BYTE_ORDER_PREFIX
111  if (d_twiddle_bytes) header = bswap_32(header);
112 #else
113  // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
114  if (!d_set_twiddle) {
115  d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
116  d_set_twiddle = true;
117  }
118 #endif
119  uint32_t chunk_size = header & CHUNK_SIZE_MASK;
120 
121  DBG(cerr << "underflow: chunk size from header: " << chunk_size << endl);
122  DBG(cerr << "underflow: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
123  DBG(cerr << "underflow: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
124 
125  // Handle the case where the buffer is not big enough to hold the incoming chunk
126  if (chunk_size > d_buf_size) {
127  d_buf_size = chunk_size;
128  m_buffer_alloc();
129  }
130 
131  // If the END chunk has zero bytes, return EOF. See above for more information
132  if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
133 
134  // Read the chunk's data
135  d_is.read(d_buffer, chunk_size);
136  DBG2(cerr << "underflow: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
137  if (d_is.bad()) return traits_type::eof();
138 
139  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
140  setg(d_buffer, // beginning of put back area
141  d_buffer, // read position (gptr() == eback())
142  d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
143 
144  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
145 
146  switch (header & CHUNK_TYPE_MASK) {
147  case CHUNK_END:
148  DBG2(cerr << "Found end chunk" << endl);
149  return traits_type::to_int_type(*gptr());
150  case CHUNK_DATA:
151  return traits_type::to_int_type(*gptr());
152 
153  case CHUNK_ERR:
154  // this is pretty much the end of the show... Assume the buffer/chunk holds
155  // the error message text.
156  d_error = true;
157  d_error_message = string(d_buffer, chunk_size);
158  return traits_type::eof();
159  default:
160  d_error = true;
161  d_error_message = "Failed to read known chunk header type.";
162  return traits_type::eof();
163  }
164 
165  return traits_type::eof(); // Can never get here; this quiets g++
166 }
167 
184 std::streamsize
185 chunked_inbuf::xsgetn(char* s, std::streamsize num)
186 {
187  DBG(cerr << "xsgetn... num: " << num << endl);
188 
189  // if num is <= the chars currently in the buffer
190  if (num <= (egptr() - gptr())) {
191  memcpy(s, gptr(), num);
192  gbump(num);
193 
194  return traits_type::not_eof(num);
195  }
196 
197  // else they asked for more
198  uint32_t bytes_left_to_read = num;
199 
200  // are there any bytes in the buffer? if so grab them first
201  if (gptr() < egptr()) {
202  int bytes_to_transfer = egptr() - gptr();
203  memcpy(s, gptr(), bytes_to_transfer);
204  gbump(bytes_to_transfer);
205  s += bytes_to_transfer;
206  bytes_left_to_read -= bytes_to_transfer;
207  }
208 
209  // We need to get more bytes from the underlying stream; at this
210  // point the internal buffer is empty.
211 
212  // read the remaining bytes to transfer, a chunk at a time,
213  // and put any leftover stuff in the buffer.
214 
215  // note that when the code is here, gptr() == egptr(), so the
216  // next call to read() will fall through the previous tests and
217  // read at least one chunk here.
218  bool done = false;
219  while (!done) {
220  // Get a chunk header
221  uint32_t header;
222  d_is.read((char *) &header, 4);
223 #if !BYTE_ORDER_PREFIX
224  ntohl(header);
225 #endif
226 
227  // There are two EOF cases: One where the END chunk is zero bytes and one where
228  // it holds data. In the latter case, those will be read and moved into the
229  // buffer. Once those data are consumed, we'll be back here again and this read()
230  // will return EOF. See below for the other case...
231  if (d_is.eof()) return traits_type::eof();
232 #if BYTE_ORDER_PREFIX
233  if (d_twiddle_bytes) header = bswap_32(header);
234 #else
235  // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
236  if (!d_set_twiddle) {
237  d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
238  d_set_twiddle = true;
239  }
240 #endif
241 
242  uint32_t chunk_size = header & CHUNK_SIZE_MASK;
243  DBG(cerr << "xsgetn: chunk size from header: " << chunk_size << endl);
244  DBG(cerr << "xsgetn: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
245  DBG(cerr << "xsgetn: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
246 
247  // handle error chunks here
248  if ((header & CHUNK_TYPE_MASK) == CHUNK_ERR) {
249  d_error = true;
250  // Note that d_buffer is not used to avoid calling resize if it is too
251  // small to hold the error message. At this point, there's not much reason
252  // to optimize transport efficiency, however.
253  std::vector<char> message(chunk_size);
254  d_is.read(&message[0], chunk_size);
255  d_error_message = string(&message[0], chunk_size);
256  // leave the buffer and gptr(), ..., in a consistent state (empty)
257  setg(d_buffer, d_buffer, d_buffer);
258  }
259  // And zero-length END chunks here.
260  else if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) {
261  return traits_type::not_eof(num-bytes_left_to_read);
262  }
263  // The next case is complicated because we read some data from the current
264  // chunk into 's' an some into the internal buffer.
265  else if (chunk_size > bytes_left_to_read) {
266  d_is.read(s, bytes_left_to_read);
267  if (d_is.bad()) return traits_type::eof();
268 
269  // Now slurp up the remain part of the chunk and store it in the buffer
270  uint32_t bytes_leftover = chunk_size - bytes_left_to_read;
271  // expand the internal buffer if needed
272  if (bytes_leftover > d_buf_size) {
273  d_buf_size = chunk_size;
274  m_buffer_alloc();
275  }
276  // read the remain stuff in to d_buffer
277  d_is.read(d_buffer, bytes_leftover);
278  if (d_is.bad()) return traits_type::eof();
279 
280  setg(d_buffer, // beginning of put back area
281  d_buffer, // read position (gptr() == eback())
282  d_buffer + bytes_leftover /*d_is.gcount()*/); // end of buffer (egptr())
283 
284  bytes_left_to_read = 0 /* -= d_is.gcount()*/;
285  }
286  else {
287  // expand the internal buffer if needed
288  if (chunk_size > d_buf_size) {
289  d_buf_size = chunk_size;
290  m_buffer_alloc();
291  }
292  // If we get a chunk that's zero bytes, Don't call read()
293  // to save the kernel context switch overhead.
294  if (chunk_size > 0) {
295  d_is.read(s, chunk_size);
296  if (d_is.bad()) return traits_type::eof();
297  bytes_left_to_read -= chunk_size /*d_is.gcount()*/;
298  s += chunk_size;
299  }
300  }
301 
302  switch (header & CHUNK_TYPE_MASK) {
303  case CHUNK_END:
304  DBG(cerr << "Found end chunk" << endl);
305  // in this case bytes_left_to_read can be > 0 because we ran out of data
306  // before reading all the requested bytes. The next read() call will return
307  // eof; this call returns the number of bytes read and transferred to 's'.
308  done = true;
309  break;
310  case CHUNK_DATA:
311  done = bytes_left_to_read == 0;
312  break;
313  case CHUNK_ERR:
314  // this is pretty much the end of the show... The error message has
315  // already been read above
316  return traits_type::eof();
317  break;
318  default:
319  d_error = true;
320  d_error_message = "Failed to read known chunk header type.";
321  return traits_type::eof();
322  }
323  }
324 
325  return traits_type::not_eof(num-bytes_left_to_read);
326 }
327 
340 std::streambuf::int_type
342 {
343  // To read data from the chunked stream, first read the header
344  uint32_t header;
345  d_is.read((char *) &header, 4);
346 #if !BYTE_ORDER_PREFIX
347  ntohl(header);
348 #endif
349 
350  // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
351  // it holds data. In the latter case, bytes those will be read and moved into the
352  // buffer. Once those data are consumed, we'll be back here again and this read()
353  // will return EOF. See below for the other case...
354  if (d_is.eof()) return traits_type::eof();
355 #if BYTE_ORDER_PREFIX
356  if (d_twiddle_bytes) header = bswap_32(header);
357 #else
358  // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
359  if (!d_set_twiddle) {
360  d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
361  d_set_twiddle = true;
362  }
363 #endif
364 
365  uint32_t chunk_size = header & CHUNK_SIZE_MASK;
366 
367  DBG(cerr << "read_next_chunk: chunk size from header: " << chunk_size << endl);
368  DBG(cerr << "read_next_chunk: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
369  DBG(cerr << "read_next_chunk: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
370 
371  // Handle the case where the buffer is not big enough to hold the incoming chunk
372  if (chunk_size > d_buf_size) {
373  d_buf_size = chunk_size;
374  m_buffer_alloc();
375  }
376 
377  // If the END chunk has zero bytes, return EOF. See above for more information
378  if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
379 
380  // Read the chunk's data
381  d_is.read(d_buffer, chunk_size);
382  DBG2(cerr << "read_next_chunk: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
383  if (d_is.bad()) return traits_type::eof();
384 
385  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
386  setg(d_buffer, // beginning of put back area
387  d_buffer, // read position (gptr() == eback())
388  d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
389 
390  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
391 
392  switch (header & CHUNK_TYPE_MASK) {
393  case CHUNK_END:
394  DBG(cerr << "Found end chunk" << endl);
395  return traits_type::not_eof(chunk_size);
396  case CHUNK_DATA:
397  return traits_type::not_eof(chunk_size);
398 
399  case CHUNK_ERR:
400  // this is pretty much the end of the show... Assume the buffer/chunk holds
401  // the error message text.
402  d_error = true;
403  d_error_message = string(d_buffer, chunk_size);
404  return traits_type::eof();
405  default:
406  d_error = true;
407  d_error_message = "Failed to read known chunk header type.";
408  return traits_type::eof();
409  }
410 
411  return traits_type::eof(); // Can never get here; this quiets g++
412 }
413 
414 }
int_type read_next_chunk()
Read a chunk Normally the chunked nature of a chunked_istream/chunked_inbuf is hidden from the caller...
virtual int_type underflow()
Insert new characters into the buffer This specialization of underflow is called when the gptr() is a...
virtual std::streamsize xsgetn(char *s, std::streamsize num)
Read a block of data This specialization of xsgetn() reads num bytes and puts them in s first reading...
bool is_host_big_endian()
Does this host use big-endian byte order?
Definition: util.cc:93