libdap Updated for version 3.20.5
libdap4 is an implementation of OPeNDAP's DAP protocol.
chunked_istream.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of libdap, A C++ implementation of the OPeNDAP Data
4// Access Protocol.
5
6// Copyright (c) 2009 OPeNDAP, Inc.
7// Author: James Gallagher <jgallagher@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24//
25// Portions of this code were taken verbatim from Josuttis,
26// "The C++ Standard Library," p.672
27
28#include "config.h"
29
30#include <stdint.h>
31#include <byteswap.h>
32#include <arpa/inet.h>
33
34#include <cstring>
35#include <vector>
36
37#include "chunked_stream.h"
38#include "chunked_istream.h"
39
40#include "Error.h"
41
42//#define DODS_DEBUG
43//#define DODS_DEBUG2
44#ifdef DODS_DEBUG
45#include <iostream>
46#endif
47
48#include "util.h"
49#include "debug.h"
50
51namespace libdap {
52
53/*
54 This code does not use a 'put back' buffer, but here's a picture of the
55 d_buffer pointer, eback(), gptr() and egptr() that can be used to see how
56 the I/O Stream library's streambuf class works. For the case with no
57 putback, just imagine it as zero and eliminate the leftmost extension. This
58 might also come in useful if the code was extended to support put back. I
59 removed that feature because I don't see it being used with our chunked
60 transmission protocol and it requires an extra call to memcopy() when data
61 are added to the internal buffer.
62
63 d_buffer d_buffer + putBack
64 | |
65 v v
66 |---------|--------------------------------------------|....
67 | | | .
68 |---------|--------------------------------------------|....
69 ^ ^ ^
70 | | |
71 eback() gptr() egptr()
72
73 */
74
84std::streambuf::int_type
86{
87 DBG(cerr << "underflow..." << endl);
88 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
89
90 // return the next character; uflow() increments the puffer pointer.
91 if (gptr() < egptr())
92 return traits_type::to_int_type(*gptr());
93
94 // gptr() == egptr() so read more data from the underlying input source.
95
96 // To read data from the chunked stream, first read the header
97 uint32_t header;
98 d_is.read((char *) &header, 4);
99#if !BYTE_ORDER_PREFIX && HEADER_IN_NETWORK_BYTE_ORDER
100 // When the endian nature of the server is encoded in the chunk header, the header is
101 // sent using network byte order
102 header = ntohl(header);
103#endif
104
105 // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
106 // it holds data. In the latter case, bytes those will be read and moved into the
107 // buffer. Once those data are consumed, we'll be back here again and this read()
108 // will return EOF. See below for the other case...
109 if (d_is.eof()) return traits_type::eof();
110#if BYTE_ORDER_PREFIX
111 if (d_twiddle_bytes) header = bswap_32(header);
112#else
113 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
114 if (!d_set_twiddle) {
115 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
116 d_set_twiddle = true;
117 }
118#endif
119 uint32_t chunk_size = header & CHUNK_SIZE_MASK;
120
121 DBG(cerr << "underflow: chunk size from header: " << chunk_size << endl);
122 DBG(cerr << "underflow: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
123 DBG(cerr << "underflow: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
124
125 // Handle the case where the buffer is not big enough to hold the incoming chunk
126 if (chunk_size > d_buf_size) {
127 d_buf_size = chunk_size;
128 m_buffer_alloc();
129 }
130
131 // If the END chunk has zero bytes, return EOF. See above for more information
132 if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
133
134 // Read the chunk's data
135 d_is.read(d_buffer, chunk_size);
136 DBG2(cerr << "underflow: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
137 if (d_is.bad()) return traits_type::eof();
138
139 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
140 setg(d_buffer, // beginning of put back area
141 d_buffer, // read position (gptr() == eback())
142 d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
143
144 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
145
146 switch (header & CHUNK_TYPE_MASK) {
147 case CHUNK_END:
148 DBG2(cerr << "Found end chunk" << endl);
149 return traits_type::to_int_type(*gptr());
150 case CHUNK_DATA:
151 return traits_type::to_int_type(*gptr());
152
153 case CHUNK_ERR:
154 // this is pretty much the end of the show... Assume the buffer/chunk holds
155 // the error message text.
156 d_error = true;
157 d_error_message = string(d_buffer, chunk_size);
158 return traits_type::eof();
159 default:
160 d_error = true;
161 d_error_message = "Failed to read known chunk header type.";
162 return traits_type::eof();
163 }
164}
165
182std::streamsize
183chunked_inbuf::xsgetn(char* s, std::streamsize num)
184{
185 DBG(cerr << "xsgetn... num: " << num << endl);
186
187 // if num is <= the chars currently in the buffer
188 if (num <= (egptr() - gptr())) {
189 memcpy(s, gptr(), num);
190 gbump(num);
191
192 return traits_type::not_eof(num);
193 }
194
195 // else they asked for more
196 uint32_t bytes_left_to_read = num;
197
198 // are there any bytes in the buffer? if so grab them first
199 if (gptr() < egptr()) {
200 int bytes_to_transfer = egptr() - gptr();
201 memcpy(s, gptr(), bytes_to_transfer);
202 gbump(bytes_to_transfer);
203 s += bytes_to_transfer;
204 bytes_left_to_read -= bytes_to_transfer;
205 }
206
207 // We need to get more bytes from the underlying stream; at this
208 // point the internal buffer is empty.
209
210 // read the remaining bytes to transfer, a chunk at a time,
211 // and put any leftover stuff in the buffer.
212
213 // note that when the code is here, gptr() == egptr(), so the
214 // next call to read() will fall through the previous tests and
215 // read at least one chunk here.
216 bool done = false;
217 while (!done) {
218 // Get a chunk header
219 uint32_t header;
220 d_is.read((char *) &header, 4);
221
222#if !BYTE_ORDER_PREFIX && HEADER_IN_NETWORK_BYTE_ORDER
223 header = ntohl(header);
224#endif
225
226 // There are two EOF cases: One where the END chunk is zero bytes and one where
227 // it holds data. In the latter case, those will be read and moved into the
228 // buffer. Once those data are consumed, we'll be back here again and this read()
229 // will return EOF. See below for the other case...
230 if (d_is.eof()) return traits_type::eof();
231#if BYTE_ORDER_PREFIX
232 if (d_twiddle_bytes) header = bswap_32(header);
233#else
234 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
235 if (!d_set_twiddle) {
236 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
237 d_set_twiddle = true;
238 }
239#endif
240
241 uint32_t chunk_size = header & CHUNK_SIZE_MASK;
242 DBG(cerr << "xsgetn: chunk size from header: " << chunk_size << endl);
243 DBG(cerr << "xsgetn: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
244 DBG(cerr << "xsgetn: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
245
246 // handle error chunks here
247 if ((header & CHUNK_TYPE_MASK) == CHUNK_ERR) {
248 d_error = true;
249 // Note that d_buffer is not used to avoid calling resize if it is too
250 // small to hold the error message. At this point, there's not much reason
251 // to optimize transport efficiency, however.
252 std::vector<char> message(chunk_size);
253 d_is.read(&message[0], chunk_size);
254 d_error_message = string(&message[0], chunk_size);
255 // leave the buffer and gptr(), ..., in a consistent state (empty)
256 setg(d_buffer, d_buffer, d_buffer);
257 }
258 // And zero-length END chunks here.
259 else if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) {
260 return traits_type::not_eof(num-bytes_left_to_read);
261 }
262 // The next case is complicated because we read some data from the current
263 // chunk into 's' an some into the internal buffer.
264 else if (chunk_size > bytes_left_to_read) {
265 d_is.read(s, bytes_left_to_read);
266 if (d_is.bad()) return traits_type::eof();
267
268 // Now slurp up the remain part of the chunk and store it in the buffer
269 uint32_t bytes_leftover = chunk_size - bytes_left_to_read;
270 // expand the internal buffer if needed
271 if (bytes_leftover > d_buf_size) {
272 d_buf_size = chunk_size;
273 m_buffer_alloc();
274 }
275 // read the remain stuff in to d_buffer
276 d_is.read(d_buffer, bytes_leftover);
277 if (d_is.bad()) return traits_type::eof();
278
279 setg(d_buffer, // beginning of put back area
280 d_buffer, // read position (gptr() == eback())
281 d_buffer + bytes_leftover /*d_is.gcount()*/); // end of buffer (egptr())
282
283 bytes_left_to_read = 0 /* -= d_is.gcount()*/;
284 }
285 else {
286 // expand the internal buffer if needed
287 if (chunk_size > d_buf_size) {
288 d_buf_size = chunk_size;
289 m_buffer_alloc();
290 }
291 // If we get a chunk that's zero bytes, Don't call read()
292 // to save the kernel context switch overhead.
293 if (chunk_size > 0) {
294 d_is.read(s, chunk_size);
295 if (d_is.bad()) return traits_type::eof();
296 bytes_left_to_read -= chunk_size /*d_is.gcount()*/;
297 s += chunk_size;
298 }
299 }
300
301 switch (header & CHUNK_TYPE_MASK) {
302 case CHUNK_END:
303 DBG(cerr << "Found end chunk" << endl);
304 // in this case bytes_left_to_read can be > 0 because we ran out of data
305 // before reading all the requested bytes. The next read() call will return
306 // eof; this call returns the number of bytes read and transferred to 's'.
307 done = true;
308 break;
309
310 case CHUNK_DATA:
311 done = bytes_left_to_read == 0;
312 break;
313
314 case CHUNK_ERR:
315 // this is pretty much the end of the show... The error message has
316 // already been read above
317 return traits_type::eof();
318
319 default:
320 d_error = true;
321 d_error_message = "Failed to read known chunk header type.";
322 return traits_type::eof();
323 }
324 }
325
326 return traits_type::not_eof(num-bytes_left_to_read);
327}
328
341std::streambuf::int_type
343{
344 // To read data from the chunked stream, first read the header
345 uint32_t header;
346 d_is.read((char *) &header, 4);
347
348#if !BYTE_ORDER_PREFIX && HEADER_IN_NETWORK_BYTE_ORDER
349 header = ntohl(header);
350#endif
351
352 // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
353 // it holds data. In the latter case, bytes those will be read and moved into the
354 // buffer. Once those data are consumed, we'll be back here again and this read()
355 // will return EOF. See below for the other case...
356 if (d_is.eof()) return traits_type::eof();
357#if BYTE_ORDER_PREFIX
358 if (d_twiddle_bytes) header = bswap_32(header);
359#else
360 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
361 if (!d_set_twiddle) {
362 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
363 d_set_twiddle = true;
364 }
365#endif
366
367 uint32_t chunk_size = header & CHUNK_SIZE_MASK;
368
369 DBG(cerr << "read_next_chunk: chunk size from header: " << chunk_size << endl);
370 DBG(cerr << "read_next_chunk: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
371 DBG(cerr << "read_next_chunk: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
372
373 // Handle the case where the buffer is not big enough to hold the incoming chunk
374 if (chunk_size > d_buf_size) {
375 d_buf_size = chunk_size;
376 m_buffer_alloc();
377 }
378
379 // If the END chunk has zero bytes, return EOF. See above for more information
380 if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
381
382 // Read the chunk's data
383 d_is.read(d_buffer, chunk_size);
384 DBG2(cerr << "read_next_chunk: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
385 if (d_is.bad()) return traits_type::eof();
386
387 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
388 setg(d_buffer, // beginning of put back area
389 d_buffer, // read position (gptr() == eback())
390 d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
391
392 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
393
394 switch (header & CHUNK_TYPE_MASK) {
395 case CHUNK_END:
396 DBG(cerr << "Found end chunk" << endl);
397 return traits_type::not_eof(chunk_size);
398
399 case CHUNK_DATA:
400 return traits_type::not_eof(chunk_size);
401
402 case CHUNK_ERR:
403 // this is pretty much the end of the show... Assume the buffer/chunk holds
404 // the error message text.
405 d_error = true;
406 d_error_message = string(d_buffer, chunk_size);
407 return traits_type::eof();
408
409 default:
410 d_error = true;
411 d_error_message = "Failed to read known chunk header type.";
412 return traits_type::eof();
413 }
414}
415
416}
virtual int_type underflow()
Insert new characters into the buffer This specialization of underflow is called when the gptr() is a...
int_type read_next_chunk()
Read a chunk Normally the chunked nature of a chunked_istream/chunked_inbuf is hidden from the caller...
virtual std::streamsize xsgetn(char *s, std::streamsize num)
Read a block of data This specialization of xsgetn() reads num bytes and puts them in s first reading...
top level DAP object to house generic methods
Definition: AlarmHandler.h:36
bool is_host_big_endian()
Does this host use big-endian byte order?
Definition: util.cc:94