ProteoWizard
BinaryDataEncoderTest.cpp
Go to the documentation of this file.
1 //
2 // $Id: BinaryDataEncoderTest.cpp 5084 2013-10-28 23:32:24Z pcbrefugee $
3 //
4 //
5 // Original author: Darren Kessner <darren@proteowizard.org>
6 //
7 // Copyright 2007 Spielberg Family Center for Applied Proteomics
8 // Cedars Sinai Medical Center, Los Angeles, California 90048
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 //
22 
23 
24 #include "BinaryDataEncoder.hpp"
26 #include "boost/filesystem.hpp"
28 #include <cstring>
29 
30 
31 using namespace pwiz::util;
32 using namespace pwiz::cv;
33 using namespace pwiz::msdata;
34 namespace bfs = boost::filesystem;
35 
36 
37 ostream* os_ = 0;
38 
39 
40 double sampleData_[] =
41 {
42  200.00018816645022000000, 0.00000000000000000000,
43  200.00043034083151000000, 0.00000000000000000000,
44  200.00067251579924000000, 0.00000000000000000000,
45  200.00091469135347000000, 0.00000000000000000000,
46  201.10647068550810000000, 0.00000000000000000000,
47  201.10671554643099000000, 0.00000000000000000000,
48  201.10696040795017000000, 0.00000000000000000000,
49  201.10720527006566000000, 0.00000000000000000000,
50  201.10745013277739000000, 908.68475341796875000000,
51  201.10769499608537000000, 1266.26928710937500000000,
52  201.10793985998967000000, 1258.11450195312500000000,
53  201.10818472449023000000, 848.79339599609375000000,
54  201.10842958958708000000, 0.00000000000000000000,
55  201.10867445528024000000, 0.00000000000000000000,
56  201.10891932156963000000, 0.0000000000000000000,
57  200, 0,
58  300, 1,
59  400, 10,
60  500, 100,
61  600, 1000,
62 };
63 
64 
65 const int sampleDataSize_ = sizeof(sampleData_)/sizeof(double);
66 
67 
68 // regression test strings
69 const char* sampleEncoded32Big_ = "Q0gADAAAAABDSAAcAAAAAENIACwAAAAAQ0gAPAAAAABDSRtCAAAAAENJG1IAAAAAQ0kbYgAAAABDSRtyAAAAAENJG4JEYyvTQ0kbkkSeSJ5DSRuiRJ1DqkNJG7JEVDLHQ0kbwgAAAABDSRvSAAAAAENJG+IAAAAAQ0gAAAAAAABDlgAAP4AAAEPIAABBIAAAQ/oAAELIAABEFgAARHoAAA==";
70 const char* sampleEncoded32Little_ = "DABIQwAAAAAcAEhDAAAAACwASEMAAAAAPABIQwAAAABCG0lDAAAAAFIbSUMAAAAAYhtJQwAAAAByG0lDAAAAAIIbSUPTK2NEkhtJQ55InkSiG0lDqkOdRLIbSUPHMlREwhtJQwAAAADSG0lDAAAAAOIbSUMAAAAAAABIQwAAAAAAAJZDAACAPwAAyEMAACBBAAD6QwAAyEIAABZEAAB6RA==";
71 const char* sampleEncoded64Little_ = "/xedigEAaUAAAAAAAAAAAIV5fYYDAGlAAAAAAAAAAACkK16CBQBpQAAAAAAAAAAAXy4/fgcAaUAAAAAAAAAAAK4HNjVoI2lAAAAAAAAAAACrvLg2aiNpQAAAAAAAAAAAnMM7OGwjaUAAAAAAAAAAAIIcvzluI2lAAAAAAAAAAABax0I7cCNpQAAAAGB6ZYxAJcTGPHIjaUAAAADAE8mTQOUSSz50I2lAAAAAQHWok0CYs88/diNpQAAAAOBYhopAP6ZUQXgjaUAAAAAAAAAAANvq2UJ6I2lAAAAAAAAAAABpgV9EfCNpQAAAAAAAAAAAAAAAAAAAaUAAAAAAAAAAAAAAAAAAwHJAAAAAAAAA8D8AAAAAAAB5QAAAAAAAACRAAAAAAABAf0AAAAAAAABZQAAAAAAAwIJAAAAAAABAj0A=";
72 const char* sampleEncoded64Big_ = "QGkAAYqdF/8AAAAAAAAAAEBpAAOGfXmFAAAAAAAAAABAaQAFgl4rpAAAAAAAAAAAQGkAB34/Ll8AAAAAAAAAAEBpI2g1NgeuAAAAAAAAAABAaSNqNri8qwAAAAAAAAAAQGkjbDg7w5wAAAAAAAAAAEBpI245vxyCAAAAAAAAAABAaSNwO0LHWkCMZXpgAAAAQGkjcjzGxCVAk8kTwAAAAEBpI3Q+SxLlQJOodUAAAABAaSN2P8+zmECKhljgAAAAQGkjeEFUpj8AAAAAAAAAAEBpI3pC2erbAAAAAAAAAABAaSN8RF+BaQAAAAAAAAAAQGkAAAAAAAAAAAAAAAAAAEBywAAAAAAAP/AAAAAAAABAeQAAAAAAAEAkAAAAAAAAQH9AAAAAAABAWQAAAAAAAECCwAAAAAAAQI9AAAAAAAA=";
73 const char* sampleEncoded32LittleZlib_ = "eJzjYfBwZgACGSitA6VtoLSTtCeYDoLSSVC6CEo3AenL2skuk4D0PI95LouA9CrnuS6bgPRxoxCXQ1B1l6D0IyjNADWfgWEakG6wZ2A4AaQVHBkYfgHpE04MDGIuDAxVLgB7LB3q";
74 const char* sampleEncoded32BigZlib_ = "eJxz9mDgYQACZw8GGSitA6VtwLSntBOUDoLSSVC6CEo3uSRrXwbSk1zmecwD0otc5jqvAtKbXEKMjgPpQ1B1l6D0I6j5YOA8jYHBvgFIn2BgcFQA0r8YGJyAbBcxIK5iYAAAfLkd6g==";
75 const char* sampleEncoded64LittleZlib_ = "eJz7Lz63i5Eh04EBClora9uYkfhLtOOaWJH48Xr2dexI/HXsZqYZygj+6j07zLKQ+HMOW1vkIPGbZPZb5iHxo447WRdA+AlVqT0OqkeO2RRB+AeET052eCrkbVcC4TuUrpjsMGPzefsyCP9BRFuXg/2yEMcKJPNuv7rpVIXEz2yMd6lB4kNFUfkHiqD8D/YQuhLKV4HQDvVQfiSEPtAEFe93AACmsz1+";
76 const char* sampleEncoded64BigZlib_ = "eJxzyGRg7Jor/p8BChwyGZjbaitbkfisTXHaS5D47HX2evEIvnKGqRn7OiR+ltmOPauR+DkW1ofnIPHzLPfLNCHxC6ydjkc59KRWJUD4RTbHjqg6TD4pfADCL7HzFnrqMHlFqQOEX2Z/fvMMh662iAcQfoVjyDJ7JPOqnG6+uo3Er3GJb8xEcj8KcCg6AKbtP0D5lVBaBUrXO0DoSCi/CaLeoR8iDgC0Qj1+";
77 const char* sampleEncodedNumpressLinear_ = "QS69PAAAAAAu7AEMAAAAAA9J0wgQ61LPfgY70wgQbTLPfg4d0wgQ7hLPfgMM1BgQwGKtfgvq1SgQ4UKtfgjc1SgQIyKtfgXO1SgQRAKtfgKw5SgQ78OG4QNVqQugf3Tmpg+6yRCARe2G9wiYdBGAecaFZgs+qjKwizv8oQVa5SgQS0GtfgJM5SgQjCGtfgwC5BgQApLPfgicxA4Q5MmQzQzK9+kgoDYaDQAvNdQwS+AZrAhzqAY5hKD/kA==";
78 const char* sampleEncodedNumpressLinearZlib_ = "eJxz1NtrwwAEem8YeUA0v+dlDoHXQefr2KyBjFyj83V8skDGO6Hzdcw8VyQEDiStreN+dVVD4KHT2jqOO0CGstLaOtZzQIYL09o6pg1PNQTeH257yBy6kntBfcmzZfy7Tgo0uL5t+84xo0SwofJYaxq33SqjDd3WfxayRgEVezsCdfkAGT2Ka+t4mJ5ICDBNOl/HMecIn8CTkxPO8pz6/lJhgZkUL4O+6RUD7weSaziKV7BZtiz4PwEAkp1KXg==";
79 const char* sampleEncodedNumpressSlof_ = "QMHqAAAAAAACvgAAAr4AAAK+AAACvgAANL4AADS+AAA0vgAANL4AADS+GvQ0vvr/NL6//zS+qfE0vgAANL4AADS+AAACvgAAeszWGMHW6VW73lqlQOWH9w==";
80 const char* sampleEncodedNumpressSlofZlib_ = "eJxzOPiKAQSY9qFiEwws9cVk36//Jvv2A/HKj8hyIPVVZ65JHLz2MnT3vailDk/bvwMAn1ogtQ==";
81 const char* sampleEncodedNumpressPic_ = "aMhoyGjIaMhpyGnIachpyGnF2DacUvRpxa5GnFFTachpyGnIaMhcIXFQkXpU8WRlhSWOMA==";
82 const char* sampleEncodedNumpressPicZlib_ = "eJzLOJEBhpkwePSG2ZygL5lH17nNCQyGiGWciFEsDJhYFfIxJbVVtc8AAAjsG4c=";
83 const char* sampleEncodedModified64BigZlib_ = "eJxzyGRg7Jor/r/+/X8wcMhkYG6rrWz9j+CzNsVpL6m/D+ez19nrxf+H85UzTM3Y1zFAAZCfZbZjz2okfo6F9eE5SPw8y/0yTUj8Amun41EOPalVCRB+kc2xI6oOk08KH4DwS+y8hZ46TF5R6gDhl9mf3zzDoast4gGEX+EYssweybwqp5uvbiPxa1ziGzMRfAYU4FB0AEzbf4DyK6G0CpSud4DQkVB+E0S9Qz9EHACREFv+";
84 
85 const char* regressionTest(const BinaryDataEncoder::Config& config,bool expectNumpressIgnored)
86 {
87  if (expectNumpressIgnored) // when set, expecting numpress not to be used even though it was requested
88  {
90  }
91  else
92  {
93  if (config.numpress == BinaryDataEncoder::Numpress_Linear)
94  return (BinaryDataEncoder::Compression_Zlib==config.compression)?sampleEncodedNumpressLinearZlib_:sampleEncodedNumpressLinear_;
95 
96  if (config.numpress == BinaryDataEncoder::Numpress_Pic)
97  return (BinaryDataEncoder::Compression_Zlib==config.compression)?sampleEncodedNumpressPicZlib_:sampleEncodedNumpressPic_;
98 
99  if (config.numpress == BinaryDataEncoder::Numpress_Slof)
100  return (BinaryDataEncoder::Compression_Zlib==config.compression)?sampleEncodedNumpressSlofZlib_:sampleEncodedNumpressSlof_;
101  }
102  if (config.precision == BinaryDataEncoder::Precision_32 &&
103  config.byteOrder == BinaryDataEncoder::ByteOrder_LittleEndian &&
104  config.compression == BinaryDataEncoder::Compression_None)
105  return sampleEncoded32Little_;
106 
107  if (config.precision == BinaryDataEncoder::Precision_32 &&
108  config.byteOrder == BinaryDataEncoder::ByteOrder_BigEndian &&
109  config.compression == BinaryDataEncoder::Compression_None)
110  return sampleEncoded32Big_;
111 
112  if (config.precision == BinaryDataEncoder::Precision_64 &&
113  config.byteOrder == BinaryDataEncoder::ByteOrder_LittleEndian &&
114  config.compression == BinaryDataEncoder::Compression_None)
115  return sampleEncoded64Little_;
116 
117  if (config.precision == BinaryDataEncoder::Precision_64 &&
118  config.byteOrder == BinaryDataEncoder::ByteOrder_BigEndian &&
119  config.compression == BinaryDataEncoder::Compression_None)
120  return sampleEncoded64Big_;
121 
122  if (config.precision == BinaryDataEncoder::Precision_32 &&
123  config.byteOrder == BinaryDataEncoder::ByteOrder_LittleEndian &&
124  config.compression == BinaryDataEncoder::Compression_Zlib)
126 
127  if (config.precision == BinaryDataEncoder::Precision_32 &&
128  config.byteOrder == BinaryDataEncoder::ByteOrder_BigEndian &&
129  config.compression == BinaryDataEncoder::Compression_Zlib)
131 
132  if (config.precision == BinaryDataEncoder::Precision_64 &&
133  config.byteOrder == BinaryDataEncoder::ByteOrder_LittleEndian &&
134  config.compression == BinaryDataEncoder::Compression_Zlib)
136 
137  if (config.precision == BinaryDataEncoder::Precision_64 &&
138  config.byteOrder == BinaryDataEncoder::ByteOrder_BigEndian &&
139  config.compression == BinaryDataEncoder::Compression_Zlib)
141 
142  throw runtime_error("[BinaryDataEncoderTest::regressionTest()] Untested configuration.");
143 }
144 
145 
147 {
148  BinaryDataEncoder::Config config(config_in);
149  if (os_)
150  *os_ << "testConfiguration: " << config << endl;
151 
152  // initialize scan data
153 
154  vector<double> binary(sampleDataSize_);
155  copy(sampleData_, sampleData_+sampleDataSize_, binary.begin());
156 
157  bool checkNumpressMaxErrorSupression = (BinaryDataEncoder::Numpress_None != config.numpress)&&(config.numpressLinearErrorTolerance>0);
158  if (checkNumpressMaxErrorSupression)
159  {
160  binary[1] = numeric_limits<double>::max( )-.1; // attempt to blow out the numpress lossiness limiter
161  binary[3] = -binary[1]; // attempt to blow out the numpress lossiness limiter
162  binary[5] = .5*binary[1]; // attempt to blow out the numpress lossiness limiter
163  binary[7] = .5*binary[3]; // attempt to blow out the numpress lossiness limiter
164  }
165 
166  if (os_)
167  {
168  *os_ << "original: " << binary.size() << endl;
169  *os_ << setprecision(20) << fixed;
170  copy(binary.begin(), binary.end(), ostream_iterator<double>(*os_, "\n"));
171  }
172 
173  // instantiate encoder
174 
175  BinaryDataEncoder encoder(config);
176 
177  // encode
178 
179  string encoded;
180  encoder.encode(binary, encoded);
181 
182  if (os_)
183  *os_ << "encoded: " << encoded.size() << endl << encoded << endl;
184 
185  // regression testing for encoding
186 
187  unit_assert(encoded == regressionTest(config,checkNumpressMaxErrorSupression));
188 
189  // decode
190 
191  vector<double> decoded;
192  encoder.decode(encoded, decoded);
193 
194  if (os_)
195  {
196  *os_ << "decoded: " << decoded.size() << endl;
197  copy(decoded.begin(), decoded.end(), ostream_iterator<double>(*os_, "\n"));
198  }
199 
200  // validate by comparing scan data before/after encode/decode
201 
202  unit_assert(binary.size() == decoded.size());
203 
204  const double epsilon = config.precision == BinaryDataEncoder::Precision_64 ? 1e-14 : 1e-5 ;
205 
206  switch (config.numpress)
207  {
208  case BinaryDataEncoder::Numpress_Linear:
209  case BinaryDataEncoder::Numpress_Slof:
210  case BinaryDataEncoder::Numpress_Pic:
211  // lossy compression
212  for (vector<double>::const_iterator it=binary.begin(), jt=decoded.begin();
213  it!=binary.end(); ++it, ++jt)
214  {
215  if (0==*it || 0==*jt)
216  unit_assert_equal(*it, *jt, 0.1);
217  else if (*it > *jt)
218  unit_assert((*jt)/(*it) > .999 );
219  else
220  unit_assert((*it)/(*jt) > .999 );
221  }
222  break;
223  default:
224  for (vector<double>::const_iterator it=binary.begin(), jt=decoded.begin();
225  it!=binary.end(); ++it, ++jt)
226  {
227  unit_assert_equal(*it, *jt, epsilon);
228  }
229  break;
230  }
231  if (os_) *os_ << "validated with epsilon: " << fixed << setprecision(1) << scientific << epsilon << "\n\n";
232 }
233 
234 
235 void test()
236 {
238 
239  config.precision = BinaryDataEncoder::Precision_32;
240  config.byteOrder = BinaryDataEncoder::ByteOrder_LittleEndian;
241  testConfiguration(config);
242 
243  config.precision = BinaryDataEncoder::Precision_32;
244  config.byteOrder = BinaryDataEncoder::ByteOrder_BigEndian;
245  testConfiguration(config);
246 
247  config.precision = BinaryDataEncoder::Precision_64;
248  config.byteOrder = BinaryDataEncoder::ByteOrder_LittleEndian;
249  testConfiguration(config);
250 
251  config.precision = BinaryDataEncoder::Precision_64;
252  config.byteOrder = BinaryDataEncoder::ByteOrder_BigEndian;
253  testConfiguration(config);
254 
255  config.precision = BinaryDataEncoder::Precision_32;
256  config.byteOrder = BinaryDataEncoder::ByteOrder_LittleEndian;
257  config.compression = BinaryDataEncoder::Compression_Zlib;
258  testConfiguration(config);
259 
260  config.precision = BinaryDataEncoder::Precision_32;
261  config.byteOrder = BinaryDataEncoder::ByteOrder_BigEndian;
262  config.compression = BinaryDataEncoder::Compression_Zlib;
263  testConfiguration(config);
264 
265  config.precision = BinaryDataEncoder::Precision_64;
266  config.byteOrder = BinaryDataEncoder::ByteOrder_LittleEndian;
267  config.compression = BinaryDataEncoder::Compression_Zlib;
268  testConfiguration(config);
269 
270  config.precision = BinaryDataEncoder::Precision_64;
271  config.byteOrder = BinaryDataEncoder::ByteOrder_BigEndian;
272  config.compression = BinaryDataEncoder::Compression_Zlib;
273  testConfiguration(config);
274 
275  // test the numpress stuff with and without zlib, and to see if it honors error limits
276  config.compression = BinaryDataEncoder::Compression_None;
277  config.numpressLinearErrorTolerance = 0; // means don't do tolerance checks
278  config.numpressSlofErrorTolerance = 0; // means don't do tolerance checks
279  for (int zloop=3;zloop--;)
280  {
281  config.numpress = BinaryDataEncoder::Numpress_Linear;
282  testConfiguration(config);
283 
284  config.numpress = BinaryDataEncoder::Numpress_Slof;
285  testConfiguration(config);
286 
287  config.numpress = BinaryDataEncoder::Numpress_Pic;
288  testConfiguration(config);
289 
290  config.compression = BinaryDataEncoder::Compression_Zlib; // and again with zlib
291  if (1==zloop) // and finally test numpress excessive error avoidance
292  {
293  config.numpressLinearErrorTolerance = .01;
294  config.numpressSlofErrorTolerance = .01;
295  }
296  }
297 
298 }
299 
300 
301 void testBadFile(const string& filename)
302 {
303  if (os_) *os_ << "testBadFile: " << filename << flush;
304 
305  size_t filesize = 0;
306 
307  try
308  {
309  filesize = (size_t) bfs::file_size(filename);
310  }
311  catch (exception&)
312  {
313  cerr << "\nUnable to find file " << filename << endl;
314  return;
315  }
316 
317  if (os_) *os_ << " (" << filesize << " bytes)\n";
318 
319  unit_assert(filesize%sizeof(double) == 0);
320 
321  // read data from file into memory
322 
323  vector<double> data(filesize/sizeof(double));
324  ifstream is(filename.c_str(), ios::binary);
325  is.read((char*)&data[0], filesize);
326 
327  // set configuration to produce the error
328 
330 
331  if (filename.find("BinaryDataEncoderTest.bad.bin")!=string::npos)
332  {
333  // zlib compression encoding error with this configuration
334  config.precision = BinaryDataEncoder::Precision_32;
335  config.byteOrder = BinaryDataEncoder::ByteOrder_LittleEndian;
336  config.compression = BinaryDataEncoder::Compression_Zlib;
337  }
338 
339  // encode and decode
340 
341  BinaryDataEncoder encoder(config);
342  string encoded;
343  encoder.encode(data, encoded);
344 
345  vector<double> decoded;
346  encoder.decode(encoded, decoded);
347 
348  // verify
349 
350  unit_assert(decoded.size() == data.size());
351  for (size_t i=0; i<decoded.size(); i++)
352  unit_assert(decoded[i] == data[i]);
353 }
354 
355 
356 int main(int argc, char* argv[])
357 {
358  TEST_PROLOG(argc, argv)
359 
360  try
361  {
362  vector<string> filenames;
363 
364  for (int i=1; i<argc; i++)
365  {
366  if (!strcmp(argv[i],"-v")) os_ = &cout;
367  else if (bal::starts_with(argv[i], "--")) continue;
368  else filenames.push_back(argv[i]);
369  }
370 
371  if (os_) *os_ << "BinaryDataEncoderTest\n\n";
372  test();
373  for_each(filenames.begin(), filenames.end(), testBadFile);
374 
375  }
376  catch (exception& e)
377  {
378  TEST_FAILED(e.what())
379  }
380  catch (...)
381  {
382  TEST_FAILED("Caught unknown exception.")
383  }
384 
386 }
387 
388 
ostream * os_
const char * filenames[]
const char * sampleEncoded32LittleZlib_
const char * sampleEncodedNumpressSlof_
const char * sampleEncoded64Little_
const char * sampleEncodedModified64BigZlib_
const double epsilon
Definition: DiffTest.cpp:41
#define TEST_EPILOG
Definition: unit.hpp:182
#define unit_assert_equal(x, y, epsilon)
Definition: unit.hpp:99
const char * sampleEncodedNumpressLinear_
const int sampleDataSize_
void encode(const std::vector< double > &data, std::string &result, size_t *binaryByteCount=NULL) const
encode binary data as a text string
const char * sampleEncodedNumpressLinearZlib_
const char * regressionTest(const BinaryDataEncoder::Config &config, bool expectNumpressIgnored)
void test()
double sampleData_[]
const char * sampleEncodedNumpressSlofZlib_
int main(int argc, char *argv[])
const char * sampleEncoded32Little_
void testBadFile(const string &filename)
const char * sampleEncoded32Big_
encoding/decoding configuration
#define TEST_FAILED(x)
Definition: unit.hpp:176
const char * sampleEncoded64Big_
void testConfiguration(const BinaryDataEncoder::Config &config_in)
#define TEST_PROLOG(argc, argv)
Definition: unit.hpp:174
const char * sampleEncoded64LittleZlib_
const char * sampleEncodedNumpressPic_
const char * sampleEncoded32BigZlib_
const char * sampleEncoded64BigZlib_
#define unit_assert(x)
Definition: unit.hpp:85
Definition: cv.hpp:91
const char * sampleEncodedNumpressPicZlib_
void decode(const char *encodedData, size_t len, std::vector< double > &result) const
decode text-encoded data as binary