ProteoWizard
Classes | Functions | Variables
MSDataFileTest.cpp File Reference
#include "MSDataFile.hpp"
#include "Diff.hpp"
#include "IO.hpp"
#include "SpectrumListBase.hpp"
#include "ChromatogramListBase.hpp"
#include "examples.hpp"
#include "pwiz/utility/misc/unit.hpp"
#include "pwiz/utility/misc/Filesystem.hpp"
#include "pwiz/utility/misc/Std.hpp"
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/device/file_descriptor.hpp>
#include <boost/iostreams/copy.hpp>

Go to the source code of this file.

Classes

class  TestReader
 

Functions

void hackInMemoryMSData (MSData &msd)
 
void validateMmgfMzxmlRoundTrip ()
 
void validateWriteRead (const MSDataFile::WriteConfig &writeConfig, const DiffConfig diffConfig)
 
void test ()
 
void demo ()
 
void testReader ()
 
void testSHA1 ()
 
int main (int argc, char *argv[])
 

Variables

ostream * os_ = 0
 
string filenameBase_ = "temp.MSDataFileTest"
 
const char rawHeader_ []
 

Function Documentation

§ hackInMemoryMSData()

void hackInMemoryMSData ( MSData msd)

Definition at line 51 of file MSDataFileTest.cpp.

References pwiz::msdata::Run::chromatogramListPtr, pwiz::msdata::MSData::fileDescription, pwiz::msdata::MSData::run, pwiz::msdata::ChromatogramListBase::setDataProcessingPtr(), pwiz::msdata::SpectrumListBase::setDataProcessingPtr(), pwiz::msdata::MSData::softwarePtrs, pwiz::msdata::FileDescription::sourceFilePtrs, and pwiz::msdata::Run::spectrumListPtr.

Referenced by validateWriteRead().

52 {
53  // remove metadata ptrs appended on read
54  vector<SourceFilePtr>& sfs = msd.fileDescription.sourceFilePtrs;
55  if (!sfs.empty()) sfs.erase(sfs.end()-1);
56  vector<SoftwarePtr>& sws = msd.softwarePtrs;
57  if (!sws.empty()) sws.erase(sws.end()-1);
58 
59  // remove current DataProcessing created on read
60  SpectrumListBase* sl = dynamic_cast<SpectrumListBase*>(msd.run.spectrumListPtr.get());
61  ChromatogramListBase* cl = dynamic_cast<ChromatogramListBase*>(msd.run.chromatogramListPtr.get());
64 }
common functionality for base SpectrumList implementations
common functionality for base ChromatogramList implementations
std::vector< SourceFilePtr > sourceFilePtrs
list and descriptions of the source files this mzML document was generated or derived from...
Definition: MSData.hpp:89
virtual void setDataProcessingPtr(DataProcessingPtr dp)
set DataProcessing
ChromatogramListPtr chromatogramListPtr
all chromatograms for this run.
Definition: MSData.hpp:826
virtual void setDataProcessingPtr(DataProcessingPtr dp)
set DataProcessing
boost::shared_ptr< DataProcessing > DataProcessingPtr
Definition: MSData.hpp:287
FileDescription fileDescription
information pertaining to the entire mzML file (i.e. not specific to any part of the data set) is sto...
Definition: MSData.hpp:858
Run run
a run in mzML should correspond to a single, consecutive and coherent set of scans on an instrument...
Definition: MSData.hpp:882
SpectrumListPtr spectrumListPtr
all mass spectra and the acquisitions underlying them are described and attached here. Subsidiary data arrays are also both described and attached here.
Definition: MSData.hpp:823
std::vector< SoftwarePtr > softwarePtrs
list and descriptions of software used to acquire and/or process the data in this mzML file...
Definition: MSData.hpp:867

§ validateMmgfMzxmlRoundTrip()

void validateMmgfMzxmlRoundTrip ( )

Definition at line 66 of file MSDataFileTest.cpp.

References filename1, filenameBase_, pwiz::msdata::MSDataFile::WriteConfig::format, Format_MGF, Format_mzXML, pwiz::msdata::MSData::run, pwiz::msdata::SpectrumList::spectrum(), pwiz::msdata::Run::spectrumListPtr, unit_assert, and pwiz::identdata::IO::write().

Referenced by test().

67 {
68  string filename1 = filenameBase_ + ".mgf";
69  string filename2 = filenameBase_ + ".mzXML";
70 
71  ofstream ofs(filename1.c_str());
72  string mgf = "CHARGE=2+ and 3+\nBEGIN IONS\nPEPMASS=952.924194 145032.0000\nCHARGE=2+\nRTINSECONDS=301.48\n271.0874 2\n298.1747 4\nEND IONS\nBEGIN IONS\nPEPMASS=503.800000 67522.2000\nCHARGE=2+\nRTINSECONDS=302.51\n147.1840 3\n154.3668 3\n162.2118 2\n162.9007 1\n167.3297 1\n175.2387 2\n184.9460 3\nEND IONS\n";
73  ofs.write(mgf.c_str(), mgf.length());
74  ofs.close();
75 
76  // make sure that round trip doesn't systematically increase converted scan numbers
77  for (int loop = 3; loop--; )
78  {
79  MSDataFile msd1(filename1); // read back the MGF
80  const SpectrumList& sl = *msd1.run.spectrumListPtr;
81  SpectrumPtr spectrum = sl.spectrum(0);
82  unit_assert(spectrum->id == "index=0");
83  MSDataFile::WriteConfig writeConfig;
84  writeConfig.format = MSDataFile::Format_mzXML;
85  MSDataFile::write(msd1, filename2, writeConfig); // write as mzXML
86  MSDataFile msd2(filename2); // read back the mzXML
87  const SpectrumList& sl2= *msd2.run.spectrumListPtr;
88  SpectrumPtr spectrum2 = sl2.spectrum(0);
89  unit_assert(spectrum2->id == "index=1"); // mzXML is 1-based
90  MSDataFile::WriteConfig writeConfig2;
91  writeConfig2.format = MSDataFile::Format_MGF;
92  MSDataFile::write(msd2, filename1, writeConfig2); // write as mgf
93  }
94 
95  // remove temp files
96  boost::filesystem::remove(filename1);
97  boost::filesystem::remove(filename2);
98 }
boost::shared_ptr< Spectrum > SpectrumPtr
Definition: MSData.hpp:569
string filename1
virtual SpectrumPtr spectrum(size_t index, bool getBinaryData=false) const =0
retrieve a spectrum by index
configuration for write()
Definition: MSDataFile.hpp:52
Interface for accessing spectra, which may be stored in memory or backed by a data file (RAW...
Definition: MSData.hpp:656
PWIZ_API_DECL void write(minimxml::XMLWriter &writer, const CV &cv)
Format_mzXML
Definition: MSDataFile.hpp:49
MSData object plus file I/O.
Definition: MSDataFile.hpp:40
Format_MGF
Definition: MSDataFile.hpp:49
string filenameBase_
#define unit_assert(x)
Definition: unit.hpp:85

§ validateWriteRead()

void validateWriteRead ( const MSDataFile::WriteConfig writeConfig,
const DiffConfig  diffConfig 
)

Definition at line 101 of file MSDataFileTest.cpp.

References diff(), filename1, filenameBase_, pwiz::msdata::MSDataFile::WriteConfig::format, Format_mzXML, hackInMemoryMSData(), pwiz::identdata::examples::initializeTiny(), os_, pwiz::msdata::MSData::run, pwiz::msdata::Run::spectrumListPtr, unit_assert, pwiz::identdata::IO::write(), and pwiz::msdata::MSDataFile::write().

Referenced by test().

103 {
104  if (os_) *os_ << "validateWriteRead()\n " << writeConfig << endl;
105 
106  string filename1 = filenameBase_ + ".1";
107  string filename2 = filenameBase_ + ".2";
108  string filename3 = filenameBase_ + ".3";
109  string filename4 = filenameBase_ + ".\xE4\xB8\x80\xE4\xB8\xAA\xE8\xAF\x95.4";
110  // FIXME: 4-byte UTF-8 not working: string filename5 = filenameBase_ + ".\x01\x04\xA4\x01\x04\xA2.5";
111 
112  {
113  // create MSData object in memory
114  MSData tiny;
116 
117  if (writeConfig.format == MSDataFile::Format_mzXML)
118  {
119  // remove s22 since it is not written to mzXML
120  static_cast<SpectrumListSimple&>(*tiny.run.spectrumListPtr).spectra.pop_back();
121  }
122 
123  // write to file #1 (static)
124  MSDataFile::write(tiny, filename1, writeConfig);
125 
126  // simulate CLI garbage collect behavior, wherein delayed deletes stress
127  // memory and file handle usage
128  {
129  std::vector< boost::shared_ptr< MSDataFile > > msds;
130  for (int i=0;i<100;i++)
131  {
132  boost::shared_ptr<MSDataFile> msd1(new MSDataFile(filename1));
133  msds.push_back(msd1);
134  hackInMemoryMSData(*msd1);
135  Diff<MSData, DiffConfig> diff(tiny, *msd1, diffConfig);
136  }
137  }
138 
139  // read back into an MSDataFile object
140  MSDataFile msd1(filename1);
141  hackInMemoryMSData(msd1);
142 
143  // compare
144  Diff<MSData, DiffConfig> diff(tiny, msd1, diffConfig);
145  if (diff && os_) *os_ << diff << endl;
146  unit_assert(!diff);
147 
148  // write to file #2 (member)
149  msd1.write(filename2, writeConfig);
150 
151  // read back into another MSDataFile object
152  MSDataFile msd2(filename2);
153  hackInMemoryMSData(msd2);
154 
155  // compare
156  diff(tiny, msd2);
157  if (diff && os_) *os_ << diff << endl;
158  unit_assert(!diff);
159 
160  // now give the gzip read a workout
161  bio::filtering_istream tinyGZ(bio::gzip_compressor() | bio::file_descriptor_source(filename1));
162  bio::copy(tinyGZ, bio::file_descriptor_sink(filename1+".gz", ios::out|ios::binary));
163 
164  MSDataFile msd3(filename1+".gz");
165  hackInMemoryMSData(msd3);
166 
167  // compare
168  diff(tiny, msd3);
169  if (diff && os_) *os_ << diff << endl;
170  unit_assert(!diff);
171 
172  // test writing to a stream
173  ostringstream oss;
174  msd1.write(oss, writeConfig);
175  string ossStr = oss.str();
176  ofstream ofs(filename3.c_str());
177  ofs.write(ossStr.c_str(), ossStr.length());
178  ofs.close();
179 
180  // read back into another MSDataFile object
181  MSDataFile msd4(filename3);
182  hackInMemoryMSData(msd4);
183 
184  // compare
185  diff(tiny, msd4);
186  if (diff && os_) *os_ << diff << endl;
187  unit_assert(!diff);
188 
189 
190  // write to file #4 (testing two byte UTF-8 code points)
191  msd1.write(filename4, writeConfig);
192 
193  // read back into another MSDataFile object
194  MSDataFile msd5(filename4);
195  hackInMemoryMSData(msd5);
196 
197  // compare
198  diff(tiny, msd5);
199  if (diff && os_) *os_ << diff << endl;
200  unit_assert(!diff);
201 
202 
203  // write to file #5 (testing four byte UTF-8 code points)
204  /*msd1.write(filename5, writeConfig);
205 
206  // read back into another MSDataFile object
207  MSDataFile msd6(filename5);
208  hackInMemoryMSData(msd6);
209 
210  // compare
211  diff(tiny, msd6);
212  if (diff && os_) *os_ << diff << endl;
213  unit_assert(!diff);*/
214  }
215 
216  // remove temp files
217  boost::filesystem::remove(filename1);
218  boost::filesystem::remove(filename2);
219  boost::filesystem::remove(filename1 + ".gz");
220  boost::filesystem::remove(filename3);
221  boost::filesystem::remove(filename4);
222  //boost::filesystem::remove(filename5);
223 }
Calculate diffs of objects in a ProteoWizard data model hierarchy.
Definition: diff_std.hpp:142
string filename1
void diff(const string &filename1, const string &filename2)
Run run
a run in mzML should correspond to a single, consecutive and coherent set of scans on an instrument...
Definition: MSData.hpp:882
PWIZ_API_DECL void write(minimxml::XMLWriter &writer, const CV &cv)
Format_mzXML
Definition: MSDataFile.hpp:49
void hackInMemoryMSData(MSData &msd)
MSData object plus file I/O.
Definition: MSDataFile.hpp:40
SpectrumListPtr spectrumListPtr
all mass spectra and the acquisitions underlying them are described and attached here. Subsidiary data arrays are also both described and attached here.
Definition: MSData.hpp:823
PWIZ_API_DECL void initializeTiny(IdentData &mzid)
string filenameBase_
This is the root element of ProteoWizard; it represents the mzML element, defined as: intended to cap...
Definition: MSData.hpp:845
ostream * os_
Simple writeable in-memory implementation of SpectrumList.
Definition: MSData.hpp:712
#define unit_assert(x)
Definition: unit.hpp:85

§ test()

void test ( )

Definition at line 225 of file MSDataFileTest.cpp.

References pwiz::msdata::MSDataFile::WriteConfig::binaryDataEncoderConfig, pwiz::msdata::MSDataFile::WriteConfig::format, Format_mzXML, pwiz::msdata::DiffConfig::ignoreChromatograms, pwiz::msdata::DiffConfig::ignoreMetadata, pwiz::msdata::MSDataFile::WriteConfig::indexed, pwiz::msdata::BinaryDataEncoder::Config::precision, validateMmgfMzxmlRoundTrip(), and validateWriteRead().

Referenced by main().

226 {
227  MSDataFile::WriteConfig writeConfig;
228  DiffConfig diffConfig;
229 
231 
232  // mzML 64-bit, full diff
233  validateWriteRead(writeConfig, diffConfig);
234 
235  writeConfig.indexed = false;
236  validateWriteRead(writeConfig, diffConfig); // no index
237  writeConfig.indexed = true;
238 
239  // mzML 32-bit, full diff
240  writeConfig.binaryDataEncoderConfig.precision = BinaryDataEncoder::Precision_32;
241  validateWriteRead(writeConfig, diffConfig);
242 
243  // mzXML 32-bit, diff ignoring metadata and chromatograms
244  writeConfig.format = MSDataFile::Format_mzXML;
245  diffConfig.ignoreMetadata = true;
246  diffConfig.ignoreChromatograms = true;
247  validateWriteRead(writeConfig, diffConfig);
248 
249  // mzXML 64-bit, diff ignoring metadata and chromatograms
250  writeConfig.binaryDataEncoderConfig.precision = BinaryDataEncoder::Precision_64;
251  validateWriteRead(writeConfig, diffConfig);
252 
253  writeConfig.indexed = false;
254  validateWriteRead(writeConfig, diffConfig); // no index
255  writeConfig.indexed = true;
256 }
bool ignoreMetadata
ignore all file level metadata, and most scan level metadata, i.e.
Definition: Diff.hpp:214
configuration for write()
Definition: MSDataFile.hpp:52
Format_mzXML
Definition: MSDataFile.hpp:49
configuration struct for diffing MSData types
Definition: Diff.hpp:205
void validateMmgfMzxmlRoundTrip()
void validateWriteRead(const MSDataFile::WriteConfig &writeConfig, const DiffConfig diffConfig)
BinaryDataEncoder::Config binaryDataEncoderConfig
Definition: MSDataFile.hpp:55

§ demo()

void demo ( )

Definition at line 259 of file MSDataFileTest.cpp.

References pwiz::msdata::MSDataFile::WriteConfig::binaryDataEncoderConfig, filenameBase_, pwiz::msdata::MSDataFile::WriteConfig::format, Format_mzXML, Format_Text, pwiz::identdata::examples::initializeTiny(), pwiz::msdata::BinaryDataEncoder::Config::precision, and pwiz::identdata::IO::write().

260 {
261  MSData tiny;
263 
265  MSDataFile::write(tiny, filenameBase_ + ".64.mzML", config);
266 
267  config.binaryDataEncoderConfig.precision = BinaryDataEncoder::Precision_32;
268  MSDataFile::write(tiny, filenameBase_ + ".32.mzML", config);
269 
271  MSDataFile::write(tiny, filenameBase_ + ".txt", config);
272 
274  MSDataFile::write(tiny, filenameBase_ + ".32.mzXML", config);
275 
276  config.binaryDataEncoderConfig.precision = BinaryDataEncoder::Precision_64;
277  MSDataFile::write(tiny, filenameBase_ + ".64.mzXML", config);
278 }
Format_Text
configuration for write()
Definition: MSDataFile.hpp:52
PWIZ_API_DECL void write(minimxml::XMLWriter &writer, const CV &cv)
Format_mzXML
Definition: MSDataFile.hpp:49
PWIZ_API_DECL void initializeTiny(IdentData &mzid)
string filenameBase_
BinaryDataEncoder::Config binaryDataEncoderConfig
Definition: MSDataFile.hpp:55
This is the root element of ProteoWizard; it represents the mzML element, defined as: intended to cap...
Definition: MSData.hpp:845

§ testReader()

void testReader ( )

Definition at line 326 of file MSDataFileTest.cpp.

References TestReader::count, filenameBase_, os_, rawHeader_, and unit_assert.

327 {
328  // create a file
329  string filename = filenameBase_ + ".RAW";
330  ofstream os(filename.c_str());
331  os.write(rawHeader_, 18);
332  os.close();
333 
334  // open the file with our Reader
335  TestReader reader;
336  MSDataFile msd(filename, &reader);
337 
338  // verify that our reader got called properly
339  unit_assert(reader.count == 2);
340 
341  // remove temp file
342  boost::filesystem::remove(filename);
343 
344  if (os_) *os_ << endl;
345 }
MSData object plus file I/O.
Definition: MSDataFile.hpp:40
string filenameBase_
const char rawHeader_[]
ostream * os_
#define unit_assert(x)
Definition: unit.hpp:85

§ testSHA1()

void testSHA1 ( )

Definition at line 348 of file MSDataFileTest.cpp.

References pwiz::msdata::MSData::fileDescription, filenameBase_, pwiz::identdata::examples::initializeTiny(), MS_SHA_1, os_, pwiz::msdata::FileDescription::sourceFilePtrs, unit_assert, and pwiz::identdata::IO::write().

Referenced by main().

349 {
350  if (os_) *os_ << "testSHA1()\n";
351 
352  // write out a test file
353 
354  string filename = filenameBase_ + ".SHA1Test";
355  MSData tiny;
357  MSDataFile::write(tiny, filename);
358 
359  {
360  // read in without SHA-1 calculation
361  MSDataFile msd(filename);
362 
363  if (os_)
364  {
365  *os_ << "no SHA-1:\n";
367  IO::write(writer, *msd.fileDescription.sourceFilePtrs.back());
368  }
369 
370  unit_assert(!msd.fileDescription.sourceFilePtrs.empty());
371  unit_assert(!msd.fileDescription.sourceFilePtrs.back()->hasCVParam(MS_SHA_1));
372 
373  // read in with SHA-1 calculation
374 
375  MSDataFile msd_sha1(filename, 0, true);
376 
377  if (os_)
378  {
379  *os_ << "with SHA-1:\n";
381  IO::write(writer, *msd_sha1.fileDescription.sourceFilePtrs.back());
382  }
383 
384  unit_assert(!msd_sha1.fileDescription.sourceFilePtrs.empty());
385  unit_assert(msd_sha1.fileDescription.sourceFilePtrs.back()->hasCVParam(MS_SHA_1));
386  }
387 
388  // clean up
389 
390  boost::filesystem::remove(filename);
391  if (os_) *os_ << endl;
392 }
The XMLWriter class provides simple, tag-level XML syntax writing.
Definition: XMLWriter.hpp:47
MS_SHA_1
SHA-1: SHA-1 (Secure Hash Algorithm-1) is a cryptographic hash function designed by the National Secu...
Definition: cv.hpp:2164
PWIZ_API_DECL void write(minimxml::XMLWriter &writer, const CV &cv)
MSData object plus file I/O.
Definition: MSDataFile.hpp:40
PWIZ_API_DECL void initializeTiny(IdentData &mzid)
string filenameBase_
This is the root element of ProteoWizard; it represents the mzML element, defined as: intended to cap...
Definition: MSData.hpp:845
ostream * os_
#define unit_assert(x)
Definition: unit.hpp:85

§ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 395 of file MSDataFileTest.cpp.

References os_, test(), TEST_EPILOG, TEST_FAILED, TEST_PROLOG, pwiz::util::testReader(), and testSHA1().

396 {
397  TEST_PROLOG(argc, argv)
398 
399  try
400  {
401  if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
402  test();
403  //demo();
404  testReader();
405  testSHA1();
406  }
407  catch (exception& e)
408  {
409  TEST_FAILED(e.what())
410  }
411  catch (...)
412  {
413  TEST_FAILED("Caught unknown exception.")
414  }
415 
417 }
#define TEST_EPILOG
Definition: unit.hpp:182
void testSHA1()
#define TEST_FAILED(x)
Definition: unit.hpp:176
void test()
#define TEST_PROLOG(argc, argv)
Definition: unit.hpp:174
ostream * os_
void testReader()

Variable Documentation

§ os_

ostream* os_ = 0

Definition at line 45 of file MSDataFileTest.cpp.

Referenced by main(), testReader(), testSHA1(), and validateWriteRead().

§ filenameBase_

string filenameBase_ = "temp.MSDataFileTest"

§ rawHeader_

const char rawHeader_[]
Initial value:
= {'\x01', '\xA1',
'F', '\0', 'i', '\0', 'n', '\0', 'n', '\0',
'i', '\0', 'g', '\0', 'a', '\0', 'n', '\0'}

Definition at line 281 of file MSDataFileTest.cpp.

Referenced by TestReader::identify(), and testReader().