ProteoWizard
MSDataAnalyzer.hpp
Go to the documentation of this file.
1 //
2 // $Id: MSDataAnalyzer.hpp 4171 2012-12-06 18:45:39Z pcbrefugee $
3 //
4 //
5 // Original author: Darren Kessner <darren@proteowizard.org>
6 //
7 // Copyright 2008 Spielberg Family Center for Applied Proteomics
8 // Cedars-Sinai Medical Center, Los Angeles, California 90048
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 //
22 
23 
24 #ifndef _MSDATAANALYZER_HPP_
25 #define _MSDATAANALYZER_HPP_
26 
27 
31 #include <iosfwd>
32 
33 
34 namespace pwiz {
35 namespace analysis {
36 
37 
38 using namespace msdata;
39 
40 
41 ///
42 /// Interface for MSData analyzers.
43 ///
44 /// MSDataAnalyzer encapsulates a passive update strategy. The MSDataAnalyzer expects to
45 /// handle events generated from an outside driver. This allows the driver to
46 /// control access to the MSData object -- in particular, the driver can ensure
47 /// that scans are read from file only once.
48 ///
49 /// Event sequence:
50 /// - open
51 /// - loop:
52 /// - updateReqested
53 /// - update
54 /// - close
55 ///
56 /// UpdateRequest_Ok handles the following use case: a spectrum cache wants to cache
57 /// only those spectra that are requested by other MSDataAnalyzers; it won't request
58 /// any updates, but it needs to see any update requested by someone else.
59 ///
61 {
62  public:
63 
64  /// information about the data to be analyzed
66  {
67  const MSData& msd;
68  std::string sourceFilename;
69  std::string outputDirectory;
70  std::ostream* log;
71 
72  DataInfo(const MSData& _msd) : msd(_msd), log(0) {}
73  };
74 
75  enum PWIZ_API_DECL UpdateRequest
76  {
77  UpdateRequest_None, // do not update
78  UpdateRequest_Ok, // will accept an update
79  UpdateRequest_NoBinary, // update requested, no binary data needed
80  UpdateRequest_Full // update requested, with binary data
81  };
82 
83  /// \name Event Handling
84  //@{
85 
86  /// start analysis of the data
87  virtual void open(const DataInfo& dataInfo) {}
88 
89  /// ask analyzer if it wants an update
90  virtual UpdateRequest updateRequested(const DataInfo& dataInfo,
91  const SpectrumIdentity& spectrumIdentity) const
92  {
93  return UpdateRequest_None;
94  }
95 
96  /// analyze a single spectrum
97  virtual void update(const DataInfo& dataInfo,
98  const Spectrum& spectrum) {}
99 
100  /// end analysis of the data
101  virtual void close(const DataInfo& dataInfo) {}
102  //@}
103 
104  virtual ~MSDataAnalyzer() {}
105 };
106 
107 
108 typedef boost::shared_ptr<MSDataAnalyzer> MSDataAnalyzerPtr;
109 
110 
111 /// This auxilliary class should be specialized for MSDataAnalyzers
112 /// whose instantiation is controlled by user-supplied strings
113 /// (via command line, config file, etc.).
114 template <typename analyzer_type>
116 {
117  /// string identifier for the analyzer
118  static const char* id() {return "analyzer_traits not specialized";}
119 
120  /// description of the analyzer
121  static const char* description() {return typeid(analyzer_type).name();}
122 
123  /// format of args string
124  static const char* argsFormat() {return "";}
125 
126  /// description of args string options
127  static std::vector<std::string> argsUsage() {return std::vector<std::string>();}
128 };
129 
130 
131 ///
132 /// container of MSDataAnalyzer (composite pattern)
133 ///
135  public std::vector<MSDataAnalyzerPtr>
136 {
137  public:
138 
139  /// \name MSDataAnalyzer interface
140  //@{
141  virtual void open(const DataInfo& dataInfo);
142 
143  virtual UpdateRequest updateRequested(const DataInfo& dataInfo,
144  const SpectrumIdentity& spectrumIdentity) const;
145 
146  virtual void update(const DataInfo& dataInfo,
147  const Spectrum& spectrum);
148 
149  virtual void close(const DataInfo& dataInfo);
150  //@}
151 };
152 
153 
154 ///
155 /// event generator for MSDataAnalyzer
156 ///
158 {
159  public:
160 
161  /// instantiate with an MSDataAnalyzer
163 
164  enum PWIZ_API_DECL Status {Status_Ok, Status_Cancel};
165 
166  /// progress callback interface
168  {
169  public:
170  virtual size_t iterationsPerCallback() const {return 100;}
171  virtual Status progress(size_t index, size_t size) {return Status_Ok;}
172  virtual ~ProgressCallback(){}
173  };
174 
175  ///
176  /// analyze a single MSData object, calling back to client if requested
177  ///
178  /// If progressCallback->progress() returns Status_Cancel, analysis
179  /// is canceled and Status_Cancel is returned.
180  ///
181  Status analyze(const MSDataAnalyzer::DataInfo& dataInfo,
182  ProgressCallback* progressCallback = 0) const;
183 
184  private:
186 };
187 
188 // helper function for argument parsing
189 // return true iff text contains desiredArg followed by '=' and
190 // a range of form [a,b] or [a] or a,b or a or [a-b] or a-b
191 // iff true then populates result
192 template <typename value_type>
193 bool parseRange(const std::string &desiredArg, const std::string& text, std::pair<value_type,value_type>& result, const std::string& callerName)
194 {
195  if (!text.compare(0,desiredArg.size()+1,desiredArg+"="))
196  {
197  std::string val = text.substr(desiredArg.size()+1);
198  std::string::size_type indexPairSeperator = val.find(',');
199  if (std::string::npos == indexPairSeperator)
200  { // no comma, perhaps a dash instead?
201  indexPairSeperator = val.find('-');
202  if (0==indexPairSeperator) // assume that's just a negative value
203  {
204  indexPairSeperator = string::npos;
205  }
206  }
207  int bracket = (val[0] == '[')?1:0;
208  if (val.empty() ||
209  ((bracket!=0) && val[val.size()-1] != ']'))
210  {
211  std::cerr << "[" << callerName << "] Unable to parse range: " << text << endl;
212  return false;
213  }
214 
215  try
216  {
217  if (std::string::npos == indexPairSeperator)
218  { // form "<start>", read as "<start>-<start>"
219  std::string first = val.substr(bracket,val.size()-(2*bracket));
220  result.first = result.second = lexical_cast<value_type>(first);
221  }
222  else
223  { // form "<start>-<end>" or "<start>-"
224  std::string first = val.substr(bracket, indexPairSeperator-bracket);
225  std::string second = val.substr(indexPairSeperator+1, val.size()-indexPairSeperator-(1+bracket));
226  result.first = lexical_cast<value_type>(first);
227  if (second.size()) // form "<start>-<end>"
228  result.second = lexical_cast<value_type>(second);
229  else // form "<start>-", assume that's "<start>-maxval"
230  result.second = numeric_limits<value_type>::max();
231  }
232  return true;
233  }
234  catch (boost::bad_lexical_cast&)
235  {
236  std::cerr << "[" << callerName << "] Unable to parse range: " << text << endl;
237  }
238  }
239 
240  return false;
241 }
242 template <typename value_type>
243 bool parseValue(const std::string &desiredArg, const std::string& text, value_type& result, const std::string& callerName)
244 {
245  if (!text.compare(0,desiredArg.size()+1,desiredArg+"="))
246  {
247  std::string val = text.substr(desiredArg.size()+1);
248  if (val.empty())
249  {
250  std::cerr << "[" << callerName << "] Unable to parse value: " << text << endl;
251  return false;
252  }
253 
254  try
255  {
256  result = lexical_cast<value_type>(val);
257  return true;
258  }
259  catch (boost::bad_lexical_cast&)
260  {
261  std::cerr << "[" << callerName << "] Unable to parse value: " << text << endl;
262  }
263  }
264 
265  return false;
266 }
267 
268 } // namespace analysis
269 } // namespace pwiz
270 
271 
272 #endif // _MSDATAANALYZER_HPP_
273 
UpdateRequest_None
bool parseValue(const std::string &desiredArg, const std::string &text, value_type &result, const std::string &callerName)
Status_Ok
boost::shared_ptr< MSDataAnalyzer > MSDataAnalyzerPtr
virtual Status progress(size_t index, size_t size)
static std::vector< std::string > argsUsage()
description of args string options
virtual UpdateRequest updateRequested(const DataInfo &dataInfo, const SpectrumIdentity &spectrumIdentity) const
ask analyzer if it wants an update
float lexical_cast(const std::string &str)
UpdateRequest_Ok
container of MSDataAnalyzer (composite pattern)
static const char * description()
description of the analyzer
virtual void update(const DataInfo &dataInfo, const Spectrum &spectrum)
analyze a single spectrum
UpdateRequest_NoBinary
event generator for MSDataAnalyzer
#define PWIZ_API_DECL
Definition: Export.hpp:32
This auxilliary class should be specialized for MSDataAnalyzers whose instantiation is controlled by ...
virtual void open(const DataInfo &dataInfo)
start analysis of the data
Identifying information for a spectrum.
Definition: MSData.hpp:469
static const char * argsFormat()
format of args string
information about the data to be analyzed
bool parseRange(const std::string &desiredArg, const std::string &text, std::pair< value_type, value_type > &result, const std::string &callerName)
The structure that captures the generation of a peak list (including the underlying acquisitions) ...
Definition: MSData.hpp:504
virtual void close(const DataInfo &dataInfo)
end analysis of the data
This is the root element of ProteoWizard; it represents the mzML element, defined as: intended to cap...
Definition: MSData.hpp:845
Interface for MSData analyzers.
static const char * id()
string identifier for the analyzer