ProteoWizard
MSNumpress.hpp
Go to the documentation of this file.
1 //
2 // $Id: MSNumpress.hpp 6945 2014-11-26 18:58:33Z chambm $
3 //
4 //
5 // Original author: Johan Teleman <johan.teleman@immun.lth.se>
6 //
7 // Copyright 2013 Johan Teleman
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // Unless required by applicable law or agreed to in writing, software
16 // distributed under the License is distributed on an "AS IS" BASIS,
17 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 // See the License for the specific language governing permissions and
19 // limitations under the License.
20 //
21 
22 
23 /*
24  ==================== encodeInt ====================
25  Some of the encodings described below use a integer compression refered to simply as
26 
27  encodeInt()
28 
29  This encoding works on a 4 byte integer, by truncating initial zeros or ones.
30  If the initial (most significant) half byte is 0x0 or 0xf, the number of such
31  halfbytes starting from the most significant is stored in a halfbyte. This initial
32  count is then followed by the rest of the ints halfbytes, in little-endian order.
33  A count halfbyte c of
34 
35  0 <= c <= 8 is interpreted as an initial c 0x0 halfbytes
36  9 <= c <= 15 is interpreted as an initial (c-8) 0xf halfbytes
37 
38  Ex:
39  int c rest
40  0 => 0x8
41  -1 => 0xf 0xf
42  23 => 0x6 0x7 0x1
43  */
44 
45 #ifndef _MSNUMPRESS_HPP_
46 #define _MSNUMPRESS_HPP_
47 
48 
50 #include <cstddef>
51 #include <vector>
52 
53 
54 namespace pwiz {
55 namespace msdata {
56 
57 namespace MSNumpress {
58 
60  const double *data,
61  size_t dataSize);
62 
63  /**
64  * Encodes the doubles in data by first using a
65  * - lossy conversion to a 4 byte 5 decimal fixed point repressentation
66  * - storing the residuals from a linear prediction after first to values
67  * - encoding by encodeInt (see above)
68  *
69  * The resulting binary is maximally dataSize * 5 bytes, but much less if the
70  * data is reasonably smooth on the first order.
71  *
72  * This encoding is suitable for typical m/z or retention time binary arrays.
73  * For masses above 100 m/z the encoding is accurate to at least 0.1 ppm.
74  *
75  * @data pointer to array of double to be encoded (need memorycont. repr.)
76  * @dataSize number of doubles from *data to encode
77  * @result pointer to were resulting bytes should be stored
78  * @fixedPoint the scaling factor used for getting the fixed point repr.
79  * This is stored in the binary and automatically extracted
80  * on decoding. Automatically (and maybe slowly) determined if 0.
81  * @return the number of encoded bytes
82  */
84  const double *data,
85  const size_t dataSize,
86  unsigned char *result,
87  double fixedPoint);
88 
89  /**
90  * Calls lower level encodeLinear while handling vector sizes appropriately
91  *
92  * @data vector of doubles to be encoded
93  * @result vector of resulting bytes (will be resized to the number of bytes)
94  */
96  const std::vector<double> &data,
97  std::vector<unsigned char> &result,
98  double fixedPoint);
99 
100  /**
101  * Decodes data encoded by encodeLinear. Note that the compression
102  * discard any information < 1e-5, so data is only guaranteed
103  * to be within +- 5e-6 of the original value.
104  *
105  * Further, values > ~42000 will also be truncated because of the
106  * fixed point representation, so this scheme is stronly discouraged
107  * if values above might be above this size.
108  *
109  * result vector guaranteedly shorter than twice the data length (in nbr of values)
110  *
111  * @data pointer to array of bytes to be decoded (need memorycont. repr.)
112  * @dataSize number of bytes from *data to decode
113  * @result pointer to were resulting doubles should be stored
114  * @return the number of decoded doubles, or -1 if dataSize < 4 or 4 < dataSize < 8
115  */
117  const unsigned char *data,
118  const size_t dataSize,
119  double *result);
120 
121  /**
122  * Calls lower level decodeLinear while handling vector sizes appropriately
123  *
124  * @data vector of bytes to be decoded
125  * @result vector of resulting double (will be resized to the number of doubles)
126  */
128  const std::vector<unsigned char> &data,
129  std::vector<double> &result);
130 
131 /////////////////////////////////////////////////////////////
132 
133  /**
134  * Encodes ion counts by simply rounding to the nearest 4 byte integer,
135  * and compressing each integer with encodeInt.
136  *
137  * The handleable range is therefore 0 -> 4294967294.
138  * The resulting binary is maximally dataSize * 5 bytes, but much less if the
139  * data is close to 0 on average.
140  *
141  * @data pointer to array of double to be encoded (need memorycont. repr.)
142  * @dataSize number of doubles from *data to encode
143  * @result pointer to were resulting bytes should be stored
144  * @return the number of encoded bytes
145  */
146  size_t PWIZ_API_DECL encodePic(
147  const double *data,
148  const size_t dataSize,
149  unsigned char *result);
150 
151  /**
152  * Calls lower level encodePic while handling vector sizes appropriately
153  *
154  * @data vector of doubles to be encoded
155  * @result vector of resulting bytes (will be resized to the number of bytes)
156  */
158  const std::vector<double> &data,
159  std::vector<unsigned char> &result);
160 
161  /**
162  * Decodes data encoded by encodePic
163  *
164  * result vector guaranteedly shorter than twice the data length (in nbr of values)
165  *
166  * @data pointer to array of bytes to be decoded (need memorycont. repr.)
167  * @dataSize number of bytes from *data to decode
168  * @result pointer to were resulting doubles should be stored
169  * @return the number of decoded doubles
170  */
172  const std::vector<unsigned char> &data,
173  std::vector<double> &result);
174 
175  /**
176  * Calls lower level decodePic while handling vector sizes appropriately
177  *
178  * @data vector of bytes to be decoded
179  * @result vector of resulting double (will be resized to the number of doubles)
180  */
181  size_t PWIZ_API_DECL decodePic(
182  const unsigned char *data,
183  const size_t dataSize,
184  double *result);
185 
186 /////////////////////////////////////////////////////////////
187 
188 
190  const double *data,
191  size_t dataSize);
192 
193  /**
194  * Encodes ion counts by taking the natural logarithm, and storing a
195  * fixed point representation of this. This is calculated as
196  *
197  * unsigned short fp = log(d + 1) * 3000.0 + 0.5
198  *
199  * Note that this fixed point will mean any d < 0.00016667 will be
200  * stored as a zero and mapped back to a zero.
201  *
202  * result vector is exactly twice the data length (in nbr of values)
203  *
204  * @data pointer to array of double to be encoded (need memorycont. repr.)
205  * @dataSize number of doubles from *data to encode
206  * @result pointer to were resulting bytes should be stored
207  * &fixedPoint automatically (and maybe slowly) determined if 0.
208  * @return the number of encoded bytes
209  */
210  size_t PWIZ_API_DECL encodeSlof(
211  const double *data,
212  const size_t dataSize,
213  unsigned char *result,
214  double fixedPoint);
215 
216  /**
217  * Calls lower level encodeSlof while handling vector sizes appropriately
218  *
219  * @data vector of doubles to be encoded
220  * @result vector of resulting bytes (will be resized to the number of bytes)
221  */
223  const std::vector<double> &data,
224  std::vector<unsigned char> &result,
225  double fixedPoint);
226 
227  /**
228  * Decodes data encoded by encodeSlof
229  *
230  * @data pointer to array of bytes to be decoded (need memorycont. repr.)
231  * @dataSize number of bytes from *data to decode
232  * @result pointer to were resulting doubles should be stored
233  * @return the number of decoded doubles
234  */
235  size_t PWIZ_API_DECL decodeSlof(
236  const unsigned char *data,
237  const size_t dataSize,
238  double *result);
239 
240  /**
241  * Calls lower level decodeSlof while handling vector sizes appropriately
242  *
243  * @data vector of bytes to be decoded
244  * @result vector of resulting double (will be resized to the number of doubles)
245  */
247  const std::vector<unsigned char> &data,
248  std::vector<double> &result);
249 
250 } // namespace MSNumpress
251 } // namespace msdata
252 } // namespace pwiz
253 
254 #endif // _MSNUMPRESS_HPP_
size_t PWIZ_API_DECL encodeLinear(const double *data, const size_t dataSize, unsigned char *result, double fixedPoint)
Encodes the doubles in data by first using a.
size_t PWIZ_API_DECL encodePic(const double *data, const size_t dataSize, unsigned char *result)
Encodes ion counts by simply rounding to the nearest 4 byte integer, and compressing each integer wit...
size_t PWIZ_API_DECL decodeSlof(const unsigned char *data, const size_t dataSize, double *result)
Decodes data encoded by encodeSlof.
double PWIZ_API_DECL optimalLinearFixedPoint(const double *data, size_t dataSize)
size_t PWIZ_API_DECL encodeSlof(const double *data, const size_t dataSize, unsigned char *result, double fixedPoint)
Encodes ion counts by taking the natural logarithm, and storing a fixed point representation of this...
#define PWIZ_API_DECL
Definition: Export.hpp:32
size_t PWIZ_API_DECL decodeLinear(const unsigned char *data, const size_t dataSize, double *result)
Decodes data encoded by encodeLinear.
void PWIZ_API_DECL decodePic(const std::vector< unsigned char > &data, std::vector< double > &result)
Decodes data encoded by encodePic.
double PWIZ_API_DECL optimalSlofFixedPoint(const double *data, size_t dataSize)