View Javadoc
1   package nom.tam.util;
2   
3   import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
4   
5   /*
6    * #%L
7    * nom.tam FITS library
8    * %%
9    * Copyright (C) 2004 - 2024 nom-tam-fits
10   * %%
11   * This is free and unencumbered software released into the public domain.
12   *
13   * Anyone is free to copy, modify, publish, use, compile, sell, or
14   * distribute this software, either in source code form or as a compiled
15   * binary, for any purpose, commercial or non-commercial, and by any
16   * means.
17   *
18   * In jurisdictions that recognize copyright laws, the author or authors
19   * of this software dedicate any and all copyright interest in the
20   * software to the public domain. We make this dedication for the benefit
21   * of the public at large and to the detriment of our heirs and
22   * successors. We intend this dedication to be an overt act of
23   * relinquishment in perpetuity of all present and future rights to this
24   * software under copyright law.
25   *
26   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
29   * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
30   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
31   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
32   * OTHER DEALINGS IN THE SOFTWARE.
33   * #L%
34   */
35  
36  /**
37   * This class provides routines for efficient parsing of data stored in a byte array. This routine is optimized (in
38   * theory at least!) for efficiency rather than accuracy. The values read in for doubles or floats may differ in the
39   * last bit or so from the standard input utilities, especially in the case where a float is specified as a very long
40   * string of digits (substantially longer than the precision of the type).
41   * <p>
42   * The get methods generally are available with or without a length parameter specified. When a length parameter is
43   * specified only the bytes with the specified range from the current offset will be search for the number. If no length
44   * is specified, the entire buffer from the current offset will be searched.
45   * <p>
46   * The getString method returns a string with leading and trailing white space left intact. For all other get calls,
47   * leading white space is ignored. If fillFields is set, then the get methods check that only white space follows valid
48   * data and a FormatException is thrown if that is not the case. If fillFields is not set and valid data is found, then
49   * the methods return having read as much as possible. E.g., for the sequence "T123.258E13", a getBoolean, getInteger
50   * and getFloat call would return true, 123, and 2.58e12 when called in succession.
51   * 
52   * @deprecated This class should not be exposed in the public API and is intended for internal use only in ASCII tables.
53   *                 Also, it may have overlapping functionality with other classes, which should probably be eliminated
54   *                 for simplicity's sake (and thus less chance of nasty bugs).
55   * 
56   * @see        ByteFormatter
57   */
58  @Deprecated
59  public class ByteParser {
60  
61      private static final int EXPONENT_DENORMALISATION_CORR_LIMIT = -300;
62  
63      private static final double EXPONENT_DENORMALISATION_FACTOR = 1.e-300;
64  
65      private static final byte[] INFINITY_LOWER = AsciiFuncs.getBytes(ByteFormatter.INFINITY.toLowerCase());
66  
67      private static final byte[] INFINITY_UPPER = AsciiFuncs.getBytes(ByteFormatter.INFINITY.toUpperCase());
68  
69      private static final int INFINITY_LENGTH = ByteParser.INFINITY_UPPER.length;
70  
71      private static final int INFINITY_SHORTCUT_LENGTH = 3;
72  
73      private static final byte[] NOT_A_NUMBER_LOWER = AsciiFuncs.getBytes(ByteFormatter.NOT_A_NUMBER.toLowerCase());
74  
75      private static final byte[] NOT_A_NUMBER_UPPER = AsciiFuncs.getBytes(ByteFormatter.NOT_A_NUMBER.toUpperCase());
76  
77      private static final int NOT_A_NUMBER_LENGTH = ByteParser.NOT_A_NUMBER_UPPER.length;
78  
79      /**
80       * The underlying number base used in this class.
81       */
82      private static final int NUMBER_BASE = 10;
83  
84      /**
85       * The underlying number base used in this class as a double value.
86       */
87      private static final double NUMBER_BASE_DOUBLE = 10.;
88  
89      /**
90       * Did we find a sign last time we checked?
91       */
92      private boolean foundSign;
93  
94      /**
95       * Array being parsed
96       */
97      private byte[] input;
98  
99      /**
100      * Length of last parsed value
101      */
102     private int numberLength;
103 
104     /**
105      * Current offset into input.
106      */
107     private int offset;
108 
109     /**
110      * Construct a parser.
111      *
112      * @param input The byte array to be parsed. Note that the array can be re-used by refilling its contents and
113      *                  resetting the offset.
114      */
115     @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "intended exposure of mutable data")
116     public ByteParser(byte[] input) {
117         this.input = input;
118         offset = 0;
119     }
120 
121     /**
122      * Find the sign for a number . This routine looks for a sign (+/-) at the current location and return +1/-1 if one
123      * is found, or +1 if not. The foundSign boolean is set if a sign is found and offset is incremented.
124      */
125     private int checkSign() {
126 
127         foundSign = false;
128 
129         if (input[offset] == '+') {
130             foundSign = true;
131             offset++;
132             return 1;
133         }
134         if (input[offset] == '-') {
135             foundSign = true;
136             offset++;
137             return -1;
138         }
139 
140         return 1;
141     }
142 
143     /**
144      * Get the integer value starting at the current position. This routine returns a double rather than an int/long to
145      * enable it to read very long integers (with reduced precision) such as 111111111111111111111111111111111111111111.
146      * Note that this routine does set numberLength.
147      *
148      * @param length The maximum number of characters to use.
149      */
150     private double getBareInteger(int length) {
151 
152         int startOffset = offset;
153         double number = 0;
154 
155         while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
156 
157             number *= ByteParser.NUMBER_BASE;
158             number += input[offset] - '0';
159             offset++;
160             length--;
161         }
162         numberLength = offset - startOffset;
163         return number;
164     }
165 
166     /**
167      * @return                 a boolean value from the beginning of the buffer.
168      *
169      * @throws FormatException if the double was in an unknown format
170      */
171     public boolean getBoolean() throws FormatException {
172         return getBoolean(input.length - offset);
173     }
174 
175     /**
176      * @return                 a boolean value from a specified region of the buffer
177      *
178      * @param  length          The maximum number of characters used to parse this boolean.
179      *
180      * @throws FormatException if the double was in an unknown format
181      */
182     public boolean getBoolean(int length) throws FormatException {
183 
184         int startOffset = offset;
185         length -= skipWhite(length);
186         if (length == 0) {
187             throw new FormatException("Blank boolean field");
188         }
189 
190         boolean value = false;
191         if (input[offset] == 'T' || input[offset] == 't') {
192             value = true;
193         } else if (input[offset] != 'F' && input[offset] != 'f') {
194             numberLength = 0;
195             offset = startOffset;
196             throw new FormatException("Invalid boolean value");
197         }
198         offset++;
199         numberLength = offset - startOffset;
200         return value;
201     }
202 
203     /**
204      * @return the buffer being used by the parser
205      */
206     @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "intended exposure of mutable data")
207     public byte[] getBuffer() {
208         return input;
209     }
210 
211     /**
212      * Read in the buffer until a double is read. This will read the entire buffer if fillFields is set.
213      *
214      * @return                 The value found.
215      *
216      * @throws FormatException if the double was in an unknown format
217      */
218     public double getDouble() throws FormatException {
219         return getDouble(input.length - offset);
220     }
221 
222     /**
223      * @return                 a parsed double from the buffer. Leading spaces are ignored.
224      *
225      * @param  length          The maximum number of characters used to parse this number. If fillFields is specified
226      *                             then exactly only whitespace may follow a valid double value.
227      *
228      * @throws FormatException if the double was in an unknown format
229      */
230     public double getDouble(int length) throws FormatException {
231         int startOffset = offset;
232         boolean error = true;
233         double number;
234         // Skip initial blanks.
235         length -= skipWhite(length);
236         if (length == 0) {
237             numberLength = offset - startOffset;
238             return 0;
239         }
240         double mantissaSign = checkSign();
241         if (foundSign) {
242             length--;
243         }
244         // Look for the special strings NaN, Inf,
245         if (isCaseInsensitiv(length, ByteParser.NOT_A_NUMBER_LENGTH, ByteParser.NOT_A_NUMBER_LOWER,
246                 ByteParser.NOT_A_NUMBER_UPPER)) {
247             number = Double.NaN;
248             offset += ByteParser.NOT_A_NUMBER_LENGTH;
249             // Look for the longer string first then try the shorter.
250         } else if (isCaseInsensitiv(length, ByteParser.INFINITY_LENGTH, ByteParser.INFINITY_LOWER,
251                 ByteParser.INFINITY_UPPER)) {
252             number = Double.POSITIVE_INFINITY;
253             offset += ByteParser.INFINITY_LENGTH;
254         } else if (isCaseInsensitiv(length, ByteParser.INFINITY_SHORTCUT_LENGTH, ByteParser.INFINITY_LOWER,
255                 ByteParser.INFINITY_UPPER)) {
256             number = Double.POSITIVE_INFINITY;
257             offset += ByteParser.INFINITY_SHORTCUT_LENGTH;
258         } else {
259             number = getBareInteger(length); // This will update offset
260             length -= numberLength; // Set by getBareInteger
261             if (numberLength > 0) {
262                 error = false;
263             }
264             // Check for fractional values after decimal
265             if (length > 0 && input[offset] == '.') {
266                 offset++;
267                 length--;
268                 double numerator = getBareInteger(length);
269                 if (numerator > 0) {
270                     number += numerator / Math.pow(ByteParser.NUMBER_BASE_DOUBLE, numberLength);
271                 }
272                 length -= numberLength;
273                 if (numberLength > 0) {
274                     error = false;
275                 }
276             }
277 
278             if (error) {
279                 offset = startOffset;
280                 numberLength = 0;
281                 throw new FormatException("Invalid real field");
282             }
283 
284             // Look for an exponent ,Our Fortran heritage means that we allow
285             // 'D' for the exponent
286             // indicator.
287             if (length > 0
288                     && (input[offset] == 'e' || input[offset] == 'E' || input[offset] == 'd' || input[offset] == 'D')) {
289                 offset++;
290                 length--;
291                 if (length > 0) {
292                     int sign = checkSign();
293                     if (foundSign) {
294                         length--;
295                     }
296 
297                     int exponent = (int) getBareInteger(length);
298 
299                     // For very small numbers we try to miminize
300                     // effects of denormalization.
301                     if (exponent * sign > ByteParser.EXPONENT_DENORMALISATION_CORR_LIMIT) {
302                         number *= Math.pow(ByteParser.NUMBER_BASE_DOUBLE, exponent * sign);
303                     } else {
304                         number = ByteParser.EXPONENT_DENORMALISATION_FACTOR
305                                 * (number * Math.pow(ByteParser.NUMBER_BASE_DOUBLE,
306                                         exponent * sign + ByteParser.EXPONENT_DENORMALISATION_CORR_LIMIT * -1));
307                     }
308                 }
309             }
310         }
311         numberLength = offset - startOffset;
312         return mantissaSign * number;
313     }
314 
315     /**
316      * @return                 a floating point value from the buffer. (see getDouble(int())
317      *
318      * @throws FormatException if the float was in an unknown format
319      */
320     public float getFloat() throws FormatException {
321         return (float) getDouble(input.length - offset);
322     }
323 
324     /**
325      * @return                 a floating point value in a region of the buffer
326      *
327      * @param  length          The maximum number of characters used to parse this float.
328      *
329      * @throws FormatException if the float was in an unknown format
330      */
331     public float getFloat(int length) throws FormatException {
332         return (float) getDouble(length);
333     }
334 
335     /**
336      * @return                 an integer at the beginning of the buffer
337      *
338      * @throws FormatException if the integer was in an unknown format
339      */
340     public int getInt() throws FormatException {
341         return getInt(input.length - offset);
342     }
343 
344     /**
345      * @return                 a region of the buffer to an integer
346      *
347      * @param  length          The maximum number of characters used to parse this integer. @throws FormatException if
348      *                             the integer was in an unknown format
349      *
350      * @throws FormatException if the integer was in an unknown format
351      */
352     public int getInt(int length) throws FormatException {
353         int startOffset = offset;
354 
355         length -= skipWhite(length);
356         if (length == 0) {
357             numberLength = offset - startOffset;
358             return 0;
359         }
360 
361         int number = 0;
362         boolean error = true;
363 
364         int sign = checkSign();
365         if (foundSign) {
366             length--;
367         }
368 
369         while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
370             number = number * ByteParser.NUMBER_BASE + input[offset] - '0';
371             offset++;
372             length--;
373             error = false;
374         }
375 
376         if (error) {
377             numberLength = 0;
378             offset = startOffset;
379             throw new FormatException("Invalid Integer");
380         }
381         numberLength = offset - startOffset;
382         return sign * number;
383     }
384 
385     /**
386      * @return                 a long in a specified region of the buffer
387      *
388      * @param  length          The maximum number of characters used to parse this long.
389      *
390      * @throws FormatException if the long was in an unknown format
391      */
392     public long getLong(int length) throws FormatException {
393 
394         int startOffset = offset;
395 
396         // Skip white space.
397         length -= skipWhite(length);
398         if (length == 0) {
399             numberLength = offset - startOffset;
400             return 0;
401         }
402 
403         long number = 0;
404         boolean error = true;
405 
406         long sign = checkSign();
407         if (foundSign) {
408             length--;
409         }
410 
411         while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
412             number = number * ByteParser.NUMBER_BASE + input[offset] - '0';
413             error = false;
414             offset++;
415             length--;
416         }
417 
418         if (error) {
419             numberLength = 0;
420             offset = startOffset;
421             throw new FormatException("Invalid long number");
422         }
423         numberLength = offset - startOffset;
424         return sign * number;
425     }
426 
427     /**
428      * @return the number of characters used to parse the previous number (or the length of the previous String
429      *             returned).
430      */
431     public int getNumberLength() {
432         return numberLength;
433     }
434 
435     /**
436      * Get the current offset.
437      *
438      * @return The current offset within the buffer.
439      */
440     public int getOffset() {
441         return offset;
442     }
443 
444     /**
445      * @return        a string.
446      *
447      * @param  length The length of the string.
448      */
449     public String getString(int length) {
450 
451         String s = AsciiFuncs.asciiString(input, offset, length);
452         offset += length;
453         numberLength = length;
454         return s;
455     }
456 
457     private boolean isCaseInsensitiv(int length, int constantLength, byte[] lowerConstant, byte[] upperConstant) {
458         if (length < constantLength) {
459             return false;
460         }
461         for (int i = 0; i < constantLength; i++) {
462             if (input[offset + i] != lowerConstant[i] && input[offset + i] != upperConstant[i]) {
463                 return false;
464             }
465         }
466         return true;
467     }
468 
469     /**
470      * Set the buffer for the parser.
471      *
472      * @param buf buffer to set
473      */
474     @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "intended exposure of mutable data")
475     public void setBuffer(byte[] buf) {
476         input = buf;
477         offset = 0;
478     }
479 
480     /**
481      * Set the offset into the array.
482      *
483      * @param offset The desired offset from the beginning of the array.
484      */
485     public void setOffset(int offset) {
486         this.offset = offset;
487     }
488 
489     /**
490      * Skip bytes in the buffer.
491      *
492      * @param nBytes number of bytes to skip
493      */
494     public void skip(int nBytes) {
495         offset += nBytes;
496     }
497 
498     /**
499      * Skip white space. This routine skips with space in the input .
500      *
501      * @return        the number of character skipped. White space is defined as ' ', '\t', '\n' or '\r'
502      *
503      * @param  length The maximum number of characters to skip.
504      */
505     public int skipWhite(int length) {
506         int i;
507         for (i = 0; i < length; i++) {
508             if (input[offset + i] != ' ' && input[offset + i] != '\t' && input[offset + i] != '\n'
509                     && input[offset + i] != '\r') {
510                 break;
511             }
512         }
513         offset += i;
514         return i;
515     }
516 }