View Javadoc
1   package nom.tam.util;
2   
3   /*
4    * #%L
5    * nom.tam FITS library
6    * %%
7    * Copyright (C) 2004 - 2024 nom-tam-fits
8    * %%
9    * This is free and unencumbered software released into the public domain.
10   *
11   * Anyone is free to copy, modify, publish, use, compile, sell, or
12   * distribute this software, either in source code form or as a compiled
13   * binary, for any purpose, commercial or non-commercial, and by any
14   * means.
15   *
16   * In jurisdictions that recognize copyright laws, the author or authors
17   * of this software dedicate any and all copyright interest in the
18   * software to the public domain. We make this dedication for the benefit
19   * of the public at large and to the detriment of our heirs and
20   * successors. We intend this dedication to be an overt act of
21   * relinquishment in perpetuity of all present and future rights to this
22   * software under copyright law.
23   *
24   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27   * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
28   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
29   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
30   * OTHER DEALINGS IN THE SOFTWARE.
31   * #L%
32   */
33  
34  /**
35   * This class provides routines for efficient parsing of data stored in a byte array. This routine is optimized (in
36   * theory at least!) for efficiency rather than accuracy. The values read in for doubles or floats may differ in the
37   * last bit or so from the standard input utilities, especially in the case where a float is specified as a very long
38   * string of digits (substantially longer than the precision of the type).
39   * <p>
40   * The get methods generally are available with or without a length parameter specified. When a length parameter is
41   * specified only the bytes with the specified range from the current offset will be search for the number. If no length
42   * is specified, the entire buffer from the current offset will be searched.
43   * <p>
44   * The getString method returns a string with leading and trailing white space left intact. For all other get calls,
45   * leading white space is ignored. If fillFields is set, then the get methods check that only white space follows valid
46   * data and a FormatException is thrown if that is not the case. If fillFields is not set and valid data is found, then
47   * the methods return having read as much as possible. E.g., for the sequence "T123.258E13", a getBoolean, getInteger
48   * and getFloat call would return true, 123, and 2.58e12 when called in succession.
49   * 
50   * @deprecated This class should not be exposed in the public API and is intended for internal use only in ASCII tables.
51   *                 Also, it may have overlapping functionality with other classes, which should probably be eliminated
52   *                 for simplicity's sake (and thus less chance of nasty bugs).
53   * 
54   * @see        ByteFormatter
55   */
56  @Deprecated
57  public class ByteParser {
58  
59      private static final int EXPONENT_DENORMALISATION_CORR_LIMIT = -300;
60  
61      private static final double EXPONENT_DENORMALISATION_FACTOR = 1.e-300;
62  
63      private static final byte[] INFINITY_LOWER = AsciiFuncs.getBytes(ByteFormatter.INFINITY.toLowerCase());
64  
65      private static final byte[] INFINITY_UPPER = AsciiFuncs.getBytes(ByteFormatter.INFINITY.toUpperCase());
66  
67      private static final int INFINITY_LENGTH = ByteParser.INFINITY_UPPER.length;
68  
69      private static final int INFINITY_SHORTCUT_LENGTH = 3;
70  
71      private static final byte[] NOT_A_NUMBER_LOWER = AsciiFuncs.getBytes(ByteFormatter.NOT_A_NUMBER.toLowerCase());
72  
73      private static final byte[] NOT_A_NUMBER_UPPER = AsciiFuncs.getBytes(ByteFormatter.NOT_A_NUMBER.toUpperCase());
74  
75      private static final int NOT_A_NUMBER_LENGTH = ByteParser.NOT_A_NUMBER_UPPER.length;
76  
77      /**
78       * The underlying number base used in this class.
79       */
80      private static final int NUMBER_BASE = 10;
81  
82      /**
83       * The underlying number base used in this class as a double value.
84       */
85      private static final double NUMBER_BASE_DOUBLE = 10.;
86  
87      /**
88       * Did we find a sign last time we checked?
89       */
90      private boolean foundSign;
91  
92      /**
93       * Array being parsed
94       */
95      private byte[] input;
96  
97      /**
98       * Length of last parsed value
99       */
100     private int numberLength;
101 
102     /**
103      * Current offset into input.
104      */
105     private int offset;
106 
107     /**
108      * Construct a parser.
109      *
110      * @param input The byte array to be parsed. Note that the array can be re-used by refilling its contents and
111      *                  resetting the offset.
112      */
113     @Deprecated
114     public ByteParser(byte[] input) {
115         this.input = input;
116         offset = 0;
117     }
118 
119     /**
120      * Find the sign for a number . This routine looks for a sign (+/-) at the current location and return +1/-1 if one
121      * is found, or +1 if not. The foundSign boolean is set if a sign is found and offset is incremented.
122      */
123     private int checkSign() {
124 
125         foundSign = false;
126 
127         if (input[offset] == '+') {
128             foundSign = true;
129             offset++;
130             return 1;
131         }
132         if (input[offset] == '-') {
133             foundSign = true;
134             offset++;
135             return -1;
136         }
137 
138         return 1;
139     }
140 
141     /**
142      * Get the integer value starting at the current position. This routine returns a double rather than an int/long to
143      * enable it to read very long integers (with reduced precision) such as 111111111111111111111111111111111111111111.
144      * Note that this routine does set numberLength.
145      *
146      * @param length The maximum number of characters to use.
147      */
148     private double getBareInteger(int length) {
149 
150         int startOffset = offset;
151         double number = 0;
152 
153         while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
154 
155             number *= ByteParser.NUMBER_BASE;
156             number += input[offset] - '0';
157             offset++;
158             length--;
159         }
160         numberLength = offset - startOffset;
161         return number;
162     }
163 
164     /**
165      * Returns the next <code>boolean</code> value from the current parse position.
166      * 
167      * @return                 a boolean value from the beginning of the buffer.
168      *
169      * @throws FormatException if the double was in an unknown format
170      */
171     @Deprecated
172     public boolean getBoolean() throws FormatException {
173         return getBoolean(input.length - offset);
174     }
175 
176     /**
177      * Returns the next <code>double</code> value from the current parse position, consuming at most the specified
178      * number of bytes from the input.
179      * 
180      * @return                 a boolean value from a specified region of the buffer
181      *
182      * @param  length          The maximum number of characters used to parse this boolean.
183      *
184      * @throws FormatException if the double was in an unknown format
185      */
186     @Deprecated
187     public boolean getBoolean(int length) throws FormatException {
188 
189         int startOffset = offset;
190         length -= skipWhite(length);
191         if (length == 0) {
192             throw new FormatException("Blank boolean field");
193         }
194 
195         boolean value = false;
196         if (input[offset] == 'T' || input[offset] == 't') {
197             value = true;
198         } else if (input[offset] != 'F' && input[offset] != 'f') {
199             numberLength = 0;
200             offset = startOffset;
201             throw new FormatException("Invalid boolean value");
202         }
203         offset++;
204         numberLength = offset - startOffset;
205         return value;
206     }
207 
208     /**
209      * Returns the underlying buffer to this parser.
210      * 
211      * @return the buffer being used by the parser
212      */
213     @Deprecated
214     public byte[] getBuffer() {
215         return input;
216     }
217 
218     /**
219      * Returns the next <code>long</code> value from the current parse position, consuming at most the specified number
220      * of bytes from the input. This will read the entire buffer if fillFields is set.
221      *
222      * @return                 The value found.
223      *
224      * @throws FormatException if the double was in an unknown format
225      */
226     @Deprecated
227     public double getDouble() throws FormatException {
228         return getDouble(input.length - offset);
229     }
230 
231     /**
232      * Returns the next <code>double</code> value from the current parse position, consuming at most the specified
233      * number of bytes from the input.
234      * 
235      * @return                 a parsed double from the buffer. Leading spaces are ignored.
236      *
237      * @param  length          The maximum number of characters used to parse this number. If fillFields is specified
238      *                             then exactly only whitespace may follow a valid double value.
239      *
240      * @throws FormatException if the double was in an unknown format
241      */
242     @Deprecated
243     public double getDouble(int length) throws FormatException {
244         int startOffset = offset;
245         boolean error = true;
246         double number;
247         // Skip initial blanks.
248         length -= skipWhite(length);
249         if (length == 0) {
250             numberLength = offset - startOffset;
251             return 0;
252         }
253         double mantissaSign = checkSign();
254         if (foundSign) {
255             length--;
256         }
257         // Look for the special strings NaN, Inf,
258         if (isCaseInsensitiv(length, ByteParser.NOT_A_NUMBER_LENGTH, ByteParser.NOT_A_NUMBER_LOWER,
259                 ByteParser.NOT_A_NUMBER_UPPER)) {
260             number = Double.NaN;
261             offset += ByteParser.NOT_A_NUMBER_LENGTH;
262             // Look for the longer string first then try the shorter.
263         } else if (isCaseInsensitiv(length, ByteParser.INFINITY_LENGTH, ByteParser.INFINITY_LOWER,
264                 ByteParser.INFINITY_UPPER)) {
265             number = Double.POSITIVE_INFINITY;
266             offset += ByteParser.INFINITY_LENGTH;
267         } else if (isCaseInsensitiv(length, ByteParser.INFINITY_SHORTCUT_LENGTH, ByteParser.INFINITY_LOWER,
268                 ByteParser.INFINITY_UPPER)) {
269             number = Double.POSITIVE_INFINITY;
270             offset += ByteParser.INFINITY_SHORTCUT_LENGTH;
271         } else {
272             number = getBareInteger(length); // This will update offset
273             length -= numberLength; // Set by getBareInteger
274             if (numberLength > 0) {
275                 error = false;
276             }
277             // Check for fractional values after decimal
278             if (length > 0 && input[offset] == '.') {
279                 offset++;
280                 length--;
281                 double numerator = getBareInteger(length);
282                 if (numerator > 0) {
283                     number += numerator / Math.pow(ByteParser.NUMBER_BASE_DOUBLE, numberLength);
284                 }
285                 length -= numberLength;
286                 if (numberLength > 0) {
287                     error = false;
288                 }
289             }
290 
291             if (error) {
292                 offset = startOffset;
293                 numberLength = 0;
294                 throw new FormatException("Invalid real field");
295             }
296 
297             // Look for an exponent ,Our Fortran heritage means that we allow
298             // 'D' for the exponent
299             // indicator.
300             if (length > 0
301                     && (input[offset] == 'e' || input[offset] == 'E' || input[offset] == 'd' || input[offset] == 'D')) {
302                 offset++;
303                 length--;
304                 if (length > 0) {
305                     int sign = checkSign();
306                     if (foundSign) {
307                         length--;
308                     }
309 
310                     int exponent = (int) getBareInteger(length);
311 
312                     // For very small numbers we try to miminize
313                     // effects of denormalization.
314                     if (exponent * sign > ByteParser.EXPONENT_DENORMALISATION_CORR_LIMIT) {
315                         number *= Math.pow(ByteParser.NUMBER_BASE_DOUBLE, exponent * sign);
316                     } else {
317                         number = ByteParser.EXPONENT_DENORMALISATION_FACTOR
318                                 * (number * Math.pow(ByteParser.NUMBER_BASE_DOUBLE,
319                                         exponent * sign + ByteParser.EXPONENT_DENORMALISATION_CORR_LIMIT * -1));
320                     }
321                 }
322             }
323         }
324         numberLength = offset - startOffset;
325         return mantissaSign * number;
326     }
327 
328     /**
329      * Returns the next <code>float</code> value from the current parse position.
330      * 
331      * @return                 a floating point value from the buffer. (see getDouble(int())
332      *
333      * @throws FormatException if the float was in an unknown format
334      */
335     @Deprecated
336     public float getFloat() throws FormatException {
337         return (float) getDouble(input.length - offset);
338     }
339 
340     /**
341      * Returns the next <code>float</code> value from the current parse position, consuming at most the specified number
342      * of bytes from the input.
343      * 
344      * @return                 a floating point value in a region of the buffer
345      *
346      * @param  length          The maximum number of characters used to parse this float.
347      *
348      * @throws FormatException if the float was in an unknown format
349      */
350     @Deprecated
351     public float getFloat(int length) throws FormatException {
352         return (float) getDouble(length);
353     }
354 
355     /**
356      * Returns the next <code>in</code> value from the current parse position.
357      * 
358      * @return                 an integer at the beginning of the buffer
359      *
360      * @throws FormatException if the integer was in an unknown format
361      */
362     @Deprecated
363     public int getInt() throws FormatException {
364         return getInt(input.length - offset);
365     }
366 
367     /**
368      * Returns the next <code>int</code> value from the current parse position, consuming at most the specified number
369      * of bytes from the input.
370      * 
371      * @return                 a region of the buffer to an integer
372      *
373      * @param  length          The maximum number of characters used to parse this integer. @throws FormatException if
374      *                             the integer was in an unknown format
375      *
376      * @throws FormatException if the integer was in an unknown format
377      */
378     @Deprecated
379     public int getInt(int length) throws FormatException {
380         int startOffset = offset;
381 
382         length -= skipWhite(length);
383         if (length == 0) {
384             numberLength = offset - startOffset;
385             return 0;
386         }
387 
388         int number = 0;
389         boolean error = true;
390 
391         int sign = checkSign();
392         if (foundSign) {
393             length--;
394         }
395 
396         while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
397             number = number * ByteParser.NUMBER_BASE + input[offset] - '0';
398             offset++;
399             length--;
400             error = false;
401         }
402 
403         if (error) {
404             numberLength = 0;
405             offset = startOffset;
406             throw new FormatException("Invalid Integer");
407         }
408         numberLength = offset - startOffset;
409         return sign * number;
410     }
411 
412     /**
413      * Returns the next <code>long</code> value from the current parse position, consuming at most the specified number
414      * of bytes from the input.
415      * 
416      * @return                 a long in a specified region of the buffer
417      *
418      * @param  length          The maximum number of characters used to parse this long.
419      *
420      * @throws FormatException if the long was in an unknown format
421      */
422     @Deprecated
423     public long getLong(int length) throws FormatException {
424 
425         int startOffset = offset;
426 
427         // Skip white space.
428         length -= skipWhite(length);
429         if (length == 0) {
430             numberLength = offset - startOffset;
431             return 0;
432         }
433 
434         long number = 0;
435         boolean error = true;
436 
437         long sign = checkSign();
438         if (foundSign) {
439             length--;
440         }
441 
442         while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
443             number = number * ByteParser.NUMBER_BASE + input[offset] - '0';
444             error = false;
445             offset++;
446             length--;
447         }
448 
449         if (error) {
450             numberLength = 0;
451             offset = startOffset;
452             throw new FormatException("Invalid long number");
453         }
454         numberLength = offset - startOffset;
455         return sign * number;
456     }
457 
458     /**
459      * Returns the length of the previous string returned (that is the number that was parse last).
460      * 
461      * @return the number of characters used to parse the previous number (or the length of the previous String
462      *             returned).
463      */
464     @Deprecated
465     public int getNumberLength() {
466         return numberLength;
467     }
468 
469     /**
470      * Get the current offset.
471      *
472      * @return The current offset within the buffer.
473      */
474     @Deprecated
475     public int getOffset() {
476         return offset;
477     }
478 
479     /**
480      * Returns the specified number of bytes as a string.
481      * 
482      * @return        a string.
483      *
484      * @param  length The length of the string.
485      */
486     @Deprecated
487     public String getString(int length) {
488         String s = AsciiFuncs.asciiString(input, offset, length);
489         offset += length;
490         numberLength = length;
491         return s;
492     }
493 
494     private boolean isCaseInsensitiv(int length, int constantLength, byte[] lowerConstant, byte[] upperConstant) {
495         if (length < constantLength) {
496             return false;
497         }
498         for (int i = 0; i < constantLength; i++) {
499             if (input[offset + i] != lowerConstant[i] && input[offset + i] != upperConstant[i]) {
500                 return false;
501             }
502         }
503         return true;
504     }
505 
506     /**
507      * Set the buffer for the parser.
508      *
509      * @param buf buffer to set
510      */
511     @Deprecated
512     public void setBuffer(byte[] buf) {
513         input = buf;
514         offset = 0;
515     }
516 
517     /**
518      * Set the offset into the array.
519      *
520      * @param offset The desired offset from the beginning of the array.
521      */
522     @Deprecated
523     public void setOffset(int offset) {
524         this.offset = offset;
525     }
526 
527     /**
528      * Skip bytes in the buffer.
529      *
530      * @param nBytes number of bytes to skip
531      */
532     @Deprecated
533     public void skip(int nBytes) {
534         offset += nBytes;
535     }
536 
537     /**
538      * Skip white space. This routine skips with space in the input .
539      *
540      * @return        the number of character skipped. White space is defined as ' ', '\t', '\n' or '\r'
541      *
542      * @param  length The maximum number of characters to skip.
543      */
544     @Deprecated
545     public int skipWhite(int length) {
546         int i;
547         for (i = 0; i < length; i++) {
548             if (input[offset + i] != ' ' && input[offset + i] != '\t' && input[offset + i] != '\n'
549                     && input[offset + i] != '\r') {
550                 break;
551             }
552         }
553         offset += i;
554         return i;
555     }
556 }