View Javadoc
1   package nom.tam.util;
2   
3   /*
4    * #%L
5    * nom.tam FITS library
6    * %%
7    * Copyright (C) 2004 - 2024 nom-tam-fits
8    * %%
9    * This is free and unencumbered software released into the public domain.
10   *
11   * Anyone is free to copy, modify, publish, use, compile, sell, or
12   * distribute this software, either in source code form or as a compiled
13   * binary, for any purpose, commercial or non-commercial, and by any
14   * means.
15   *
16   * In jurisdictions that recognize copyright laws, the author or authors
17   * of this software dedicate any and all copyright interest in the
18   * software to the public domain. We make this dedication for the benefit
19   * of the public at large and to the detriment of our heirs and
20   * successors. We intend this dedication to be an overt act of
21   * relinquishment in perpetuity of all present and future rights to this
22   * software under copyright law.
23   *
24   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27   * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
28   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
29   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
30   * OTHER DEALINGS IN THE SOFTWARE.
31   * #L%
32   */
33  
34  /**
35   * This class provides routines for efficient parsing of data stored in a byte array. This routine is optimized (in
36   * theory at least!) for efficiency rather than accuracy. The values read in for doubles or floats may differ in the
37   * last bit or so from the standard input utilities, especially in the case where a float is specified as a very long
38   * string of digits (substantially longer than the precision of the type).
39   * <p>
40   * The get methods generally are available with or without a length parameter specified. When a length parameter is
41   * specified only the bytes with the specified range from the current offset will be search for the number. If no length
42   * is specified, the entire buffer from the current offset will be searched.
43   * <p>
44   * The getString method returns a string with leading and trailing white space left intact. For all other get calls,
45   * leading white space is ignored. If fillFields is set, then the get methods check that only white space follows valid
46   * data and a FormatException is thrown if that is not the case. If fillFields is not set and valid data is found, then
47   * the methods return having read as much as possible. E.g., for the sequence "T123.258E13", a getBoolean, getInteger
48   * and getFloat call would return true, 123, and 2.58e12 when called in succession.
49   * 
50   * @deprecated This class should not be exposed in the public API and is intended for internal use only in ASCII tables.
51   *                 Also, it may have overlapping functionality with other classes, which should probably be eliminated
52   *                 for simplicity's sake (and thus less chance of nasty bugs).
53   * 
54   * @see        ByteFormatter
55   */
56  @Deprecated
57  public class ByteParser {
58  
59      private static final int EXPONENT_DENORMALISATION_CORR_LIMIT = -300;
60  
61      private static final double EXPONENT_DENORMALISATION_FACTOR = 1.e-300;
62  
63      private static final byte[] INFINITY_LOWER = AsciiFuncs.getBytes(ByteFormatter.INFINITY.toLowerCase());
64  
65      private static final byte[] INFINITY_UPPER = AsciiFuncs.getBytes(ByteFormatter.INFINITY.toUpperCase());
66  
67      private static final int INFINITY_LENGTH = ByteParser.INFINITY_UPPER.length;
68  
69      private static final int INFINITY_SHORTCUT_LENGTH = 3;
70  
71      private static final byte[] NOT_A_NUMBER_LOWER = AsciiFuncs.getBytes(ByteFormatter.NOT_A_NUMBER.toLowerCase());
72  
73      private static final byte[] NOT_A_NUMBER_UPPER = AsciiFuncs.getBytes(ByteFormatter.NOT_A_NUMBER.toUpperCase());
74  
75      private static final int NOT_A_NUMBER_LENGTH = ByteParser.NOT_A_NUMBER_UPPER.length;
76  
77      /**
78       * The underlying number base used in this class.
79       */
80      private static final int NUMBER_BASE = 10;
81  
82      /**
83       * The underlying number base used in this class as a double value.
84       */
85      private static final double NUMBER_BASE_DOUBLE = 10.;
86  
87      /**
88       * Did we find a sign last time we checked?
89       */
90      private boolean foundSign;
91  
92      /**
93       * Array being parsed
94       */
95      private byte[] input;
96  
97      /**
98       * Length of last parsed value
99       */
100     private int numberLength;
101 
102     /**
103      * Current offset into input.
104      */
105     private int offset;
106 
107     /**
108      * Construct a parser.
109      *
110      * @param input The byte array to be parsed. Note that the array can be re-used by refilling its contents and
111      *                  resetting the offset.
112      */
113     public ByteParser(byte[] input) {
114         this.input = input;
115         offset = 0;
116     }
117 
118     /**
119      * Find the sign for a number . This routine looks for a sign (+/-) at the current location and return +1/-1 if one
120      * is found, or +1 if not. The foundSign boolean is set if a sign is found and offset is incremented.
121      */
122     private int checkSign() {
123 
124         foundSign = false;
125 
126         if (input[offset] == '+') {
127             foundSign = true;
128             offset++;
129             return 1;
130         }
131         if (input[offset] == '-') {
132             foundSign = true;
133             offset++;
134             return -1;
135         }
136 
137         return 1;
138     }
139 
140     /**
141      * Get the integer value starting at the current position. This routine returns a double rather than an int/long to
142      * enable it to read very long integers (with reduced precision) such as 111111111111111111111111111111111111111111.
143      * Note that this routine does set numberLength.
144      *
145      * @param length The maximum number of characters to use.
146      */
147     private double getBareInteger(int length) {
148 
149         int startOffset = offset;
150         double number = 0;
151 
152         while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
153 
154             number *= ByteParser.NUMBER_BASE;
155             number += input[offset] - '0';
156             offset++;
157             length--;
158         }
159         numberLength = offset - startOffset;
160         return number;
161     }
162 
163     /**
164      * @return                 a boolean value from the beginning of the buffer.
165      *
166      * @throws FormatException if the double was in an unknown format
167      */
168     public boolean getBoolean() throws FormatException {
169         return getBoolean(input.length - offset);
170     }
171 
172     /**
173      * @return                 a boolean value from a specified region of the buffer
174      *
175      * @param  length          The maximum number of characters used to parse this boolean.
176      *
177      * @throws FormatException if the double was in an unknown format
178      */
179     public boolean getBoolean(int length) throws FormatException {
180 
181         int startOffset = offset;
182         length -= skipWhite(length);
183         if (length == 0) {
184             throw new FormatException("Blank boolean field");
185         }
186 
187         boolean value = false;
188         if (input[offset] == 'T' || input[offset] == 't') {
189             value = true;
190         } else if (input[offset] != 'F' && input[offset] != 'f') {
191             numberLength = 0;
192             offset = startOffset;
193             throw new FormatException("Invalid boolean value");
194         }
195         offset++;
196         numberLength = offset - startOffset;
197         return value;
198     }
199 
200     /**
201      * @return the buffer being used by the parser
202      */
203     public byte[] getBuffer() {
204         return input;
205     }
206 
207     /**
208      * Read in the buffer until a double is read. This will read the entire buffer if fillFields is set.
209      *
210      * @return                 The value found.
211      *
212      * @throws FormatException if the double was in an unknown format
213      */
214     public double getDouble() throws FormatException {
215         return getDouble(input.length - offset);
216     }
217 
218     /**
219      * @return                 a parsed double from the buffer. Leading spaces are ignored.
220      *
221      * @param  length          The maximum number of characters used to parse this number. If fillFields is specified
222      *                             then exactly only whitespace may follow a valid double value.
223      *
224      * @throws FormatException if the double was in an unknown format
225      */
226     public double getDouble(int length) throws FormatException {
227         int startOffset = offset;
228         boolean error = true;
229         double number;
230         // Skip initial blanks.
231         length -= skipWhite(length);
232         if (length == 0) {
233             numberLength = offset - startOffset;
234             return 0;
235         }
236         double mantissaSign = checkSign();
237         if (foundSign) {
238             length--;
239         }
240         // Look for the special strings NaN, Inf,
241         if (isCaseInsensitiv(length, ByteParser.NOT_A_NUMBER_LENGTH, ByteParser.NOT_A_NUMBER_LOWER,
242                 ByteParser.NOT_A_NUMBER_UPPER)) {
243             number = Double.NaN;
244             offset += ByteParser.NOT_A_NUMBER_LENGTH;
245             // Look for the longer string first then try the shorter.
246         } else if (isCaseInsensitiv(length, ByteParser.INFINITY_LENGTH, ByteParser.INFINITY_LOWER,
247                 ByteParser.INFINITY_UPPER)) {
248             number = Double.POSITIVE_INFINITY;
249             offset += ByteParser.INFINITY_LENGTH;
250         } else if (isCaseInsensitiv(length, ByteParser.INFINITY_SHORTCUT_LENGTH, ByteParser.INFINITY_LOWER,
251                 ByteParser.INFINITY_UPPER)) {
252             number = Double.POSITIVE_INFINITY;
253             offset += ByteParser.INFINITY_SHORTCUT_LENGTH;
254         } else {
255             number = getBareInteger(length); // This will update offset
256             length -= numberLength; // Set by getBareInteger
257             if (numberLength > 0) {
258                 error = false;
259             }
260             // Check for fractional values after decimal
261             if (length > 0 && input[offset] == '.') {
262                 offset++;
263                 length--;
264                 double numerator = getBareInteger(length);
265                 if (numerator > 0) {
266                     number += numerator / Math.pow(ByteParser.NUMBER_BASE_DOUBLE, numberLength);
267                 }
268                 length -= numberLength;
269                 if (numberLength > 0) {
270                     error = false;
271                 }
272             }
273 
274             if (error) {
275                 offset = startOffset;
276                 numberLength = 0;
277                 throw new FormatException("Invalid real field");
278             }
279 
280             // Look for an exponent ,Our Fortran heritage means that we allow
281             // 'D' for the exponent
282             // indicator.
283             if (length > 0
284                     && (input[offset] == 'e' || input[offset] == 'E' || input[offset] == 'd' || input[offset] == 'D')) {
285                 offset++;
286                 length--;
287                 if (length > 0) {
288                     int sign = checkSign();
289                     if (foundSign) {
290                         length--;
291                     }
292 
293                     int exponent = (int) getBareInteger(length);
294 
295                     // For very small numbers we try to miminize
296                     // effects of denormalization.
297                     if (exponent * sign > ByteParser.EXPONENT_DENORMALISATION_CORR_LIMIT) {
298                         number *= Math.pow(ByteParser.NUMBER_BASE_DOUBLE, exponent * sign);
299                     } else {
300                         number = ByteParser.EXPONENT_DENORMALISATION_FACTOR
301                                 * (number * Math.pow(ByteParser.NUMBER_BASE_DOUBLE,
302                                         exponent * sign + ByteParser.EXPONENT_DENORMALISATION_CORR_LIMIT * -1));
303                     }
304                 }
305             }
306         }
307         numberLength = offset - startOffset;
308         return mantissaSign * number;
309     }
310 
311     /**
312      * @return                 a floating point value from the buffer. (see getDouble(int())
313      *
314      * @throws FormatException if the float was in an unknown format
315      */
316     public float getFloat() throws FormatException {
317         return (float) getDouble(input.length - offset);
318     }
319 
320     /**
321      * @return                 a floating point value in a region of the buffer
322      *
323      * @param  length          The maximum number of characters used to parse this float.
324      *
325      * @throws FormatException if the float was in an unknown format
326      */
327     public float getFloat(int length) throws FormatException {
328         return (float) getDouble(length);
329     }
330 
331     /**
332      * @return                 an integer at the beginning of the buffer
333      *
334      * @throws FormatException if the integer was in an unknown format
335      */
336     public int getInt() throws FormatException {
337         return getInt(input.length - offset);
338     }
339 
340     /**
341      * @return                 a region of the buffer to an integer
342      *
343      * @param  length          The maximum number of characters used to parse this integer. @throws FormatException if
344      *                             the integer was in an unknown format
345      *
346      * @throws FormatException if the integer was in an unknown format
347      */
348     public int getInt(int length) throws FormatException {
349         int startOffset = offset;
350 
351         length -= skipWhite(length);
352         if (length == 0) {
353             numberLength = offset - startOffset;
354             return 0;
355         }
356 
357         int number = 0;
358         boolean error = true;
359 
360         int sign = checkSign();
361         if (foundSign) {
362             length--;
363         }
364 
365         while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
366             number = number * ByteParser.NUMBER_BASE + input[offset] - '0';
367             offset++;
368             length--;
369             error = false;
370         }
371 
372         if (error) {
373             numberLength = 0;
374             offset = startOffset;
375             throw new FormatException("Invalid Integer");
376         }
377         numberLength = offset - startOffset;
378         return sign * number;
379     }
380 
381     /**
382      * @return                 a long in a specified region of the buffer
383      *
384      * @param  length          The maximum number of characters used to parse this long.
385      *
386      * @throws FormatException if the long was in an unknown format
387      */
388     public long getLong(int length) throws FormatException {
389 
390         int startOffset = offset;
391 
392         // Skip white space.
393         length -= skipWhite(length);
394         if (length == 0) {
395             numberLength = offset - startOffset;
396             return 0;
397         }
398 
399         long number = 0;
400         boolean error = true;
401 
402         long sign = checkSign();
403         if (foundSign) {
404             length--;
405         }
406 
407         while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
408             number = number * ByteParser.NUMBER_BASE + input[offset] - '0';
409             error = false;
410             offset++;
411             length--;
412         }
413 
414         if (error) {
415             numberLength = 0;
416             offset = startOffset;
417             throw new FormatException("Invalid long number");
418         }
419         numberLength = offset - startOffset;
420         return sign * number;
421     }
422 
423     /**
424      * @return the number of characters used to parse the previous number (or the length of the previous String
425      *             returned).
426      */
427     public int getNumberLength() {
428         return numberLength;
429     }
430 
431     /**
432      * Get the current offset.
433      *
434      * @return The current offset within the buffer.
435      */
436     public int getOffset() {
437         return offset;
438     }
439 
440     /**
441      * @return        a string.
442      *
443      * @param  length The length of the string.
444      */
445     public String getString(int length) {
446 
447         String s = AsciiFuncs.asciiString(input, offset, length);
448         offset += length;
449         numberLength = length;
450         return s;
451     }
452 
453     private boolean isCaseInsensitiv(int length, int constantLength, byte[] lowerConstant, byte[] upperConstant) {
454         if (length < constantLength) {
455             return false;
456         }
457         for (int i = 0; i < constantLength; i++) {
458             if (input[offset + i] != lowerConstant[i] && input[offset + i] != upperConstant[i]) {
459                 return false;
460             }
461         }
462         return true;
463     }
464 
465     /**
466      * Set the buffer for the parser.
467      *
468      * @param buf buffer to set
469      */
470     public void setBuffer(byte[] buf) {
471         input = buf;
472         offset = 0;
473     }
474 
475     /**
476      * Set the offset into the array.
477      *
478      * @param offset The desired offset from the beginning of the array.
479      */
480     public void setOffset(int offset) {
481         this.offset = offset;
482     }
483 
484     /**
485      * Skip bytes in the buffer.
486      *
487      * @param nBytes number of bytes to skip
488      */
489     public void skip(int nBytes) {
490         offset += nBytes;
491     }
492 
493     /**
494      * Skip white space. This routine skips with space in the input .
495      *
496      * @return        the number of character skipped. White space is defined as ' ', '\t', '\n' or '\r'
497      *
498      * @param  length The maximum number of characters to skip.
499      */
500     public int skipWhite(int length) {
501         int i;
502         for (i = 0; i < length; i++) {
503             if (input[offset + i] != ' ' && input[offset + i] != '\t' && input[offset + i] != '\n'
504                     && input[offset + i] != '\r') {
505                 break;
506             }
507         }
508         offset += i;
509         return i;
510     }
511 }