1 package nom.tam.util;
2
3 /*
4 * #%L
5 * nom.tam FITS library
6 * %%
7 * Copyright (C) 2004 - 2024 nom-tam-fits
8 * %%
9 * This is free and unencumbered software released into the public domain.
10 *
11 * Anyone is free to copy, modify, publish, use, compile, sell, or
12 * distribute this software, either in source code form or as a compiled
13 * binary, for any purpose, commercial or non-commercial, and by any
14 * means.
15 *
16 * In jurisdictions that recognize copyright laws, the author or authors
17 * of this software dedicate any and all copyright interest in the
18 * software to the public domain. We make this dedication for the benefit
19 * of the public at large and to the detriment of our heirs and
20 * successors. We intend this dedication to be an overt act of
21 * relinquishment in perpetuity of all present and future rights to this
22 * software under copyright law.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
28 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
29 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
30 * OTHER DEALINGS IN THE SOFTWARE.
31 * #L%
32 */
33
34 /**
35 * This class provides routines for efficient parsing of data stored in a byte array. This routine is optimized (in
36 * theory at least!) for efficiency rather than accuracy. The values read in for doubles or floats may differ in the
37 * last bit or so from the standard input utilities, especially in the case where a float is specified as a very long
38 * string of digits (substantially longer than the precision of the type).
39 * <p>
40 * The get methods generally are available with or without a length parameter specified. When a length parameter is
41 * specified only the bytes with the specified range from the current offset will be search for the number. If no length
42 * is specified, the entire buffer from the current offset will be searched.
43 * <p>
44 * The getString method returns a string with leading and trailing white space left intact. For all other get calls,
45 * leading white space is ignored. If fillFields is set, then the get methods check that only white space follows valid
46 * data and a FormatException is thrown if that is not the case. If fillFields is not set and valid data is found, then
47 * the methods return having read as much as possible. E.g., for the sequence "T123.258E13", a getBoolean, getInteger
48 * and getFloat call would return true, 123, and 2.58e12 when called in succession.
49 *
50 * @deprecated This class should not be exposed in the public API and is intended for internal use only in ASCII tables.
51 * Also, it may have overlapping functionality with other classes, which should probably be eliminated
52 * for simplicity's sake (and thus less chance of nasty bugs).
53 *
54 * @see ByteFormatter
55 */
56 @Deprecated
57 public class ByteParser {
58
59 private static final int EXPONENT_DENORMALISATION_CORR_LIMIT = -300;
60
61 private static final double EXPONENT_DENORMALISATION_FACTOR = 1.e-300;
62
63 private static final byte[] INFINITY_LOWER = AsciiFuncs.getBytes(ByteFormatter.INFINITY.toLowerCase());
64
65 private static final byte[] INFINITY_UPPER = AsciiFuncs.getBytes(ByteFormatter.INFINITY.toUpperCase());
66
67 private static final int INFINITY_LENGTH = ByteParser.INFINITY_UPPER.length;
68
69 private static final int INFINITY_SHORTCUT_LENGTH = 3;
70
71 private static final byte[] NOT_A_NUMBER_LOWER = AsciiFuncs.getBytes(ByteFormatter.NOT_A_NUMBER.toLowerCase());
72
73 private static final byte[] NOT_A_NUMBER_UPPER = AsciiFuncs.getBytes(ByteFormatter.NOT_A_NUMBER.toUpperCase());
74
75 private static final int NOT_A_NUMBER_LENGTH = ByteParser.NOT_A_NUMBER_UPPER.length;
76
77 /**
78 * The underlying number base used in this class.
79 */
80 private static final int NUMBER_BASE = 10;
81
82 /**
83 * The underlying number base used in this class as a double value.
84 */
85 private static final double NUMBER_BASE_DOUBLE = 10.;
86
87 /**
88 * Did we find a sign last time we checked?
89 */
90 private boolean foundSign;
91
92 /**
93 * Array being parsed
94 */
95 private byte[] input;
96
97 /**
98 * Length of last parsed value
99 */
100 private int numberLength;
101
102 /**
103 * Current offset into input.
104 */
105 private int offset;
106
107 /**
108 * Construct a parser.
109 *
110 * @param input The byte array to be parsed. Note that the array can be re-used by refilling its contents and
111 * resetting the offset.
112 */
113 @Deprecated
114 public ByteParser(byte[] input) {
115 this.input = input;
116 offset = 0;
117 }
118
119 /**
120 * Find the sign for a number . This routine looks for a sign (+/-) at the current location and return +1/-1 if one
121 * is found, or +1 if not. The foundSign boolean is set if a sign is found and offset is incremented.
122 */
123 private int checkSign() {
124
125 foundSign = false;
126
127 if (input[offset] == '+') {
128 foundSign = true;
129 offset++;
130 return 1;
131 }
132 if (input[offset] == '-') {
133 foundSign = true;
134 offset++;
135 return -1;
136 }
137
138 return 1;
139 }
140
141 /**
142 * Get the integer value starting at the current position. This routine returns a double rather than an int/long to
143 * enable it to read very long integers (with reduced precision) such as 111111111111111111111111111111111111111111.
144 * Note that this routine does set numberLength.
145 *
146 * @param length The maximum number of characters to use.
147 */
148 private double getBareInteger(int length) {
149
150 int startOffset = offset;
151 double number = 0;
152
153 while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
154
155 number *= ByteParser.NUMBER_BASE;
156 number += input[offset] - '0';
157 offset++;
158 length--;
159 }
160 numberLength = offset - startOffset;
161 return number;
162 }
163
164 /**
165 * Returns the next <code>boolean</code> value from the current parse position.
166 *
167 * @return a boolean value from the beginning of the buffer.
168 *
169 * @throws FormatException if the double was in an unknown format
170 */
171 @Deprecated
172 public boolean getBoolean() throws FormatException {
173 return getBoolean(input.length - offset);
174 }
175
176 /**
177 * Returns the next <code>double</code> value from the current parse position, consuming at most the specified
178 * number of bytes from the input.
179 *
180 * @return a boolean value from a specified region of the buffer
181 *
182 * @param length The maximum number of characters used to parse this boolean.
183 *
184 * @throws FormatException if the double was in an unknown format
185 */
186 @Deprecated
187 public boolean getBoolean(int length) throws FormatException {
188
189 int startOffset = offset;
190 length -= skipWhite(length);
191 if (length == 0) {
192 throw new FormatException("Blank boolean field");
193 }
194
195 boolean value = false;
196 if (input[offset] == 'T' || input[offset] == 't') {
197 value = true;
198 } else if (input[offset] != 'F' && input[offset] != 'f') {
199 numberLength = 0;
200 offset = startOffset;
201 throw new FormatException("Invalid boolean value");
202 }
203 offset++;
204 numberLength = offset - startOffset;
205 return value;
206 }
207
208 /**
209 * Returns the underlying buffer to this parser.
210 *
211 * @return the buffer being used by the parser
212 */
213 @Deprecated
214 public byte[] getBuffer() {
215 return input;
216 }
217
218 /**
219 * Returns the next <code>long</code> value from the current parse position, consuming at most the specified number
220 * of bytes from the input. This will read the entire buffer if fillFields is set.
221 *
222 * @return The value found.
223 *
224 * @throws FormatException if the double was in an unknown format
225 */
226 @Deprecated
227 public double getDouble() throws FormatException {
228 return getDouble(input.length - offset);
229 }
230
231 /**
232 * Returns the next <code>double</code> value from the current parse position, consuming at most the specified
233 * number of bytes from the input.
234 *
235 * @return a parsed double from the buffer. Leading spaces are ignored.
236 *
237 * @param length The maximum number of characters used to parse this number. If fillFields is specified
238 * then exactly only whitespace may follow a valid double value.
239 *
240 * @throws FormatException if the double was in an unknown format
241 */
242 @Deprecated
243 public double getDouble(int length) throws FormatException {
244 int startOffset = offset;
245 boolean error = true;
246 double number;
247 // Skip initial blanks.
248 length -= skipWhite(length);
249 if (length == 0) {
250 numberLength = offset - startOffset;
251 return 0;
252 }
253 double mantissaSign = checkSign();
254 if (foundSign) {
255 length--;
256 }
257 // Look for the special strings NaN, Inf,
258 if (isCaseInsensitiv(length, ByteParser.NOT_A_NUMBER_LENGTH, ByteParser.NOT_A_NUMBER_LOWER,
259 ByteParser.NOT_A_NUMBER_UPPER)) {
260 number = Double.NaN;
261 offset += ByteParser.NOT_A_NUMBER_LENGTH;
262 // Look for the longer string first then try the shorter.
263 } else if (isCaseInsensitiv(length, ByteParser.INFINITY_LENGTH, ByteParser.INFINITY_LOWER,
264 ByteParser.INFINITY_UPPER)) {
265 number = Double.POSITIVE_INFINITY;
266 offset += ByteParser.INFINITY_LENGTH;
267 } else if (isCaseInsensitiv(length, ByteParser.INFINITY_SHORTCUT_LENGTH, ByteParser.INFINITY_LOWER,
268 ByteParser.INFINITY_UPPER)) {
269 number = Double.POSITIVE_INFINITY;
270 offset += ByteParser.INFINITY_SHORTCUT_LENGTH;
271 } else {
272 number = getBareInteger(length); // This will update offset
273 length -= numberLength; // Set by getBareInteger
274 if (numberLength > 0) {
275 error = false;
276 }
277 // Check for fractional values after decimal
278 if (length > 0 && input[offset] == '.') {
279 offset++;
280 length--;
281 double numerator = getBareInteger(length);
282 if (numerator > 0) {
283 number += numerator / Math.pow(ByteParser.NUMBER_BASE_DOUBLE, numberLength);
284 }
285 length -= numberLength;
286 if (numberLength > 0) {
287 error = false;
288 }
289 }
290
291 if (error) {
292 offset = startOffset;
293 numberLength = 0;
294 throw new FormatException("Invalid real field");
295 }
296
297 // Look for an exponent ,Our Fortran heritage means that we allow
298 // 'D' for the exponent
299 // indicator.
300 if (length > 0
301 && (input[offset] == 'e' || input[offset] == 'E' || input[offset] == 'd' || input[offset] == 'D')) {
302 offset++;
303 length--;
304 if (length > 0) {
305 int sign = checkSign();
306 if (foundSign) {
307 length--;
308 }
309
310 int exponent = (int) getBareInteger(length);
311
312 // For very small numbers we try to miminize
313 // effects of denormalization.
314 if (exponent * sign > ByteParser.EXPONENT_DENORMALISATION_CORR_LIMIT) {
315 number *= Math.pow(ByteParser.NUMBER_BASE_DOUBLE, exponent * sign);
316 } else {
317 number = ByteParser.EXPONENT_DENORMALISATION_FACTOR
318 * (number * Math.pow(ByteParser.NUMBER_BASE_DOUBLE,
319 exponent * sign + ByteParser.EXPONENT_DENORMALISATION_CORR_LIMIT * -1));
320 }
321 }
322 }
323 }
324 numberLength = offset - startOffset;
325 return mantissaSign * number;
326 }
327
328 /**
329 * Returns the next <code>float</code> value from the current parse position.
330 *
331 * @return a floating point value from the buffer. (see getDouble(int())
332 *
333 * @throws FormatException if the float was in an unknown format
334 */
335 @Deprecated
336 public float getFloat() throws FormatException {
337 return (float) getDouble(input.length - offset);
338 }
339
340 /**
341 * Returns the next <code>float</code> value from the current parse position, consuming at most the specified number
342 * of bytes from the input.
343 *
344 * @return a floating point value in a region of the buffer
345 *
346 * @param length The maximum number of characters used to parse this float.
347 *
348 * @throws FormatException if the float was in an unknown format
349 */
350 @Deprecated
351 public float getFloat(int length) throws FormatException {
352 return (float) getDouble(length);
353 }
354
355 /**
356 * Returns the next <code>in</code> value from the current parse position.
357 *
358 * @return an integer at the beginning of the buffer
359 *
360 * @throws FormatException if the integer was in an unknown format
361 */
362 @Deprecated
363 public int getInt() throws FormatException {
364 return getInt(input.length - offset);
365 }
366
367 /**
368 * Returns the next <code>int</code> value from the current parse position, consuming at most the specified number
369 * of bytes from the input.
370 *
371 * @return a region of the buffer to an integer
372 *
373 * @param length The maximum number of characters used to parse this integer. @throws FormatException if
374 * the integer was in an unknown format
375 *
376 * @throws FormatException if the integer was in an unknown format
377 */
378 @Deprecated
379 public int getInt(int length) throws FormatException {
380 int startOffset = offset;
381
382 length -= skipWhite(length);
383 if (length == 0) {
384 numberLength = offset - startOffset;
385 return 0;
386 }
387
388 int number = 0;
389 boolean error = true;
390
391 int sign = checkSign();
392 if (foundSign) {
393 length--;
394 }
395
396 while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
397 number = number * ByteParser.NUMBER_BASE + input[offset] - '0';
398 offset++;
399 length--;
400 error = false;
401 }
402
403 if (error) {
404 numberLength = 0;
405 offset = startOffset;
406 throw new FormatException("Invalid Integer");
407 }
408 numberLength = offset - startOffset;
409 return sign * number;
410 }
411
412 /**
413 * Returns the next <code>long</code> value from the current parse position, consuming at most the specified number
414 * of bytes from the input.
415 *
416 * @return a long in a specified region of the buffer
417 *
418 * @param length The maximum number of characters used to parse this long.
419 *
420 * @throws FormatException if the long was in an unknown format
421 */
422 @Deprecated
423 public long getLong(int length) throws FormatException {
424
425 int startOffset = offset;
426
427 // Skip white space.
428 length -= skipWhite(length);
429 if (length == 0) {
430 numberLength = offset - startOffset;
431 return 0;
432 }
433
434 long number = 0;
435 boolean error = true;
436
437 long sign = checkSign();
438 if (foundSign) {
439 length--;
440 }
441
442 while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
443 number = number * ByteParser.NUMBER_BASE + input[offset] - '0';
444 error = false;
445 offset++;
446 length--;
447 }
448
449 if (error) {
450 numberLength = 0;
451 offset = startOffset;
452 throw new FormatException("Invalid long number");
453 }
454 numberLength = offset - startOffset;
455 return sign * number;
456 }
457
458 /**
459 * Returns the length of the previous string returned (that is the number that was parse last).
460 *
461 * @return the number of characters used to parse the previous number (or the length of the previous String
462 * returned).
463 */
464 @Deprecated
465 public int getNumberLength() {
466 return numberLength;
467 }
468
469 /**
470 * Get the current offset.
471 *
472 * @return The current offset within the buffer.
473 */
474 @Deprecated
475 public int getOffset() {
476 return offset;
477 }
478
479 /**
480 * Returns the specified number of bytes as a string.
481 *
482 * @return a string.
483 *
484 * @param length The length of the string.
485 */
486 @Deprecated
487 public String getString(int length) {
488 String s = AsciiFuncs.asciiString(input, offset, length);
489 offset += length;
490 numberLength = length;
491 return s;
492 }
493
494 private boolean isCaseInsensitiv(int length, int constantLength, byte[] lowerConstant, byte[] upperConstant) {
495 if (length < constantLength) {
496 return false;
497 }
498 for (int i = 0; i < constantLength; i++) {
499 if (input[offset + i] != lowerConstant[i] && input[offset + i] != upperConstant[i]) {
500 return false;
501 }
502 }
503 return true;
504 }
505
506 /**
507 * Set the buffer for the parser.
508 *
509 * @param buf buffer to set
510 */
511 @Deprecated
512 public void setBuffer(byte[] buf) {
513 input = buf;
514 offset = 0;
515 }
516
517 /**
518 * Set the offset into the array.
519 *
520 * @param offset The desired offset from the beginning of the array.
521 */
522 @Deprecated
523 public void setOffset(int offset) {
524 this.offset = offset;
525 }
526
527 /**
528 * Skip bytes in the buffer.
529 *
530 * @param nBytes number of bytes to skip
531 */
532 @Deprecated
533 public void skip(int nBytes) {
534 offset += nBytes;
535 }
536
537 /**
538 * Skip white space. This routine skips with space in the input .
539 *
540 * @return the number of character skipped. White space is defined as ' ', '\t', '\n' or '\r'
541 *
542 * @param length The maximum number of characters to skip.
543 */
544 @Deprecated
545 public int skipWhite(int length) {
546 int i;
547 for (i = 0; i < length; i++) {
548 if (input[offset + i] != ' ' && input[offset + i] != '\t' && input[offset + i] != '\n'
549 && input[offset + i] != '\r') {
550 break;
551 }
552 }
553 offset += i;
554 return i;
555 }
556 }