1 package nom.tam.util;
2
3 /*
4 * #%L
5 * nom.tam FITS library
6 * %%
7 * Copyright (C) 2004 - 2024 nom-tam-fits
8 * %%
9 * This is free and unencumbered software released into the public domain.
10 *
11 * Anyone is free to copy, modify, publish, use, compile, sell, or
12 * distribute this software, either in source code form or as a compiled
13 * binary, for any purpose, commercial or non-commercial, and by any
14 * means.
15 *
16 * In jurisdictions that recognize copyright laws, the author or authors
17 * of this software dedicate any and all copyright interest in the
18 * software to the public domain. We make this dedication for the benefit
19 * of the public at large and to the detriment of our heirs and
20 * successors. We intend this dedication to be an overt act of
21 * relinquishment in perpetuity of all present and future rights to this
22 * software under copyright law.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
28 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
29 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
30 * OTHER DEALINGS IN THE SOFTWARE.
31 * #L%
32 */
33
34 /**
35 * This class provides routines for efficient parsing of data stored in a byte array. This routine is optimized (in
36 * theory at least!) for efficiency rather than accuracy. The values read in for doubles or floats may differ in the
37 * last bit or so from the standard input utilities, especially in the case where a float is specified as a very long
38 * string of digits (substantially longer than the precision of the type).
39 * <p>
40 * The get methods generally are available with or without a length parameter specified. When a length parameter is
41 * specified only the bytes with the specified range from the current offset will be search for the number. If no length
42 * is specified, the entire buffer from the current offset will be searched.
43 * <p>
44 * The getString method returns a string with leading and trailing white space left intact. For all other get calls,
45 * leading white space is ignored. If fillFields is set, then the get methods check that only white space follows valid
46 * data and a FormatException is thrown if that is not the case. If fillFields is not set and valid data is found, then
47 * the methods return having read as much as possible. E.g., for the sequence "T123.258E13", a getBoolean, getInteger
48 * and getFloat call would return true, 123, and 2.58e12 when called in succession.
49 *
50 * @deprecated This class should not be exposed in the public API and is intended for internal use only in ASCII tables.
51 * Also, it may have overlapping functionality with other classes, which should probably be eliminated
52 * for simplicity's sake (and thus less chance of nasty bugs).
53 *
54 * @see ByteFormatter
55 */
56 @Deprecated
57 public class ByteParser {
58
59 private static final int EXPONENT_DENORMALISATION_CORR_LIMIT = -300;
60
61 private static final double EXPONENT_DENORMALISATION_FACTOR = 1.e-300;
62
63 private static final byte[] INFINITY_LOWER = AsciiFuncs.getBytes(ByteFormatter.INFINITY.toLowerCase());
64
65 private static final byte[] INFINITY_UPPER = AsciiFuncs.getBytes(ByteFormatter.INFINITY.toUpperCase());
66
67 private static final int INFINITY_LENGTH = ByteParser.INFINITY_UPPER.length;
68
69 private static final int INFINITY_SHORTCUT_LENGTH = 3;
70
71 private static final byte[] NOT_A_NUMBER_LOWER = AsciiFuncs.getBytes(ByteFormatter.NOT_A_NUMBER.toLowerCase());
72
73 private static final byte[] NOT_A_NUMBER_UPPER = AsciiFuncs.getBytes(ByteFormatter.NOT_A_NUMBER.toUpperCase());
74
75 private static final int NOT_A_NUMBER_LENGTH = ByteParser.NOT_A_NUMBER_UPPER.length;
76
77 /**
78 * The underlying number base used in this class.
79 */
80 private static final int NUMBER_BASE = 10;
81
82 /**
83 * The underlying number base used in this class as a double value.
84 */
85 private static final double NUMBER_BASE_DOUBLE = 10.;
86
87 /**
88 * Did we find a sign last time we checked?
89 */
90 private boolean foundSign;
91
92 /**
93 * Array being parsed
94 */
95 private byte[] input;
96
97 /**
98 * Length of last parsed value
99 */
100 private int numberLength;
101
102 /**
103 * Current offset into input.
104 */
105 private int offset;
106
107 /**
108 * Construct a parser.
109 *
110 * @param input The byte array to be parsed. Note that the array can be re-used by refilling its contents and
111 * resetting the offset.
112 */
113 public ByteParser(byte[] input) {
114 this.input = input;
115 offset = 0;
116 }
117
118 /**
119 * Find the sign for a number . This routine looks for a sign (+/-) at the current location and return +1/-1 if one
120 * is found, or +1 if not. The foundSign boolean is set if a sign is found and offset is incremented.
121 */
122 private int checkSign() {
123
124 foundSign = false;
125
126 if (input[offset] == '+') {
127 foundSign = true;
128 offset++;
129 return 1;
130 }
131 if (input[offset] == '-') {
132 foundSign = true;
133 offset++;
134 return -1;
135 }
136
137 return 1;
138 }
139
140 /**
141 * Get the integer value starting at the current position. This routine returns a double rather than an int/long to
142 * enable it to read very long integers (with reduced precision) such as 111111111111111111111111111111111111111111.
143 * Note that this routine does set numberLength.
144 *
145 * @param length The maximum number of characters to use.
146 */
147 private double getBareInteger(int length) {
148
149 int startOffset = offset;
150 double number = 0;
151
152 while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
153
154 number *= ByteParser.NUMBER_BASE;
155 number += input[offset] - '0';
156 offset++;
157 length--;
158 }
159 numberLength = offset - startOffset;
160 return number;
161 }
162
163 /**
164 * @return a boolean value from the beginning of the buffer.
165 *
166 * @throws FormatException if the double was in an unknown format
167 */
168 public boolean getBoolean() throws FormatException {
169 return getBoolean(input.length - offset);
170 }
171
172 /**
173 * @return a boolean value from a specified region of the buffer
174 *
175 * @param length The maximum number of characters used to parse this boolean.
176 *
177 * @throws FormatException if the double was in an unknown format
178 */
179 public boolean getBoolean(int length) throws FormatException {
180
181 int startOffset = offset;
182 length -= skipWhite(length);
183 if (length == 0) {
184 throw new FormatException("Blank boolean field");
185 }
186
187 boolean value = false;
188 if (input[offset] == 'T' || input[offset] == 't') {
189 value = true;
190 } else if (input[offset] != 'F' && input[offset] != 'f') {
191 numberLength = 0;
192 offset = startOffset;
193 throw new FormatException("Invalid boolean value");
194 }
195 offset++;
196 numberLength = offset - startOffset;
197 return value;
198 }
199
200 /**
201 * @return the buffer being used by the parser
202 */
203 public byte[] getBuffer() {
204 return input;
205 }
206
207 /**
208 * Read in the buffer until a double is read. This will read the entire buffer if fillFields is set.
209 *
210 * @return The value found.
211 *
212 * @throws FormatException if the double was in an unknown format
213 */
214 public double getDouble() throws FormatException {
215 return getDouble(input.length - offset);
216 }
217
218 /**
219 * @return a parsed double from the buffer. Leading spaces are ignored.
220 *
221 * @param length The maximum number of characters used to parse this number. If fillFields is specified
222 * then exactly only whitespace may follow a valid double value.
223 *
224 * @throws FormatException if the double was in an unknown format
225 */
226 public double getDouble(int length) throws FormatException {
227 int startOffset = offset;
228 boolean error = true;
229 double number;
230 // Skip initial blanks.
231 length -= skipWhite(length);
232 if (length == 0) {
233 numberLength = offset - startOffset;
234 return 0;
235 }
236 double mantissaSign = checkSign();
237 if (foundSign) {
238 length--;
239 }
240 // Look for the special strings NaN, Inf,
241 if (isCaseInsensitiv(length, ByteParser.NOT_A_NUMBER_LENGTH, ByteParser.NOT_A_NUMBER_LOWER,
242 ByteParser.NOT_A_NUMBER_UPPER)) {
243 number = Double.NaN;
244 offset += ByteParser.NOT_A_NUMBER_LENGTH;
245 // Look for the longer string first then try the shorter.
246 } else if (isCaseInsensitiv(length, ByteParser.INFINITY_LENGTH, ByteParser.INFINITY_LOWER,
247 ByteParser.INFINITY_UPPER)) {
248 number = Double.POSITIVE_INFINITY;
249 offset += ByteParser.INFINITY_LENGTH;
250 } else if (isCaseInsensitiv(length, ByteParser.INFINITY_SHORTCUT_LENGTH, ByteParser.INFINITY_LOWER,
251 ByteParser.INFINITY_UPPER)) {
252 number = Double.POSITIVE_INFINITY;
253 offset += ByteParser.INFINITY_SHORTCUT_LENGTH;
254 } else {
255 number = getBareInteger(length); // This will update offset
256 length -= numberLength; // Set by getBareInteger
257 if (numberLength > 0) {
258 error = false;
259 }
260 // Check for fractional values after decimal
261 if (length > 0 && input[offset] == '.') {
262 offset++;
263 length--;
264 double numerator = getBareInteger(length);
265 if (numerator > 0) {
266 number += numerator / Math.pow(ByteParser.NUMBER_BASE_DOUBLE, numberLength);
267 }
268 length -= numberLength;
269 if (numberLength > 0) {
270 error = false;
271 }
272 }
273
274 if (error) {
275 offset = startOffset;
276 numberLength = 0;
277 throw new FormatException("Invalid real field");
278 }
279
280 // Look for an exponent ,Our Fortran heritage means that we allow
281 // 'D' for the exponent
282 // indicator.
283 if (length > 0
284 && (input[offset] == 'e' || input[offset] == 'E' || input[offset] == 'd' || input[offset] == 'D')) {
285 offset++;
286 length--;
287 if (length > 0) {
288 int sign = checkSign();
289 if (foundSign) {
290 length--;
291 }
292
293 int exponent = (int) getBareInteger(length);
294
295 // For very small numbers we try to miminize
296 // effects of denormalization.
297 if (exponent * sign > ByteParser.EXPONENT_DENORMALISATION_CORR_LIMIT) {
298 number *= Math.pow(ByteParser.NUMBER_BASE_DOUBLE, exponent * sign);
299 } else {
300 number = ByteParser.EXPONENT_DENORMALISATION_FACTOR
301 * (number * Math.pow(ByteParser.NUMBER_BASE_DOUBLE,
302 exponent * sign + ByteParser.EXPONENT_DENORMALISATION_CORR_LIMIT * -1));
303 }
304 }
305 }
306 }
307 numberLength = offset - startOffset;
308 return mantissaSign * number;
309 }
310
311 /**
312 * @return a floating point value from the buffer. (see getDouble(int())
313 *
314 * @throws FormatException if the float was in an unknown format
315 */
316 public float getFloat() throws FormatException {
317 return (float) getDouble(input.length - offset);
318 }
319
320 /**
321 * @return a floating point value in a region of the buffer
322 *
323 * @param length The maximum number of characters used to parse this float.
324 *
325 * @throws FormatException if the float was in an unknown format
326 */
327 public float getFloat(int length) throws FormatException {
328 return (float) getDouble(length);
329 }
330
331 /**
332 * @return an integer at the beginning of the buffer
333 *
334 * @throws FormatException if the integer was in an unknown format
335 */
336 public int getInt() throws FormatException {
337 return getInt(input.length - offset);
338 }
339
340 /**
341 * @return a region of the buffer to an integer
342 *
343 * @param length The maximum number of characters used to parse this integer. @throws FormatException if
344 * the integer was in an unknown format
345 *
346 * @throws FormatException if the integer was in an unknown format
347 */
348 public int getInt(int length) throws FormatException {
349 int startOffset = offset;
350
351 length -= skipWhite(length);
352 if (length == 0) {
353 numberLength = offset - startOffset;
354 return 0;
355 }
356
357 int number = 0;
358 boolean error = true;
359
360 int sign = checkSign();
361 if (foundSign) {
362 length--;
363 }
364
365 while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
366 number = number * ByteParser.NUMBER_BASE + input[offset] - '0';
367 offset++;
368 length--;
369 error = false;
370 }
371
372 if (error) {
373 numberLength = 0;
374 offset = startOffset;
375 throw new FormatException("Invalid Integer");
376 }
377 numberLength = offset - startOffset;
378 return sign * number;
379 }
380
381 /**
382 * @return a long in a specified region of the buffer
383 *
384 * @param length The maximum number of characters used to parse this long.
385 *
386 * @throws FormatException if the long was in an unknown format
387 */
388 public long getLong(int length) throws FormatException {
389
390 int startOffset = offset;
391
392 // Skip white space.
393 length -= skipWhite(length);
394 if (length == 0) {
395 numberLength = offset - startOffset;
396 return 0;
397 }
398
399 long number = 0;
400 boolean error = true;
401
402 long sign = checkSign();
403 if (foundSign) {
404 length--;
405 }
406
407 while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
408 number = number * ByteParser.NUMBER_BASE + input[offset] - '0';
409 error = false;
410 offset++;
411 length--;
412 }
413
414 if (error) {
415 numberLength = 0;
416 offset = startOffset;
417 throw new FormatException("Invalid long number");
418 }
419 numberLength = offset - startOffset;
420 return sign * number;
421 }
422
423 /**
424 * @return the number of characters used to parse the previous number (or the length of the previous String
425 * returned).
426 */
427 public int getNumberLength() {
428 return numberLength;
429 }
430
431 /**
432 * Get the current offset.
433 *
434 * @return The current offset within the buffer.
435 */
436 public int getOffset() {
437 return offset;
438 }
439
440 /**
441 * @return a string.
442 *
443 * @param length The length of the string.
444 */
445 public String getString(int length) {
446
447 String s = AsciiFuncs.asciiString(input, offset, length);
448 offset += length;
449 numberLength = length;
450 return s;
451 }
452
453 private boolean isCaseInsensitiv(int length, int constantLength, byte[] lowerConstant, byte[] upperConstant) {
454 if (length < constantLength) {
455 return false;
456 }
457 for (int i = 0; i < constantLength; i++) {
458 if (input[offset + i] != lowerConstant[i] && input[offset + i] != upperConstant[i]) {
459 return false;
460 }
461 }
462 return true;
463 }
464
465 /**
466 * Set the buffer for the parser.
467 *
468 * @param buf buffer to set
469 */
470 public void setBuffer(byte[] buf) {
471 input = buf;
472 offset = 0;
473 }
474
475 /**
476 * Set the offset into the array.
477 *
478 * @param offset The desired offset from the beginning of the array.
479 */
480 public void setOffset(int offset) {
481 this.offset = offset;
482 }
483
484 /**
485 * Skip bytes in the buffer.
486 *
487 * @param nBytes number of bytes to skip
488 */
489 public void skip(int nBytes) {
490 offset += nBytes;
491 }
492
493 /**
494 * Skip white space. This routine skips with space in the input .
495 *
496 * @return the number of character skipped. White space is defined as ' ', '\t', '\n' or '\r'
497 *
498 * @param length The maximum number of characters to skip.
499 */
500 public int skipWhite(int length) {
501 int i;
502 for (i = 0; i < length; i++) {
503 if (input[offset + i] != ' ' && input[offset + i] != '\t' && input[offset + i] != '\n'
504 && input[offset + i] != '\r') {
505 break;
506 }
507 }
508 offset += i;
509 return i;
510 }
511 }