1 /*
2 * #%L
3 * nom.tam FITS library
4 * %%
5 * Copyright (C) 2004 - 2024 nom-tam-fits
6 * %%
7 * This is free and unencumbered software released into the public domain.
8 *
9 * Anyone is free to copy, modify, publish, use, compile, sell, or
10 * distribute this software, either in source code form or as a compiled
11 * binary, for any purpose, commercial or non-commercial, and by any
12 * means.
13 *
14 * In jurisdictions that recognize copyright laws, the author or authors
15 * of this software dedicate any and all copyright interest in the
16 * software to the public domain. We make this dedication for the benefit
17 * of the public at large and to the detriment of our heirs and
18 * successors. We intend this dedication to be an overt act of
19 * relinquishment in perpetuity of all present and future rights to this
20 * software under copyright law.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 * OTHER DEALINGS IN THE SOFTWARE.
29 * #L%
30 */
31
32 package nom.tam.fits;
33
34 import java.math.BigDecimal;
35 import java.math.BigInteger;
36 import java.util.Locale;
37 import java.util.StringTokenizer;
38 import java.util.logging.Level;
39 import java.util.logging.Logger;
40 import java.util.regex.Pattern;
41
42 import nom.tam.fits.header.Standard;
43 import nom.tam.util.ComplexValue;
44 import nom.tam.util.FlexFormat;
45
46 import static nom.tam.fits.header.NonStandard.HIERARCH;
47 import static nom.tam.fits.header.Standard.CONTINUE;
48
49 /**
50 * <p>
51 * Converts a single 80-character wide FITS header record into a header card. See {@link HeaderCard#create(String)} for
52 * a description of the rules that guide parsing.
53 * </p>
54 * <p>
55 * When parsing header records that violate FITS standards, the violations can be logged or will throw appropriate
56 * excpetions (depending on the severity of the standard violation and whether
57 * {@link FitsFactory#setAllowHeaderRepairs(boolean)} is enabled or not. The logging of violations is disabled by
58 * default, but may be controlled via {@link Header#setParserWarningsEnabled(boolean)}.
59 * </p>
60 *
61 * @author Attila Kovacs
62 *
63 * @see FitsFactory#setAllowHeaderRepairs(boolean)
64 * @see Header#setParserWarningsEnabled(boolean)
65 */
66 class HeaderCardParser {
67
68 private static final Logger LOG = Logger.getLogger(HeaderCardParser.class.getName());
69
70 static {
71 // Do not log warnings by default.
72 LOG.setLevel(Level.SEVERE);
73 }
74
75 /** regexp for IEEE floats */
76 private static final Pattern DECIMAL_REGEX = Pattern.compile("[+-]?\\d+(\\.\\d*)?([dDeE][+-]?\\d+)?");
77
78 /** regexp for complex numbers */
79 private static final Pattern COMPLEX_REGEX = Pattern
80 .compile("\\(\\s*" + DECIMAL_REGEX + "\\s*,\\s*" + DECIMAL_REGEX + "\\s*\\)");
81
82 /** regexp for decimal integers. */
83 private static final Pattern INT_REGEX = Pattern.compile("[+-]?\\d+");
84
85 /** The header line (usually 80-character width), which to parse. */
86 private String line;
87
88 /**
89 * the value of the card. (trimmed and standardized with . in HIERARCH)
90 */
91 private String key = null;
92
93 /**
94 * the value of the card. (trimmed)
95 */
96 private String value = null;
97
98 /**
99 * the comment specified with the value.
100 */
101 private String comment = null;
102
103 /**
104 * was the value quoted?
105 */
106 private Class<?> type = null;
107
108 /**
109 * The position in the string that right after the last character processed by this parser
110 */
111 private int parsePos = 0;
112
113 /**
114 * Instantiates a new parser for a FITS header line.
115 *
116 * @param line a line in the FITS header, normally exactly 80-characters wide (but need not
117 * be).
118 *
119 * @see #getKey()
120 * @see #getValue()
121 * @see #getComment()
122 * @see #isString()
123 *
124 * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
125 * @throws IllegalArgumentException if the record contained neither a key or a value.
126 *
127 * @see FitsFactory#setAllowHeaderRepairs(boolean)
128 */
129 HeaderCardParser(String line) throws UnclosedQuoteException, IllegalArgumentException {
130 this.line = line;
131 // TODO HeaderCard never calls this with a null argument, so the check below is dead code here...
132 // if (line == null) {
133 // throw new IllegalArgumentException("Cannot parse null string");
134 // }
135 parseKey();
136 parseValue();
137 parseComment();
138 }
139
140 /**
141 * Returns the keyword component of the parsed header line. If the processing of HIERARCH keywords is enabled, it
142 * may be a `HIERARCH` style long key with the components separated by dots (e.g.
143 * `HIERARCH.ORG.SYSTEM.SUBSYS.ELEMENT`). Otherwise, it will be a standard 0--8 character standard uppercase FITS
144 * keyword (including simply `HIERARCH` if {@link FitsFactory#setUseHierarch(boolean)} was set <code>false</code>).
145 *
146 * @return the FITS header keyword for the line.
147 *
148 * @see FitsFactory#setUseHierarch(boolean)
149 */
150 String getKey() {
151 return key;
152 }
153
154 /**
155 * Returns the value component of the parsed header line.
156 *
157 * @return the value part of the line or <code>null</code> if the line contained no value.
158 *
159 * @see FitsFactory#setUseHierarch(boolean)
160 */
161 String getValue() {
162 return value;
163 }
164
165 /**
166 * Returns the comment component of the parsed header line, with all leading and trailing spaces preserved.
167 *
168 * @return the comment part of the line or <code>null</code> if the line contained no comment.
169 *
170 * @see #getTrimmedComment()
171 */
172 String getUntrimmedComment() {
173 return comment;
174 }
175
176 /**
177 * Returns the comment component of the parsed header line, with both leading and trailing spaces removed
178 *
179 * @return the comment part of the line or <code>null</code> if the line contained no comment.
180 *
181 * @see #getUntrimmedComment()
182 */
183 String getTrimmedComment() {
184 return comment == null ? null : comment.trim();
185 }
186
187 /**
188 * Returns whether the line contained a quoted string value. By default, strings with missing end quotes are no
189 * considered string values, but rather as comments. To allow processing lines with missing quotes as string values,
190 * you must set {@link FitsFactory#setAllowHeaderRepairs(boolean)} to <code>true</code> prior to parsing a header
191 * line with the missing end quote.
192 *
193 * @return true if the value was quoted.
194 *
195 * @see FitsFactory#setAllowHeaderRepairs(boolean)
196 */
197 boolean isString() {
198 if (type == null) {
199 return false;
200 }
201 return String.class.isAssignableFrom(type);
202 }
203
204 /**
205 * <p>
206 * Returns the inferred Java class for the value stored in the header record, such as a {@link String} class, a
207 * {@link Boolean} class, an integer type ({@link Integer}, {@link Long}, or {@link BigInteger}) class, a decimal
208 * type ({@link Float}, {@link Double}, or {@link BigDecimal}) class, a {@link ComplexValue} class, or
209 * <code>null</code>. For number types, it returns the 'smallest' type that can be used to represent the string
210 * value.
211 * </p>
212 * <p>
213 * Its an inferred type as the true underlying type that was used to create the value is lost. For example, the
214 * value <code>42</code> may have been written from any integer type, including <code>byte</code> or
215 * <code>short<code>, but this routine will guess it to be an <code>int</code> ({@link Integer} type. As such, it
216 * may not be equal to {@link HeaderCard#valueType()} from which the record was created, and hence should not be
217 * used for round-trip testing of type equality.
218 * </p>
219 *
220 * @return the inferred type of the stored serialized (string) value, or <code>null</code> if the value does not
221 * seem to match any of the supported value types.
222 *
223 * @see HeaderCard#valueType()
224 */
225 Class<?> getInferredType() {
226 return type;
227 }
228
229 /**
230 * Parses a fits keyword from a card and standardizes it (trim, uppercase, and hierarch with dots).
231 */
232 private void parseKey() {
233 /*
234 * AK: The parsing of headers should never be stricter that the writing, such that any header written by this
235 * library can be parsed back without errors. (And, if anything, the parsing should be more permissive to allow
236 * reading FITS produced by other libraries, which may be less stringent in their rules). The original
237 * implementation strongly enforced the ESO HIERARCH convention when reading, but not at all for writing. Here
238 * is a tolerant hierarch parser that will read back any hierarch key that was written by this library. The
239 * input FITS can use any space or even '.' to separate the hierarchies, and the hierarchical elements may
240 * contain any ASCII characters other than those used for separating. It is more in line with what we do with
241 * standard keys too.
242 */
243
244 // Find the '=' in the line, if any...
245 int iEq = line.indexOf('=');
246
247 // The stem is in the first 8 characters or what precedes an '=' character
248 // before that.
249 int endStem = (iEq >= 0 && iEq <= HeaderCard.MAX_KEYWORD_LENGTH) ? iEq : HeaderCard.MAX_KEYWORD_LENGTH;
250 endStem = Math.min(line.length(), endStem);
251
252 String rawStem = line.substring(0, endStem).trim();
253
254 // Check for space at the start of the keyword...
255 if (endStem > 0 && !rawStem.isEmpty()) {
256 if (Character.isSpaceChar(line.charAt(0))) {
257 LOG.warning("[" + sanitize(rawStem) + "] Non-standard starting with a space (trimming).");
258 }
259 }
260
261 String stem = rawStem.toUpperCase();
262
263 if (!stem.equals(rawStem)) {
264 LOG.warning("[" + sanitize(rawStem) + "] Non-standard lower-case letter(s) in base keyword.");
265 }
266
267 key = stem;
268 parsePos = endStem;
269
270 // If not using HIERARCH, then be very resilient, and return whatever key the first 8 chars make...
271
272 // If the line does not have an '=', can only be a simple key
273 // If it's not a HIERARCH keyword, then return the simple key.
274 if (!FitsFactory.getUseHierarch() || (iEq < 0) || !stem.equals(HIERARCH.key())) {
275 return;
276 }
277
278 // Compose the hierarchical key...
279 StringTokenizer tokens = new StringTokenizer(line.substring(stem.length(), iEq), " \t\r\n.");
280 StringBuilder builder = new StringBuilder(stem);
281
282 while (tokens.hasMoreTokens()) {
283 String token = tokens.nextToken();
284
285 parsePos = line.indexOf(token, parsePos) + token.length();
286
287 // Add a . to separate hierarchies
288 builder.append('.');
289 builder.append(token);
290 }
291
292 key = builder.toString();
293
294 if (HIERARCH.key().equals(key)) {
295 // The key is only HIERARCH, without a hierarchical keyword after it...
296 LOG.warning("HIERARCH base keyword without HIERARCH-style long key after it.");
297 return;
298 }
299
300 if (!FitsFactory.getHierarchFormater().isCaseSensitive()) {
301 key = key.toUpperCase(Locale.US);
302 }
303
304 try {
305 HeaderCard.validateKey(key);
306 } catch (IllegalArgumentException e) {
307 LOG.warning(e.getMessage());
308 }
309 }
310
311 /**
312 * Advances the parse position to skip any spaces at the current parse position, and returns whether there is
313 * anything left in the line after the spaces...
314 *
315 * @return <code>true</code> if there is more non-space characters in the string, otherwise <code>false</code>
316 */
317 private boolean skipSpaces() {
318 for (; parsePos < line.length(); parsePos++) {
319 if (!Character.isSpaceChar(line.charAt(parsePos))) {
320 // Line has non-space characters left to parse...
321 return true;
322 }
323 }
324 // nothing left to parse.
325 return false;
326 }
327
328 /**
329 * Parses the comment components starting from the current parse position. After this call the parse position is set
330 * to the end of the string. The leading '/' (if found) is not included in the comment.
331 */
332 private void parseComment() {
333 if (!skipSpaces()) {
334 // nothing left to parse.
335 return;
336 }
337
338 // if no value, then everything is comment from here on...
339 if (value != null) {
340 if (line.charAt(parsePos) == '/') {
341 // Skip the '/' itself, the comment is whatever is after it.
342 parsePos++;
343 } else {
344 // Junk after a string value -- interpret it as the start of the comment...
345 LOG.warning("[" + sanitize(getKey()) + "] Junk after value (included in the comment).");
346 }
347 }
348
349 comment = line.substring(parsePos);
350 parsePos = line.length();
351
352 try {
353 HeaderCard.validateChars(comment);
354 } catch (IllegalArgumentException e) {
355 LOG.warning("[" + sanitize(getKey()) + "]: " + e.getMessage());
356 }
357 }
358
359 /**
360 * Parses the value component from the current parse position. The parse position is advanced to the first character
361 * after the value specification in the line. If the header line does not contain a value component, then the value
362 * field of this object is set to <code>null</code>.
363 *
364 * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
365 *
366 * @see FitsFactory#setAllowHeaderRepairs(boolean)
367 */
368 private void parseValue() throws UnclosedQuoteException {
369 if (key.isEmpty() || key.equals(Standard.COMMENT.key()) || key.equals(Standard.HISTORY.key())) {
370 return;
371 }
372
373 if (!skipSpaces()) {
374 return; // nothing left to parse.
375 }
376
377 if (CONTINUE.key().equals(key)) {
378 parseValueBody();
379 } else if (line.charAt(parsePos) == '=') {
380
381 if (parsePos < HeaderCard.MAX_KEYWORD_LENGTH) {
382 LOG.warning("[" + sanitize(key) + "] assigmment before byte " + (HeaderCard.MAX_KEYWORD_LENGTH + 1)
383 + " for key '" + sanitize(key) + "'.");
384 }
385 if (parsePos + 1 >= line.length()) {
386 LOG.warning("[" + sanitize(key) + "] Record ends with '='.");
387 } else if (line.charAt(parsePos + 1) != ' ') {
388 LOG.warning("[" + sanitize(key) + "] Missing required standard space after '='.");
389 }
390
391 if (parsePos > HeaderCard.MAX_KEYWORD_LENGTH) {
392 // equal sign = after the 9th char -- only supported with hierarch keys...
393 if (!key.startsWith(HIERARCH.key() + ".")) {
394 LOG.warning("[" + sanitize(key) + "] Possibly misplaced '=' (after byte 9).");
395 // It's not a HIERARCH key
396 return;
397 }
398 }
399
400 parsePos++;
401 parseValueBody();
402 }
403
404 try {
405 HeaderCard.validateChars(value);
406 } catch (IllegalArgumentException e) {
407 LOG.warning("[" + sanitize(getKey()) + "] " + e.getMessage());
408 }
409 }
410
411 /**
412 * Parses the value body from the current parse position. The parse position is advanced to the first character
413 * after the value specification in the line. If the header line does not contain a value component, then the value
414 * field of this object is set to <code>null</code>.
415 *
416 * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
417 *
418 * @see FitsFactory#setAllowHeaderRepairs(boolean)
419 */
420 private void parseValueBody() throws UnclosedQuoteException {
421 if (!skipSpaces()) {
422 // nothing left to parse.
423 return;
424 }
425
426 if (isNextQuote()) {
427 // Parse as a string value, or else throw an exception.
428 parseStringValue();
429 } else {
430 int end = line.indexOf('/', parsePos);
431 if (end < 0) {
432 end = line.length();
433 }
434 value = line.substring(parsePos, end).trim();
435 parsePos = end;
436 type = getInferredValueType(key, value);
437 }
438
439 }
440
441 /**
442 * Checks if the next character, at the current parse position, is a single quote.
443 *
444 * @return <code>true</code> if the next character on the line exists and is a single quote, otherwise
445 * <code>false</code>.
446 */
447 private boolean isNextQuote() {
448 if (parsePos >= line.length()) {
449 // nothing left to parse.
450 return false;
451 }
452 return line.charAt(parsePos) == '\'';
453 }
454
455 /**
456 * Returns the string fom a parsed string value component, with trailing spaces removed. It preserves leading
457 * spaces.
458 *
459 * @param buf the parsed string value.
460 *
461 * @return the string value with trailing spaces removed.
462 */
463 private static String getNoTrailingSpaceString(StringBuilder buf) {
464 int to = buf.length();
465
466 // Remove trailing spaces only!
467 while (--to >= 0) {
468 if (!Character.isSpaceChar(buf.charAt(to))) {
469 break;
470 }
471 }
472
473 return to < 0 ? "" : buf.substring(0, to + 1);
474 }
475
476 /**
477 * Parses a quoted string value starting at the current parse position. If successful, the parse position is updated
478 * to after the string. Otherwise, the parse position is advanced only to skip leading spaces starting from the
479 * input position.
480 *
481 * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
482 *
483 * @see FitsFactory#setAllowHeaderRepairs(boolean)
484 */
485 private void parseStringValue() throws UnclosedQuoteException {
486 type = String.class;
487 StringBuilder buf = new StringBuilder(HeaderCard.MAX_VALUE_LENGTH);
488
489 // Build the string value, up to the end quote and paying attention to double
490 // quotes inside the string, which are translated to single quotes within
491 // the string value itself.
492 for (++parsePos; parsePos < line.length(); parsePos++) {
493 if (isNextQuote()) {
494 parsePos++;
495
496 if (!isNextQuote()) {
497 // Closing single quote;
498 value = getNoTrailingSpaceString(buf);
499 return;
500 }
501 }
502 buf.append(line.charAt(parsePos));
503 }
504
505 // String with missing end quote
506 if (!FitsFactory.isAllowHeaderRepairs()) {
507 throw new UnclosedQuoteException(line);
508 }
509 LOG.warning("[" + sanitize(key) + "] Ignored missing end quote (value parsed to end of record).");
510 value = getNoTrailingSpaceString(buf);
511 }
512
513 /**
514 * Returns the inferred Java class for the specified value. See {@link #getInferredType()} for a more detailed
515 * description.
516 *
517 * @param value the serialized (string) representation of a FITS header value.
518 *
519 * @return the inferred type of the specified serialized (string) value, or <code>null</code> if the value
520 * does not seem to match any of the supported value types. <code>null</code> values default to
521 * <code>Boolean.class</code>.
522 */
523 private static Class<?> getInferredValueType(String key, String value) {
524 // TODO We never call this with null locally, so the following check is dead code here...
525 // if (value == null) {
526 // return Boolean.class;
527 // }
528 if (value.isEmpty()) {
529 LOG.warning("[" + sanitize(key) + "] Null non-string value (defaulted to Boolean.class).");
530 return Boolean.class;
531 }
532
533 String trimmedValue = value.trim().toUpperCase();
534
535 if ("T".equals(trimmedValue) || "F".equals(trimmedValue)) {
536 return Boolean.class;
537 }
538 if (INT_REGEX.matcher(trimmedValue).matches()) {
539 return getIntegerType(trimmedValue);
540 }
541 if (DECIMAL_REGEX.matcher(trimmedValue).matches()) {
542 return getDecimalType(trimmedValue);
543 }
544 if (COMPLEX_REGEX.matcher(trimmedValue).matches()) {
545 return ComplexValue.class;
546 }
547
548 LOG.warning("[" + sanitize(key) + "] Unrecognised non-string value type '" + sanitize(trimmedValue) + "'.");
549
550 return null;
551 }
552
553 /**
554 * Returns the guessed decimal type of a string representation of a decimal value.
555 *
556 * @param value the string representation of a decimal value.
557 *
558 * @return the The Java class ({@link Float}, {@link Double}, or {@link BigDecimal}) that can be used to
559 * represent the value with the precision provided.
560 *
561 * @see #getInferredValueType()
562 * @see #getIntegerType(String)
563 */
564 private static Class<? extends Number> getDecimalType(String value) {
565 value = value.toUpperCase(Locale.US);
566 boolean hasD = (value.indexOf('D') >= 0);
567
568 if (hasD) {
569 // Convert the Double Scientific Notation specified by FITS to pure IEEE.
570 value = value.replace('D', 'E');
571 }
572
573 BigDecimal big = new BigDecimal(value);
574
575 // Check for zero, and deal with it separately...
576 if (big.stripTrailingZeros().equals(BigDecimal.ZERO)) {
577 int decimals = big.scale();
578 if (decimals <= FlexFormat.FLOAT_DECIMALS) {
579 return hasD ? Double.class : Float.class;
580 }
581 if (decimals <= FlexFormat.DOUBLE_DECIMALS) {
582 return Double.class;
583 }
584 return BigDecimal.class;
585 }
586
587 // Now non-zero values...
588 int decimals = big.precision() - 1;
589 float f = big.floatValue();
590 if (decimals <= FlexFormat.FLOAT_DECIMALS && (f != 0.0F) && Float.isFinite(f)) {
591 return hasD ? Double.class : Float.class;
592 }
593
594 double d = big.doubleValue();
595 if (decimals <= FlexFormat.DOUBLE_DECIMALS && (d != 0.0) && Double.isFinite(d)) {
596 return Double.class;
597 }
598 return BigDecimal.class;
599 }
600
601 /**
602 * Returns the guessed integer type of a string representation of a integer value.
603 *
604 * @param value the string representation of an integer value.
605 *
606 * @return the The Java class ({@link Integer}, {@link Long}, or {@link BigInteger}) that can be used to
607 * represent the value with the number of digits provided.
608 *
609 * @see #getInferredValueType()
610 * @see #getDecimalType(String)
611 */
612 private static Class<? extends Number> getIntegerType(String value) {
613 int bits = new BigInteger(value).bitLength();
614 if (bits < Integer.SIZE) {
615 return Integer.class;
616 }
617 if (bits < Long.SIZE) {
618 return Long.class;
619 }
620 return BigInteger.class;
621 }
622
623 private static String sanitize(String text) {
624 return HeaderCard.sanitize(text);
625 }
626
627 static Logger getLogger() {
628 return LOG;
629 }
630
631 }