View Javadoc
1   /*
2    * #%L
3    * nom.tam FITS library
4    * %%
5    * Copyright (C) 2004 - 2024 nom-tam-fits
6    * %%
7    * This is free and unencumbered software released into the public domain.
8    *
9    * Anyone is free to copy, modify, publish, use, compile, sell, or
10   * distribute this software, either in source code form or as a compiled
11   * binary, for any purpose, commercial or non-commercial, and by any
12   * means.
13   *
14   * In jurisdictions that recognize copyright laws, the author or authors
15   * of this software dedicate any and all copyright interest in the
16   * software to the public domain. We make this dedication for the benefit
17   * of the public at large and to the detriment of our heirs and
18   * successors. We intend this dedication to be an overt act of
19   * relinquishment in perpetuity of all present and future rights to this
20   * software under copyright law.
21   *
22   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25   * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28   * OTHER DEALINGS IN THE SOFTWARE.
29   * #L%
30   */
31  
32  package nom.tam.fits;
33  
34  import java.math.BigDecimal;
35  import java.math.BigInteger;
36  import java.util.Locale;
37  import java.util.StringTokenizer;
38  import java.util.logging.Level;
39  import java.util.logging.Logger;
40  import java.util.regex.Pattern;
41  
42  import nom.tam.util.ComplexValue;
43  import nom.tam.util.FlexFormat;
44  
45  import static nom.tam.fits.header.NonStandard.HIERARCH;
46  import static nom.tam.fits.header.Standard.CONTINUE;
47  
48  /**
49   * <p>
50   * Converts a single 80-character wide FITS header record into a header card. See {@link HeaderCard#create(String)} for
51   * a description of the rules that guide parsing.
52   * </p>
53   * <p>
54   * When parsing header records that violate FITS standards, the violations can be logged or will throw appropriate
55   * excpetions (depending on the severity of the standard violation and whether
56   * {@link FitsFactory#setAllowHeaderRepairs(boolean)} is enabled or not. The logging of violations is disabled by
57   * default, but may be controlled via {@link Header#setParserWarningsEnabled(boolean)}.
58   * </p>
59   *
60   * @author Attila Kovacs
61   *
62   * @see    FitsFactory#setAllowHeaderRepairs(boolean)
63   * @see    Header#setParserWarningsEnabled(boolean)
64   */
65  class HeaderCardParser {
66  
67      private static final Logger LOG = Logger.getLogger(HeaderCardParser.class.getName());
68  
69      static {
70          // Do not log warnings by default.
71          LOG.setLevel(Level.SEVERE);
72      }
73  
74      /** regexp for IEEE floats */
75      private static final Pattern DECIMAL_REGEX = Pattern.compile("[+-]?\\d+(\\.\\d*)?([dDeE][+-]?\\d+)?");
76  
77      /** regexp for complex numbers */
78      private static final Pattern COMPLEX_REGEX = Pattern
79              .compile("\\(\\s*" + DECIMAL_REGEX + "\\s*,\\s*" + DECIMAL_REGEX + "\\s*\\)");
80  
81      /** regexp for decimal integers. */
82      private static final Pattern INT_REGEX = Pattern.compile("[+-]?\\d+");
83  
84      /** The header line (usually 80-character width), which to parse. */
85      private String line;
86  
87      /**
88       * the value of the card. (trimmed and standardized with . in HIERARCH)
89       */
90      private String key = null;
91  
92      /**
93       * the value of the card. (trimmed)
94       */
95      private String value = null;
96  
97      /**
98       * the comment specified with the value.
99       */
100     private String comment = null;
101 
102     /**
103      * was the value quoted?
104      */
105     private Class<?> type = null;
106 
107     /**
108      * The position in the string that right after the last character processed by this parser
109      */
110     private int parsePos = 0;
111 
112     /**
113      * Instantiates a new parser for a FITS header line.
114      *
115      * @param  line                     a line in the FITS header, normally exactly 80-characters wide (but need not
116      *                                      be).
117      *
118      * @see                             #getKey()
119      * @see                             #getValue()
120      * @see                             #getComment()
121      * @see                             #isString()
122      *
123      * @throws UnclosedQuoteException   if there is a missing end-quote and header repairs aren't allowed.
124      * @throws IllegalArgumentException if the record contained neither a key or a value.
125      *
126      * @see                             FitsFactory#setAllowHeaderRepairs(boolean)
127      */
128     HeaderCardParser(String line) throws UnclosedQuoteException, IllegalArgumentException {
129         this.line = line;
130         // TODO HeaderCard never calls this with a null argument, so the check below is dead code here...
131         // if (line == null) {
132         // throw new IllegalArgumentException("Cannot parse null string");
133         // }
134         parseKey();
135         parseValue();
136         parseComment();
137     }
138 
139     /**
140      * Returns the keyword component of the parsed header line. If the processing of HIERARCH keywords is enabled, it
141      * may be a `HIERARCH` style long key with the components separated by dots (e.g.
142      * `HIERARCH.ORG.SYSTEM.SUBSYS.ELEMENT`). Otherwise, it will be a standard 0--8 character standard uppercase FITS
143      * keyword (including simply `HIERARCH` if {@link FitsFactory#setUseHierarch(boolean)} was set <code>false</code>).
144      *
145      * @return the FITS header keyword for the line.
146      *
147      * @see    FitsFactory#setUseHierarch(boolean)
148      */
149     String getKey() {
150         return key;
151     }
152 
153     /**
154      * Returns the value component of the parsed header line.
155      *
156      * @return the value part of the line or <code>null</code> if the line contained no value.
157      *
158      * @see    FitsFactory#setUseHierarch(boolean)
159      */
160     String getValue() {
161         return value;
162     }
163 
164     /**
165      * Returns the comment component of the parsed header line, with all leading and trailing spaces preserved.
166      *
167      * @return the comment part of the line or <code>null</code> if the line contained no comment.
168      *
169      * @see    #getTrimmedComment()
170      */
171     String getUntrimmedComment() {
172         return comment;
173     }
174 
175     /**
176      * Returns the comment component of the parsed header line, with both leading and trailing spaces removed
177      *
178      * @return the comment part of the line or <code>null</code> if the line contained no comment.
179      *
180      * @see    #getUntrimmedComment()
181      */
182     String getTrimmedComment() {
183         return comment == null ? null : comment.trim();
184     }
185 
186     /**
187      * Returns whether the line contained a quoted string value. By default, strings with missing end quotes are no
188      * considered string values, but rather as comments. To allow processing lines with missing quotes as string values,
189      * you must set {@link FitsFactory#setAllowHeaderRepairs(boolean)} to <code>true</code> prior to parsing a header
190      * line with the missing end quote.
191      *
192      * @return true if the value was quoted.
193      *
194      * @see    FitsFactory#setAllowHeaderRepairs(boolean)
195      */
196     boolean isString() {
197         if (type == null) {
198             return false;
199         }
200         return String.class.isAssignableFrom(type);
201     }
202 
203     /**
204      * <p>
205      * Returns the inferred Java class for the value stored in the header record, such as a {@link String} class, a
206      * {@link Boolean} class, an integer type ({@link Integer}, {@link Long}, or {@link BigInteger}) class, a decimal
207      * type ({@link Float}, {@link Double}, or {@link BigDecimal}) class, a {@link ComplexValue} class, or
208      * <code>null</code>. For number types, it returns the 'smallest' type that can be used to represent the string
209      * value.
210      * </p>
211      * <p>
212      * Its an inferred type as the true underlying type that was used to create the value is lost. For example, the
213      * value <code>42</code> may have been written from any integer type, including <code>byte</code> or
214      * <code>short<code>, but this routine will guess it to be an <code>int</code> ({@link Integer} type. As such, it
215      * may not be equal to {@link HeaderCard#valueType()} from which the record was created, and hence should not be
216      * used for round-trip testing of type equality.
217      * </p>
218      *
219      * @return the inferred type of the stored serialized (string) value, or <code>null</code> if the value does not
220      *             seem to match any of the supported value types.
221      *
222      * @see    HeaderCard#valueType()
223      */
224     Class<?> getInferredType() {
225         return type;
226     }
227 
228     /**
229      * Parses a fits keyword from a card and standardizes it (trim, uppercase, and hierarch with dots).
230      */
231     private void parseKey() {
232         /*
233          * AK: The parsing of headers should never be stricter that the writing, such that any header written by this
234          * library can be parsed back without errors. (And, if anything, the parsing should be more permissive to allow
235          * reading FITS produced by other libraries, which may be less stringent in their rules). The original
236          * implementation strongly enforced the ESO HIERARCH convention when reading, but not at all for writing. Here
237          * is a tolerant hierarch parser that will read back any hierarch key that was written by this library. The
238          * input FITS can use any space or even '.' to separate the hierarchies, and the hierarchical elements may
239          * contain any ASCII characters other than those used for separating. It is more in line with what we do with
240          * standard keys too.
241          */
242 
243         // Find the '=' in the line, if any...
244         int iEq = line.indexOf('=');
245 
246         // The stem is in the first 8 characters or what precedes an '=' character
247         // before that.
248         int endStem = (iEq >= 0 && iEq <= HeaderCard.MAX_KEYWORD_LENGTH) ? iEq : HeaderCard.MAX_KEYWORD_LENGTH;
249         endStem = Math.min(line.length(), endStem);
250 
251         String rawStem = line.substring(0, endStem).trim();
252 
253         // Check for space at the start of the keyword...
254         if (endStem > 0 && !rawStem.isEmpty()) {
255             if (Character.isSpaceChar(line.charAt(0))) {
256                 LOG.warning("[" + sanitize(rawStem) + "] Non-standard starting with a space (trimming).");
257             }
258         }
259 
260         String stem = rawStem.toUpperCase();
261 
262         if (!stem.equals(rawStem)) {
263             LOG.warning("[" + sanitize(rawStem) + "] Non-standard lower-case letter(s) in base keyword.");
264         }
265 
266         key = stem;
267         parsePos = endStem;
268 
269         // If not using HIERARCH, then be very resilient, and return whatever key the first 8 chars make...
270 
271         // If the line does not have an '=', can only be a simple key
272         // If it's not a HIERARCH keyword, then return the simple key.
273         if (!FitsFactory.getUseHierarch() || (iEq < 0) || !stem.equals(HIERARCH.key())) {
274             return;
275         }
276 
277         // Compose the hierarchical key...
278         StringTokenizer tokens = new StringTokenizer(line.substring(stem.length(), iEq), " \t\r\n.");
279         StringBuilder builder = new StringBuilder(stem);
280 
281         while (tokens.hasMoreTokens()) {
282             String token = tokens.nextToken();
283 
284             parsePos = line.indexOf(token, parsePos) + token.length();
285 
286             // Add a . to separate hierarchies
287             builder.append('.');
288             builder.append(token);
289         }
290 
291         key = builder.toString();
292 
293         if (HIERARCH.key().equals(key)) {
294             // The key is only HIERARCH, without a hierarchical keyword after it...
295             LOG.warning("HIERARCH base keyword without HIERARCH-style long key after it.");
296             return;
297         }
298 
299         if (!FitsFactory.getHierarchFormater().isCaseSensitive()) {
300             key = key.toUpperCase(Locale.US);
301         }
302 
303         try {
304             HeaderCard.validateKey(key);
305         } catch (IllegalArgumentException e) {
306             LOG.warning(e.getMessage());
307         }
308     }
309 
310     /**
311      * Advances the parse position to skip any spaces at the current parse position, and returns whether there is
312      * anything left in the line after the spaces...
313      *
314      * @return <code>true</code> if there is more non-space characters in the string, otherwise <code>false</code>
315      */
316     private boolean skipSpaces() {
317         for (; parsePos < line.length(); parsePos++) {
318             if (!Character.isSpaceChar(line.charAt(parsePos))) {
319                 // Line has non-space characters left to parse...
320                 return true;
321             }
322         }
323         // nothing left to parse.
324         return false;
325     }
326 
327     /**
328      * Parses the comment components starting from the current parse position. After this call the parse position is set
329      * to the end of the string. The leading '/' (if found) is not included in the comment.
330      */
331     private void parseComment() {
332         if (!skipSpaces()) {
333             // nothing left to parse.
334             return;
335         }
336 
337         // if no value, then everything is comment from here on...
338         if (value != null) {
339             if (line.charAt(parsePos) == '/') {
340                 // Skip the '/' itself, the comment is whatever is after it.
341                 parsePos++;
342             } else {
343                 // Junk after a string value -- interpret it as the start of the comment...
344                 LOG.warning("[" + sanitize(getKey()) + "] Junk after value (included in the comment).");
345             }
346         }
347 
348         comment = line.substring(parsePos);
349         parsePos = line.length();
350 
351         try {
352             HeaderCard.validateChars(comment);
353         } catch (IllegalArgumentException e) {
354             LOG.warning("[" + sanitize(getKey()) + "]: " + e.getMessage());
355         }
356     }
357 
358     /**
359      * Parses the value component from the current parse position. The parse position is advanced to the first character
360      * after the value specification in the line. If the header line does not contain a value component, then the value
361      * field of this object is set to <code>null</code>.
362      *
363      * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
364      *
365      * @see                           FitsFactory#setAllowHeaderRepairs(boolean)
366      */
367     private void parseValue() throws UnclosedQuoteException {
368         if (key.isEmpty() || !skipSpaces()) {
369             // nothing left to parse.
370             return;
371         }
372 
373         if (CONTINUE.key().equals(key)) {
374             parseValueBody();
375         } else if (line.charAt(parsePos) == '=') {
376 
377             if (parsePos < HeaderCard.MAX_KEYWORD_LENGTH) {
378                 LOG.warning("[" + sanitize(key) + "] assigmment before byte " + (HeaderCard.MAX_KEYWORD_LENGTH + 1)
379                         + " for key '" + sanitize(key) + "'.");
380             }
381             if (parsePos + 1 >= line.length()) {
382                 LOG.warning("[" + sanitize(key) + "] Record ends with '='.");
383             } else if (line.charAt(parsePos + 1) != ' ') {
384                 LOG.warning("[" + sanitize(key) + "] Missing required standard space after '='.");
385             }
386 
387             if (parsePos > HeaderCard.MAX_KEYWORD_LENGTH) {
388                 // equal sign = after the 9th char -- only supported with hierarch keys...
389                 if (!key.startsWith(HIERARCH.key() + ".")) {
390                     LOG.warning("[" + sanitize(key) + "] Possibly misplaced '=' (after byte 9).");
391                     // It's not a HIERARCH key
392                     return;
393                 }
394             }
395 
396             parsePos++;
397             parseValueBody();
398         }
399 
400         try {
401             HeaderCard.validateChars(value);
402         } catch (IllegalArgumentException e) {
403             LOG.warning("[" + sanitize(getKey()) + "] " + e.getMessage());
404         }
405     }
406 
407     /**
408      * Parses the value body from the current parse position. The parse position is advanced to the first character
409      * after the value specification in the line. If the header line does not contain a value component, then the value
410      * field of this object is set to <code>null</code>.
411      *
412      * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
413      *
414      * @see                           FitsFactory#setAllowHeaderRepairs(boolean)
415      */
416     private void parseValueBody() throws UnclosedQuoteException {
417         if (!skipSpaces()) {
418             // nothing left to parse.
419             return;
420         }
421 
422         if (isNextQuote()) {
423             // Parse as a string value, or else throw an exception.
424             parseStringValue();
425         } else {
426             int end = line.indexOf('/', parsePos);
427             if (end < 0) {
428                 end = line.length();
429             }
430             value = line.substring(parsePos, end).trim();
431             parsePos = end;
432             type = getInferredValueType(key, value);
433         }
434 
435     }
436 
437     /**
438      * Checks if the next character, at the current parse position, is a single quote.
439      *
440      * @return <code>true</code> if the next character on the line exists and is a single quote, otherwise
441      *             <code>false</code>.
442      */
443     private boolean isNextQuote() {
444         if (parsePos >= line.length()) {
445             // nothing left to parse.
446             return false;
447         }
448         return line.charAt(parsePos) == '\'';
449     }
450 
451     /**
452      * Returns the string fom a parsed string value component, with trailing spaces removed. It preserves leading
453      * spaces.
454      *
455      * @param  buf the parsed string value.
456      *
457      * @return     the string value with trailing spaces removed.
458      */
459     private static String getNoTrailingSpaceString(StringBuilder buf) {
460         int to = buf.length();
461 
462         // Remove trailing spaces only!
463         while (--to >= 0) {
464             if (!Character.isSpaceChar(buf.charAt(to))) {
465                 break;
466             }
467         }
468 
469         return to < 0 ? "" : buf.substring(0, to + 1);
470     }
471 
472     /**
473      * Parses a quoted string value starting at the current parse position. If successful, the parse position is updated
474      * to after the string. Otherwise, the parse position is advanced only to skip leading spaces starting from the
475      * input position.
476      *
477      * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
478      *
479      * @see                           FitsFactory#setAllowHeaderRepairs(boolean)
480      */
481     private void parseStringValue() throws UnclosedQuoteException {
482         type = String.class;
483         StringBuilder buf = new StringBuilder(HeaderCard.MAX_VALUE_LENGTH);
484 
485         // Build the string value, up to the end quote and paying attention to double
486         // quotes inside the string, which are translated to single quotes within
487         // the string value itself.
488         for (++parsePos; parsePos < line.length(); parsePos++) {
489             if (isNextQuote()) {
490                 parsePos++;
491 
492                 if (!isNextQuote()) {
493                     // Closing single quote;
494                     value = getNoTrailingSpaceString(buf);
495                     return;
496                 }
497             }
498             buf.append(line.charAt(parsePos));
499         }
500 
501         // String with missing end quote
502         if (!FitsFactory.isAllowHeaderRepairs()) {
503             throw new UnclosedQuoteException(line);
504         }
505         LOG.warning("[" + sanitize(key) + "] Ignored missing end quote (value parsed to end of record).");
506         value = getNoTrailingSpaceString(buf);
507     }
508 
509     /**
510      * Returns the inferred Java class for the specified value. See {@link #getInferredType()} for a more detailed
511      * description.
512      *
513      * @param  value the serialized (string) representation of a FITS header value.
514      *
515      * @return       the inferred type of the specified serialized (string) value, or <code>null</code> if the value
516      *                   does not seem to match any of the supported value types. <code>null</code> values default to
517      *                   <code>Boolean.class</code>.
518      */
519     private static Class<?> getInferredValueType(String key, String value) {
520         // TODO We never call this with null locally, so the following check is dead code here...
521         // if (value == null) {
522         // return Boolean.class;
523         // }
524         if (value.isEmpty()) {
525             LOG.warning("[" + sanitize(key) + "] Null non-string value (defaulted to Boolean.class).");
526             return Boolean.class;
527         }
528 
529         String trimmedValue = value.trim().toUpperCase();
530 
531         if ("T".equals(trimmedValue) || "F".equals(trimmedValue)) {
532             return Boolean.class;
533         }
534         if (INT_REGEX.matcher(trimmedValue).matches()) {
535             return getIntegerType(trimmedValue);
536         }
537         if (DECIMAL_REGEX.matcher(trimmedValue).matches()) {
538             return getDecimalType(trimmedValue);
539         }
540         if (COMPLEX_REGEX.matcher(trimmedValue).matches()) {
541             return ComplexValue.class;
542         }
543 
544         LOG.warning("[" + sanitize(key) + "] Unrecognised non-string value type '" + sanitize(trimmedValue) + "'.");
545 
546         return null;
547     }
548 
549     /**
550      * Returns the guessed decimal type of a string representation of a decimal value.
551      *
552      * @param  value the string representation of a decimal value.
553      *
554      * @return       the The Java class ({@link Float}, {@link Double}, or {@link BigDecimal}) that can be used to
555      *                   represent the value with the precision provided.
556      *
557      * @see          #getInferredValueType()
558      * @see          #getIntegerType(String)
559      */
560     private static Class<? extends Number> getDecimalType(String value) {
561         value = value.toUpperCase(Locale.US);
562         boolean hasD = (value.indexOf('D') >= 0);
563 
564         if (hasD) {
565             // Convert the Double Scientific Notation specified by FITS to pure IEEE.
566             value = value.replace('D', 'E');
567         }
568 
569         BigDecimal big = new BigDecimal(value);
570 
571         // Check for zero, and deal with it separately...
572         if (big.stripTrailingZeros().equals(BigDecimal.ZERO)) {
573             int decimals = big.scale();
574             if (decimals <= FlexFormat.FLOAT_DECIMALS) {
575                 return hasD ? Double.class : Float.class;
576             }
577             if (decimals <= FlexFormat.DOUBLE_DECIMALS) {
578                 return Double.class;
579             }
580             return BigDecimal.class;
581         }
582 
583         // Now non-zero values...
584         int decimals = big.precision() - 1;
585         float f = big.floatValue();
586         if (decimals <= FlexFormat.FLOAT_DECIMALS && (f != 0.0F) && Float.isFinite(f)) {
587             return hasD ? Double.class : Float.class;
588         }
589 
590         double d = big.doubleValue();
591         if (decimals <= FlexFormat.DOUBLE_DECIMALS && (d != 0.0) && Double.isFinite(d)) {
592             return Double.class;
593         }
594         return BigDecimal.class;
595     }
596 
597     /**
598      * Returns the guessed integer type of a string representation of a integer value.
599      *
600      * @param  value the string representation of an integer value.
601      *
602      * @return       the The Java class ({@link Integer}, {@link Long}, or {@link BigInteger}) that can be used to
603      *                   represent the value with the number of digits provided.
604      *
605      * @see          #getInferredValueType()
606      * @see          #getDecimalType(String)
607      */
608     private static Class<? extends Number> getIntegerType(String value) {
609         int bits = new BigInteger(value).bitLength();
610         if (bits < Integer.SIZE) {
611             return Integer.class;
612         }
613         if (bits < Long.SIZE) {
614             return Long.class;
615         }
616         return BigInteger.class;
617     }
618 
619     private static String sanitize(String text) {
620         return HeaderCard.sanitize(text);
621     }
622 
623     static Logger getLogger() {
624         return LOG;
625     }
626 
627 }