View Javadoc
1   /*
2    * #%L
3    * nom.tam FITS library
4    * %%
5    * Copyright (C) 2004 - 2024 nom-tam-fits
6    * %%
7    * This is free and unencumbered software released into the public domain.
8    *
9    * Anyone is free to copy, modify, publish, use, compile, sell, or
10   * distribute this software, either in source code form or as a compiled
11   * binary, for any purpose, commercial or non-commercial, and by any
12   * means.
13   *
14   * In jurisdictions that recognize copyright laws, the author or authors
15   * of this software dedicate any and all copyright interest in the
16   * software to the public domain. We make this dedication for the benefit
17   * of the public at large and to the detriment of our heirs and
18   * successors. We intend this dedication to be an overt act of
19   * relinquishment in perpetuity of all present and future rights to this
20   * software under copyright law.
21   *
22   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25   * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28   * OTHER DEALINGS IN THE SOFTWARE.
29   * #L%
30   */
31  
32  package nom.tam.fits;
33  
34  import java.math.BigDecimal;
35  import java.math.BigInteger;
36  import java.util.Locale;
37  import java.util.StringTokenizer;
38  import java.util.logging.Level;
39  import java.util.logging.Logger;
40  import java.util.regex.Pattern;
41  
42  import nom.tam.fits.header.Standard;
43  import nom.tam.util.ComplexValue;
44  import nom.tam.util.FlexFormat;
45  
46  import static nom.tam.fits.header.NonStandard.HIERARCH;
47  import static nom.tam.fits.header.Standard.CONTINUE;
48  
49  /**
50   * <p>
51   * Converts a single 80-character wide FITS header record into a header card. See {@link HeaderCard#create(String)} for
52   * a description of the rules that guide parsing.
53   * </p>
54   * <p>
55   * When parsing header records that violate FITS standards, the violations can be logged or will throw appropriate
56   * excpetions (depending on the severity of the standard violation and whether
57   * {@link FitsFactory#setAllowHeaderRepairs(boolean)} is enabled or not. The logging of violations is disabled by
58   * default, but may be controlled via {@link Header#setParserWarningsEnabled(boolean)}.
59   * </p>
60   *
61   * @author Attila Kovacs
62   *
63   * @see    FitsFactory#setAllowHeaderRepairs(boolean)
64   * @see    Header#setParserWarningsEnabled(boolean)
65   */
66  class HeaderCardParser {
67  
68      private static final Logger LOG = Logger.getLogger(HeaderCardParser.class.getName());
69  
70      static {
71          // Do not log warnings by default.
72          LOG.setLevel(Level.SEVERE);
73      }
74  
75      /** regexp for IEEE floats */
76      private static final Pattern DECIMAL_REGEX = Pattern.compile("[+-]?\\d+(\\.\\d*)?([dDeE][+-]?\\d+)?");
77  
78      /** regexp for complex numbers */
79      private static final Pattern COMPLEX_REGEX = Pattern
80              .compile("\\(\\s*" + DECIMAL_REGEX + "\\s*,\\s*" + DECIMAL_REGEX + "\\s*\\)");
81  
82      /** regexp for decimal integers. */
83      private static final Pattern INT_REGEX = Pattern.compile("[+-]?\\d+");
84  
85      /** The header line (usually 80-character width), which to parse. */
86      private String line;
87  
88      /**
89       * the value of the card. (trimmed and standardized with . in HIERARCH)
90       */
91      private String key = null;
92  
93      /**
94       * the value of the card. (trimmed)
95       */
96      private String value = null;
97  
98      /**
99       * the comment specified with the value.
100      */
101     private String comment = null;
102 
103     /**
104      * was the value quoted?
105      */
106     private Class<?> type = null;
107 
108     /**
109      * The position in the string that right after the last character processed by this parser
110      */
111     private int parsePos = 0;
112 
113     /**
114      * Instantiates a new parser for a FITS header line.
115      *
116      * @param  line                     a line in the FITS header, normally exactly 80-characters wide (but need not
117      *                                      be).
118      *
119      * @see                             #getKey()
120      * @see                             #getValue()
121      * @see                             #getComment()
122      * @see                             #isString()
123      *
124      * @throws UnclosedQuoteException   if there is a missing end-quote and header repairs aren't allowed.
125      * @throws IllegalArgumentException if the record contained neither a key or a value.
126      *
127      * @see                             FitsFactory#setAllowHeaderRepairs(boolean)
128      */
129     HeaderCardParser(String line) throws UnclosedQuoteException, IllegalArgumentException {
130         this.line = line;
131         // TODO HeaderCard never calls this with a null argument, so the check below is dead code here...
132         // if (line == null) {
133         // throw new IllegalArgumentException("Cannot parse null string");
134         // }
135         parseKey();
136         parseValue();
137         parseComment();
138     }
139 
140     /**
141      * Returns the keyword component of the parsed header line. If the processing of HIERARCH keywords is enabled, it
142      * may be a `HIERARCH` style long key with the components separated by dots (e.g.
143      * `HIERARCH.ORG.SYSTEM.SUBSYS.ELEMENT`). Otherwise, it will be a standard 0--8 character standard uppercase FITS
144      * keyword (including simply `HIERARCH` if {@link FitsFactory#setUseHierarch(boolean)} was set <code>false</code>).
145      *
146      * @return the FITS header keyword for the line.
147      *
148      * @see    FitsFactory#setUseHierarch(boolean)
149      */
150     String getKey() {
151         return key;
152     }
153 
154     /**
155      * Returns the value component of the parsed header line.
156      *
157      * @return the value part of the line or <code>null</code> if the line contained no value.
158      *
159      * @see    FitsFactory#setUseHierarch(boolean)
160      */
161     String getValue() {
162         return value;
163     }
164 
165     /**
166      * Returns the comment component of the parsed header line, with all leading and trailing spaces preserved.
167      *
168      * @return the comment part of the line or <code>null</code> if the line contained no comment.
169      *
170      * @see    #getTrimmedComment()
171      */
172     String getUntrimmedComment() {
173         return comment;
174     }
175 
176     /**
177      * Returns the comment component of the parsed header line, with both leading and trailing spaces removed
178      *
179      * @return the comment part of the line or <code>null</code> if the line contained no comment.
180      *
181      * @see    #getUntrimmedComment()
182      */
183     String getTrimmedComment() {
184         return comment == null ? null : comment.trim();
185     }
186 
187     /**
188      * Returns whether the line contained a quoted string value. By default, strings with missing end quotes are no
189      * considered string values, but rather as comments. To allow processing lines with missing quotes as string values,
190      * you must set {@link FitsFactory#setAllowHeaderRepairs(boolean)} to <code>true</code> prior to parsing a header
191      * line with the missing end quote.
192      *
193      * @return true if the value was quoted.
194      *
195      * @see    FitsFactory#setAllowHeaderRepairs(boolean)
196      */
197     boolean isString() {
198         if (type == null) {
199             return false;
200         }
201         return String.class.isAssignableFrom(type);
202     }
203 
204     /**
205      * <p>
206      * Returns the inferred Java class for the value stored in the header record, such as a {@link String} class, a
207      * {@link Boolean} class, an integer type ({@link Integer}, {@link Long}, or {@link BigInteger}) class, a decimal
208      * type ({@link Float}, {@link Double}, or {@link BigDecimal}) class, a {@link ComplexValue} class, or
209      * <code>null</code>. For number types, it returns the 'smallest' type that can be used to represent the string
210      * value.
211      * </p>
212      * <p>
213      * Its an inferred type as the true underlying type that was used to create the value is lost. For example, the
214      * value <code>42</code> may have been written from any integer type, including <code>byte</code> or
215      * <code>short<code>, but this routine will guess it to be an <code>int</code> ({@link Integer} type. As such, it
216      * may not be equal to {@link HeaderCard#valueType()} from which the record was created, and hence should not be
217      * used for round-trip testing of type equality.
218      * </p>
219      *
220      * @return the inferred type of the stored serialized (string) value, or <code>null</code> if the value does not
221      *             seem to match any of the supported value types.
222      *
223      * @see    HeaderCard#valueType()
224      */
225     Class<?> getInferredType() {
226         return type;
227     }
228 
229     /**
230      * Parses a fits keyword from a card and standardizes it (trim, uppercase, and hierarch with dots).
231      */
232     private void parseKey() {
233         /*
234          * AK: The parsing of headers should never be stricter that the writing, such that any header written by this
235          * library can be parsed back without errors. (And, if anything, the parsing should be more permissive to allow
236          * reading FITS produced by other libraries, which may be less stringent in their rules). The original
237          * implementation strongly enforced the ESO HIERARCH convention when reading, but not at all for writing. Here
238          * is a tolerant hierarch parser that will read back any hierarch key that was written by this library. The
239          * input FITS can use any space or even '.' to separate the hierarchies, and the hierarchical elements may
240          * contain any ASCII characters other than those used for separating. It is more in line with what we do with
241          * standard keys too.
242          */
243 
244         // Find the '=' in the line, if any...
245         int iEq = line.indexOf('=');
246 
247         // The stem is in the first 8 characters or what precedes an '=' character
248         // before that.
249         int endStem = (iEq >= 0 && iEq <= HeaderCard.MAX_KEYWORD_LENGTH) ? iEq : HeaderCard.MAX_KEYWORD_LENGTH;
250         endStem = Math.min(line.length(), endStem);
251 
252         String rawStem = line.substring(0, endStem).trim();
253 
254         // Check for space at the start of the keyword...
255         if (endStem > 0 && !rawStem.isEmpty()) {
256             if (Character.isSpaceChar(line.charAt(0))) {
257                 LOG.warning("[" + sanitize(rawStem) + "] Non-standard starting with a space (trimming).");
258             }
259         }
260 
261         String stem = rawStem.toUpperCase();
262 
263         if (!stem.equals(rawStem)) {
264             LOG.warning("[" + sanitize(rawStem) + "] Non-standard lower-case letter(s) in base keyword.");
265         }
266 
267         key = stem;
268         parsePos = endStem;
269 
270         // If not using HIERARCH, then be very resilient, and return whatever key the first 8 chars make...
271 
272         // If the line does not have an '=', can only be a simple key
273         // If it's not a HIERARCH keyword, then return the simple key.
274         if (!FitsFactory.getUseHierarch() || (iEq < 0) || !stem.equals(HIERARCH.key())) {
275             return;
276         }
277 
278         // Compose the hierarchical key...
279         StringTokenizer tokens = new StringTokenizer(line.substring(stem.length(), iEq), " \t\r\n.");
280         StringBuilder builder = new StringBuilder(stem);
281 
282         while (tokens.hasMoreTokens()) {
283             String token = tokens.nextToken();
284 
285             parsePos = line.indexOf(token, parsePos) + token.length();
286 
287             // Add a . to separate hierarchies
288             builder.append('.');
289             builder.append(token);
290         }
291 
292         key = builder.toString();
293 
294         if (HIERARCH.key().equals(key)) {
295             // The key is only HIERARCH, without a hierarchical keyword after it...
296             LOG.warning("HIERARCH base keyword without HIERARCH-style long key after it.");
297             return;
298         }
299 
300         if (!FitsFactory.getHierarchFormater().isCaseSensitive()) {
301             key = key.toUpperCase(Locale.US);
302         }
303 
304         try {
305             HeaderCard.validateKey(key);
306         } catch (IllegalArgumentException e) {
307             LOG.warning(e.getMessage());
308         }
309     }
310 
311     /**
312      * Advances the parse position to skip any spaces at the current parse position, and returns whether there is
313      * anything left in the line after the spaces...
314      *
315      * @return <code>true</code> if there is more non-space characters in the string, otherwise <code>false</code>
316      */
317     private boolean skipSpaces() {
318         for (; parsePos < line.length(); parsePos++) {
319             if (!Character.isSpaceChar(line.charAt(parsePos))) {
320                 // Line has non-space characters left to parse...
321                 return true;
322             }
323         }
324         // nothing left to parse.
325         return false;
326     }
327 
328     /**
329      * Parses the comment components starting from the current parse position. After this call the parse position is set
330      * to the end of the string. The leading '/' (if found) is not included in the comment.
331      */
332     private void parseComment() {
333         if (!skipSpaces()) {
334             // nothing left to parse.
335             return;
336         }
337 
338         // if no value, then everything is comment from here on...
339         if (value != null) {
340             if (line.charAt(parsePos) == '/') {
341                 // Skip the '/' itself, the comment is whatever is after it.
342                 parsePos++;
343             } else {
344                 // Junk after a string value -- interpret it as the start of the comment...
345                 LOG.warning("[" + sanitize(getKey()) + "] Junk after value (included in the comment).");
346             }
347         }
348 
349         comment = line.substring(parsePos);
350         parsePos = line.length();
351 
352         try {
353             HeaderCard.validateChars(comment);
354         } catch (IllegalArgumentException e) {
355             LOG.warning("[" + sanitize(getKey()) + "]: " + e.getMessage());
356         }
357     }
358 
359     /**
360      * Parses the value component from the current parse position. The parse position is advanced to the first character
361      * after the value specification in the line. If the header line does not contain a value component, then the value
362      * field of this object is set to <code>null</code>.
363      *
364      * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
365      *
366      * @see                           FitsFactory#setAllowHeaderRepairs(boolean)
367      */
368     private void parseValue() throws UnclosedQuoteException {
369         if (key.isEmpty() || key.equals(Standard.COMMENT.key()) || key.equals(Standard.HISTORY.key())) {
370             return;
371         }
372 
373         if (!skipSpaces()) {
374             return; // nothing left to parse.
375         }
376 
377         if (CONTINUE.key().equals(key)) {
378             parseValueBody();
379         } else if (line.charAt(parsePos) == '=') {
380 
381             if (parsePos < HeaderCard.MAX_KEYWORD_LENGTH) {
382                 LOG.warning("[" + sanitize(key) + "] assigmment before byte " + (HeaderCard.MAX_KEYWORD_LENGTH + 1)
383                         + " for key '" + sanitize(key) + "'.");
384             }
385             if (parsePos + 1 >= line.length()) {
386                 LOG.warning("[" + sanitize(key) + "] Record ends with '='.");
387             } else if (line.charAt(parsePos + 1) != ' ') {
388                 LOG.warning("[" + sanitize(key) + "] Missing required standard space after '='.");
389             }
390 
391             if (parsePos > HeaderCard.MAX_KEYWORD_LENGTH) {
392                 // equal sign = after the 9th char -- only supported with hierarch keys...
393                 if (!key.startsWith(HIERARCH.key() + ".")) {
394                     LOG.warning("[" + sanitize(key) + "] Possibly misplaced '=' (after byte 9).");
395                     // It's not a HIERARCH key
396                     return;
397                 }
398             }
399 
400             parsePos++;
401             parseValueBody();
402         }
403 
404         try {
405             HeaderCard.validateChars(value);
406         } catch (IllegalArgumentException e) {
407             LOG.warning("[" + sanitize(getKey()) + "] " + e.getMessage());
408         }
409     }
410 
411     /**
412      * Parses the value body from the current parse position. The parse position is advanced to the first character
413      * after the value specification in the line. If the header line does not contain a value component, then the value
414      * field of this object is set to <code>null</code>.
415      *
416      * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
417      *
418      * @see                           FitsFactory#setAllowHeaderRepairs(boolean)
419      */
420     private void parseValueBody() throws UnclosedQuoteException {
421         if (!skipSpaces()) {
422             // nothing left to parse.
423             return;
424         }
425 
426         if (isNextQuote()) {
427             // Parse as a string value, or else throw an exception.
428             parseStringValue();
429         } else {
430             int end = line.indexOf('/', parsePos);
431             if (end < 0) {
432                 end = line.length();
433             }
434             value = line.substring(parsePos, end).trim();
435             parsePos = end;
436             type = getInferredValueType(key, value);
437         }
438 
439     }
440 
441     /**
442      * Checks if the next character, at the current parse position, is a single quote.
443      *
444      * @return <code>true</code> if the next character on the line exists and is a single quote, otherwise
445      *             <code>false</code>.
446      */
447     private boolean isNextQuote() {
448         if (parsePos >= line.length()) {
449             // nothing left to parse.
450             return false;
451         }
452         return line.charAt(parsePos) == '\'';
453     }
454 
455     /**
456      * Returns the string fom a parsed string value component, with trailing spaces removed. It preserves leading
457      * spaces.
458      *
459      * @param  buf the parsed string value.
460      *
461      * @return     the string value with trailing spaces removed.
462      */
463     private static String getNoTrailingSpaceString(StringBuilder buf) {
464         int to = buf.length();
465 
466         // Remove trailing spaces only!
467         while (--to >= 0) {
468             if (!Character.isSpaceChar(buf.charAt(to))) {
469                 break;
470             }
471         }
472 
473         return to < 0 ? "" : buf.substring(0, to + 1);
474     }
475 
476     /**
477      * Parses a quoted string value starting at the current parse position. If successful, the parse position is updated
478      * to after the string. Otherwise, the parse position is advanced only to skip leading spaces starting from the
479      * input position.
480      *
481      * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
482      *
483      * @see                           FitsFactory#setAllowHeaderRepairs(boolean)
484      */
485     private void parseStringValue() throws UnclosedQuoteException {
486         type = String.class;
487         StringBuilder buf = new StringBuilder(HeaderCard.MAX_VALUE_LENGTH);
488 
489         // Build the string value, up to the end quote and paying attention to double
490         // quotes inside the string, which are translated to single quotes within
491         // the string value itself.
492         for (++parsePos; parsePos < line.length(); parsePos++) {
493             if (isNextQuote()) {
494                 parsePos++;
495 
496                 if (!isNextQuote()) {
497                     // Closing single quote;
498                     value = getNoTrailingSpaceString(buf);
499                     return;
500                 }
501             }
502             buf.append(line.charAt(parsePos));
503         }
504 
505         // String with missing end quote
506         if (!FitsFactory.isAllowHeaderRepairs()) {
507             throw new UnclosedQuoteException(line);
508         }
509         LOG.warning("[" + sanitize(key) + "] Ignored missing end quote (value parsed to end of record).");
510         value = getNoTrailingSpaceString(buf);
511     }
512 
513     /**
514      * Returns the inferred Java class for the specified value. See {@link #getInferredType()} for a more detailed
515      * description.
516      *
517      * @param  value the serialized (string) representation of a FITS header value.
518      *
519      * @return       the inferred type of the specified serialized (string) value, or <code>null</code> if the value
520      *                   does not seem to match any of the supported value types. <code>null</code> values default to
521      *                   <code>Boolean.class</code>.
522      */
523     private static Class<?> getInferredValueType(String key, String value) {
524         // TODO We never call this with null locally, so the following check is dead code here...
525         // if (value == null) {
526         // return Boolean.class;
527         // }
528         if (value.isEmpty()) {
529             LOG.warning("[" + sanitize(key) + "] Null non-string value (defaulted to Boolean.class).");
530             return Boolean.class;
531         }
532 
533         String trimmedValue = value.trim().toUpperCase();
534 
535         if ("T".equals(trimmedValue) || "F".equals(trimmedValue)) {
536             return Boolean.class;
537         }
538         if (INT_REGEX.matcher(trimmedValue).matches()) {
539             return getIntegerType(trimmedValue);
540         }
541         if (DECIMAL_REGEX.matcher(trimmedValue).matches()) {
542             return getDecimalType(trimmedValue);
543         }
544         if (COMPLEX_REGEX.matcher(trimmedValue).matches()) {
545             return ComplexValue.class;
546         }
547 
548         LOG.warning("[" + sanitize(key) + "] Unrecognised non-string value type '" + sanitize(trimmedValue) + "'.");
549 
550         return null;
551     }
552 
553     /**
554      * Returns the guessed decimal type of a string representation of a decimal value.
555      *
556      * @param  value the string representation of a decimal value.
557      *
558      * @return       the The Java class ({@link Float}, {@link Double}, or {@link BigDecimal}) that can be used to
559      *                   represent the value with the precision provided.
560      *
561      * @see          #getInferredValueType()
562      * @see          #getIntegerType(String)
563      */
564     private static Class<? extends Number> getDecimalType(String value) {
565         value = value.toUpperCase(Locale.US);
566         boolean hasD = (value.indexOf('D') >= 0);
567 
568         if (hasD) {
569             // Convert the Double Scientific Notation specified by FITS to pure IEEE.
570             value = value.replace('D', 'E');
571         }
572 
573         BigDecimal big = new BigDecimal(value);
574 
575         // Check for zero, and deal with it separately...
576         if (big.stripTrailingZeros().equals(BigDecimal.ZERO)) {
577             int decimals = big.scale();
578             if (decimals <= FlexFormat.FLOAT_DECIMALS) {
579                 return hasD ? Double.class : Float.class;
580             }
581             if (decimals <= FlexFormat.DOUBLE_DECIMALS) {
582                 return Double.class;
583             }
584             return BigDecimal.class;
585         }
586 
587         // Now non-zero values...
588         int decimals = big.precision() - 1;
589         float f = big.floatValue();
590         if (decimals <= FlexFormat.FLOAT_DECIMALS && (f != 0.0F) && Float.isFinite(f)) {
591             return hasD ? Double.class : Float.class;
592         }
593 
594         double d = big.doubleValue();
595         if (decimals <= FlexFormat.DOUBLE_DECIMALS && (d != 0.0) && Double.isFinite(d)) {
596             return Double.class;
597         }
598         return BigDecimal.class;
599     }
600 
601     /**
602      * Returns the guessed integer type of a string representation of a integer value.
603      *
604      * @param  value the string representation of an integer value.
605      *
606      * @return       the The Java class ({@link Integer}, {@link Long}, or {@link BigInteger}) that can be used to
607      *                   represent the value with the number of digits provided.
608      *
609      * @see          #getInferredValueType()
610      * @see          #getDecimalType(String)
611      */
612     private static Class<? extends Number> getIntegerType(String value) {
613         int bits = new BigInteger(value).bitLength();
614         if (bits < Integer.SIZE) {
615             return Integer.class;
616         }
617         if (bits < Long.SIZE) {
618             return Long.class;
619         }
620         return BigInteger.class;
621     }
622 
623     private static String sanitize(String text) {
624         return HeaderCard.sanitize(text);
625     }
626 
627     static Logger getLogger() {
628         return LOG;
629     }
630 
631 }