1 /* 2 * #%L 3 * nom.tam FITS library 4 * %% 5 * Copyright (C) 2004 - 2024 nom-tam-fits 6 * %% 7 * This is free and unencumbered software released into the public domain. 8 * 9 * Anyone is free to copy, modify, publish, use, compile, sell, or 10 * distribute this software, either in source code form or as a compiled 11 * binary, for any purpose, commercial or non-commercial, and by any 12 * means. 13 * 14 * In jurisdictions that recognize copyright laws, the author or authors 15 * of this software dedicate any and all copyright interest in the 16 * software to the public domain. We make this dedication for the benefit 17 * of the public at large and to the detriment of our heirs and 18 * successors. We intend this dedication to be an overt act of 19 * relinquishment in perpetuity of all present and future rights to this 20 * software under copyright law. 21 * 22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 24 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 25 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 26 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 27 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 28 * OTHER DEALINGS IN THE SOFTWARE. 29 * #L% 30 */ 31 32 package nom.tam.fits; 33 34 import java.math.BigDecimal; 35 import java.math.BigInteger; 36 import java.util.Locale; 37 import java.util.StringTokenizer; 38 import java.util.logging.Level; 39 import java.util.logging.Logger; 40 import java.util.regex.Pattern; 41 42 import nom.tam.util.ComplexValue; 43 import nom.tam.util.FlexFormat; 44 45 import static nom.tam.fits.header.NonStandard.HIERARCH; 46 import static nom.tam.fits.header.Standard.CONTINUE; 47 48 /** 49 * <p> 50 * Converts a single 80-character wide FITS header record into a header card. See {@link HeaderCard#create(String)} for 51 * a description of the rules that guide parsing. 52 * </p> 53 * <p> 54 * When parsing header records that violate FITS standards, the violations can be logged or will throw appropriate 55 * excpetions (depending on the severity of the standard violation and whether 56 * {@link FitsFactory#setAllowHeaderRepairs(boolean)} is enabled or not. The logging of violations is disabled by 57 * default, but may be controlled via {@link Header#setParserWarningsEnabled(boolean)}. 58 * </p> 59 * 60 * @author Attila Kovacs 61 * 62 * @see FitsFactory#setAllowHeaderRepairs(boolean) 63 * @see Header#setParserWarningsEnabled(boolean) 64 */ 65 class HeaderCardParser { 66 67 private static final Logger LOG = Logger.getLogger(HeaderCardParser.class.getName()); 68 69 static { 70 // Do not log warnings by default. 71 LOG.setLevel(Level.SEVERE); 72 } 73 74 /** regexp for IEEE floats */ 75 private static final Pattern DECIMAL_REGEX = Pattern.compile("[+-]?\\d+(\\.\\d*)?([dDeE][+-]?\\d+)?"); 76 77 /** regexp for complex numbers */ 78 private static final Pattern COMPLEX_REGEX = Pattern 79 .compile("\\(\\s*" + DECIMAL_REGEX + "\\s*,\\s*" + DECIMAL_REGEX + "\\s*\\)"); 80 81 /** regexp for decimal integers. */ 82 private static final Pattern INT_REGEX = Pattern.compile("[+-]?\\d+"); 83 84 /** The header line (usually 80-character width), which to parse. */ 85 private String line; 86 87 /** 88 * the value of the card. (trimmed and standardized with . in HIERARCH) 89 */ 90 private String key = null; 91 92 /** 93 * the value of the card. (trimmed) 94 */ 95 private String value = null; 96 97 /** 98 * the comment specified with the value. 99 */ 100 private String comment = null; 101 102 /** 103 * was the value quoted? 104 */ 105 private Class<?> type = null; 106 107 /** 108 * The position in the string that right after the last character processed by this parser 109 */ 110 private int parsePos = 0; 111 112 /** 113 * Instantiates a new parser for a FITS header line. 114 * 115 * @param line a line in the FITS header, normally exactly 80-characters wide (but need not 116 * be). 117 * 118 * @see #getKey() 119 * @see #getValue() 120 * @see #getComment() 121 * @see #isString() 122 * 123 * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed. 124 * @throws IllegalArgumentException if the record contained neither a key or a value. 125 * 126 * @see FitsFactory#setAllowHeaderRepairs(boolean) 127 */ 128 HeaderCardParser(String line) throws UnclosedQuoteException, IllegalArgumentException { 129 this.line = line; 130 // TODO HeaderCard never calls this with a null argument, so the check below is dead code here... 131 // if (line == null) { 132 // throw new IllegalArgumentException("Cannot parse null string"); 133 // } 134 parseKey(); 135 parseValue(); 136 parseComment(); 137 } 138 139 /** 140 * Returns the keyword component of the parsed header line. If the processing of HIERARCH keywords is enabled, it 141 * may be a `HIERARCH` style long key with the components separated by dots (e.g. 142 * `HIERARCH.ORG.SYSTEM.SUBSYS.ELEMENT`). Otherwise, it will be a standard 0--8 character standard uppercase FITS 143 * keyword (including simply `HIERARCH` if {@link FitsFactory#setUseHierarch(boolean)} was set <code>false</code>). 144 * 145 * @return the FITS header keyword for the line. 146 * 147 * @see FitsFactory#setUseHierarch(boolean) 148 */ 149 String getKey() { 150 return key; 151 } 152 153 /** 154 * Returns the value component of the parsed header line. 155 * 156 * @return the value part of the line or <code>null</code> if the line contained no value. 157 * 158 * @see FitsFactory#setUseHierarch(boolean) 159 */ 160 String getValue() { 161 return value; 162 } 163 164 /** 165 * Returns the comment component of the parsed header line, with all leading and trailing spaces preserved. 166 * 167 * @return the comment part of the line or <code>null</code> if the line contained no comment. 168 * 169 * @see #getTrimmedComment() 170 */ 171 String getUntrimmedComment() { 172 return comment; 173 } 174 175 /** 176 * Returns the comment component of the parsed header line, with both leading and trailing spaces removed 177 * 178 * @return the comment part of the line or <code>null</code> if the line contained no comment. 179 * 180 * @see #getUntrimmedComment() 181 */ 182 String getTrimmedComment() { 183 return comment == null ? null : comment.trim(); 184 } 185 186 /** 187 * Returns whether the line contained a quoted string value. By default, strings with missing end quotes are no 188 * considered string values, but rather as comments. To allow processing lines with missing quotes as string values, 189 * you must set {@link FitsFactory#setAllowHeaderRepairs(boolean)} to <code>true</code> prior to parsing a header 190 * line with the missing end quote. 191 * 192 * @return true if the value was quoted. 193 * 194 * @see FitsFactory#setAllowHeaderRepairs(boolean) 195 */ 196 boolean isString() { 197 if (type == null) { 198 return false; 199 } 200 return String.class.isAssignableFrom(type); 201 } 202 203 /** 204 * <p> 205 * Returns the inferred Java class for the value stored in the header record, such as a {@link String} class, a 206 * {@link Boolean} class, an integer type ({@link Integer}, {@link Long}, or {@link BigInteger}) class, a decimal 207 * type ({@link Float}, {@link Double}, or {@link BigDecimal}) class, a {@link ComplexValue} class, or 208 * <code>null</code>. For number types, it returns the 'smallest' type that can be used to represent the string 209 * value. 210 * </p> 211 * <p> 212 * Its an inferred type as the true underlying type that was used to create the value is lost. For example, the 213 * value <code>42</code> may have been written from any integer type, including <code>byte</code> or 214 * <code>short<code>, but this routine will guess it to be an <code>int</code> ({@link Integer} type. As such, it 215 * may not be equal to {@link HeaderCard#valueType()} from which the record was created, and hence should not be 216 * used for round-trip testing of type equality. 217 * </p> 218 * 219 * @return the inferred type of the stored serialized (string) value, or <code>null</code> if the value does not 220 * seem to match any of the supported value types. 221 * 222 * @see HeaderCard#valueType() 223 */ 224 Class<?> getInferredType() { 225 return type; 226 } 227 228 /** 229 * Parses a fits keyword from a card and standardizes it (trim, uppercase, and hierarch with dots). 230 */ 231 private void parseKey() { 232 /* 233 * AK: The parsing of headers should never be stricter that the writing, such that any header written by this 234 * library can be parsed back without errors. (And, if anything, the parsing should be more permissive to allow 235 * reading FITS produced by other libraries, which may be less stringent in their rules). The original 236 * implementation strongly enforced the ESO HIERARCH convention when reading, but not at all for writing. Here 237 * is a tolerant hierarch parser that will read back any hierarch key that was written by this library. The 238 * input FITS can use any space or even '.' to separate the hierarchies, and the hierarchical elements may 239 * contain any ASCII characters other than those used for separating. It is more in line with what we do with 240 * standard keys too. 241 */ 242 243 // Find the '=' in the line, if any... 244 int iEq = line.indexOf('='); 245 246 // The stem is in the first 8 characters or what precedes an '=' character 247 // before that. 248 int endStem = (iEq >= 0 && iEq <= HeaderCard.MAX_KEYWORD_LENGTH) ? iEq : HeaderCard.MAX_KEYWORD_LENGTH; 249 endStem = Math.min(line.length(), endStem); 250 251 String rawStem = line.substring(0, endStem).trim(); 252 253 // Check for space at the start of the keyword... 254 if (endStem > 0 && !rawStem.isEmpty()) { 255 if (Character.isSpaceChar(line.charAt(0))) { 256 LOG.warning("[" + sanitize(rawStem) + "] Non-standard starting with a space (trimming)."); 257 } 258 } 259 260 String stem = rawStem.toUpperCase(); 261 262 if (!stem.equals(rawStem)) { 263 LOG.warning("[" + sanitize(rawStem) + "] Non-standard lower-case letter(s) in base keyword."); 264 } 265 266 key = stem; 267 parsePos = endStem; 268 269 // If not using HIERARCH, then be very resilient, and return whatever key the first 8 chars make... 270 271 // If the line does not have an '=', can only be a simple key 272 // If it's not a HIERARCH keyword, then return the simple key. 273 if (!FitsFactory.getUseHierarch() || (iEq < 0) || !stem.equals(HIERARCH.key())) { 274 return; 275 } 276 277 // Compose the hierarchical key... 278 StringTokenizer tokens = new StringTokenizer(line.substring(stem.length(), iEq), " \t\r\n."); 279 StringBuilder builder = new StringBuilder(stem); 280 281 while (tokens.hasMoreTokens()) { 282 String token = tokens.nextToken(); 283 284 parsePos = line.indexOf(token, parsePos) + token.length(); 285 286 // Add a . to separate hierarchies 287 builder.append('.'); 288 builder.append(token); 289 } 290 291 key = builder.toString(); 292 293 if (HIERARCH.key().equals(key)) { 294 // The key is only HIERARCH, without a hierarchical keyword after it... 295 LOG.warning("HIERARCH base keyword without HIERARCH-style long key after it."); 296 return; 297 } 298 299 if (!FitsFactory.getHierarchFormater().isCaseSensitive()) { 300 key = key.toUpperCase(Locale.US); 301 } 302 303 try { 304 HeaderCard.validateKey(key); 305 } catch (IllegalArgumentException e) { 306 LOG.warning(e.getMessage()); 307 } 308 } 309 310 /** 311 * Advances the parse position to skip any spaces at the current parse position, and returns whether there is 312 * anything left in the line after the spaces... 313 * 314 * @return <code>true</code> if there is more non-space characters in the string, otherwise <code>false</code> 315 */ 316 private boolean skipSpaces() { 317 for (; parsePos < line.length(); parsePos++) { 318 if (!Character.isSpaceChar(line.charAt(parsePos))) { 319 // Line has non-space characters left to parse... 320 return true; 321 } 322 } 323 // nothing left to parse. 324 return false; 325 } 326 327 /** 328 * Parses the comment components starting from the current parse position. After this call the parse position is set 329 * to the end of the string. The leading '/' (if found) is not included in the comment. 330 */ 331 private void parseComment() { 332 if (!skipSpaces()) { 333 // nothing left to parse. 334 return; 335 } 336 337 // if no value, then everything is comment from here on... 338 if (value != null) { 339 if (line.charAt(parsePos) == '/') { 340 // Skip the '/' itself, the comment is whatever is after it. 341 parsePos++; 342 } else { 343 // Junk after a string value -- interpret it as the start of the comment... 344 LOG.warning("[" + sanitize(getKey()) + "] Junk after value (included in the comment)."); 345 } 346 } 347 348 comment = line.substring(parsePos); 349 parsePos = line.length(); 350 351 try { 352 HeaderCard.validateChars(comment); 353 } catch (IllegalArgumentException e) { 354 LOG.warning("[" + sanitize(getKey()) + "]: " + e.getMessage()); 355 } 356 } 357 358 /** 359 * Parses the value component from the current parse position. The parse position is advanced to the first character 360 * after the value specification in the line. If the header line does not contain a value component, then the value 361 * field of this object is set to <code>null</code>. 362 * 363 * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed. 364 * 365 * @see FitsFactory#setAllowHeaderRepairs(boolean) 366 */ 367 private void parseValue() throws UnclosedQuoteException { 368 if (key.isEmpty() || !skipSpaces()) { 369 // nothing left to parse. 370 return; 371 } 372 373 if (CONTINUE.key().equals(key)) { 374 parseValueBody(); 375 } else if (line.charAt(parsePos) == '=') { 376 377 if (parsePos < HeaderCard.MAX_KEYWORD_LENGTH) { 378 LOG.warning("[" + sanitize(key) + "] assigmment before byte " + (HeaderCard.MAX_KEYWORD_LENGTH + 1) 379 + " for key '" + sanitize(key) + "'."); 380 } 381 if (parsePos + 1 >= line.length()) { 382 LOG.warning("[" + sanitize(key) + "] Record ends with '='."); 383 } else if (line.charAt(parsePos + 1) != ' ') { 384 LOG.warning("[" + sanitize(key) + "] Missing required standard space after '='."); 385 } 386 387 if (parsePos > HeaderCard.MAX_KEYWORD_LENGTH) { 388 // equal sign = after the 9th char -- only supported with hierarch keys... 389 if (!key.startsWith(HIERARCH.key() + ".")) { 390 LOG.warning("[" + sanitize(key) + "] Possibly misplaced '=' (after byte 9)."); 391 // It's not a HIERARCH key 392 return; 393 } 394 } 395 396 parsePos++; 397 parseValueBody(); 398 } 399 400 try { 401 HeaderCard.validateChars(value); 402 } catch (IllegalArgumentException e) { 403 LOG.warning("[" + sanitize(getKey()) + "] " + e.getMessage()); 404 } 405 } 406 407 /** 408 * Parses the value body from the current parse position. The parse position is advanced to the first character 409 * after the value specification in the line. If the header line does not contain a value component, then the value 410 * field of this object is set to <code>null</code>. 411 * 412 * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed. 413 * 414 * @see FitsFactory#setAllowHeaderRepairs(boolean) 415 */ 416 private void parseValueBody() throws UnclosedQuoteException { 417 if (!skipSpaces()) { 418 // nothing left to parse. 419 return; 420 } 421 422 if (isNextQuote()) { 423 // Parse as a string value, or else throw an exception. 424 parseStringValue(); 425 } else { 426 int end = line.indexOf('/', parsePos); 427 if (end < 0) { 428 end = line.length(); 429 } 430 value = line.substring(parsePos, end).trim(); 431 parsePos = end; 432 type = getInferredValueType(key, value); 433 } 434 435 } 436 437 /** 438 * Checks if the next character, at the current parse position, is a single quote. 439 * 440 * @return <code>true</code> if the next character on the line exists and is a single quote, otherwise 441 * <code>false</code>. 442 */ 443 private boolean isNextQuote() { 444 if (parsePos >= line.length()) { 445 // nothing left to parse. 446 return false; 447 } 448 return line.charAt(parsePos) == '\''; 449 } 450 451 /** 452 * Returns the string fom a parsed string value component, with trailing spaces removed. It preserves leading 453 * spaces. 454 * 455 * @param buf the parsed string value. 456 * 457 * @return the string value with trailing spaces removed. 458 */ 459 private static String getNoTrailingSpaceString(StringBuilder buf) { 460 int to = buf.length(); 461 462 // Remove trailing spaces only! 463 while (--to >= 0) { 464 if (!Character.isSpaceChar(buf.charAt(to))) { 465 break; 466 } 467 } 468 469 return to < 0 ? "" : buf.substring(0, to + 1); 470 } 471 472 /** 473 * Parses a quoted string value starting at the current parse position. If successful, the parse position is updated 474 * to after the string. Otherwise, the parse position is advanced only to skip leading spaces starting from the 475 * input position. 476 * 477 * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed. 478 * 479 * @see FitsFactory#setAllowHeaderRepairs(boolean) 480 */ 481 private void parseStringValue() throws UnclosedQuoteException { 482 type = String.class; 483 StringBuilder buf = new StringBuilder(HeaderCard.MAX_VALUE_LENGTH); 484 485 // Build the string value, up to the end quote and paying attention to double 486 // quotes inside the string, which are translated to single quotes within 487 // the string value itself. 488 for (++parsePos; parsePos < line.length(); parsePos++) { 489 if (isNextQuote()) { 490 parsePos++; 491 492 if (!isNextQuote()) { 493 // Closing single quote; 494 value = getNoTrailingSpaceString(buf); 495 return; 496 } 497 } 498 buf.append(line.charAt(parsePos)); 499 } 500 501 // String with missing end quote 502 if (!FitsFactory.isAllowHeaderRepairs()) { 503 throw new UnclosedQuoteException(line); 504 } 505 LOG.warning("[" + sanitize(key) + "] Ignored missing end quote (value parsed to end of record)."); 506 value = getNoTrailingSpaceString(buf); 507 } 508 509 /** 510 * Returns the inferred Java class for the specified value. See {@link #getInferredType()} for a more detailed 511 * description. 512 * 513 * @param value the serialized (string) representation of a FITS header value. 514 * 515 * @return the inferred type of the specified serialized (string) value, or <code>null</code> if the value 516 * does not seem to match any of the supported value types. <code>null</code> values default to 517 * <code>Boolean.class</code>. 518 */ 519 private static Class<?> getInferredValueType(String key, String value) { 520 // TODO We never call this with null locally, so the following check is dead code here... 521 // if (value == null) { 522 // return Boolean.class; 523 // } 524 if (value.isEmpty()) { 525 LOG.warning("[" + sanitize(key) + "] Null non-string value (defaulted to Boolean.class)."); 526 return Boolean.class; 527 } 528 529 String trimmedValue = value.trim().toUpperCase(); 530 531 if ("T".equals(trimmedValue) || "F".equals(trimmedValue)) { 532 return Boolean.class; 533 } 534 if (INT_REGEX.matcher(trimmedValue).matches()) { 535 return getIntegerType(trimmedValue); 536 } 537 if (DECIMAL_REGEX.matcher(trimmedValue).matches()) { 538 return getDecimalType(trimmedValue); 539 } 540 if (COMPLEX_REGEX.matcher(trimmedValue).matches()) { 541 return ComplexValue.class; 542 } 543 544 LOG.warning("[" + sanitize(key) + "] Unrecognised non-string value type '" + sanitize(trimmedValue) + "'."); 545 546 return null; 547 } 548 549 /** 550 * Returns the guessed decimal type of a string representation of a decimal value. 551 * 552 * @param value the string representation of a decimal value. 553 * 554 * @return the The Java class ({@link Float}, {@link Double}, or {@link BigDecimal}) that can be used to 555 * represent the value with the precision provided. 556 * 557 * @see #getInferredValueType() 558 * @see #getIntegerType(String) 559 */ 560 private static Class<? extends Number> getDecimalType(String value) { 561 value = value.toUpperCase(Locale.US); 562 boolean hasD = (value.indexOf('D') >= 0); 563 564 if (hasD) { 565 // Convert the Double Scientific Notation specified by FITS to pure IEEE. 566 value = value.replace('D', 'E'); 567 } 568 569 BigDecimal big = new BigDecimal(value); 570 571 // Check for zero, and deal with it separately... 572 if (big.stripTrailingZeros().equals(BigDecimal.ZERO)) { 573 int decimals = big.scale(); 574 if (decimals <= FlexFormat.FLOAT_DECIMALS) { 575 return hasD ? Double.class : Float.class; 576 } 577 if (decimals <= FlexFormat.DOUBLE_DECIMALS) { 578 return Double.class; 579 } 580 return BigDecimal.class; 581 } 582 583 // Now non-zero values... 584 int decimals = big.precision() - 1; 585 float f = big.floatValue(); 586 if (decimals <= FlexFormat.FLOAT_DECIMALS && (f != 0.0F) && Float.isFinite(f)) { 587 return hasD ? Double.class : Float.class; 588 } 589 590 double d = big.doubleValue(); 591 if (decimals <= FlexFormat.DOUBLE_DECIMALS && (d != 0.0) && Double.isFinite(d)) { 592 return Double.class; 593 } 594 return BigDecimal.class; 595 } 596 597 /** 598 * Returns the guessed integer type of a string representation of a integer value. 599 * 600 * @param value the string representation of an integer value. 601 * 602 * @return the The Java class ({@link Integer}, {@link Long}, or {@link BigInteger}) that can be used to 603 * represent the value with the number of digits provided. 604 * 605 * @see #getInferredValueType() 606 * @see #getDecimalType(String) 607 */ 608 private static Class<? extends Number> getIntegerType(String value) { 609 int bits = new BigInteger(value).bitLength(); 610 if (bits < Integer.SIZE) { 611 return Integer.class; 612 } 613 if (bits < Long.SIZE) { 614 return Long.class; 615 } 616 return BigInteger.class; 617 } 618 619 private static String sanitize(String text) { 620 return HeaderCard.sanitize(text); 621 } 622 623 static Logger getLogger() { 624 return LOG; 625 } 626 627 }