1 /* 2 * #%L 3 * nom.tam FITS library 4 * %% 5 * Copyright (C) 2004 - 2024 nom-tam-fits 6 * %% 7 * This is free and unencumbered software released into the public domain. 8 * 9 * Anyone is free to copy, modify, publish, use, compile, sell, or 10 * distribute this software, either in source code form or as a compiled 11 * binary, for any purpose, commercial or non-commercial, and by any 12 * means. 13 * 14 * In jurisdictions that recognize copyright laws, the author or authors 15 * of this software dedicate any and all copyright interest in the 16 * software to the public domain. We make this dedication for the benefit 17 * of the public at large and to the detriment of our heirs and 18 * successors. We intend this dedication to be an overt act of 19 * relinquishment in perpetuity of all present and future rights to this 20 * software under copyright law. 21 * 22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 24 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 25 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 26 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 27 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 28 * OTHER DEALINGS IN THE SOFTWARE. 29 * #L% 30 */ 31 32 package nom.tam.fits; 33 34 import java.math.BigDecimal; 35 import java.math.BigInteger; 36 import java.util.Locale; 37 import java.util.StringTokenizer; 38 import java.util.logging.Level; 39 import java.util.logging.Logger; 40 import java.util.regex.Pattern; 41 42 import nom.tam.fits.header.Standard; 43 import nom.tam.util.ComplexValue; 44 import nom.tam.util.FlexFormat; 45 46 import static nom.tam.fits.header.NonStandard.HIERARCH; 47 import static nom.tam.fits.header.Standard.CONTINUE; 48 49 /** 50 * <p> 51 * Converts a single 80-character wide FITS header record into a header card. See {@link HeaderCard#create(String)} for 52 * a description of the rules that guide parsing. 53 * </p> 54 * <p> 55 * When parsing header records that violate FITS standards, the violations can be logged or will throw appropriate 56 * excpetions (depending on the severity of the standard violation and whether 57 * {@link FitsFactory#setAllowHeaderRepairs(boolean)} is enabled or not. The logging of violations is disabled by 58 * default, but may be controlled via {@link Header#setParserWarningsEnabled(boolean)}. 59 * </p> 60 * 61 * @author Attila Kovacs 62 * 63 * @see FitsFactory#setAllowHeaderRepairs(boolean) 64 * @see Header#setParserWarningsEnabled(boolean) 65 */ 66 class HeaderCardParser { 67 68 private static final Logger LOG = Logger.getLogger(HeaderCardParser.class.getName()); 69 70 static { 71 // Do not log warnings by default. 72 LOG.setLevel(Level.SEVERE); 73 } 74 75 /** regexp for IEEE floats */ 76 private static final Pattern DECIMAL_REGEX = Pattern.compile("[+-]?\\d+(\\.\\d*)?([dDeE][+-]?\\d+)?"); 77 78 /** regexp for complex numbers */ 79 private static final Pattern COMPLEX_REGEX = Pattern 80 .compile("\\(\\s*" + DECIMAL_REGEX + "\\s*,\\s*" + DECIMAL_REGEX + "\\s*\\)"); 81 82 /** regexp for decimal integers. */ 83 private static final Pattern INT_REGEX = Pattern.compile("[+-]?\\d+"); 84 85 /** The header line (usually 80-character width), which to parse. */ 86 private String line; 87 88 /** 89 * the value of the card. (trimmed and standardized with . in HIERARCH) 90 */ 91 private String key = null; 92 93 /** 94 * the value of the card. (trimmed) 95 */ 96 private String value = null; 97 98 /** 99 * the comment specified with the value. 100 */ 101 private String comment = null; 102 103 /** 104 * was the value quoted? 105 */ 106 private Class<?> type = null; 107 108 /** 109 * The position in the string that right after the last character processed by this parser 110 */ 111 private int parsePos = 0; 112 113 /** 114 * Instantiates a new parser for a FITS header line. 115 * 116 * @param line a line in the FITS header, normally exactly 80-characters wide (but need not 117 * be). 118 * 119 * @see #getKey() 120 * @see #getValue() 121 * @see #getComment() 122 * @see #isString() 123 * 124 * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed. 125 * @throws IllegalArgumentException if the record contained neither a key or a value. 126 * 127 * @see FitsFactory#setAllowHeaderRepairs(boolean) 128 */ 129 HeaderCardParser(String line) throws UnclosedQuoteException, IllegalArgumentException { 130 this.line = line; 131 // TODO HeaderCard never calls this with a null argument, so the check below is dead code here... 132 // if (line == null) { 133 // throw new IllegalArgumentException("Cannot parse null string"); 134 // } 135 parseKey(); 136 parseValue(); 137 parseComment(); 138 } 139 140 /** 141 * Returns the keyword component of the parsed header line. If the processing of HIERARCH keywords is enabled, it 142 * may be a `HIERARCH` style long key with the components separated by dots (e.g. 143 * `HIERARCH.ORG.SYSTEM.SUBSYS.ELEMENT`). Otherwise, it will be a standard 0--8 character standard uppercase FITS 144 * keyword (including simply `HIERARCH` if {@link FitsFactory#setUseHierarch(boolean)} was set <code>false</code>). 145 * 146 * @return the FITS header keyword for the line. 147 * 148 * @see FitsFactory#setUseHierarch(boolean) 149 */ 150 String getKey() { 151 return key; 152 } 153 154 /** 155 * Returns the value component of the parsed header line. 156 * 157 * @return the value part of the line or <code>null</code> if the line contained no value. 158 * 159 * @see FitsFactory#setUseHierarch(boolean) 160 */ 161 String getValue() { 162 return value; 163 } 164 165 /** 166 * Returns the comment component of the parsed header line, with all leading and trailing spaces preserved. 167 * 168 * @return the comment part of the line or <code>null</code> if the line contained no comment. 169 * 170 * @see #getTrimmedComment() 171 */ 172 String getUntrimmedComment() { 173 return comment; 174 } 175 176 /** 177 * Returns the comment component of the parsed header line, with both leading and trailing spaces removed 178 * 179 * @return the comment part of the line or <code>null</code> if the line contained no comment. 180 * 181 * @see #getUntrimmedComment() 182 */ 183 String getTrimmedComment() { 184 return comment == null ? null : comment.trim(); 185 } 186 187 /** 188 * Returns whether the line contained a quoted string value. By default, strings with missing end quotes are no 189 * considered string values, but rather as comments. To allow processing lines with missing quotes as string values, 190 * you must set {@link FitsFactory#setAllowHeaderRepairs(boolean)} to <code>true</code> prior to parsing a header 191 * line with the missing end quote. 192 * 193 * @return true if the value was quoted. 194 * 195 * @see FitsFactory#setAllowHeaderRepairs(boolean) 196 */ 197 boolean isString() { 198 if (type == null) { 199 return false; 200 } 201 return String.class.isAssignableFrom(type); 202 } 203 204 /** 205 * <p> 206 * Returns the inferred Java class for the value stored in the header record, such as a {@link String} class, a 207 * {@link Boolean} class, an integer type ({@link Integer}, {@link Long}, or {@link BigInteger}) class, a decimal 208 * type ({@link Float}, {@link Double}, or {@link BigDecimal}) class, a {@link ComplexValue} class, or 209 * <code>null</code>. For number types, it returns the 'smallest' type that can be used to represent the string 210 * value. 211 * </p> 212 * <p> 213 * Its an inferred type as the true underlying type that was used to create the value is lost. For example, the 214 * value <code>42</code> may have been written from any integer type, including <code>byte</code> or 215 * <code>short<code>, but this routine will guess it to be an <code>int</code> ({@link Integer} type. As such, it 216 * may not be equal to {@link HeaderCard#valueType()} from which the record was created, and hence should not be 217 * used for round-trip testing of type equality. 218 * </p> 219 * 220 * @return the inferred type of the stored serialized (string) value, or <code>null</code> if the value does not 221 * seem to match any of the supported value types. 222 * 223 * @see HeaderCard#valueType() 224 */ 225 Class<?> getInferredType() { 226 return type; 227 } 228 229 /** 230 * Parses a fits keyword from a card and standardizes it (trim, uppercase, and hierarch with dots). 231 */ 232 private void parseKey() { 233 /* 234 * AK: The parsing of headers should never be stricter that the writing, such that any header written by this 235 * library can be parsed back without errors. (And, if anything, the parsing should be more permissive to allow 236 * reading FITS produced by other libraries, which may be less stringent in their rules). The original 237 * implementation strongly enforced the ESO HIERARCH convention when reading, but not at all for writing. Here 238 * is a tolerant hierarch parser that will read back any hierarch key that was written by this library. The 239 * input FITS can use any space or even '.' to separate the hierarchies, and the hierarchical elements may 240 * contain any ASCII characters other than those used for separating. It is more in line with what we do with 241 * standard keys too. 242 */ 243 244 // Find the '=' in the line, if any... 245 int iEq = line.indexOf('='); 246 247 // The stem is in the first 8 characters or what precedes an '=' character 248 // before that. 249 int endStem = (iEq >= 0 && iEq <= HeaderCard.MAX_KEYWORD_LENGTH) ? iEq : HeaderCard.MAX_KEYWORD_LENGTH; 250 endStem = Math.min(line.length(), endStem); 251 252 String rawStem = line.substring(0, endStem).trim(); 253 254 // Check for space at the start of the keyword... 255 if (endStem > 0 && !rawStem.isEmpty()) { 256 if (Character.isSpaceChar(line.charAt(0))) { 257 LOG.warning("[" + sanitize(rawStem) + "] Non-standard starting with a space (trimming)."); 258 } 259 } 260 261 String stem = rawStem.toUpperCase(); 262 263 if (!stem.equals(rawStem)) { 264 LOG.warning("[" + sanitize(rawStem) + "] Non-standard lower-case letter(s) in base keyword."); 265 } 266 267 key = stem; 268 parsePos = endStem; 269 270 // If not using HIERARCH, then be very resilient, and return whatever key the first 8 chars make... 271 272 // If the line does not have an '=', can only be a simple key 273 // If it's not a HIERARCH keyword, then return the simple key. 274 if (!FitsFactory.getUseHierarch() || (iEq < 0) || !stem.equals(HIERARCH.key())) { 275 return; 276 } 277 278 // Compose the hierarchical key... 279 StringTokenizer tokens = new StringTokenizer(line.substring(stem.length(), iEq), " \t\r\n."); 280 StringBuilder builder = new StringBuilder(stem); 281 282 while (tokens.hasMoreTokens()) { 283 String token = tokens.nextToken(); 284 285 parsePos = line.indexOf(token, parsePos) + token.length(); 286 287 // Add a . to separate hierarchies 288 builder.append('.'); 289 builder.append(token); 290 } 291 292 key = builder.toString(); 293 294 if (HIERARCH.key().equals(key)) { 295 // The key is only HIERARCH, without a hierarchical keyword after it... 296 LOG.warning("HIERARCH base keyword without HIERARCH-style long key after it."); 297 return; 298 } 299 300 if (!FitsFactory.getHierarchFormater().isCaseSensitive()) { 301 key = key.toUpperCase(Locale.US); 302 } 303 304 try { 305 HeaderCard.validateKey(key); 306 } catch (IllegalArgumentException e) { 307 LOG.warning(e.getMessage()); 308 } 309 } 310 311 /** 312 * Advances the parse position to skip any spaces at the current parse position, and returns whether there is 313 * anything left in the line after the spaces... 314 * 315 * @return <code>true</code> if there is more non-space characters in the string, otherwise <code>false</code> 316 */ 317 private boolean skipSpaces() { 318 for (; parsePos < line.length(); parsePos++) { 319 if (!Character.isSpaceChar(line.charAt(parsePos))) { 320 // Line has non-space characters left to parse... 321 return true; 322 } 323 } 324 // nothing left to parse. 325 return false; 326 } 327 328 /** 329 * Parses the comment components starting from the current parse position. After this call the parse position is set 330 * to the end of the string. The leading '/' (if found) is not included in the comment. 331 */ 332 private void parseComment() { 333 if (!skipSpaces()) { 334 // nothing left to parse. 335 return; 336 } 337 338 // if no value, then everything is comment from here on... 339 if (value != null) { 340 if (line.charAt(parsePos) == '/') { 341 // Skip the '/' itself, the comment is whatever is after it. 342 parsePos++; 343 } else { 344 // Junk after a string value -- interpret it as the start of the comment... 345 LOG.warning("[" + sanitize(getKey()) + "] Junk after value (included in the comment)."); 346 } 347 } 348 349 comment = line.substring(parsePos); 350 parsePos = line.length(); 351 352 try { 353 HeaderCard.validateChars(comment); 354 } catch (IllegalArgumentException e) { 355 LOG.warning("[" + sanitize(getKey()) + "]: " + e.getMessage()); 356 } 357 } 358 359 /** 360 * Parses the value component from the current parse position. The parse position is advanced to the first character 361 * after the value specification in the line. If the header line does not contain a value component, then the value 362 * field of this object is set to <code>null</code>. 363 * 364 * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed. 365 * 366 * @see FitsFactory#setAllowHeaderRepairs(boolean) 367 */ 368 private void parseValue() throws UnclosedQuoteException { 369 if (key.isEmpty() || key.equals(Standard.COMMENT.key()) || key.equals(Standard.HISTORY.key())) { 370 return; 371 } 372 373 if (!skipSpaces()) { 374 return; // nothing left to parse. 375 } 376 377 if (CONTINUE.key().equals(key)) { 378 parseValueBody(); 379 } else if (line.charAt(parsePos) == '=') { 380 381 if (parsePos < HeaderCard.MAX_KEYWORD_LENGTH) { 382 LOG.warning("[" + sanitize(key) + "] assigmment before byte " + (HeaderCard.MAX_KEYWORD_LENGTH + 1) 383 + " for key '" + sanitize(key) + "'."); 384 } 385 if (parsePos + 1 >= line.length()) { 386 LOG.warning("[" + sanitize(key) + "] Record ends with '='."); 387 } else if (line.charAt(parsePos + 1) != ' ') { 388 LOG.warning("[" + sanitize(key) + "] Missing required standard space after '='."); 389 } 390 391 if (parsePos > HeaderCard.MAX_KEYWORD_LENGTH) { 392 // equal sign = after the 9th char -- only supported with hierarch keys... 393 if (!key.startsWith(HIERARCH.key() + ".")) { 394 LOG.warning("[" + sanitize(key) + "] Possibly misplaced '=' (after byte 9)."); 395 // It's not a HIERARCH key 396 return; 397 } 398 } 399 400 parsePos++; 401 parseValueBody(); 402 } 403 404 try { 405 HeaderCard.validateChars(value); 406 } catch (IllegalArgumentException e) { 407 LOG.warning("[" + sanitize(getKey()) + "] " + e.getMessage()); 408 } 409 } 410 411 /** 412 * Parses the value body from the current parse position. The parse position is advanced to the first character 413 * after the value specification in the line. If the header line does not contain a value component, then the value 414 * field of this object is set to <code>null</code>. 415 * 416 * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed. 417 * 418 * @see FitsFactory#setAllowHeaderRepairs(boolean) 419 */ 420 private void parseValueBody() throws UnclosedQuoteException { 421 if (!skipSpaces()) { 422 // nothing left to parse. 423 return; 424 } 425 426 if (isNextQuote()) { 427 // Parse as a string value, or else throw an exception. 428 parseStringValue(); 429 } else { 430 int end = line.indexOf('/', parsePos); 431 if (end < 0) { 432 end = line.length(); 433 } 434 value = line.substring(parsePos, end).trim(); 435 parsePos = end; 436 type = getInferredValueType(key, value); 437 } 438 439 } 440 441 /** 442 * Checks if the next character, at the current parse position, is a single quote. 443 * 444 * @return <code>true</code> if the next character on the line exists and is a single quote, otherwise 445 * <code>false</code>. 446 */ 447 private boolean isNextQuote() { 448 if (parsePos >= line.length()) { 449 // nothing left to parse. 450 return false; 451 } 452 return line.charAt(parsePos) == '\''; 453 } 454 455 /** 456 * Returns the string fom a parsed string value component, with trailing spaces removed. It preserves leading 457 * spaces. 458 * 459 * @param buf the parsed string value. 460 * 461 * @return the string value with trailing spaces removed. 462 */ 463 private static String getNoTrailingSpaceString(StringBuilder buf) { 464 int to = buf.length(); 465 466 // Remove trailing spaces only! 467 while (--to >= 0) { 468 if (!Character.isSpaceChar(buf.charAt(to))) { 469 break; 470 } 471 } 472 473 return to < 0 ? "" : buf.substring(0, to + 1); 474 } 475 476 /** 477 * Parses a quoted string value starting at the current parse position. If successful, the parse position is updated 478 * to after the string. Otherwise, the parse position is advanced only to skip leading spaces starting from the 479 * input position. 480 * 481 * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed. 482 * 483 * @see FitsFactory#setAllowHeaderRepairs(boolean) 484 */ 485 private void parseStringValue() throws UnclosedQuoteException { 486 type = String.class; 487 StringBuilder buf = new StringBuilder(HeaderCard.MAX_VALUE_LENGTH); 488 489 // Build the string value, up to the end quote and paying attention to double 490 // quotes inside the string, which are translated to single quotes within 491 // the string value itself. 492 for (++parsePos; parsePos < line.length(); parsePos++) { 493 if (isNextQuote()) { 494 parsePos++; 495 496 if (!isNextQuote()) { 497 // Closing single quote; 498 value = getNoTrailingSpaceString(buf); 499 return; 500 } 501 } 502 buf.append(line.charAt(parsePos)); 503 } 504 505 // String with missing end quote 506 if (!FitsFactory.isAllowHeaderRepairs()) { 507 throw new UnclosedQuoteException(line); 508 } 509 LOG.warning("[" + sanitize(key) + "] Ignored missing end quote (value parsed to end of record)."); 510 value = getNoTrailingSpaceString(buf); 511 } 512 513 /** 514 * Returns the inferred Java class for the specified value. See {@link #getInferredType()} for a more detailed 515 * description. 516 * 517 * @param value the serialized (string) representation of a FITS header value. 518 * 519 * @return the inferred type of the specified serialized (string) value, or <code>null</code> if the value 520 * does not seem to match any of the supported value types. <code>null</code> values default to 521 * <code>Boolean.class</code>. 522 */ 523 private static Class<?> getInferredValueType(String key, String value) { 524 // TODO We never call this with null locally, so the following check is dead code here... 525 // if (value == null) { 526 // return Boolean.class; 527 // } 528 if (value.isEmpty()) { 529 LOG.warning("[" + sanitize(key) + "] Null non-string value (defaulted to Boolean.class)."); 530 return Boolean.class; 531 } 532 533 String trimmedValue = value.trim().toUpperCase(); 534 535 if ("T".equals(trimmedValue) || "F".equals(trimmedValue)) { 536 return Boolean.class; 537 } 538 if (INT_REGEX.matcher(trimmedValue).matches()) { 539 return getIntegerType(trimmedValue); 540 } 541 if (DECIMAL_REGEX.matcher(trimmedValue).matches()) { 542 return getDecimalType(trimmedValue); 543 } 544 if (COMPLEX_REGEX.matcher(trimmedValue).matches()) { 545 return ComplexValue.class; 546 } 547 548 LOG.warning("[" + sanitize(key) + "] Unrecognised non-string value type '" + sanitize(trimmedValue) + "'."); 549 550 return null; 551 } 552 553 /** 554 * Returns the guessed decimal type of a string representation of a decimal value. 555 * 556 * @param value the string representation of a decimal value. 557 * 558 * @return the The Java class ({@link Float}, {@link Double}, or {@link BigDecimal}) that can be used to 559 * represent the value with the precision provided. 560 * 561 * @see #getInferredValueType() 562 * @see #getIntegerType(String) 563 */ 564 private static Class<? extends Number> getDecimalType(String value) { 565 value = value.toUpperCase(Locale.US); 566 boolean hasD = (value.indexOf('D') >= 0); 567 568 if (hasD) { 569 // Convert the Double Scientific Notation specified by FITS to pure IEEE. 570 value = value.replace('D', 'E'); 571 } 572 573 BigDecimal big = new BigDecimal(value); 574 575 // Check for zero, and deal with it separately... 576 if (big.stripTrailingZeros().equals(BigDecimal.ZERO)) { 577 int decimals = big.scale(); 578 if (decimals <= FlexFormat.FLOAT_DECIMALS) { 579 return hasD ? Double.class : Float.class; 580 } 581 if (decimals <= FlexFormat.DOUBLE_DECIMALS) { 582 return Double.class; 583 } 584 return BigDecimal.class; 585 } 586 587 // Now non-zero values... 588 int decimals = big.precision() - 1; 589 float f = big.floatValue(); 590 if (decimals <= FlexFormat.FLOAT_DECIMALS && (f != 0.0F) && Float.isFinite(f)) { 591 return hasD ? Double.class : Float.class; 592 } 593 594 double d = big.doubleValue(); 595 if (decimals <= FlexFormat.DOUBLE_DECIMALS && (d != 0.0) && Double.isFinite(d)) { 596 return Double.class; 597 } 598 return BigDecimal.class; 599 } 600 601 /** 602 * Returns the guessed integer type of a string representation of a integer value. 603 * 604 * @param value the string representation of an integer value. 605 * 606 * @return the The Java class ({@link Integer}, {@link Long}, or {@link BigInteger}) that can be used to 607 * represent the value with the number of digits provided. 608 * 609 * @see #getInferredValueType() 610 * @see #getDecimalType(String) 611 */ 612 private static Class<? extends Number> getIntegerType(String value) { 613 int bits = new BigInteger(value).bitLength(); 614 if (bits < Integer.SIZE) { 615 return Integer.class; 616 } 617 if (bits < Long.SIZE) { 618 return Long.class; 619 } 620 return BigInteger.class; 621 } 622 623 private static String sanitize(String text) { 624 return HeaderCard.sanitize(text); 625 } 626 627 static Logger getLogger() { 628 return LOG; 629 } 630 631 }