1 /******************************************************************************* 2 * 3 * A fast JSON parser implementing RFC 7159. 4 * 5 * The most prominent change compared to the initial revision is the allowance 6 * of all data types as root values, not just objects and arrays. 7 * 8 * Usage_Hints: 9 * $(UL 10 * $(LI This parser only supports UTF-8 without BOM.) 11 * $(LI When a JSON object has duplicate keys, the last one in the set will 12 * determine the value of associative-array entries or struct fields.) 13 * $(LI `BigInt` and large number parsing are not implemented currently, but 14 * all integral types as well as minimal exact representations of many 15 * `double` values are supported.) 16 * ) 17 * 18 * Authors: 19 * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 20 * 21 * Copyright: 22 * © 2015 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 23 * 24 * License: 25 * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 26 * 27 **************************************/ 28 module fast.json; 29 30 import core.bitop; 31 import core.simd; 32 import core.stdc.stdlib; 33 import core.stdc.string; 34 35 version (GNU) import gcc.attribute; 36 version (GNU) import gcc.builtins; 37 version (LDC) import ldc.gccbuiltins_x86; 38 39 import std.algorithm; 40 import std.ascii; 41 import std.conv; 42 import std.datetime; 43 import std.exception; 44 import std.file; 45 import std.json; 46 import std.range; 47 import std.stdio; 48 import std.string; 49 import std.traits; 50 import std.uni; 51 52 import fast.buffer; 53 import fast.cstring; 54 import fast.helpers; 55 import fast.parsing; 56 57 58 /******************************************************************************* 59 * 60 * Loads a file as JSON text and validates the used parts. This includes a UTF-8 61 * validation on strings. 62 * 63 * Params: 64 * fname = The file name to load. 65 * 66 * Returns: 67 * A JSON file object exposing the `Json` API. 68 * 69 **************************************/ 70 auto parseJSONFile(uint vl = validateUsed)(in char[] fname) 71 { return parseJSONFile(fname.representation); } 72 73 /// ditto 74 auto parseJSONFile(uint vl = validateUsed)(in ubyte[] fname) 75 { return Json!vl.File(fname); } 76 77 78 /******************************************************************************* 79 * 80 * Loads a JSON string and validates the used parts. This includes a UTF-8 81 * validation on strings. 82 * 83 * Params: 84 * text = The string to load. 85 * 86 * Returns: 87 * A `Json` struct. 88 * 89 **************************************/ 90 auto parseJSON(uint vl = validateUsed, T : const(char)[])(T text) nothrow 91 { return parseJSONTextImpl!vl(text); } 92 93 94 /******************************************************************************* 95 * 96 * Load a file as JSON text that is considered 100% correct. No checks will be 97 * performed, not even if you try to read a number as a string. 98 * 99 * Params: 100 * fname = The file name to load. 101 * 102 * Returns: 103 * A JSON file object exposing the `Json` API. 104 * 105 **************************************/ 106 auto parseTrustedJSONFile(uint vl = trustedSource)(in char[] fname) 107 { return parseTrustedJSONFile!vl(fname.representation); } 108 109 /// ditto 110 auto parseTrustedJSONFile(uint vl = trustedSource)(in ubyte[] fname) 111 { return Json!vl.File(fname); } 112 113 114 /******************************************************************************* 115 * 116 * Load a JSON string that is considered 100% correct. No checks will be 117 * performed, not even if you try to read a number as a string. 118 * 119 * Params: 120 * text = The string to load. 121 * 122 * Returns: 123 * A `Json` struct. 124 * 125 **************************************/ 126 auto parseTrustedJSON(uint vl = trustedSource, T : const(char)[])(T text) nothrow 127 { return parseJSONTextImpl!vl(text); } 128 129 130 private auto parseJSONTextImpl(uint vl, T : const(char)[])(T text) 131 { 132 // We need to append 16 zero bytes for SSE to work, and if that reallocates the char[] 133 // we can declare it unique/immutable and don't need to allocate when returning JSON strings. 134 auto oldPtr = text.ptr; 135 text ~= "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; 136 static if (!is(T == string)) if (oldPtr !is text.ptr) 137 return Json!(vl, false)(assumeUnique(text)); 138 return Json!(vl, false)(text); 139 } 140 141 142 /******************************************************************************* 143 * 144 * Validates a JSON text file. 145 * 146 * Params: 147 * fname = The file name to load. 148 * 149 * Throws: 150 * JSONException on validation errors. 151 * 152 **************************************/ 153 void validateJSONFile(in char[] fname) 154 { validateJSONFile(fname.representation); } 155 156 /// ditto 157 void validateJSONFile(in ubyte[] fname) 158 { Json!(validateAll, true).File(fname).skipValue(); } 159 160 161 /// JSON data types returned by `peek`. 162 enum DataType : ubyte 163 { 164 string, number, object, array, boolean, null_ 165 } 166 167 168 /// Validation strength of JSON parser 169 enum 170 { 171 trustedSource, /// Assume 100% correct JSON and speed up parsing. 172 validateUsed, /// Ignore errors in skipped portions. 173 validateAll, /// Do a complete validation of the JSON data. 174 } 175 176 177 /// A UDA used to remap enum members or struct field names to JSON strings. 178 struct JsonMapping { string[string] map; } 179 180 181 /******************************************************************************* 182 * 183 * This is a forward JSON parser for picking off items of interest on the go. 184 * It neither produces a node structure, nor does it produce events. Instead you 185 * can peek at the value type that lies ahead and/or directly consume a JSON 186 * value from the parser. Objects and arrays can be iterated over via `foreach`, 187 * while you can also directly ask for one or multiple keys of an object. 188 * 189 * Prams: 190 * vl = Validation level. Any of `trustedSource`, `validateUsed` or 191 * `validateAll`. 192 * validateUtf8 = If validation is enabled, this also checks UTF-8 encoding 193 * of JSON strings. 194 * 195 **************************************/ 196 struct Json(uint vl = validateUsed, bool validateUtf8 = vl > trustedSource) 197 if (vl > trustedSource || !validateUtf8) 198 { 199 private: 200 201 enum isTrusted = vl == trustedSource; 202 enum skipAllInter = vl == trustedSource; 203 enum isValidating = vl >= validateUsed; 204 enum isValidateAll = vl == validateAll; 205 206 const(char)* m_text = void; 207 const(char*) m_start = void; 208 size_t m_nesting = 0; 209 RaiiArray!char m_mem; 210 bool m_isString = false; 211 212 213 public: 214 215 @disable this(); 216 @disable this(this); 217 218 219 /******************************************************************************* 220 * 221 * Constructor taking a `string` for fast slicing. 222 * 223 * JSON strings without escape sequences can be returned as slices. 224 * 225 * Params: 226 * text = The JSON text to parse. 227 * 228 **************************************/ 229 nothrow 230 this(string text) 231 { 232 import core.memory; 233 m_isString = GC.query(text.ptr) !is ReturnType!(GC.query).init; 234 this(cast(const char[]) text); 235 } 236 237 238 /******************************************************************************* 239 * 240 * Constructor taking a `const char[]`. 241 * 242 * JSON strings allocate on the GC heap when returned. 243 * 244 * Params: 245 * text = The JSON text to parse. 246 * 247 **************************************/ 248 @nogc pure nothrow 249 this(const char[] text) 250 { 251 m_start = m_text = text.ptr; 252 skipWhitespace!false(); 253 } 254 255 256 /+ 257 ╔══════════════════════════════════════════════════════════════════════════════ 258 ║ ⚑ String 259 ╚══════════════════════════════════════════════════════════════════════════════ 260 +/ 261 262 /******************************************************************************* 263 * 264 * Reads a string off the JSON text. 265 * 266 * Params: 267 * allowNull = Allow `null` as a valid option for the string. 268 * 269 * Returns: 270 * A GC managed string. 271 * 272 **************************************/ 273 string read(T)(bool allowNull = true) if (is(T == string)) 274 { 275 if (!allowNull || peek == DataType..string) 276 { 277 auto borrowed = borrowString(); 278 return m_isString ? borrowed.assumeUnique() : borrowed.idup; 279 } 280 return readNull(); 281 } 282 283 284 /******************************************************************************* 285 * 286 * Reads an enumeration off the JSON text. 287 * 288 **************************************/ 289 T read(T)() if (is(T == enum)) 290 { 291 enum mapping = buildRemapTable!T; 292 auto oldPos = m_text; 293 auto text = borrowString(); 294 foreach (m; mapping) 295 if (text.length == m.json.length && memcmp(text.ptr, m.json.ptr, m.json.length) == 0) 296 return m.d; 297 m_text = oldPos; 298 static if (isValidating) 299 handleError(format("Could not find enum member `%s` in `%s`", text, T.stringof)); 300 assert(0); 301 } 302 303 304 /******************************************************************************* 305 * 306 * Reads a string off the JSON text with limited lifetime. 307 * 308 * The reference to this slice is not guaranteed to be valid after the JSON 309 * parser has been destroyed or another object key or string value has been 310 * parsed. So make a copy before you continue parsing. 311 * 312 * Returns: 313 * If the string had no escape sequences in it, the returned array is a 314 * slice of the JSON text buffer, otherwise temporary copy. 315 * 316 **************************************/ 317 const(char)[] borrowString() 318 { 319 expect('"', "at start of string"); 320 auto escFreeStart = m_text; 321 322 if (scanString!validateUtf8()) 323 { 324 // Fast path here is to return a slice of the JSON if it doesn't contain escapes. 325 size_t length = m_text - escFreeStart; 326 skipOnePlusWhitespace!skipAllInter(); 327 return escFreeStart[0 .. length]; 328 } 329 else 330 { 331 // Otherwise we copy to a separate memory area managed by this parser instance. 332 size_t length = 0; 333 bool eos = false; 334 goto CopyToBuffer; 335 do 336 { 337 do 338 { 339 m_mem.capacityNeeded( length + 4 ); 340 uint decoded = decodeEscape( &m_mem[length] ); 341 length += decoded; 342 } 343 while (*m_text == '\\'); 344 345 escFreeStart = m_text; 346 eos = scanString!validateUtf8(); 347 CopyToBuffer: 348 size_t escFreeLength = m_text - escFreeStart; 349 m_mem.capacityNeeded( length + escFreeLength ); 350 memcpy( m_mem.ptr + length, escFreeStart, escFreeLength ); 351 length += escFreeLength; 352 } 353 while (!eos); 354 skipOnePlusWhitespace!skipAllInter(); 355 return m_mem[0 .. length]; 356 } 357 } 358 359 360 private bool scanString(bool validate)() 361 { 362 static if (validate) 363 { 364 import std.system; 365 366 while (true) 367 { 368 // Stop for control-characters, \, " and anything non-ASCII. 369 m_text.seekToRanges!"\0\x1F\"\"\\\\\x7F\xFF"; 370 371 // Handle printable ASCII range 372 if (*m_text == '"') 373 return true; 374 if (*m_text == '\\') 375 return false; 376 377 // Anything else better be UTF-8 378 uint u = *cast(uint*) m_text; 379 static assert(endian == Endian.littleEndian, "Not implemented"); 380 381 // Filter overlong ASCII and missing follow byte. 382 if ( 383 (u & 0b111_00000_11_000000_00000000_00000000) == 0b110_00000_10_000000_00000000_00000000 && 384 (u > 0b110_00001_10_111111_11111111_11111111)) 385 m_text += 2; 386 // Handle overlong representation, UTF-16 surrogate pairs and missing follow bytes. 387 else if ( 388 (u & 0b1111_0000_11_000000_11_000000_00000000) == 0b1110_0000_10_000000_10_000000_00000000 && 389 (u & 0b0000_1111_00_100000_00_000000_00000000) != 0b0000_1101_00_100000_00_000000_00000000 && 390 (u > 0b1110_0000_10_011111_10_111111_11111111)) 391 m_text += 3; 392 // Handle missing follow bytes, Handle overlong representation and out of valid range (max. 0x10FFFF) 393 else if ( 394 (u & 0b11111_000_11_000000_11_000000_11_000000) == 0b11110_000_10_000000_10_000000_10_000000 && 395 (u > 0b11110_000_10_001111_10_111111_10_111111) && (u < 0b11110_100_10_010000_10_000000_10_000000)) 396 m_text += 4; 397 // Handle invalid code units. 398 else if (*m_text < ' ' || *m_text == 0x7F) 399 expectNot("is a disallowed control character in strings"); 400 else if (*m_text >= 0x80 && *m_text <= 0xBF) 401 expectNot("is a UTF-8 follow byte and cannot start a sequence"); 402 else 403 expectNot("forms invalid UTF-8 sequence in string"); 404 } 405 } 406 else 407 { 408 m_text.seekToAnyOf!("\\\"\0"); 409 return *m_text == '"'; 410 } 411 } 412 413 414 private int matchString(string key)() 415 { 416 return m_text.fixedTermStrCmp!(char, key, "\"\0", "\\")(&stringCompareCallback); 417 } 418 419 420 private bool stringCompareCallback(ref immutable(char)* key, ref const(char)* str) 421 { 422 do 423 { 424 auto key4 = cast(char[4]*) key; 425 char[4] buf = *key4; 426 uint bytes = decodeEscape(buf.ptr); 427 if (buf !is *key4) 428 return false; 429 key += bytes; 430 } 431 while (str[0] == '\\'); 432 return true; 433 } 434 435 436 private static immutable escapes = { 437 char[256] result = '\0'; 438 result['"'] = '"'; 439 result['\\'] = '\\'; 440 result['/'] = '/'; 441 result['b'] = '\b'; 442 result['f'] = '\f'; 443 result['n'] = '\n'; 444 result['r'] = '\r'; 445 result['t'] = '\t'; 446 return result; 447 }(); 448 449 450 private void skipEscape() 451 { 452 static if (isValidateAll) 453 { 454 if (m_text[1] != 'u') 455 { 456 // Normal escape sequence. 2 bytes removed. 457 if (!escapes[*++m_text]) 458 expectNot("in escape sequence"); 459 m_text++; 460 } 461 else 462 { 463 // UTF-16 464 m_text += 2; 465 decodeUtf16HexToCodepoint(); 466 } 467 } 468 else m_text += 2; 469 } 470 471 472 private uint decodeEscape(scope char* dst) 473 { 474 if (m_text[1] != 'u') 475 { 476 // Normal escape sequence. 2 bytes removed. 477 dst[0] = escapes[m_text[1]]; 478 static if (isValidating) 479 if (!dst[0]) 480 handleError("Invalid escape sequence"); 481 m_text += 2; 482 return 1; 483 } 484 else 485 { 486 // UTF-16 487 m_text += 2; 488 uint cp = decodeUtf16HexToCodepoint(); 489 490 if (cp >= 0xD800 && cp <= 0xDBFF) 491 { 492 dst[0] = cast(char)(0b11110_000 | cp >> 18); 493 dst[1] = cast(char)(0b10_000000 | cp >> 12 & 0b00_111111); 494 dst[2] = cast(char)(0b10_000000 | cp >> 6 & 0b00_111111); 495 dst[3] = cast(char)(0b10_000000 | cp & 0b00_111111); 496 return 4; 497 } 498 else if (cp >= 0x800) 499 { 500 dst[0] = cast(char)(0b1110_0000 | cp >> 12); 501 dst[1] = cast(char)(0b10_000000 | cp >> 6 & 0b00_111111); 502 dst[2] = cast(char)(0b10_000000 | cp & 0b00_111111); 503 return 3; 504 } 505 else if (cp >= 0x80) 506 { 507 dst[0] = cast(char)(0b110_00000 | cp >> 6); 508 dst[1] = cast(char)(0b10_000000 | cp & 0b00_111111); 509 return 2; 510 } 511 else 512 { 513 dst[0] = cast(char)(cp); 514 return 1; 515 } 516 } 517 } 518 519 520 private dchar decodeUtf16HexToCodepoint() 521 { 522 import std.typecons; 523 524 uint cp, hi; 525 foreach (i; staticIota!(0, 2)) 526 { 527 static if (isValidating) 528 { 529 if (auto badByte = hexDecode4(m_text, cp)) 530 { 531 m_text = badByte; 532 expectNot("is not a hex digit"); 533 } 534 } 535 else 536 { 537 cp = hexDecode4(m_text); 538 } 539 540 static if (i == 0) 541 { 542 // Is this a high surrogate (followed by a low surrogate) or not ? 543 if (cp < 0xD800 || cp > 0xDBFF) 544 break; 545 hi = cp - 0xD800 + 0x40 << 10; 546 } 547 else static if (i == 1) 548 { 549 static if (isValidating) 550 { 551 if (cp < 0xDC00 || cp > 0xDFFF) 552 handleError("The UTF-16 escape produced an invalid code point."); 553 cp -= 0xDC00; 554 } 555 cp |= hi; 556 } 557 } 558 559 static if (isValidating) 560 if (cp > 0x10FFFF || cp >= 0xD800 && cp <= 0xDFFF) 561 handleError("The UTF-16 escape produced an invalid code point."); 562 563 return cp; 564 } 565 566 567 private void skipString(bool skipInter)() 568 { 569 m_text++; 570 skipRestOfString!skipInter(); 571 } 572 573 574 private void skipRestOfString(bool skipInter)() 575 { 576 while (!scanString!isValidateAll()) 577 skipEscape(); 578 skipOnePlusWhitespace!skipInter(); 579 } 580 581 582 /+ 583 ╔══════════════════════════════════════════════════════════════════════════════ 584 ║ ⚑ Number 585 ╚══════════════════════════════════════════════════════════════════════════════ 586 +/ 587 588 /******************************************************************************* 589 * 590 * Reads a number off the JSON text. 591 * 592 * If you ask for an unsigned value, no minus sign will be accepted in the JSON, 593 * otherwise all features of JSON numbers will be available. In particular large 594 * integers can be given in scientific notation. 595 * 596 * Params: 597 * N = Built-in numerical type that should be returned. 598 * 599 * Returns: 600 * The parsed number. 601 * 602 * Throws: 603 * JSONException, on invalid JSON or integer overflow. 604 * 605 **************************************/ 606 N read(N)() if (isNumeric!N && !is(N == enum)) 607 { 608 N n = void; 609 static if (isUnsigned!N) 610 enum NumberOptions opt = {}; 611 else 612 enum NumberOptions opt = { minus:true }; 613 if (parseNumber!opt(m_text, n)) 614 skipWhitespace!skipAllInter(); 615 else static if (isValidating) 616 handleError(format("Could not convert JSON number to `%s`", N.stringof)); 617 return n; 618 } 619 620 621 private void skipNumber(bool skipInter)() 622 { 623 static if (isValidateAll) 624 { 625 if (*m_text == '-') 626 m_text++; 627 if (*m_text == '0') 628 m_text++; 629 else 630 trySkipDigits(); 631 if (*m_text == '.') 632 { 633 m_text++; 634 trySkipDigits(); 635 } 636 if ((*m_text | 0x20) == 'e') 637 { 638 m_text++; 639 if (*m_text == '+' || *m_text == '-') 640 m_text++; 641 trySkipDigits(); 642 } 643 skipWhitespace!false(); 644 } 645 else 646 { 647 m_text.skipCharRanges!"\t\n\r\r ++-.09EEee"; 648 static if (skipInter) 649 m_text.skipAllOf!"\t\n\r ,"; 650 } 651 } 652 653 654 static if (isValidateAll) 655 { 656 private void trySkipDigits() 657 { 658 if (*m_text - '0' > 9) 659 expectNot("in number literal"); 660 m_text.skipAllOf!"0123456789"; 661 } 662 } 663 664 665 /+ 666 ╔══════════════════════════════════════════════════════════════════════════════ 667 ║ ⚑ Object 668 ╚══════════════════════════════════════════════════════════════════════════════ 669 +/ 670 671 /******************************************************************************* 672 * 673 * Reads a plain old data struct off the JSON text. 674 * 675 * Params: 676 * T = Type of struct that should be returned. 677 * 678 * Returns: 679 * A struct of type `T`. 680 * 681 **************************************/ 682 T read(T)() if (is(T == struct) && __traits(isPOD, T)) 683 { 684 nest('{', "on start of object"); 685 686 T t; 687 if (*m_text != '}') while (true) 688 { 689 auto key = borrowString(); 690 static if (!skipAllInter) 691 { 692 expect(':', "between key and value"); 693 skipWhitespace!false(); 694 } 695 696 enum mapping = buildRemapTable!T; 697 foreach (m; mapping) 698 { 699 if (key.length == m.json.length && memcmp(key.ptr, m.json.ptr, m.json.length) == 0) 700 { 701 mixin("alias keyT = typeof(T." ~ m.d ~ ");"); 702 mixin("t." ~ m.d ~ " = read!keyT;"); 703 goto Success; 704 } 705 } 706 skipValue(); 707 708 Success: 709 if (*m_text == '}') 710 break; 711 712 static if (!skipAllInter) 713 { 714 expect(',', "between key-value pairs"); 715 skipWhitespace!false(); 716 } 717 } 718 719 unnest(); 720 return t; 721 } 722 723 724 /******************************************************************************* 725 * 726 * Reads a plain old data struct or `null` off the JSON text. 727 * 728 * Params: 729 * T = Type of struct pointer that should be returned. 730 * 731 * Returns: 732 * A pointer to a newly filled struct of type `T` on the GC heap. 733 * 734 **************************************/ 735 T read(T)() if (is(PointerTarget!T == struct) && __traits(isPOD, PointerTarget!T)) 736 { 737 if (peek == DataType.null_) 738 return readNull(); 739 T tp = new PointerTarget!T; 740 *tp = read!(PointerTarget!T)(); 741 return tp; 742 } 743 744 745 /******************************************************************************* 746 * 747 * Reads an associative-array off a JSON text. 748 * 749 * The key type must be `string`, the value type can be any type otherwise 750 * supported by the parser. 751 * 752 * Params: 753 * T = The type of AA to return. 754 * 755 * Returns: 756 * A newly filled associative array. 757 * 758 **************************************/ 759 T read(T)() if (is(KeyType!T == string)) 760 { 761 T aa; 762 foreach (key; byKey) 763 aa[key] = read!(ValueType!T)(); 764 return aa; 765 } 766 767 768 /******************************************************************************* 769 * 770 * An alias to the `singleKey` method. Instead of `json.singleKey!"something"` 771 * you can write `json.something`. Read the notes on `singleKey`. 772 * 773 **************************************/ 774 alias opDispatch = singleKey; 775 776 777 /******************************************************************************* 778 * 779 * Skips all keys of an object except the first occurence with the given key 780 * name. 781 * 782 * Params: 783 * name = the key name of interest 784 * 785 * Returns: 786 * A temporary struct, a proxy to the parser, that will automatically seek to 787 * the end of the current JSON object on destruction. 788 * 789 * Throws: 790 * JSONException when the key is not found in the object or parsing errors 791 * occur. 792 * 793 * Note: 794 * Since this is an on the fly parser, you can only get one key from an 795 * object with this method. Use `keySwitch` or `foreach(key; json)` to get 796 * values from multiple keys. 797 * 798 * See_Also: 799 * keySwitch 800 * 801 **************************************/ 802 @property SingleKey singleKey(string name)() 803 { 804 nest('{', "on start of object"); 805 806 if (*m_text != '}') while (true) 807 { 808 auto key = borrowString(); 809 static if (!skipAllInter) 810 { 811 expect(':', "between key and value"); 812 skipWhitespace!false(); 813 } 814 815 if (key.length == name.length && memcmp(key.ptr, name.ptr, name.length) == 0) 816 return SingleKey(this); 817 818 skipValueImpl!skipAllInter(); 819 820 if (*m_text == '}') 821 break; 822 823 static if (!skipAllInter) 824 { 825 expect(',', "between key-value pairs"); 826 skipWhitespace!false(); 827 } 828 } 829 830 unnest(); 831 static if (isValidating) 832 handleError("Key not found."); 833 assert(0); 834 } 835 836 837 /******************************************************************************* 838 * 839 * Selects from a set of given keys in an object and calls the corresponding 840 * delegate. The difference to `singleKey` when invoked with a single key is 841 * that `keySwitch` will not error out if the key is non-existent and may 842 * trigger the delegate multiple times, if the JSON object has duplicate keys. 843 * 844 * Params: 845 * Args = the names of the keys 846 * dlg = the delegates corresponding to the keys 847 * 848 * Throws: 849 * JSONException when the key is not found in the object or parsing errors 850 * occur. 851 * 852 **************************************/ 853 void keySwitch(Args...)(scope void delegate()[Args.length] dlg...) 854 { 855 nest('{', "on start of object"); 856 857 if (*m_text != '}') while (true) 858 { 859 auto key = borrowString(); 860 static if (!skipAllInter) 861 { 862 expect(':', "between key and value"); 863 skipWhitespace!false(); 864 } 865 866 auto oldPos = m_text; 867 foreach (i, arg; Args) 868 { 869 if (key.length == arg.length && memcmp(key.ptr, arg.ptr, arg.length) == 0) 870 { 871 dlg[i](); 872 goto Next; 873 } 874 } 875 skipValue(); 876 877 Next: 878 if (*m_text == '}') 879 break; 880 881 static if (!skipAllInter) if (oldPos !is m_text) 882 { 883 expect(',', "after key-value pair"); 884 skipWhitespace!false(); 885 } 886 } 887 888 unnest(); 889 } 890 891 892 private int byKeyImpl(scope int delegate(ref const char[]) foreachBody) 893 { 894 nest('{', "at start of foreach over object"); 895 896 int result = 0; 897 if (*m_text != '}') while (true) 898 { 899 auto key = borrowString(); 900 static if (!skipAllInter) 901 { 902 expect(':', "between key and value"); 903 skipWhitespace!false; 904 } 905 906 if (iterationGuts!"{}"(result, key, foreachBody, "after key-value pair")) 907 break; 908 } 909 910 unnest(); 911 return result; 912 } 913 914 915 /******************************************************************************* 916 * 917 * Iterate the keys of an JSON object with `foreach`. 918 * 919 * Notes: 920 * $(UL 921 * $(LI If you want to store the key, you need to duplicate it.) 922 * ) 923 * 924 * Example: 925 * --- 926 * uint id; 927 * foreach (key; json.byKey) 928 * if (key == "id") 929 * id = json.read!uint; 930 * --- 931 **************************************/ 932 @safe @nogc pure nothrow 933 @property int delegate(scope int delegate(ref const char[])) byKey() 934 { 935 return &byKeyImpl; 936 } 937 938 939 /+ 940 ╔══════════════════════════════════════════════════════════════════════════════ 941 ║ ⚑ Array handling 942 ╚══════════════════════════════════════════════════════════════════════════════ 943 +/ 944 945 /******************************************************************************* 946 * 947 * Reads a dynamic array off the JSON text. 948 * 949 **************************************/ 950 T read(T)() if (isDynamicArray!T && !isSomeString!T) 951 { 952 import std.array; 953 Appender!T app; 954 foreach (i; this) 955 app.put(read!(typeof(T.init[0]))); 956 return app.data; 957 } 958 959 960 /******************************************************************************* 961 * 962 * Reads a static array off the JSON text. 963 * 964 * When validation is enabled, it is an error if the JSON array has a different 965 * length lengths don't match up. Otherwise unset elements receive their initial 966 * value. 967 * 968 **************************************/ 969 T read(T)() if (isStaticArray!T) 970 { 971 T sa = void; 972 size_t cnt; 973 foreach (i; this) 974 { 975 if (i < T.length) 976 sa[i] = read!(typeof(T.init[0])); 977 cnt = i + 1; 978 } 979 static if (isValidating) 980 { 981 if (cnt != T.length) 982 handleError(format("Static array size mismatch. Expected %s, got %s", T.length, cnt)); 983 } 984 else 985 { 986 foreach (i; cnt .. T.length) 987 sa[i] = T.init; 988 } 989 return sa; 990 } 991 992 993 /******************************************************************************* 994 * 995 * Iterate over a JSON array via `foreach`. 996 * 997 **************************************/ 998 int opApply(scope int delegate(const size_t) foreachBody) 999 { 1000 nest('[', "at start of foreach over array"); 1001 1002 int result = 0; 1003 if (*m_text != ']') for (size_t idx = 0; true; idx++) 1004 if (iterationGuts!"[]"(result, idx, foreachBody, "after array element")) 1005 break; 1006 1007 unnest(); 1008 return result; 1009 } 1010 1011 1012 /+ 1013 ╔══════════════════════════════════════════════════════════════════════════════ 1014 ║ ⚑ Boolean 1015 ╚══════════════════════════════════════════════════════════════════════════════ 1016 +/ 1017 1018 /******************************************************************************* 1019 * 1020 * Reads a boolean value off the JSON text. 1021 * 1022 **************************************/ 1023 bool read(T)() if (is(T == bool)) 1024 { 1025 return skipBoolean!(skipAllInter, isValidating)(); 1026 } 1027 1028 1029 private bool skipBoolean(bool skipInter, bool validate = isValidateAll)() 1030 { 1031 static immutable char[4][2] keywords = [ "true", "alse" ]; 1032 auto isFalse = *m_text == 'f'; 1033 static if (validate) 1034 if (*cast(char[4]*) &m_text[isFalse] != keywords[isFalse]) 1035 handleError("`true` or `false` expected."); 1036 m_text += isFalse ? 5 : 4; 1037 skipWhitespace!skipInter(); 1038 return !isFalse; 1039 } 1040 1041 1042 /+ 1043 ╔══════════════════════════════════════════════════════════════════════════════ 1044 ║ ⚑ Null 1045 ╚══════════════════════════════════════════════════════════════════════════════ 1046 +/ 1047 1048 /******************************************************************************* 1049 * 1050 * Reads `null` off the JSON text. 1051 * 1052 **************************************/ 1053 typeof(null) readNull() 1054 { 1055 skipNull!(false, isValidating)(); 1056 return null; 1057 } 1058 1059 1060 private void skipNull(bool skipInter, bool validate = isValidateAll)() 1061 { 1062 static if (validate) 1063 if (*cast(const uint*) m_text != *cast(const uint*) "null".ptr) 1064 handleError("`null` expected."); 1065 m_text += 4; 1066 skipWhitespace!skipInter(); 1067 } 1068 1069 1070 /+ 1071 ╔══════════════════════════════════════════════════════════════════════════════ 1072 ║ ⚑ Helpers and Error Handling 1073 ╚══════════════════════════════════════════════════════════════════════════════ 1074 +/ 1075 1076 /******************************************************************************* 1077 * 1078 * Skips the next JSON value if you are not interested. 1079 * 1080 **************************************/ 1081 void skipValue() 1082 { 1083 skipValueImpl!skipAllInter(); 1084 } 1085 1086 1087 private void skipValueImpl(bool skipInter)() 1088 { 1089 with (DataType) final switch (peek) 1090 { 1091 case string: 1092 skipString!skipInter(); 1093 break; 1094 case number: 1095 skipNumber!skipInter(); 1096 break; 1097 case object: 1098 static if (isValidateAll) 1099 { 1100 foreach (_; this.byKey) 1101 break; 1102 } 1103 else 1104 { 1105 m_text++; 1106 seekObjectEnd(); 1107 skipOnePlusWhitespace!skipInter(); 1108 } 1109 break; 1110 case array: 1111 static if (isValidateAll) 1112 { 1113 foreach (_; this) 1114 break; 1115 } 1116 else 1117 { 1118 m_text++; 1119 seekArrayEnd(); 1120 skipOnePlusWhitespace!skipInter(); 1121 } 1122 break; 1123 case boolean: 1124 skipBoolean!skipInter(); 1125 break; 1126 case null_: 1127 skipNull!skipInter(); 1128 break; 1129 } 1130 } 1131 1132 1133 /******************************************************************************* 1134 * 1135 * Returns the type of data that is up next in the JSON text. 1136 * 1137 **************************************/ 1138 @property DataType peek() 1139 { 1140 static immutable trans = { 1141 DataType[256] result = cast(DataType) ubyte.max; 1142 result['{'] = DataType.object; 1143 result['['] = DataType.array; 1144 result['-'] = DataType.number; 1145 foreach (i; '0' .. '9'+1) 1146 result[i] = DataType.number; 1147 result['"'] = DataType..string; 1148 result['t'] = DataType.boolean; 1149 result['f'] = DataType.boolean; 1150 result['n'] = DataType.null_; 1151 return result; 1152 }(); 1153 1154 DataType vt = trans[*m_text]; 1155 static if (isValidating) 1156 if (vt == ubyte.max) 1157 expectNot("while peeking at next value type"); 1158 return vt; 1159 } 1160 1161 1162 private void nest(char c, string msg) 1163 { 1164 expect(c, msg); 1165 skipWhitespace!false(); 1166 m_nesting++; 1167 } 1168 1169 1170 private void unnest() 1171 in { assert(m_nesting > 0); } 1172 body 1173 { 1174 if (--m_nesting == 0) 1175 { 1176 skipOnePlusWhitespace!false(); 1177 static if (isValidating) 1178 if (*m_text != '\0') 1179 handleError("Expected end of JSON."); 1180 } 1181 else skipOnePlusWhitespace!skipAllInter(); 1182 } 1183 1184 1185 private bool iterationGuts(char[2] braces, T, D)(ref int result, T idx, scope D dlg, 1186 string missingCommaMsg) 1187 { 1188 auto oldPos = m_text; 1189 static if (isValidateAll) 1190 { 1191 if (result) 1192 { 1193 skipValueImpl!(!isValidateAll)(); 1194 goto PastValue; 1195 } 1196 } 1197 result = dlg(idx); 1198 if (oldPos is m_text) 1199 skipValueImpl!(!isValidateAll)(); 1200 1201 PastValue: 1202 if (*m_text == braces[1]) 1203 return true; 1204 1205 static if (!isValidateAll) if (result) 1206 { 1207 seekAggregateEnd!braces(); 1208 return true; 1209 } 1210 1211 static if (!skipAllInter) if (oldPos !is m_text) 1212 { 1213 expect(',', missingCommaMsg); 1214 skipWhitespace!false(); 1215 } 1216 return false; 1217 } 1218 1219 1220 static if (!isValidateAll) 1221 { 1222 private void seekObjectEnd() 1223 { 1224 seekAggregateEnd!"{}"(); 1225 } 1226 1227 1228 private void seekArrayEnd() 1229 { 1230 seekAggregateEnd!"[]"(); 1231 } 1232 1233 1234 private void seekAggregateEnd(immutable char[2] parenthesis)() 1235 { 1236 size_t nesting = 1; 1237 while (true) 1238 { 1239 m_text.seekToAnyOf!(parenthesis ~ "\"\0"); 1240 final switch (*m_text) 1241 { 1242 case parenthesis[0]: 1243 m_text++; 1244 nesting++; 1245 break; 1246 case parenthesis[1]: 1247 if (--nesting == 0) 1248 return; 1249 m_text++; 1250 break; 1251 case '"': 1252 // Could skip ':' or ',' here by passing `true`, but we skip it above anyways. 1253 skipString!false(); 1254 } 1255 } 1256 } 1257 } 1258 1259 1260 /// This also increments the JSON read pointer. 1261 private void expect(char c, string msg) 1262 { 1263 static if (isValidating) 1264 if (*m_text != c) 1265 expectImpl(c, msg); 1266 m_text++; 1267 } 1268 1269 1270 private void expectNot(char c, string msg) 1271 { 1272 static if (isValidating) 1273 if (*m_text == c) 1274 expectNot(msg); 1275 } 1276 1277 1278 static if (isValidating) 1279 { 1280 @noinline 1281 private void expectNot(string msg) 1282 { 1283 string tmpl = isPrintable(*m_text) 1284 ? "Character '%s' %s." 1285 : "Byte 0x%02x %s."; 1286 handleError(format(tmpl, *m_text, msg)); 1287 } 1288 1289 1290 @noinline 1291 private void expectImpl(char c, string msg) 1292 { 1293 string tmpl = isPrintable(*m_text) 1294 ? "Expected '%s', but found '%s' %s." 1295 : "Expected '%s', but found byte 0x%02x %s."; 1296 handleError(format(tmpl, c, *m_text, msg)); 1297 } 1298 1299 1300 @noinline 1301 private void handleError(string msg) 1302 { 1303 size_t line; 1304 const(char)* p = m_start; 1305 const(char)* last; 1306 do 1307 { 1308 last = p; 1309 p.skipToNextLine(); 1310 line++; 1311 } 1312 while (p <= m_text); 1313 1314 size_t pos; 1315 pos += last[0 .. m_text - last].byGrapheme.walkLength; 1316 1317 throw new JSONException(msg, line.to!int, pos.to!int); 1318 } 1319 } 1320 1321 1322 @forceinline @nogc pure nothrow 1323 private void skipOnePlusWhitespace(bool skipInter)() 1324 { 1325 m_text++; 1326 skipWhitespace!skipInter(); 1327 } 1328 1329 1330 @forceinline @nogc pure nothrow 1331 private void skipWhitespace(bool skipInter)() 1332 { 1333 static if (skipInter) 1334 m_text.skipAllOf!"\t\n\r ,:"; 1335 else 1336 m_text.skipAsciiWhitespace(); 1337 } 1338 1339 1340 private static struct SingleKey 1341 { 1342 alias json this; 1343 1344 private Json* m_pjson; 1345 private const(char*) m_oldPos; 1346 1347 @safe @nogc pure nothrow 1348 @property ref Json json() 1349 { 1350 return *m_pjson; 1351 } 1352 1353 this(ref Json json) 1354 { 1355 m_pjson = &json; 1356 m_oldPos = json.m_text; 1357 } 1358 1359 ~this() 1360 { 1361 static if (isValidateAll) 1362 { 1363 if (*json.m_text != '}') 1364 { 1365 if (m_oldPos !is json.m_text) 1366 { 1367 json.expect(',', "after key-value pair"); 1368 json.skipWhitespace!false(); 1369 } 1370 while (true) 1371 { 1372 json.skipString!false(); 1373 json.expect(':', "between key and value"); 1374 json.skipWhitespace!false(); 1375 json.skipValueImpl!false(); 1376 1377 if (*json.m_text == '}') 1378 break; 1379 1380 json.expect(',', "after key-value pair"); 1381 json.skipWhitespace!false(); 1382 } 1383 } 1384 } 1385 else 1386 { 1387 json.seekObjectEnd(); 1388 } 1389 json.unnest(); 1390 } 1391 } 1392 1393 1394 private static struct File 1395 { 1396 alias m_json this; 1397 1398 private size_t m_len; 1399 Json m_json; 1400 1401 @disable this(); 1402 @disable this(this); 1403 1404 this(const(ubyte)[] fname) 1405 { 1406 version (Posix) 1407 { 1408 import core.sys.posix.fcntl; 1409 import core.sys.posix.sys.mman; 1410 import core.sys.posix.unistd; 1411 1412 version (linux) 1413 enum O_CLOEXEC = octal!2000000; 1414 else version (OSX) // Requires at least OS X 10.7 Lion 1415 enum O_CLOEXEC = 0x1000000; 1416 else static assert(0, "Not implemented"); 1417 1418 int fd = { return open(charPtr!fname, O_RDONLY | O_NOCTTY | O_CLOEXEC); }(); 1419 assert(fcntl(fd, F_GETFD) & FD_CLOEXEC, "Could not set O_CLOEXEC."); 1420 1421 if (fd == -1) 1422 throw new ErrnoException("Could not open JSON file for reading."); 1423 scope(exit) close(fd); 1424 1425 // Get the file size 1426 stat_t info; 1427 if (fstat(fd, &info) == -1) 1428 throw new ErrnoException("Could not get JSON file size."); 1429 1430 // Ensure we have 16 extra bytes 1431 size_t pagesize = sysconf(_SC_PAGESIZE); 1432 ulong fsize = ulong(info.st_size + pagesize - 1) / pagesize * pagesize; 1433 bool zeroPage = fsize < info.st_size + 16; 1434 if (zeroPage) 1435 fsize += pagesize; 1436 if (fsize > size_t.max) 1437 throw new FileException("JSON file too large to be mapped in RAM."); 1438 m_len = cast(size_t) fsize; 1439 1440 // Map the file 1441 void* mapping = mmap(null, m_len, PROT_READ, MAP_PRIVATE, fd, 0); 1442 if (mapping == MAP_FAILED) 1443 throw new ErrnoException("Could not map JSON file."); 1444 scope(failure) 1445 munmap(mapping, m_len); 1446 1447 // Get a zero-page up behind the JSON text 1448 if (zeroPage) 1449 { 1450 void* offs = mapping + m_len - pagesize; 1451 if (mmap(offs, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0) == MAP_FAILED) 1452 throw new ErrnoException("Could not map zero-page behind JSON text."); 1453 } 1454 1455 // Initialize the parser on the JSON text 1456 m_json = (cast(char*) mapping)[0 .. cast(size_t) info.st_size]; 1457 } 1458 else static assert(0, "Not implemented"); 1459 } 1460 1461 1462 this(const(char)[] fname) 1463 { 1464 this(fname.representation); 1465 } 1466 1467 1468 nothrow 1469 ~this() 1470 { 1471 version (Posix) 1472 { 1473 import core.sys.posix.sys.mman; 1474 munmap(cast(void*) m_json.m_start, m_len); 1475 } 1476 } 1477 } 1478 } 1479 1480 1481 private template buildRemapTable(T) 1482 { 1483 import std.typetuple; 1484 1485 static if (is(T == enum)) 1486 { 1487 struct Remap { T d; string json; } 1488 enum members = EnumMembers!T; 1489 } 1490 else 1491 { 1492 struct Remap { string d; string json; } 1493 enum members = FieldNameTuple!T; 1494 } 1495 enum mapping = getUDA!(T, JsonMapping).map; 1496 1497 template Impl(size_t a, size_t b) 1498 { 1499 static if (b - a > 1) 1500 { 1501 alias Impl = TypeTuple!(Impl!(a, (b + a) / 2), Impl!((b + a) / 2, b)); 1502 } 1503 else static if (b - a == 1) 1504 { 1505 static if (is(T == enum)) 1506 enum key = members[a].to!string; 1507 else 1508 alias key = members[a]; 1509 static if ((key in mapping) !is null) 1510 enum mapped = mapping[key]; 1511 else 1512 alias mapped = key; 1513 alias Impl = TypeTuple!(Remap(members[a], mapped)); 1514 } 1515 else alias Impl = TypeTuple!(); 1516 } 1517 1518 alias buildRemapTable = Impl!(0, members.length); 1519 } 1520 1521 1522 unittest 1523 { 1524 struct Counter 1525 { 1526 size_t array, object, key, string, number, boolean, null_; 1527 } 1528 1529 void valueHandler(ref Json!validateAll.File json, ref Counter ctr) 1530 { 1531 with (DataType) final switch (json.peek) 1532 { 1533 case array: 1534 ctr.array++; 1535 foreach (_; json) 1536 valueHandler(json, ctr); 1537 break; 1538 case object: 1539 ctr.object++; 1540 foreach(key; json.byKey) 1541 { 1542 ctr.key++; 1543 valueHandler(json, ctr); 1544 } 1545 break; 1546 case string: 1547 ctr..string++; 1548 json.skipValue(); 1549 break; 1550 case number: 1551 ctr.number++; 1552 json.skipValue(); 1553 break; 1554 case boolean: 1555 ctr.boolean++; 1556 json.skipValue(); 1557 break; 1558 case null_: 1559 ctr.null_++; 1560 json.skipValue(); 1561 break; 1562 } 1563 } 1564 1565 void passFile(string fname, Counter valid) 1566 { 1567 auto json = Json!validateAll.File(fname); 1568 Counter ctr; 1569 valueHandler(json, ctr); 1570 assert(ctr == valid, fname); 1571 } 1572 1573 void failFile(string fname) 1574 { 1575 auto json = Json!validateAll.File(fname); 1576 Counter ctr; 1577 assertThrown!JSONException(valueHandler(json, ctr), fname); 1578 } 1579 1580 // Tests that need to pass according to RFC 7159 1581 passFile("test/pass1.json", Counter( 6, 4, 33, 21, 32, 4, 2)); 1582 passFile("test/pass2.json", Counter(19, 0, 0, 1, 0, 0, 0)); 1583 passFile("test/pass3.json", Counter( 0, 2, 3, 2, 0, 0, 0)); 1584 passFile("test/fail1.json", Counter( 0, 0, 0, 1, 0, 0, 0)); 1585 passFile("test/fail18.json", Counter(20, 0, 0, 1, 0, 0, 0)); 1586 1587 // Tests that need to fail 1588 foreach (i; chain(iota(2, 18), iota(19, 34))) 1589 failFile("test/fail" ~ i.to!string ~ ".json"); 1590 1591 // Deserialization 1592 struct Test 1593 { 1594 string text1; 1595 string text2; 1596 string text3; 1597 double dbl = 0; 1598 float flt = 0; 1599 ulong ul; 1600 uint ui; 1601 ushort us; 1602 ubyte ub; 1603 long lm, lp; 1604 int im, ip; 1605 short sm, sp; 1606 byte bm, bp; 1607 bool t, f; 1608 Test* tp1, tp2; 1609 int[2] sa; 1610 int[] da; 1611 Test[string] aa; 1612 SearchPolicy e; 1613 } 1614 1615 Test t1 = { 1616 text1 : "abcde", 1617 text2 : "", 1618 text3 : null, 1619 dbl : 1.1, 1620 flt : -1.1, 1621 ul : ulong.max, 1622 ui : uint.max, 1623 us : ushort.max, 1624 ub : ubyte.max, 1625 lm : long.min, 1626 lp : long.max, 1627 im : int.min, 1628 ip : int.max, 1629 sm : short.min, 1630 sp : short.max, 1631 bm : byte.min, 1632 bp : byte.max, 1633 t : true, 1634 f : false, 1635 tp1 : null, 1636 tp2 : new Test("This is", "a", "test."), 1637 sa : [ 33, 44 ], 1638 da : [ 5, 6, 7 ], 1639 aa : [ "hash" : Test("x", "y", "z") ], 1640 e : SearchPolicy.linear 1641 }; 1642 Test t2 = parseJSON(`{ 1643 "text1" : "abcde", 1644 "text2" : "", 1645 "text3" : null, 1646 "dbl" : 1.1, 1647 "flt" : -1.1, 1648 "ul" : ` ~ ulong.max.to!string ~ `, 1649 "ui" : ` ~ uint.max.to!string ~ `, 1650 "us" : ` ~ ushort.max.to!string ~ `, 1651 "ub" : ` ~ ubyte.max.to!string ~ `, 1652 "lm" : ` ~ long.min.to!string ~ `, 1653 "lp" : ` ~ long.max.to!string ~ `, 1654 "im" : ` ~ int.min.to!string ~ `, 1655 "ip" : ` ~ int.max.to!string ~ `, 1656 "sm" : ` ~ short.min.to!string ~ `, 1657 "sp" : ` ~ short.max.to!string ~ `, 1658 "bm" : ` ~ byte.min.to!string ~ `, 1659 "bp" : ` ~ byte.max.to!string ~ `, 1660 "t" : true, 1661 "f" : false, 1662 "tp1" : null, 1663 "tp2" : { "text1": "This is", "text2": "a", "text3": "test." }, 1664 "sa" : [ 33, 44 ], 1665 "da" : [ 5, 6, 7 ], 1666 "aa" : { "hash" : { "text1":"x", "text2":"y", "text3":"z" } }, 1667 "e" : "linear" 1668 }`).read!Test; 1669 1670 assert(t2.tp2 && *t1.tp2 == *t2.tp2); 1671 assert(t1.da == t2.da); 1672 assert(t1.aa == t2.aa); 1673 t2.tp2 = t1.tp2; 1674 t2.da = t1.da; 1675 t2.aa = t1.aa; 1676 assert(t1 == t2); 1677 }