1 /** 2 * Fast, non-allocating string functions. 3 * 4 * Authors: 5 * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 6 * 7 * Copyright: 8 * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 9 * 10 * License: 11 * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 12 */ 13 module fast..string; 14 15 import core.bitop; 16 import core.simd; 17 import core.stdc.stdlib; 18 19 version (GNU) import gcc.attribute; 20 21 import std.algorithm; 22 import std.range; 23 import std.stdio; 24 import std..string; 25 import std.traits; 26 27 import fast.buffer; 28 29 30 /** 31 * Splits a string in two around one or more compile-time known code units. 32 * 33 * Params: 34 * match = An expression that matches all characters around which a split should occur. 35 * str = The string to scan. 36 * before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here. 37 * after = The part after the split is stored here. If no character in $(D match) is found, $(D null) is returned here. 38 * splitter = If not $(D null), this pointer will receive a copy of the splitting char. 39 * 40 * Returns: 41 * $(D true), iff a split occured. 42 */ 43 bool split(string match)(scope inout(char[]) str, ref inout(char)[] before, ref inout(char)[] after, char* splitter = null) 44 { 45 immutable pos = min(str.length, SimdMatcher!match.find(str.ptr, str.ptr + str.length)); 46 before = str[0 .. pos]; 47 if (pos < str.length) { 48 after = str[pos+1 .. $]; 49 if (splitter) *splitter = str[pos]; 50 return true; 51 } 52 after = null; 53 return false; 54 } 55 56 /** 57 * Similar to the overload for strings, this function works a little faster as it lacks boundary checks. 58 * It assumes that one of the characters in $(D match) is actually contained in the string. 59 * 60 * Params: 61 * match = An expression that matches all characters around which a split should occur. 62 * ptr = The string to scan. 63 * before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here. 64 * after = The pointer to the part after the split is stored here. 65 * 66 * Returns: 67 * The char that caused the split. (From $(D match).) 68 */ 69 char split(string match)(scope inout(char*) ptr, ref inout(char)[] before, ref inout(char)* after) 70 { 71 immutable pos = SimdMatcher!match.find(str.ptr); 72 before = ptr[0 .. pos]; 73 after = ptr + pos + 1; 74 return ptr[pos]; 75 } 76 77 78 /******************************************************************************* 79 * 80 * Finds the first occurrence of a set of compile-time known code units in a 81 * string. While the algorithm is `O(n)` in relation to the count of given code 82 * units, the overhead when using it on short strings weights more for only 1 or 83 * 2 code units. 84 * 85 * Params: 86 * match = An expression that matches all characters around which a split 87 * should occur. 88 * str = The string to search for a code unit. 89 * 90 * Returns: 91 * If a match is found, the index into the string is returned. 92 * Otherwise an invalid index is returned. Check with 93 * `if (result < str.length)`. 94 * 95 * See_Also: 96 * split, 97 * $(LINK2 http://mischasan.wordpress.com/2011/11/09/the-generic-sse2-loop/, 98 * The Generic SSE2 Loop) 99 * 100 * Example: 101 * --- 102 * // Check if there is a '/' or '\' in the string 103 * auto pos = str.find!(`or(=/,=\)`); 104 * if (pos < str.length) { } 105 * --- 106 **************************************/ 107 size_t find(string match)(in char[] str) pure nothrow 108 { 109 return SimdMatcher!match.find(str.ptr, str.ptr + str.length); 110 } 111 112 /******************************************************************************* 113 * 114 * Same as the overload for strings, but with only a char*, making it faster as 115 * it cannot do a boundary check. 116 * 117 * Sometimes when looking for a character it is helpful to append it as a 118 * sentinel to the char buffer and then use this function instead of the slower 119 * one that checks the boundary constantly. 120 * 121 * Example: 122 * --- 123 * // Find a ']' in a buffer of 1024 bytes using an additional sentinel. 124 * size_t length = 1024; 125 * char[] buffer = new char[](length+1); 126 * buffer[length] = ']'; 127 * auto pos = buffer.ptr.find!("=]"); 128 * if (pos < length) { // was an actual find before the sentinel } 129 * --- 130 **************************************/ 131 inout(char)* find(string match)(inout(char*) ptr) pure nothrow 132 { 133 return SimdMatcher!match.find(ptr); 134 } 135 136 137 bool keyword1(string key)(in char[] str, 138 scope bool function(ref immutable(char)* key, ref const(char)* str) mismatcher = null) 139 { 140 auto strPtr = str.ptr; 141 auto keyPtr = key.ptr; 142 auto keyEnd = keyPtr + key.length; 143 144 while (keyPtr !is keyEnd) 145 { 146 while (*strPtr == '\\') 147 if (!mismatcher(keyPtr, strPtr)) 148 return false; 149 150 if (*strPtr == '"' || *strPtr != *keyPtr) 151 return false; 152 153 strPtr++; 154 keyPtr++; 155 } 156 return true; 157 } 158 159 160 bool keyword2(string key)(in char[] str, 161 scope bool function(ref immutable(char)* key, ref const(char)* str) mismatcher = null) 162 { 163 version (LDC) import ldc.gccbuiltins_x86; 164 165 /* Since SIMD typically works with word aligned data, we duplicate 'key' for every possible start of 'str' when 166 * loaded from an aligned memory address where the first character appears 0 to Word.sizeof bytes into the SIMD 167 * register. 168 * For 16-byte SIMD we could just create an array of 16 strings with 0 to 15 padding bytes in front and some after, 169 * but we can be more compact with at most 16 wasted padding bytes. Since machine registers are powers of 2, if we 170 * pad all keys to an odd length and repeat them 16 times we get a sequence with the following properties: 171 * - It consists of as many SIMD words as the key is long. 172 * - All 16 shift offsets of the key are contained in the SIMD words due to the periodicity introduced by using 173 * disjunct prime factors for the key length and the SIMD word size. 174 * Interpreted as an array of SIMD words, it can be indexed with the desired shift multiplied by a constant factor 175 * and taken modulo the SIMD array length to use the periodicity. The constant factor is the smallest value that 176 * when multiplied with the key length ends up at a SIMD word boundary + 1 (the first shift). 177 */ 178 179 // 'key' length rounded up to next odd value is the number of SIMD words we need. 180 enum keyLenOdd = uint(key.length | 1); // TODO: uint or implicit type ? 181 align(16) static immutable char[keyLenOdd * Word.sizeof] keyData = key.representation 182 .chain(ubyte(0x20).repeat(keyLenOdd - key.length)).cycle.take(keyLenOdd * Word.sizeof).array; 183 align(16) static immutable char[Word.sizeof] dquote = '"'; 184 align(16) static immutable char[Word.sizeof] bslash = '\\'; 185 enum mul = { uint result = 0; while ((++result * Word.sizeof + 1) % keyLenOdd) {} return result; }(); 186 187 const(char)* strPtr = str.ptr; 188 immutable(char)* keyPtr = keyData.ptr; 189 auto bsWord = *cast(immutable Word*) &bslash; 190 auto dqWord = *cast(immutable Word*) &dquote; 191 192 do 193 { 194 // writeln("enter loop"); 195 // Calculate SSE word boundary before 'str' 196 size_t strOff = cast(size_t) strPtr % Word.sizeof; 197 Word strWord = *cast(Word*) (strPtr - strOff); 198 size_t keyPos = keyPtr - keyData.ptr; 199 size_t keyOff = (strOff - keyPos) % Word.sizeof; 200 Word keyWord = (cast(Word*) keyData.ptr)[keyOff * mul % keyLenOdd + (keyOff + keyPos) / Word.sizeof]; 201 202 // Escape seqences have priority. 'key' may contain backslashes as part of the text, but in 'str' a backslash 203 // at the same position is actually the begin of the escape sequence "\\". 204 Word bsMask = strWord.maskEqual(bsWord); 205 // If after processing backslashes there is a double-quote in 'str' we must not match it with a double-quote in 206 // 'key', since it is the delimiter of 'str'. 207 Word dqMask = strWord.maskEqual(dqWord); 208 // How many bytes of 'key' and 'str' match in our 'Word' ? 209 Word missMask = strWord.maskNotEqual(keyWord); 210 // Merge mismatch, backslash and double-quote masks and move them into a non-SSE register. 211 Word allMasks = or(missMask, or(bsMask, dqMask)); 212 uint skip = bsf((__builtin_ia32_pmovmskb128(allMasks) | 1 << Word.sizeof) >> strOff); 213 // writeln(keyPtr[0 .. 5]); 214 // writeln(strPtr[0 .. 5]); 215 // writeln(skip); 216 strPtr += skip; 217 keyPtr += skip; 218 219 // Have we matched enough bytes to reach the end of 'key' ? 220 if (keyPtr - keyData.ptr >= key.length) 221 return true; 222 223 // When we find a mismatch between 'key' and 'str', we try to call a provided helper function. 224 // It may decode escape sequences in 'str' and recover from the state. 225 // If that fails we accept the mismatch and return 'false'. 226 // writefln("Key: %s, Str %s", *keyPtr, *strPtr); 227 // const(char*) strPtrOld = strPtr; 228 // immutable(char*) keyPtrOld = keyPtr; 229 if (strOff + skip < Word.sizeof && !(mismatcher && mismatcher(keyPtr, strPtr))) 230 { 231 // writefln("Key: %s, Str %s", *keyPtr, *strPtr); 232 return false; 233 } 234 // writefln("Key: %s, Str %s", *keyPtr, *strPtr); 235 } 236 while (keyPtr - keyData.ptr < key.length); 237 238 return true; 239 } 240 241 242 bool keyword3(string key)(in char[] str, bool function(ref immutable(char)*, ref const(char)*) mismatcher = null) 243 { 244 version (LDC) import ldc.gccbuiltins_x86; 245 version (GNU) import gcc.builtins; 246 247 /* Since SIMD typically works with word aligned data, we duplicate 'key' for every possible start of 'str' when 248 * loaded from an aligned memory address where the first character appears 0 to Word.sizeof bytes into the SIMD 249 * register. 250 * For 16-byte SIMD we could just create an array of 16 strings with 0 to 15 padding bytes in front and some after, 251 * but we can be more compact with at most 16 wasted padding bytes. Since machine registers are powers of 2, if we 252 * pad all keys to an odd length and repeat them 16 times we get a sequence with the following properties: 253 * - It consists of as many SIMD words as the key is long. 254 * - All 16 shift offsets of the key are contained in the SIMD words due to the periodicity introduced by using 255 * disjunct prime factors for the key length and the SIMD word size. 256 * Interpreted as an array of SIMD words, it can be indexed with the desired shift multiplied by a constant factor 257 * and taken modulo the SIMD array length to use the periodicity. The constant factor is the smallest value that 258 * when multiplied with the key length ends up at a SIMD word boundary + 1 (the first shift). 259 */ 260 261 // 'key' length rounded up to next odd value is the number of SIMD words we need. 262 enum keyLenOdd = uint(key.length | 1); // TODO: uint or implicit type ? 263 align(16) static immutable char[keyLenOdd * Word.sizeof] keyData = key.representation 264 .chain(ubyte(0x20).repeat(keyLenOdd - key.length)).cycle.take(keyLenOdd * Word.sizeof).array; 265 align(16) static immutable char[Word.sizeof] dqbs = `\"""""""""""""""`; 266 enum mul = { uint result = 0; while ((++result * Word.sizeof + 1) % keyLenOdd) {} return result; }(); 267 268 // Calculate SSE word boundary before 'str' 269 uint off = cast(uint) str.ptr % Word.sizeof; 270 // SSE aligned pointer <= 'str.ptr'. 271 auto strPtr = cast(const(Word)*) (str.ptr - off); 272 auto keyPtr = cast(immutable(Word)*) keyData.ptr + off * mul % keyLenOdd; 273 auto keyStart = cast(immutable(char)*) keyPtr + off; 274 Word strWord = *strPtr; 275 276 LoadKey: 277 auto keyEnd = keyStart + key.length; 278 279 Compare: 280 // Get bitmask of special characters in 'str'. 281 uint escMask = getScalar(cast(int4) __builtin_ia32_pcmpistrm128(*cast(Word*) &dqbs, strWord, 0b_0_00_00_00)); 282 // writeln("Called a"); 283 // Get bitmask of characters from 'key' and 'str' that don't match. 284 uint missMask = getScalar(cast(int4) __builtin_ia32_pcmpistrm128(*keyPtr, strWord, 0b_0_01_10_00)); 285 // writeln("Called b"); 286 // Create a merged mask for both and an additional bit at position 16, serving as a delimiter for 'bsf'. 287 uint mask = (escMask | missMask) & (uint.max << off); 288 289 // No bit set means all 16 bytes are equal and there are no escape characters. That's as good as it gets. 290 if (!mask) 291 { 292 // Jump forward by a word size and see if we successfully compared all bytes to the end of our 'key'. 293 keyPtr += 16; 294 if (cast(immutable(char)*) keyPtr >= keyEnd) 295 return true; 296 // Otherwise continue with the next set of 16 bytes. 297 strPtr += 16; 298 off = 0; 299 goto Compare; 300 } 301 302 // One of two cases ... 303 off = bsf(mask); 304 305 // 1) Did the mismatch occur past the end of 'key' ? Then we compared succesfully. 306 if (cast(immutable(char)*) keyPtr + off >= keyEnd) 307 return true; 308 309 // 2) It must be a special character or actual mismatch, let 'mismatcher' decide. 310 // writefln("Skipping: %s", (cast(const(char)*) strPtr)[0 .. off]); 311 auto strChP = cast(const(char)*) strPtr + off; 312 auto strChPOld = strChP; 313 auto keyChP = cast(immutable(char)*) keyPtr + off; 314 bool goodToGo = mismatcher(keyChP, strChP); 315 316 // writefln("Mismatcher used %s key chars, %s str chars and returned: %s", keyAdd, strAdd, goodToGo); 317 if (keyChP >= keyEnd) 318 return true; 319 if (!goodToGo) 320 return false; 321 322 // Arriving here we just decoded an escape sequence and have to adjust our pointers. 323 auto keyPos = keyChP - keyStart; 324 off += strChP - strChPOld; 325 if (off >= 16) 326 { 327 strPtr += off / 16; 328 strWord = *strPtr; 329 off %= 16; 330 } 331 auto baseOff = (off - keyPos) & 15; 332 keyPtr = cast(immutable(Word)*) keyData.ptr + baseOff * mul % keyLenOdd; 333 keyStart = cast(immutable(char)*) keyPtr + baseOff; 334 keyPtr += (baseOff + keyPos) / 16; 335 goto LoadKey; 336 } 337 338 339 size_t equalLength(scope inout(char[]) a, scope inout(char[]) b) 340 { 341 return 0; 342 } 343 344 345 /******************************************************************************* 346 * 347 * Concatenates a series of strings. 348 * 349 * Params: 350 * Strs = a series of string symbols or literals to be concatenated 351 * buffer = optional buffer, implicitly allocated 352 * 353 * Returns: 354 * A $(D TempBuffer!char) containing the concatenated string. It is kept alive 355 * for as long as it is in scope. 356 * 357 **************************************/ 358 nothrow @nogc 359 template concat(Strs...) 360 { 361 import core.stdc..string : memcpy; 362 import fast.internal.helpers; 363 364 enum allocExpr = ctfeJoin!(Strs.length)("Strs[%s].length", "+") ~ "+1"; 365 366 auto concat(void* buffer = (mixin(allocExpr) <= allocaLimit) ? alloca(mixin(allocExpr)) : null) 367 { 368 immutable length = mixin(allocExpr); 369 auto result = TempBuffer!char( 370 (cast(char*) (buffer is null ? malloc(length) : buffer))[0 .. length - 1], 371 buffer is null); 372 373 char* p = result.ptr; 374 foreach (const(char[]) str; Strs) 375 { 376 memcpy (p, str.ptr, str.length); 377 p += str.length; 378 } 379 *p = '\0'; 380 381 return result; 382 } 383 } 384 385 386 387 private: 388 389 template SimdMatcher(string match) 390 { 391 import core.simd; 392 import std..string; 393 import fast.internal.sysdef; 394 395 static if (match != strip(match)) { 396 // Reinstanciate the template with any whitespace stripped from the match string. 397 alias SimdMatcher = SimdMatcher!(strip(match)); 398 } else { 399 /* For SSE in DMD I am blocked by: 400 * https://d.puremagic.com/issues/show_bug.cgi?id=8047 401 * https://d.puremagic.com/issues/show_bug.cgi?id=11585 402 */ 403 enum isUsingSSE = hasSSE2 && (isLDC || isGDC); 404 enum isSingleChar = match.length == 2 && match[0] == '='; 405 static if (isSingleChar) enum singleChar = match[1]; 406 static if (isUsingSSE) { 407 // Using MOVMSKB we get one boolean per bit in a 16-bit value. 408 alias Word = ubyte16; 409 alias Mask = uint; 410 enum sparseness = 1; 411 } else { 412 // The fallback is to work with machine words and tricky bit-twiddling algorithms. 413 // As a result we get machine words where matching bytes have the high bit set. 414 alias Word = size_t; 415 alias Mask = size_t; 416 enum sparseness = 8; 417 } 418 enum matchCode = genMatchCode!isUsingSSE("*wp"); 419 // Used in generic comparison code 420 enum lows = size_t.max / 0xFF; 421 enum highs = lows * 0x80; 422 423 enum betterUseTables = (isDMD && matchCode.complexity >= 4) 424 || (isGDC && matchCode.complexity >= 18) 425 || (isLDC && matchCode.complexity >= 18); 426 427 static if (betterUseTables) 428 { 429 immutable matchTable = genMatchTable(); 430 431 size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow @nogc 432 { 433 import core.stdc..string; 434 import fast.internal.helpers; 435 436 // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins. 437 static if (isSingleChar) { 438 return memchr(b, singleChar, e - b) - b; 439 } else { 440 if (b >= e) return 0; 441 442 size_t off = cast(size_t) b % ushort.sizeof; 443 ushort* wp = cast(ushort*) (b - off); 444 ushort* we = cast(ushort*) alignPtrNext(e, ushort.sizeof); 445 if (off) { 446 // Throw away bytes from before start of the string 447 if (auto mask = matchTable[*wp] >> off) 448 return bsf(mask); 449 if (++wp is we) return size_t.max; 450 } 451 452 do { 453 if (auto mask = matchTable[*wp]) 454 return bsf(mask) + (cast(char*) wp - b); 455 } while (++wp !is we); 456 return size_t.max; 457 } 458 } 459 460 inout(char)* find(scope inout(char*) b) pure nothrow @nogc 461 { 462 import core.stdc..string; 463 // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins. 464 static if (isSingleChar && singleChar == '\0') { 465 return strlen(b) + b; 466 } else static if (isSingleChar && isDMD) { // DMD is better off using optimized C library code. 467 return memchr(b, singleChar, e - b) - b; 468 } else { 469 size_t off = cast(size_t) b % ushort.sizeof; 470 ushort* wp = cast(ushort*) (b - off); 471 if (off) { 472 // Throw away bytes from before start of the string 473 if (auto mask = matchTable[*wp] >> off) 474 return b + bsf(mask); 475 } 476 477 do { 478 if (auto mask = matchTable[*wp]) 479 return cast(inout(char)*) wp + bsf(mask); 480 } while (true); 481 } 482 } 483 } 484 else 485 { 486 import core.stdc..string, core.simd; 487 import std.simd; 488 import fast.internal.helpers; 489 490 version (LDC) { 491 import ldc.gccbuiltins_x86; 492 } else version (GNU) { 493 import gcc.builtins; 494 } 495 496 size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow 497 { 498 // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins. 499 static if (isSingleChar) { 500 return memchr(b, singleChar, e - b) - b; 501 } else { 502 if (b >= e) return 0; 503 504 size_t off = cast(size_t) b % Word.sizeof; 505 Word* wp = cast(Word*) (b - off); 506 Word* we = cast(Word*) alignPtrNext(e, Word.sizeof); 507 if (off) { 508 // Throw away bytes from before start of the string 509 if (auto mask = (mixin(matchCode.code)) >> (off * sparseness)) 510 return bsf(mask) / sparseness; 511 if (++wp is we) return size_t.max; 512 } 513 514 do { 515 if (auto mask = mixin(matchCode.code)) 516 return bsf(mask) / sparseness + (cast(char*) wp - b); 517 } while (++wp !is we); 518 return size_t.max; 519 } 520 } 521 522 inout(char)* find(scope inout(char*) b) pure nothrow 523 { 524 // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins. 525 static if (isSingleChar && singleChar == '\0') { 526 return strlen(b) + b; 527 } else static if (isSingleChar && isDMD) { // DMD is better off using optimized C library code. 528 return cast(inout(char*)) memchr(b, singleChar, size_t.max); 529 } else { 530 size_t off = cast(size_t) b % Word.sizeof; 531 Word* wp = cast(Word*) (b - off); 532 if (off) { 533 // Throw away bytes from before start of the string 534 if (auto mask = (mixin(matchCode.code)) >> (off * sparseness)) 535 return b + bsf(mask) / sparseness; 536 ++wp; 537 } 538 539 do { 540 if (auto mask = mixin(matchCode.code)) 541 return cast(inout(char)*) wp + bsf(mask) / sparseness; 542 ++wp; 543 } while (true); 544 } 545 } 546 } 547 548 enum genMatchCode(bool sse)(string var) 549 { 550 import std.ascii, std.exception; 551 552 struct Code { 553 string code; 554 size_t complexity = 1; 555 } 556 Code result; 557 string[] nesting; 558 559 with (result) { 560 for (size_t i = 0; i < match.length;) { 561 string handleChar() { 562 char c = match[i+1]; 563 switch (c) { 564 case 0: 565 return `'\0'`; 566 case '\\': 567 return `'\\'`; 568 case "'"[0]: 569 return `'\''`; 570 case '\t': 571 return `'\t'`; 572 case '\r': 573 return `'\r'`; 574 case '\n': 575 return `'\n'`; 576 default: 577 return `'` ~ c ~ `'`; 578 } 579 } 580 581 if (match[i] == '=') { 582 static if (sse) { 583 code ~= "maskEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))"; 584 } else if (match[i+1] == 0) { 585 code ~= "" ~ var ~ " - lows & ~" ~ var; 586 } else { 587 code ~= "(" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows & ~(" ~ var ~ " ^ lows * " ~ handleChar() ~ ")"; 588 } 589 i += 2; 590 } else if (match[i] == '!') { 591 static if (sse) { 592 code ~= "maskNotEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))"; 593 } else if (match[i+1] == 0) { 594 code ~= "(~(" ~ var ~ " - lows) | " ~ var ~ ")"; 595 } else { 596 code ~= "(~((" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows) | (" ~ var ~ " ^ lows * " ~ handleChar() ~ "))"; 597 } 598 i += 2; 599 } else if (match[i] == '<') { 600 static if (sse) 601 code ~= "maskGreater(SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "), " ~ var ~ ")"; 602 else 603 code ~= "maskLessGeneric!" ~ handleChar() ~ "(" ~ var ~ ")"; 604 i += 2; 605 } else if (match[i] == '>') { 606 static if (sse) 607 code ~= "maskGreater(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))"; 608 else 609 code ~= "maskGreaterGeneric!" ~ handleChar() ~ "(" ~ var ~ ")"; 610 i += 2; 611 } else if (match[i .. $].startsWith("or(")) { 612 static if (sse) { 613 nesting ~= ", "; 614 code ~= "or("; 615 } else { 616 nesting ~= " | "; 617 } 618 complexity++; 619 i += 3; 620 } else if (match[i .. $].startsWith("and(")) { 621 static if (sse) { 622 nesting ~= ", "; 623 code ~= "and("; 624 } else { 625 nesting ~= " & "; 626 } 627 complexity++; 628 i += 4; 629 } else if (match[i] == ',') { 630 enforce(nesting.length, "',' on top level"); 631 code ~= nesting[$-1]; 632 i++; 633 } else if (match[i] == ')') { 634 enforce(nesting.length, "Unbalanced closing parenthesis"); 635 nesting.length--; 636 static if (sse) { 637 code ~= ")"; 638 } 639 i++; 640 } else if (match[i].isWhite) { 641 i++; 642 } else { 643 throw new Exception(format("Unexpected character at index %s: 0x%02x", i, match[i])); 644 } 645 } 646 static if (sse) { 647 code = "__builtin_ia32_pmovmskb128(" ~ code ~ ")"; 648 } else { 649 code = "(" ~ code ~ ") & highs"; 650 } 651 } 652 return result; 653 } 654 655 enum genMatchTable() 656 { 657 ubyte[1 << 16] table; 658 ubyte[256] lut; 659 foreach (uint i; 0 .. 256) { 660 lut[i] = (mixin(genMatchCode!false("i").code) >> 7) & 1; 661 } 662 foreach (i; 0 .. 256) foreach (k; 0 .. 256) { 663 table[i * 256 + k] = cast(ubyte) (lut[i] << 1 | lut[k]); 664 } 665 return table; 666 } 667 } 668 } 669 670 /** 671 * Template for searching a fixed value in a word sized memory block (i.e. 1, 2, 4 or 8 bytes). 672 * 673 * Params: 674 * value = The value you are looking for. 675 * word = The data word to search for the value. 676 * 677 * Returns: 678 * non-zero, iff the value is contained in the data word. 679 * Specifically it returns 0x80 for every byte of the word that was a match and 0x00 for others. 680 * 681 * See_Also: 682 * http://graphics.stanford.edu/~seander/bithacks.html#ValueInWord 683 */ 684 T maskEqualGeneric(ubyte value, T)(T word) @safe pure nothrow 685 if (isUnsigned!T) 686 { 687 // This value results in 0x01 for each byte of a T value. 688 enum lows = T.max / 0xFF; 689 static if (value == 0) { 690 enum highs = lows * 0x80; 691 return (word - lows) & ~word & highs; 692 } else { 693 enum xor = lows * value; 694 return maskEqualGeneric!0(word ^ xor); 695 } 696 } 697 698 T maskLessGeneric(ubyte value, T)(T word) @safe pure nothrow 699 if (isUnsigned!T && value <= 128) 700 { 701 enum lows = T.max / 0xFF; 702 enum highs = lows * 0x80; 703 return (word - lows * value) & ~word & highs; 704 } 705 706 T maskGreaterGeneric(ubyte value, T)(T word) @safe pure nothrow 707 if (isUnsigned!T && value <= 127) 708 { 709 enum lows = T.max / 0xFF; 710 enum highs = lows * 0x80; 711 return (word + lows * (127 - value) | word) & highs; 712 } 713 714 T orGeneric(T)(T a, T b) @safe pure nothrow 715 if (isUnsigned!T) 716 { 717 return a | b; 718 }