1 /**
2 * Fast, non-allocating string functions.
3 *
4 * Authors:
5 * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
6 *
7 * Copyright:
8 * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
9 *
10 * License:
11 * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
12 */
13 module fast..string;
14
15 import core.bitop;
16 import core.simd;
17 import core.stdc.stdlib;
18
19 version (GNU) import gcc.attribute;
20
21 import std.algorithm;
22 import std.range;
23 import std.stdio;
24 import std..string;
25 import std.traits;
26
27 import fast.buffer;
28
29
30 /**
31 * Splits a string in two around one or more compile-time known code units.
32 *
33 * Params:
34 * match = An expression that matches all characters around which a split should occur.
35 * str = The string to scan.
36 * before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here.
37 * after = The part after the split is stored here. If no character in $(D match) is found, $(D null) is returned here.
38 * splitter = If not $(D null), this pointer will receive a copy of the splitting char.
39 *
40 * Returns:
41 * $(D true), iff a split occured.
42 */
43 bool split(string match)(scope inout(char[]) str, ref inout(char)[] before, ref inout(char)[] after, char* splitter = null)
44 {
45 immutable pos = min(str.length, SimdMatcher!match.find(str.ptr, str.ptr + str.length));
46 before = str[0 .. pos];
47 if (pos < str.length) {
48 after = str[pos+1 .. $];
49 if (splitter) *splitter = str[pos];
50 return true;
51 }
52 after = null;
53 return false;
54 }
55
56 /**
57 * Similar to the overload for strings, this function works a little faster as it lacks boundary checks.
58 * It assumes that one of the characters in $(D match) is actually contained in the string.
59 *
60 * Params:
61 * match = An expression that matches all characters around which a split should occur.
62 * ptr = The string to scan.
63 * before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here.
64 * after = The pointer to the part after the split is stored here.
65 *
66 * Returns:
67 * The char that caused the split. (From $(D match).)
68 */
69 char split(string match)(scope inout(char*) ptr, ref inout(char)[] before, ref inout(char)* after)
70 {
71 immutable pos = SimdMatcher!match.find(str.ptr);
72 before = ptr[0 .. pos];
73 after = ptr + pos + 1;
74 return ptr[pos];
75 }
76
77
78 /*******************************************************************************
79 *
80 * Finds the first occurrence of a set of compile-time known code units in a
81 * string. While the algorithm is `O(n)` in relation to the count of given code
82 * units, the overhead when using it on short strings weights more for only 1 or
83 * 2 code units.
84 *
85 * Params:
86 * match = An expression that matches all characters around which a split
87 * should occur.
88 * str = The string to search for a code unit.
89 *
90 * Returns:
91 * If a match is found, the index into the string is returned.
92 * Otherwise an invalid index is returned. Check with
93 * `if (result < str.length)`.
94 *
95 * See_Also:
96 * split,
97 * $(LINK2 http://mischasan.wordpress.com/2011/11/09/the-generic-sse2-loop/,
98 * The Generic SSE2 Loop)
99 *
100 * Example:
101 * ---
102 * // Check if there is a '/' or '\' in the string
103 * auto pos = str.find!(`or(=/,=\)`);
104 * if (pos < str.length) { }
105 * ---
106 **************************************/
107 size_t find(string match)(in char[] str) pure nothrow
108 {
109 return SimdMatcher!match.find(str.ptr, str.ptr + str.length);
110 }
111
112 /*******************************************************************************
113 *
114 * Same as the overload for strings, but with only a char*, making it faster as
115 * it cannot do a boundary check.
116 *
117 * Sometimes when looking for a character it is helpful to append it as a
118 * sentinel to the char buffer and then use this function instead of the slower
119 * one that checks the boundary constantly.
120 *
121 * Example:
122 * ---
123 * // Find a ']' in a buffer of 1024 bytes using an additional sentinel.
124 * size_t length = 1024;
125 * char[] buffer = new char[](length+1);
126 * buffer[length] = ']';
127 * auto pos = buffer.ptr.find!("=]");
128 * if (pos < length) { // was an actual find before the sentinel }
129 * ---
130 **************************************/
131 inout(char)* find(string match)(inout(char*) ptr) pure nothrow
132 {
133 return SimdMatcher!match.find(ptr);
134 }
135
136
137 bool keyword1(string key)(in char[] str,
138 scope bool function(ref immutable(char)* key, ref const(char)* str) mismatcher = null)
139 {
140 auto strPtr = str.ptr;
141 auto keyPtr = key.ptr;
142 auto keyEnd = keyPtr + key.length;
143
144 while (keyPtr !is keyEnd)
145 {
146 while (*strPtr == '\\')
147 if (!mismatcher(keyPtr, strPtr))
148 return false;
149
150 if (*strPtr == '"' || *strPtr != *keyPtr)
151 return false;
152
153 strPtr++;
154 keyPtr++;
155 }
156 return true;
157 }
158
159
160 bool keyword2(string key)(in char[] str,
161 scope bool function(ref immutable(char)* key, ref const(char)* str) mismatcher = null)
162 {
163 version (LDC) import ldc.gccbuiltins_x86;
164
165 /* Since SIMD typically works with word aligned data, we duplicate 'key' for every possible start of 'str' when
166 * loaded from an aligned memory address where the first character appears 0 to Word.sizeof bytes into the SIMD
167 * register.
168 * For 16-byte SIMD we could just create an array of 16 strings with 0 to 15 padding bytes in front and some after,
169 * but we can be more compact with at most 16 wasted padding bytes. Since machine registers are powers of 2, if we
170 * pad all keys to an odd length and repeat them 16 times we get a sequence with the following properties:
171 * - It consists of as many SIMD words as the key is long.
172 * - All 16 shift offsets of the key are contained in the SIMD words due to the periodicity introduced by using
173 * disjunct prime factors for the key length and the SIMD word size.
174 * Interpreted as an array of SIMD words, it can be indexed with the desired shift multiplied by a constant factor
175 * and taken modulo the SIMD array length to use the periodicity. The constant factor is the smallest value that
176 * when multiplied with the key length ends up at a SIMD word boundary + 1 (the first shift).
177 */
178
179 // 'key' length rounded up to next odd value is the number of SIMD words we need.
180 enum keyLenOdd = uint(key.length | 1); // TODO: uint or implicit type ?
181 align(16) static immutable char[keyLenOdd * Word.sizeof] keyData = key.representation
182 .chain(ubyte(0x20).repeat(keyLenOdd - key.length)).cycle.take(keyLenOdd * Word.sizeof).array;
183 align(16) static immutable char[Word.sizeof] dquote = '"';
184 align(16) static immutable char[Word.sizeof] bslash = '\\';
185 enum mul = { uint result = 0; while ((++result * Word.sizeof + 1) % keyLenOdd) {} return result; }();
186
187 const(char)* strPtr = str.ptr;
188 immutable(char)* keyPtr = keyData.ptr;
189 auto bsWord = *cast(immutable Word*) &bslash;
190 auto dqWord = *cast(immutable Word*) &dquote;
191
192 do
193 {
194 // writeln("enter loop");
195 // Calculate SSE word boundary before 'str'
196 size_t strOff = cast(size_t) strPtr % Word.sizeof;
197 Word strWord = *cast(Word*) (strPtr - strOff);
198 size_t keyPos = keyPtr - keyData.ptr;
199 size_t keyOff = (strOff - keyPos) % Word.sizeof;
200 Word keyWord = (cast(Word*) keyData.ptr)[keyOff * mul % keyLenOdd + (keyOff + keyPos) / Word.sizeof];
201
202 // Escape seqences have priority. 'key' may contain backslashes as part of the text, but in 'str' a backslash
203 // at the same position is actually the begin of the escape sequence "\\".
204 Word bsMask = strWord.maskEqual(bsWord);
205 // If after processing backslashes there is a double-quote in 'str' we must not match it with a double-quote in
206 // 'key', since it is the delimiter of 'str'.
207 Word dqMask = strWord.maskEqual(dqWord);
208 // How many bytes of 'key' and 'str' match in our 'Word' ?
209 Word missMask = strWord.maskNotEqual(keyWord);
210 // Merge mismatch, backslash and double-quote masks and move them into a non-SSE register.
211 Word allMasks = or(missMask, or(bsMask, dqMask));
212 uint skip = bsf((__builtin_ia32_pmovmskb128(allMasks) | 1 << Word.sizeof) >> strOff);
213 // writeln(keyPtr[0 .. 5]);
214 // writeln(strPtr[0 .. 5]);
215 // writeln(skip);
216 strPtr += skip;
217 keyPtr += skip;
218
219 // Have we matched enough bytes to reach the end of 'key' ?
220 if (keyPtr - keyData.ptr >= key.length)
221 return true;
222
223 // When we find a mismatch between 'key' and 'str', we try to call a provided helper function.
224 // It may decode escape sequences in 'str' and recover from the state.
225 // If that fails we accept the mismatch and return 'false'.
226 // writefln("Key: %s, Str %s", *keyPtr, *strPtr);
227 // const(char*) strPtrOld = strPtr;
228 // immutable(char*) keyPtrOld = keyPtr;
229 if (strOff + skip < Word.sizeof && !(mismatcher && mismatcher(keyPtr, strPtr)))
230 {
231 // writefln("Key: %s, Str %s", *keyPtr, *strPtr);
232 return false;
233 }
234 // writefln("Key: %s, Str %s", *keyPtr, *strPtr);
235 }
236 while (keyPtr - keyData.ptr < key.length);
237
238 return true;
239 }
240
241
242 bool keyword3(string key)(in char[] str, bool function(ref immutable(char)*, ref const(char)*) mismatcher = null)
243 {
244 version (LDC) import ldc.gccbuiltins_x86;
245 version (GNU) import gcc.builtins;
246
247 /* Since SIMD typically works with word aligned data, we duplicate 'key' for every possible start of 'str' when
248 * loaded from an aligned memory address where the first character appears 0 to Word.sizeof bytes into the SIMD
249 * register.
250 * For 16-byte SIMD we could just create an array of 16 strings with 0 to 15 padding bytes in front and some after,
251 * but we can be more compact with at most 16 wasted padding bytes. Since machine registers are powers of 2, if we
252 * pad all keys to an odd length and repeat them 16 times we get a sequence with the following properties:
253 * - It consists of as many SIMD words as the key is long.
254 * - All 16 shift offsets of the key are contained in the SIMD words due to the periodicity introduced by using
255 * disjunct prime factors for the key length and the SIMD word size.
256 * Interpreted as an array of SIMD words, it can be indexed with the desired shift multiplied by a constant factor
257 * and taken modulo the SIMD array length to use the periodicity. The constant factor is the smallest value that
258 * when multiplied with the key length ends up at a SIMD word boundary + 1 (the first shift).
259 */
260
261 // 'key' length rounded up to next odd value is the number of SIMD words we need.
262 enum keyLenOdd = uint(key.length | 1); // TODO: uint or implicit type ?
263 align(16) static immutable char[keyLenOdd * Word.sizeof] keyData = key.representation
264 .chain(ubyte(0x20).repeat(keyLenOdd - key.length)).cycle.take(keyLenOdd * Word.sizeof).array;
265 align(16) static immutable char[Word.sizeof] dqbs = `\"""""""""""""""`;
266 enum mul = { uint result = 0; while ((++result * Word.sizeof + 1) % keyLenOdd) {} return result; }();
267
268 // Calculate SSE word boundary before 'str'
269 uint off = cast(uint) str.ptr % Word.sizeof;
270 // SSE aligned pointer <= 'str.ptr'.
271 auto strPtr = cast(const(Word)*) (str.ptr - off);
272 auto keyPtr = cast(immutable(Word)*) keyData.ptr + off * mul % keyLenOdd;
273 auto keyStart = cast(immutable(char)*) keyPtr + off;
274 Word strWord = *strPtr;
275
276 LoadKey:
277 auto keyEnd = keyStart + key.length;
278
279 Compare:
280 // Get bitmask of special characters in 'str'.
281 uint escMask = getScalar(cast(int4) __builtin_ia32_pcmpistrm128(*cast(Word*) &dqbs, strWord, 0b_0_00_00_00));
282 // writeln("Called a");
283 // Get bitmask of characters from 'key' and 'str' that don't match.
284 uint missMask = getScalar(cast(int4) __builtin_ia32_pcmpistrm128(*keyPtr, strWord, 0b_0_01_10_00));
285 // writeln("Called b");
286 // Create a merged mask for both and an additional bit at position 16, serving as a delimiter for 'bsf'.
287 uint mask = (escMask | missMask) & (uint.max << off);
288
289 // No bit set means all 16 bytes are equal and there are no escape characters. That's as good as it gets.
290 if (!mask)
291 {
292 // Jump forward by a word size and see if we successfully compared all bytes to the end of our 'key'.
293 keyPtr += 16;
294 if (cast(immutable(char)*) keyPtr >= keyEnd)
295 return true;
296 // Otherwise continue with the next set of 16 bytes.
297 strPtr += 16;
298 off = 0;
299 goto Compare;
300 }
301
302 // One of two cases ...
303 off = bsf(mask);
304
305 // 1) Did the mismatch occur past the end of 'key' ? Then we compared succesfully.
306 if (cast(immutable(char)*) keyPtr + off >= keyEnd)
307 return true;
308
309 // 2) It must be a special character or actual mismatch, let 'mismatcher' decide.
310 // writefln("Skipping: %s", (cast(const(char)*) strPtr)[0 .. off]);
311 auto strChP = cast(const(char)*) strPtr + off;
312 auto strChPOld = strChP;
313 auto keyChP = cast(immutable(char)*) keyPtr + off;
314 bool goodToGo = mismatcher(keyChP, strChP);
315
316 // writefln("Mismatcher used %s key chars, %s str chars and returned: %s", keyAdd, strAdd, goodToGo);
317 if (keyChP >= keyEnd)
318 return true;
319 if (!goodToGo)
320 return false;
321
322 // Arriving here we just decoded an escape sequence and have to adjust our pointers.
323 auto keyPos = keyChP - keyStart;
324 off += strChP - strChPOld;
325 if (off >= 16)
326 {
327 strPtr += off / 16;
328 strWord = *strPtr;
329 off %= 16;
330 }
331 auto baseOff = (off - keyPos) & 15;
332 keyPtr = cast(immutable(Word)*) keyData.ptr + baseOff * mul % keyLenOdd;
333 keyStart = cast(immutable(char)*) keyPtr + baseOff;
334 keyPtr += (baseOff + keyPos) / 16;
335 goto LoadKey;
336 }
337
338
339 size_t equalLength(scope inout(char[]) a, scope inout(char[]) b)
340 {
341 return 0;
342 }
343
344
345 /*******************************************************************************
346 *
347 * Concatenates a series of strings.
348 *
349 * Params:
350 * Strs = a series of string symbols or literals to be concatenated
351 * buffer = optional buffer, implicitly allocated
352 *
353 * Returns:
354 * A $(D TempBuffer!char) containing the concatenated string. It is kept alive
355 * for as long as it is in scope.
356 *
357 **************************************/
358 nothrow @nogc
359 template concat(Strs...)
360 {
361 import core.stdc..string : memcpy;
362 import fast.internal.helpers;
363
364 enum allocExpr = ctfeJoin!(Strs.length)("Strs[%s].length", "+") ~ "+1";
365
366 auto concat(void* buffer = (mixin(allocExpr) <= allocaLimit) ? alloca(mixin(allocExpr)) : null)
367 {
368 immutable length = mixin(allocExpr);
369 auto result = TempBuffer!char(
370 (cast(char*) (buffer is null ? malloc(length) : buffer))[0 .. length - 1],
371 buffer is null);
372
373 char* p = result.ptr;
374 foreach (const(char[]) str; Strs)
375 {
376 memcpy (p, str.ptr, str.length);
377 p += str.length;
378 }
379 *p = '\0';
380
381 return result;
382 }
383 }
384
385
386
387 private:
388
389 template SimdMatcher(string match)
390 {
391 import core.simd;
392 import std..string;
393 import fast.internal.sysdef;
394
395 static if (match != strip(match)) {
396 // Reinstanciate the template with any whitespace stripped from the match string.
397 alias SimdMatcher = SimdMatcher!(strip(match));
398 } else {
399 /* For SSE in DMD I am blocked by:
400 * https://d.puremagic.com/issues/show_bug.cgi?id=8047
401 * https://d.puremagic.com/issues/show_bug.cgi?id=11585
402 */
403 enum isUsingSSE = hasSSE2 && (isLDC || isGDC);
404 enum isSingleChar = match.length == 2 && match[0] == '=';
405 static if (isSingleChar) enum singleChar = match[1];
406 static if (isUsingSSE) {
407 // Using MOVMSKB we get one boolean per bit in a 16-bit value.
408 alias Word = ubyte16;
409 alias Mask = uint;
410 enum sparseness = 1;
411 } else {
412 // The fallback is to work with machine words and tricky bit-twiddling algorithms.
413 // As a result we get machine words where matching bytes have the high bit set.
414 alias Word = size_t;
415 alias Mask = size_t;
416 enum sparseness = 8;
417 }
418 enum matchCode = genMatchCode!isUsingSSE("*wp");
419 // Used in generic comparison code
420 enum lows = size_t.max / 0xFF;
421 enum highs = lows * 0x80;
422
423 enum betterUseTables = (isDMD && matchCode.complexity >= 4)
424 || (isGDC && matchCode.complexity >= 18)
425 || (isLDC && matchCode.complexity >= 18);
426
427 static if (betterUseTables)
428 {
429 immutable matchTable = genMatchTable();
430
431 size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow @nogc
432 {
433 import core.stdc..string;
434 import fast.internal.helpers;
435
436 // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
437 static if (isSingleChar) {
438 return memchr(b, singleChar, e - b) - b;
439 } else {
440 if (b >= e) return 0;
441
442 size_t off = cast(size_t) b % ushort.sizeof;
443 ushort* wp = cast(ushort*) (b - off);
444 ushort* we = cast(ushort*) alignPtrNext(e, ushort.sizeof);
445 if (off) {
446 // Throw away bytes from before start of the string
447 if (auto mask = matchTable[*wp] >> off)
448 return bsf(mask);
449 if (++wp is we) return size_t.max;
450 }
451
452 do {
453 if (auto mask = matchTable[*wp])
454 return bsf(mask) + (cast(char*) wp - b);
455 } while (++wp !is we);
456 return size_t.max;
457 }
458 }
459
460 inout(char)* find(scope inout(char*) b) pure nothrow @nogc
461 {
462 import core.stdc..string;
463 // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
464 static if (isSingleChar && singleChar == '\0') {
465 return strlen(b) + b;
466 } else static if (isSingleChar && isDMD) { // DMD is better off using optimized C library code.
467 return memchr(b, singleChar, e - b) - b;
468 } else {
469 size_t off = cast(size_t) b % ushort.sizeof;
470 ushort* wp = cast(ushort*) (b - off);
471 if (off) {
472 // Throw away bytes from before start of the string
473 if (auto mask = matchTable[*wp] >> off)
474 return b + bsf(mask);
475 }
476
477 do {
478 if (auto mask = matchTable[*wp])
479 return cast(inout(char)*) wp + bsf(mask);
480 } while (true);
481 }
482 }
483 }
484 else
485 {
486 import core.stdc..string, core.simd;
487 import std.simd;
488 import fast.internal.helpers;
489
490 version (LDC) {
491 import ldc.gccbuiltins_x86;
492 } else version (GNU) {
493 import gcc.builtins;
494 }
495
496 size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow
497 {
498 // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
499 static if (isSingleChar) {
500 return memchr(b, singleChar, e - b) - b;
501 } else {
502 if (b >= e) return 0;
503
504 size_t off = cast(size_t) b % Word.sizeof;
505 Word* wp = cast(Word*) (b - off);
506 Word* we = cast(Word*) alignPtrNext(e, Word.sizeof);
507 if (off) {
508 // Throw away bytes from before start of the string
509 if (auto mask = (mixin(matchCode.code)) >> (off * sparseness))
510 return bsf(mask) / sparseness;
511 if (++wp is we) return size_t.max;
512 }
513
514 do {
515 if (auto mask = mixin(matchCode.code))
516 return bsf(mask) / sparseness + (cast(char*) wp - b);
517 } while (++wp !is we);
518 return size_t.max;
519 }
520 }
521
522 inout(char)* find(scope inout(char*) b) pure nothrow
523 {
524 // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
525 static if (isSingleChar && singleChar == '\0') {
526 return strlen(b) + b;
527 } else static if (isSingleChar && isDMD) { // DMD is better off using optimized C library code.
528 return cast(inout(char*)) memchr(b, singleChar, size_t.max);
529 } else {
530 size_t off = cast(size_t) b % Word.sizeof;
531 Word* wp = cast(Word*) (b - off);
532 if (off) {
533 // Throw away bytes from before start of the string
534 if (auto mask = (mixin(matchCode.code)) >> (off * sparseness))
535 return b + bsf(mask) / sparseness;
536 ++wp;
537 }
538
539 do {
540 if (auto mask = mixin(matchCode.code))
541 return cast(inout(char)*) wp + bsf(mask) / sparseness;
542 ++wp;
543 } while (true);
544 }
545 }
546 }
547
548 enum genMatchCode(bool sse)(string var)
549 {
550 import std.ascii, std.exception;
551
552 struct Code {
553 string code;
554 size_t complexity = 1;
555 }
556 Code result;
557 string[] nesting;
558
559 with (result) {
560 for (size_t i = 0; i < match.length;) {
561 string handleChar() {
562 char c = match[i+1];
563 switch (c) {
564 case 0:
565 return `'\0'`;
566 case '\\':
567 return `'\\'`;
568 case "'"[0]:
569 return `'\''`;
570 case '\t':
571 return `'\t'`;
572 case '\r':
573 return `'\r'`;
574 case '\n':
575 return `'\n'`;
576 default:
577 return `'` ~ c ~ `'`;
578 }
579 }
580
581 if (match[i] == '=') {
582 static if (sse) {
583 code ~= "maskEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))";
584 } else if (match[i+1] == 0) {
585 code ~= "" ~ var ~ " - lows & ~" ~ var;
586 } else {
587 code ~= "(" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows & ~(" ~ var ~ " ^ lows * " ~ handleChar() ~ ")";
588 }
589 i += 2;
590 } else if (match[i] == '!') {
591 static if (sse) {
592 code ~= "maskNotEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))";
593 } else if (match[i+1] == 0) {
594 code ~= "(~(" ~ var ~ " - lows) | " ~ var ~ ")";
595 } else {
596 code ~= "(~((" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows) | (" ~ var ~ " ^ lows * " ~ handleChar() ~ "))";
597 }
598 i += 2;
599 } else if (match[i] == '<') {
600 static if (sse)
601 code ~= "maskGreater(SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "), " ~ var ~ ")";
602 else
603 code ~= "maskLessGeneric!" ~ handleChar() ~ "(" ~ var ~ ")";
604 i += 2;
605 } else if (match[i] == '>') {
606 static if (sse)
607 code ~= "maskGreater(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))";
608 else
609 code ~= "maskGreaterGeneric!" ~ handleChar() ~ "(" ~ var ~ ")";
610 i += 2;
611 } else if (match[i .. $].startsWith("or(")) {
612 static if (sse) {
613 nesting ~= ", ";
614 code ~= "or(";
615 } else {
616 nesting ~= " | ";
617 }
618 complexity++;
619 i += 3;
620 } else if (match[i .. $].startsWith("and(")) {
621 static if (sse) {
622 nesting ~= ", ";
623 code ~= "and(";
624 } else {
625 nesting ~= " & ";
626 }
627 complexity++;
628 i += 4;
629 } else if (match[i] == ',') {
630 enforce(nesting.length, "',' on top level");
631 code ~= nesting[$-1];
632 i++;
633 } else if (match[i] == ')') {
634 enforce(nesting.length, "Unbalanced closing parenthesis");
635 nesting.length--;
636 static if (sse) {
637 code ~= ")";
638 }
639 i++;
640 } else if (match[i].isWhite) {
641 i++;
642 } else {
643 throw new Exception(format("Unexpected character at index %s: 0x%02x", i, match[i]));
644 }
645 }
646 static if (sse) {
647 code = "__builtin_ia32_pmovmskb128(" ~ code ~ ")";
648 } else {
649 code = "(" ~ code ~ ") & highs";
650 }
651 }
652 return result;
653 }
654
655 enum genMatchTable()
656 {
657 ubyte[1 << 16] table;
658 ubyte[256] lut;
659 foreach (uint i; 0 .. 256) {
660 lut[i] = (mixin(genMatchCode!false("i").code) >> 7) & 1;
661 }
662 foreach (i; 0 .. 256) foreach (k; 0 .. 256) {
663 table[i * 256 + k] = cast(ubyte) (lut[i] << 1 | lut[k]);
664 }
665 return table;
666 }
667 }
668 }
669
670 /**
671 * Template for searching a fixed value in a word sized memory block (i.e. 1, 2, 4 or 8 bytes).
672 *
673 * Params:
674 * value = The value you are looking for.
675 * word = The data word to search for the value.
676 *
677 * Returns:
678 * non-zero, iff the value is contained in the data word.
679 * Specifically it returns 0x80 for every byte of the word that was a match and 0x00 for others.
680 *
681 * See_Also:
682 * http://graphics.stanford.edu/~seander/bithacks.html#ValueInWord
683 */
684 T maskEqualGeneric(ubyte value, T)(T word) @safe pure nothrow
685 if (isUnsigned!T)
686 {
687 // This value results in 0x01 for each byte of a T value.
688 enum lows = T.max / 0xFF;
689 static if (value == 0) {
690 enum highs = lows * 0x80;
691 return (word - lows) & ~word & highs;
692 } else {
693 enum xor = lows * value;
694 return maskEqualGeneric!0(word ^ xor);
695 }
696 }
697
698 T maskLessGeneric(ubyte value, T)(T word) @safe pure nothrow
699 if (isUnsigned!T && value <= 128)
700 {
701 enum lows = T.max / 0xFF;
702 enum highs = lows * 0x80;
703 return (word - lows * value) & ~word & highs;
704 }
705
706 T maskGreaterGeneric(ubyte value, T)(T word) @safe pure nothrow
707 if (isUnsigned!T && value <= 127)
708 {
709 enum lows = T.max / 0xFF;
710 enum highs = lows * 0x80;
711 return (word + lows * (127 - value) | word) & highs;
712 }
713
714 T orGeneric(T)(T a, T b) @safe pure nothrow
715 if (isUnsigned!T)
716 {
717 return a | b;
718 }