fast.parsing source code

1 /***************************************************************************************************
2  * 
3  * Text parsing functionality.
4  * 
5  * Authors:
6  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
7  * 
8  * Copyright:
9  *   © 2015 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
10  * 
11  * License:
12  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
13  * 
14  **************************************************************************************************/
15 module fast.parsing;
16 
17 import std.traits;
18 import fast.helpers;
19 
20 
21 /+
22  ╔══════════════════════════════════════════════════════════════════════════════
23  ║ ⚑ Hexadecimal
24  ╚══════════════════════════════════════════════════════════════════════════════
25  +/
26 
27 /*******************************************************************************
28  * 
29  * Decodes a single hexadecimal character.
30  *
31  * Params:
32  *   c = The hexadecimal digit.
33  *
34  * Returns:
35  *   `c` converted to an integer.
36  *
37  **************************************/
38 @safe @nogc pure nothrow
39 uint hexDecode(char c)
40 {
41 	return c + 9 * (c >> 6) & 15;
42 }
43 
44 
45 @nogc pure nothrow
46 uint hexDecode4(ref const(char)* hex)
47 {
48 	uint x = *cast(uint*) &hex;
49 	hex += 4;
50 	x = (x & 0x0F0F0F0F) + 9 * (x >> 6 & 0x01010101);
51 	version (LittleEndian)
52 	{
53 		return x >> 24 | x >> 12 & 0xF0 | x & 0xF00 | x << 12 & 0xF000;
54 	}
55 	else
56 	{
57 		x = (x | x >> 4) & 0x00FF00FF;
58 		return (x | x >> 8) & 0x0000FFFF;
59 	}
60 }
61 
62 
63 @nogc pure nothrow
64 inout(char)* hexDecode4(ref inout(char)* hex, out uint result)
65 {
66 	foreach (i; 0 .. 4)
67 	{
68 		char ch = hex[i];
69 		result *= 16;
70 		if (ch >= '0' && ch <= '9')
71 		{
72 			result += ch - '0';
73 		}
74 		else
75 		{
76 			ch |= 0x20;
77 			if (ch >= 'a' && ch <= 'f')
78 				result += ch - 'a' + 10;
79 			else
80 				return hex + i;
81 		}
82 	}
83 	hex += 4;
84 	return null;
85 }
86 
87 
88 /+
89  ╔══════════════════════════════════════════════════════════════════════════════
90  ║ ⚑ Numbers
91  ╚══════════════════════════════════════════════════════════════════════════════
92  +/
93 
94 
95 /// Options for `parseNumber`.
96 struct NumberOptions
97 {
98 	/// Allows the minus sign as the first character and thus negative numbers.
99 	bool minus;
100 }
101 
102 
103 /*******************************************************************************
104  * 
105  * Parse a number from a character read pointer.
106  * 
107  * On success, the read pointer is set behind the number.
108  *
109  * Params:
110  *   opt = Selects features for the implementation. Less features make the
111  *         parser faster.
112  *   str = The read pointer.
113  *   n = A reference to a number to be overwritten with the result.
114  *
115  * Returns:
116  *   An indication of success. Typically the function fails when a number cannot
117  *   be stored in an integer of the given size or invalid characters are
118  *   encountered.
119  *
120  **************************************/
121 @nogc pure nothrow
122 bool parseNumber(NumberOptions opt, N)(ref const(char)* str, ref N n) if (isNumeric!N)
123 {
124 	import core.bitop;
125 	import std.range;
126 	import fast.helpers;
127 
128 	// Integer types larger than the mantissa of N.
129 	static if (N.sizeof <= size_t.sizeof)
130 	{
131 		alias U = size_t;
132 		alias I = ptrdiff_t;
133 	}
134 	else
135 	{
136 		alias U = ulong;
137 		alias I = long;
138 	}
139 	
140 	// Largest value of type U that can be multiplied by 10 and have a digit added without overflow.
141 	enum canHoldOneMoreDigit = (U.max - 9) / 10;
142 	static if (isFloatingPoint!N)
143 	{
144 		enum significandRightShift = 8 * U.sizeof - N.mant_dig + 1;
145 		enum lastSignificandBit = U(2) << 8 * U.sizeof - N.mant_dig;
146 		enum firstFractionBit   = U(1) << 8 * U.sizeof - N.mant_dig;
147 		enum remainderBits = U.max - N.mant_dig + 1;
148 		enum expShift = N.mant_dig - 1;
149 		enum expBias = N.max_exp - 1;
150 	}
151 	
152 	static if (isFloatingPoint!N)
153 	{
154 		alias pow5Max = PowData!(U, 5).powMax;
155 		alias pow5    = PowData!(U, 5).pows;
156 
157 		// Largest power of 10 that fits into a float of type N. The factor 5 here is correct, as the 2s
158 		// go in as an increment in the exponent, that is neglectable here.
159 		enum pow10MaxF = {
160 			U v = 1; uint exp;
161 			while (v <= ((U(1) << N.mant_dig) - 1) / 5) { v *= 5; exp++; }
162 			return exp;
163 		}();
164 
165 		static immutable N[pow10MaxF] pow10F = N(10).recurrence!((a, n) => 10 * a[n-1]).take(pow10MaxF).array;
166 	}
167 	else
168 	{
169 		alias pow10Max = PowData!(U, 10).powMax;
170 		alias pow10    = PowData!(U, 10).pows;
171 	}
172 
173 	const(char)* p = str;
174 	const(char)* point = null;
175 	U significand = 0;
176 	size_t exponent = 0;
177 	size_t expAdjust = void;
178 	bool expSign = void;
179 	static if (isFloatingPoint!N)
180 	{
181 		U exp2 = void;
182 		bool roundUp = false;
183 	}
184 	
185 	/////////////////// SIGN BIT HANDLING ///////////////////
186 	
187 	// Check for the sign.
188 	static if (opt.minus)
189 	{
190 		bool sign = (*p == '-');
191 		if (sign)
192 			p++;
193 	}
194 	
195 	/////////////////// INTEGRAL PART OF SIGNIFICAND ///////////////////
196 	
197 	uint digit = *p - '0';
198 	if (digit == 0)
199 	{
200 		// We have a single zero.
201 		p++;
202 	}
203 	else if (digit <= 9)
204 	{
205 		// Regular case of one or more digits.
206 		do
207 		{
208 			if (significand > canHoldOneMoreDigit)
209 				goto BigMantissa;
210 		BigMantissaNotSoMuch:
211 			significand = 10 * significand + digit;
212 			digit = *++p - '0';
213 		}
214 		while (digit <= 9);
215 	}
216 	else return false;
217 	
218 	/////////////////// FRACTIONAL PART OF SIGNIFICAND ///////////////////
219 	
220 	if (*p == '.')
221 	{
222 		point = ++p;
223 		digit = *p - '0';
224 		if (digit > 9)
225 			return false;
226 		do
227 		{
228 			if (significand > canHoldOneMoreDigit)
229 				goto BigMantissa;
230 			significand = 10 * significand + digit;
231 			digit = *++p - '0';
232 		}
233 		while (digit <= 9);
234 	}
235 	
236 	/////////////////// EXPONENT HANDLING ///////////////////
237 
238 	expAdjust = (point is null) ? 0 : p - point;
239 	if ((*p | 0x20) == 'e')
240 	{
241 		p++;
242 		expSign = (*p == '-');
243 		if (expSign || *p == '+')
244 			p++;
245 		digit = *p - '0';
246 		if (digit > 9)
247 			return false;
248 		do
249 		{
250 			if (exponent > canHoldOneMoreDigit)
251 				goto BigExponent;
252 			exponent = 10 * exponent + digit;
253 			digit = *++p - '0';
254 		}
255 		while (digit <= 9);
256 	}
257 	
258 	if (expAdjust)
259 	{
260 		if (expSign)
261 		{
262 			if (exponent > size_t.max - expAdjust)
263 				goto BigExponentAdjustForDecimalPoint;
264 			exponent += expAdjust;
265 		}
266 		else if (exponent >= expAdjust)
267 		{
268 			exponent -= expAdjust;
269 		}
270 		else
271 		{
272 			// Amount of fraction digits turns exponent from positive to negative.
273 			expAdjust -= exponent;
274 			exponent = expAdjust;
275 			expSign = true;
276 		}
277 	}
278 
279 	/////////////////// RESULT ASSEMBLY ///////////////////
280 
281 	static if (isFloatingPoint!N)
282 	{
283 		if (significand == 0 || exponent == 0)
284 		{
285 			// The significand is the unsigned result.
286 			static if (opt.minus)
287 				if (sign)
288 					n = -N(significand);
289 			n = +N(significand);
290 			str = p;
291 			return true;
292 		}
293 
294 		// Try the floating-point fast path: The significand's bits, as well as the 10^x exponent can be expressed
295 		// accurately as a float of type N. We just need to divide or multiply them based on the signedness of the
296 		// exponent.
297 		exp2 = bsr(significand);
298 		if (exp2 - bsf(significand) < N.mant_dig && exponent <= pow10MaxF)
299 		{
300 			N b = pow10F[exponent - 1];
301 			static if (opt.minus)
302 				if (sign)
303 					b = -b;
304 			n = expSign ? significand / b : significand * b;
305 			str = p;
306 			return true;
307 		}
308 		else if (exponent <= pow5Max)
309 		{
310 			// Special case, mostly to handle the little bit of extra precision that comes from
311 			// converting a double to its string representation. The last base-10 digit doesn't quite
312 			// fit back into a double, but we don't need to resort to arbitrary precision math just yet.
313 			if (expSign)
314 			{
315 				U divisor = pow5[exponent - 1];
316 				static if (isAMD64 && (isLDC || isGDC))
317 				{
318 					// AMD64 can divide 128-bit numbers by 64-bit numbers directly.
319 					ubyte expDivisor = clz(divisor);
320 					divisor <<= expDivisor;
321 					exp2 = expDivisor - exponent - bigDiv(significand, divisor);
322 					significand <<= 1;
323 				}
324 				else
325 				{
326 					// We perform an iterative division.
327 					U dividend = significand << 8 * U.sizeof - 1 - exp2;
328 					U quotient = dividend / divisor;
329 					dividend %= divisor;
330 
331 					ubyte lzs = clz(quotient);
332 					exp2 -= exponent + lzs;
333 					significand = quotient << ++lzs;
334 					size_t accuracy = 8 * U.sizeof - lzs;
335 					while (accuracy < N.mant_dig)
336 					{
337 						lzs = clz(dividend);
338 						dividend <<= lzs;
339 						quotient = dividend / divisor;
340 						dividend %= divisor;
341 						significand |= quotient << (8 * U.sizeof - lzs) >> accuracy;
342 						accuracy += lzs;
343 					}
344 				}
345 
346 				// Assemble floating point value from bits.
347 				roundUp = (significand & firstFractionBit) != 0;
348 				significand >>= significandRightShift;
349 				if (roundUp)
350 				{
351 					significand++;
352 					significand &= ~(U(1) << N.mant_dig - 1);
353 					if (significand == 0)
354 						++exp2;
355 				}
356 
357 				U* result = cast(U*) &n;
358 				*result = exp2 + expBias << expShift | significand;
359 				static if (opt.minus)
360 					*result |= U(sign) << U.sizeof * 8 - 1;
361 				str = p;
362 				return true;
363 			}
364 			else assert(0, "Not implemented");
365 		}
366 		else assert(0, "Not implemented");
367 	}
368 	else
369 	{
370 		import fast.intmath;
371 
372 		if (exponent && significand)
373 		{
374 			// We need to account for the exponent.
375 			U pow = pow10[exponent - 1];
376 			if (expSign)
377 			{
378 				// Negative exponent, if we get a fractional result, abort.
379 				if (significand % pow)
380 					return false;
381 				significand /= pow;
382 			}
383 			else static if (U.sizeof < ulong.sizeof)
384 			{
385 				// Multiply using a bigger result type
386 				ulong prod = ulong(significand) * pow;
387 				if (prod > U.max)
388 					return false;
389 				significand = cast(U) prod;
390 			}
391 			else
392 			{
393 				// If the multiply will overflow, abort.
394 				bool overflowed;
395 				significand = mulu(significand, pow, overflowed);
396 				if (overflowed)
397 					return false;
398 			}
399 		}
400 
401 		n = cast(N) significand;
402 		static if (isSigned!N && opt.minus)
403 		{
404 			if (significand > U(N.max) + sign)
405 				return false;
406 			if (sign)
407 				n = -n;
408 		}
409 		else if (significand > N.max)
410 			return false;
411 		str = p;
412 		return true;
413 	}
414 
415 BigMantissa:
416 	if (significand <= (significand.max - digit) / 10)
417 		goto BigMantissaNotSoMuch;
418 //	assert(0, "Not implemented");
419 
420 BigExponent:
421 //	assert(0, "Not implemented");
422 
423 BigExponentAdjustForDecimalPoint:
424 //	assert(0, "Not implemented");
425 	return false;
426 }
427 
428 
429 private template PowData(U, U base)
430 {
431 	import std.range;
432 
433 	// Largest power of `base` that fits into an integer of type U.
434 	enum powMax = { U v = 1; uint exp; while (v <= U.max / base) { v *= base; exp++; } return exp; }();
435 	
436 	// Table of powers of `base`. (We skip base^0)
437 	static immutable U[powMax] pows = base.recurrence!((a, n) => base * a[n-1]).take(powMax).array;
438 }
439 
440 
441 static if (isAMD64 && (isLDC || isGDC))
442 {
443 	@nogc pure nothrow
444 	private ubyte bigDiv(ref size_t a, size_t b)
445 	in
446 	{
447 		assert(b > size_t.max / 2, "High bit of divisor must be set.");
448 	}
449 	body
450 	{
451 		// Make sure that the division will yield exactly 32 or 64 significant bits.
452 		ubyte lza = clz(a);
453 		version (LDC)
454 		{
455 			import ldc.llvmasm;
456 			a <<= lza;
457 			if (a >= b) { a >>= 1; lza--; }
458 			a = __asm!ulong("
459 				xor %rax, %rax
460 				divq $2
461 				", "={rax},{rdx},rm", a, b);
462 		}
463 		else version (GNU)
464 		{
465 			size_t dividend = a << lza;
466 			if (dividend >= b) { dividend >>= 1; lza--; }
467 			asm { "
468 				xor %%rax, %%rax
469 				divq %3
470 				" : "=&a" a, "=d" dividend : "d" dividend, "rm" b; }
471 		}
472 		return ++lza;
473 	}
474 	
475 	unittest
476 	{
477 		size_t a = size_t.max / 11;
478 		size_t b = size_t.max / 5;
479 		version (X86_64)
480 		{
481 			int exp = clz(b);   // Positive base-2 exponent
482 			b <<= exp;
483 			exp -= bigDiv(a, b);
484 			assert(a == 0xE8BA2E8BA2E8BA2AUL);
485 			assert(exp == -1);
486 		}
487 	}
488 }
489 
490 
491 /+
492  ╔══════════════════════════════════════════════════════════════════════════════
493  ║ ⚑ String Scanning and Comparison
494  ╚══════════════════════════════════════════════════════════════════════════════
495  +/
496 
497 /*******************************************************************************
498  * 
499  * Compares a string of unknown length against a statically known key.
500  * 
501  * This function also handles escapes and requires one or more terminator chars.
502  *
503  * Params:
504  *   C = Character with.
505  *   key = The static key string.
506  *   terminators = A list of code units that terminate the string.
507  *   special = A list of code units that are handled by the user callback. Use
508  *             this for escape string handling. Default is `null`.
509  *   p_str = Pointer to the string for the comparison.
510  *   callback = User callback to handle special escape characters if `special`
511  *              is non-empty.
512  *
513  * Returns:
514  *   A code with following meanings: -1 = not equal, terminator character hit,
515  *   0 = not equal, but string not exhausted, 1 = string equals key.
516  *
517  **************************************/
518 int fixedTermStrCmp(C, immutable C[] key, immutable C[] terminators, immutable C[] special = null)
519 	(ref const(C)* p_str, scope bool delegate(ref immutable(char)*, ref const(char)*) callback = null)
520 in
521 {
522 	assert(special.length == 0 || callback !is null);
523 }
524 body
525 {
526 	import std.algorithm, std.range;
527 	
528 	static immutable byte[256] classify =
529 		iota(256).map!(c => terminators.canFind(c) ? byte(-1) : special.canFind(c) ? 1 : 0).array;
530 	
531 	immutable(C)* p_key = key.ptr;
532 	immutable C* e_key = p_key + key.length;
533 	
534 	while (p_key !is e_key)
535 	{
536 		int clazz = *p_str <= 0xFF ? classify[*p_str] : 0;
537 		
538 		if (clazz < 0)
539 		{
540 			return clazz;
541 		}
542 		else if (clazz == 0)
543 		{
544 			if (*p_str != *p_key)
545 				return clazz;
546 			
547 			p_str++;
548 			p_key++;
549 		}
550 		else if (clazz > 0)
551 		{
552 			if (!callback(p_key, p_str))
553 				return 0;
554 		}
555 	}
556 	
557 	return classify[*p_str & 0xFF] < 0;
558 }
559 
560 
561 /*
562 @nogc nothrow
563 void fixedStringCompareSSE4()
564 {
565 	enum words     = key.length / 16;
566 	enum remainder = key.length % 16;
567 	enum contains0 = key.canFind('\0');     // For SSE4.2 string search.
568 	static assert(!contains0, "Not implemented");
569 
570 	size_t remaining = e - b;
571 	auto p = b;
572 
573 	foreach (i; staticIota!(0, words))
574 	{
575 		auto backup = p;
576 		p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid);
577 		p = backup;
578 		p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid);
579 	}
580 }
581 */
582 
583 
584 @forceinline @nogc nothrow pure
585 void seekToAnyOf(string cs)(ref const(char)* p)
586 {
587 	p.vpcmpistri!(char, sanitizeChars(cs), Operation.equalAnyElem);
588 }
589 
590 
591 @forceinline @nogc nothrow pure
592 void seekToRanges(string cs)(ref const(char)* p)
593 {
594 	p.vpcmpistri!(char, sanitizeRanges(cs), Operation.inRanges);
595 }
596 
597 
598 /*******************************************************************************
599  * 
600  * Searches for a specific character known to appear in the stream and skips the
601  * read pointer over it.
602  *
603  * Params:
604  *   c = the character
605  *   p = the read pointer
606  *
607  **************************************/
608 @forceinline @nogc nothrow pure
609 void seekPast(char c)(ref const(char)* p)
610 {
611 	p.vpcmpistri!(char, c.repeat(16).to!string, Operation.equalElem);
612 	p++;
613 }
614 
615 
616 /*******************************************************************************
617  * 
618  * Skips the read pointer over characters that fall into any of up to 8 ranges
619  * of characters. The first character in `cs` is the start of the first range,
620  * the second character is the end. This is repeated for any other character
621  * pair. A character falls into a range from `a` to `b` if `a <= *p <= b`.
622  *
623  * Params:
624  *   cs = the character ranges
625  *   p = the read pointer
626  *
627  **************************************/
628 @forceinline @nogc nothrow pure
629 void skipCharRanges(string cs)(ref const(char)* p)
630 {
631 	p.vpcmpistri!(char, cs, Operation.inRanges, Polarity.negate);
632 }
633 
634 
635 /*******************************************************************************
636  * 
637  * Skips the read pointer over all and any of the given characters.
638  *
639  * Params:
640  *   cs = the characters to skip over
641  *   p = the read pointer
642  *
643  **************************************/
644 @forceinline @nogc nothrow pure
645 void skipAllOf(string cs)(ref const(char)* p)
646 { 
647 	p.vpcmpistri!(char, cs, Operation.equalAnyElem, Polarity.negate);
648 }
649 
650 
651 /*******************************************************************************
652  * 
653  * Skips the read pointer over ASCII white-space including comprising '\t',
654  * '\r', '\n' and ' '.
655  *
656  * Params:
657  *   p = the read pointer
658  *
659  **************************************/
660 @forceinline @nogc nothrow pure
661 void skipAsciiWhitespace(ref const(char)* p)
662 {
663 	if (*p == ' ')
664 		p++;
665 	if (*p > ' ')
666 		return;
667 	p.skipAllOf!" \t\r\n";
668 }
669 
670 
671 /*******************************************************************************
672  * 
673  * Sets the read pointer to the start of the next line.
674  *
675  * Params:
676  *   p = the read pointer
677  *
678  **************************************/
679 @forceinline @nogc nothrow pure
680 void skipToNextLine(ref const(char)* p)
681 {
682 	p.vpcmpistri!(char, "\r\n", Operation.equalAnyElem);
683 	if (p[0] == '\r' && p[1] == '\n')
684 		p++;
685 	p++;
686 }
687 
688 
689 private enum sanitizeChars(string cs)
690 {
691 	import std.exception;
692 
693 	bool has0 = false;
694 	foreach (c; cs) if (!c) { has0 = true; break; }
695 	assert(has0, "Parsers are required to also check for \0 when looking for specific chars.");
696 	
697 	char[] result;
698 	foreach (i; 1 .. 256) foreach (c; cs) if (i == c)
699 	result ~= c;
700 	return result.assumeUnique;
701 }
702 
703 
704 private enum sanitizeRanges(string cs)
705 {
706 	import std.exception;
707 
708 	bool has0 = false;
709 	foreach (i; 0 .. cs.length / 2) if (!cs[2*i]) { has0 = true; break; }
710 	assert(has0, "Parsers are required to also check for \0 when looking for specific chars.");
711 	
712 	char[] result;
713 	foreach (i; 0 .. cs.length / 2)
714 	{
715 		if (cs[2*i])
716 			result ~= cs[2*i .. 2*i+2];
717 		else if (cs[2*i+1])
718 			result ~= ['\x01', cs[2*i+1]];
719 	}
720 	return result.assumeUnique;
721 }
722 
723 
724 private enum Operation
725 {
726 	equalAnyElem = 0b0_00_00_00,
727 	inRanges     = 0b0_00_01_00,
728 	equalElem    = 0b0_00_10_00,
729 	substrPos    = 0b0_00_11_00,
730 }
731 
732 
733 private enum Polarity
734 {
735 	keep        = 0b0_00_00_00,
736 	negate      = 0b0_01_00_00,
737 	negateValid = 0b0_11_00_00,
738 }
739 
740 
741 @forceinline @nogc nothrow pure
742 private void vpcmpistri(C, immutable(C[]) cs, Operation op, Polarity pol = Polarity.keep, bool lastIndex = false)
743 	(ref const(char)* p)
744 		if (is(C == char) || is(C == ubyte) || is(C == wchar) || is(C == ushort) || is(C == byte) || is(C == short))
745 {
746 	// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53712
747 	static if (is(C == char) || is(C == ubyte))
748 		enum ct = 0b00;
749 	else static if (is(C == wchar) || is(C == ushort))
750 		enum ct = 0b01;
751 	else static if (is(C == byte))
752 		enum ct = 0b10;
753 	else
754 		enum ct = 0b11;
755 	
756 	enum mode = ct | op | pol | (!!lastIndex << 6);
757 	
758 	version (X86_64)
759 		enum creg = "rcx";
760 	else version (X86)
761 		enum creg = "ecx";
762 	else static assert(0, "Not implemented");
763 	
764 	version (LDC)
765 	{
766 		import ldc.llvmasm;
767 		
768 		p = __asm!(const(char*))("
769 			1:
770 			pcmpistri $2, ($1), $3
771 			add       $$16, $1
772 			cmp       $$16, %ecx
773 			je        1b
774 			sub       $$16, $1
775 			add       %" ~ creg ~ ", $1
776 			", "=r,0,K,x,~{ecx}", p, mode, SIMDFromString!cs);
777 	}
778 	else version (GNU)
779 	{
780 		asm { "
781 			1:
782 			pcmpistri %2, (%1), %3
783 			add       $16, %1
784 			cmp       $16, %%ecx
785 			je        1b
786 			sub       $16, %1
787 			add       %%" ~ creg ~ ", %1
788 			" : "=r" p : "0" p, "K" mode, "x" SIMDFromString!cs : "ecx"; }
789 	}
790 	else
791 	{
792 		alias csXMM = SIMDFromString!cs;
793 		version (D_InlineAsm_X86_64)
794 		{
795 			version (Posix)
796 			{
797 				asm @nogc pure nothrow
798 				{
799 					naked;
800 					movdqa      XMM0, csXMM;
801 					mov         RAX, [RDI];
802 				L1:
803 					vpcmpistri  XMM0, [RAX], mode;
804 					add         RAX, 16;
805 					cmp         ECX, 16;
806 					je          L1;
807 					sub         RAX, 16;
808 					add         RAX, RCX;
809 					mov         [RDI], RAX;
810 					ret;
811 				}
812 			}
813 			else static assert(0, "Not implemented");
814 		}
815 		else version (D_InlineAsm_X86)
816 		{
817 			version (Posix)
818 			{
819 				asm @nogc pure nothrow
820 				{
821 					naked;
822 					movdqa      XMM0, csXMM;
823 					mov         EDX, [EAX];
824 				L1:
825 					vpcmpistri  XMM0, [EDX], mode;
826 					add         EDX, 16;
827 					cmp         ECX, 16;
828 					je          L1;
829 					sub         EDX, 16;
830 					add         EDX, ECX;
831 					mov         [EAX], EDX;
832 					ret;
833 				}
834 			}
835 			else static assert(0, "Not implemented");
836 		}
837 		else static assert(0, "Not implemented");
838 	}
839 }