1 /***************************************************************************************************
2  * 
3  * Text parsing functionality.
4  * 
5  * Authors:
6  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
7  * 
8  * Copyright:
9  *   © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
10  * 
11  * License:
12  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
13  * 
14  **************************************************************************************************/
15 module fast.parsing;
16 
17 import std.traits;
18 import fast.internal.sysdef;
19 
20 
21 /+
22  ╔══════════════════════════════════════════════════════════════════════════════
23  ║ ⚑ Hexadecimal
24  ╚══════════════════════════════════════════════════════════════════════════════
25  +/
26 
27 /*******************************************************************************
28  * 
29  * Decodes a single hexadecimal character.
30  *
31  * Params:
32  *   c = The hexadecimal digit.
33  *
34  * Returns:
35  *   `c` converted to an integer.
36  *
37  **************************************/
38 @safe @nogc pure nothrow
39 uint hexDecode(char c)
40 {
41 	return c + 9 * (c >> 6) & 15;
42 }
43 
44 
45 @nogc pure nothrow
46 uint hexDecode4(ref const(char)* hex)
47 {
48 	uint x = *cast(uint*) &hex;
49 	hex += 4;
50 	x = (x & 0x0F0F0F0F) + 9 * (x >> 6 & 0x01010101);
51 	version (LittleEndian)
52 	{
53 		return x >> 24 | x >> 12 & 0xF0 | x & 0xF00 | x << 12 & 0xF000;
54 	}
55 	else
56 	{
57 		x = (x | x >> 4) & 0x00FF00FF;
58 		return (x | x >> 8) & 0x0000FFFF;
59 	}
60 }
61 
62 
63 @nogc pure nothrow
64 inout(char)* hexDecode4(ref inout(char)* hex, out uint result)
65 {
66 	foreach (i; 0 .. 4)
67 	{
68 		result *= 16;
69 		char ch = cast(char) (hex[i] - '0');
70 		if (ch <= 9)
71 		{
72 			result += ch;
73 		}
74 		else
75 		{
76 			ch = cast(char) ((ch | 0x20) - 0x31);
77 			if (ch <= 5)
78 				result += ch + 10;
79 			else
80 				return hex + i;
81 		}
82 	}
83 	hex += 4;
84 	return null;
85 }
86 unittest
87 {
88 	string x = "aF09";
89 	const(char)* p = x.ptr;
90 	uint result;
91 	hexDecode4(p, result);
92 	assert(result == 0xAF09);
93 }
94 
95 
96 /+
97  ╔══════════════════════════════════════════════════════════════════════════════
98  ║ ⚑ Numbers
99  ╚══════════════════════════════════════════════════════════════════════════════
100  +/
101 
102 
103 /// Options for `parseNumber`.
104 struct NumberOptions
105 {
106 	/// Allows the minus sign as the first character and thus negative numbers.
107 	bool minus;
108 }
109 
110 
111 /*******************************************************************************
112  * 
113  * Parse a number from a character read pointer.
114  * 
115  * On success, the read pointer is set behind the number.
116  *
117  * Params:
118  *   opt = Selects features for the implementation. Less features make the
119  *         parser faster.
120  *   str = The read pointer.
121  *   n = A reference to a number to be overwritten with the result.
122  *
123  * Returns:
124  *   An indication of success. Typically the function fails when a number cannot
125  *   be stored in an integer of the given size or invalid characters are
126  *   encountered.
127  *
128  **************************************/
129 @nogc pure nothrow
130 bool parseNumber(NumberOptions opt, N)(ref const(char)* str, ref N n) if (isNumeric!N)
131 {
132 	import fast.internal.helpers;
133 	import std.range;
134 
135 	// Integer types larger than the mantissa of N.
136 	static if (N.sizeof <= size_t.sizeof)
137 	{
138 		alias U = size_t;
139 		alias I = ptrdiff_t;
140 	}
141 	else
142 	{
143 		alias U = ulong;
144 		alias I = long;
145 	}
146 	
147 	// Largest value of type U that can be multiplied by 10 and have a digit added without overflow.
148 	enum canHoldOneMoreDigit = (U.max - 9) / 10;
149 	static if (isFloatingPoint!N)
150 	{
151 		enum significandRightShift = 8 * U.sizeof - N.mant_dig + 1;
152 		enum lastSignificandBit = U(2) << 8 * U.sizeof - N.mant_dig;
153 		enum firstFractionBit   = U(1) << 8 * U.sizeof - N.mant_dig;
154 		enum remainderBits = U.max - N.mant_dig + 1;
155 		enum expShift = N.mant_dig - 1;
156 		enum expBias = N.max_exp - 1;
157 	}
158 	
159 	static if (isFloatingPoint!N)
160 	{
161 		alias pow5Max = PowData!(U, 5).powMax;
162 		alias pow5    = PowData!(U, 5).pows;
163 
164 		// Largest power of 10 that fits into a float of type N. The factor 5 here is correct, as the 2s
165 		// go in as an increment in the exponent, that is neglectable here.
166 		enum pow10MaxF = {
167 			U v = 1; uint exp;
168 			while (v <= ((U(1) << N.mant_dig) - 1) / 5) { v *= 5; exp++; }
169 			return exp;
170 		}();
171 
172 		static immutable N[pow10MaxF] pow10F = N(10).recurrence!((a, n) => 10 * a[n-1]).take(pow10MaxF).array;
173 	}
174 	else
175 	{
176 		alias pow10Max = PowData!(U, 10).powMax;
177 		alias pow10    = PowData!(U, 10).pows;
178 	}
179 
180 	const(char)* p = str;
181 	const(char)* point = null;
182 	U significand = 0;
183 	size_t exponent = 0;
184 	size_t expAdjust = void;
185 	bool expSign = void;
186 	static if (isFloatingPoint!N)
187 	{
188 		U exp2 = void;
189 		bool roundUp = false;
190 	}
191 	
192 	/////////////////// SIGN BIT HANDLING ///////////////////
193 	
194 	// Check for the sign.
195 	static if (opt.minus)
196 	{
197 		bool sign = (*p == '-');
198 		if (sign)
199 			p++;
200 	}
201 	
202 	/////////////////// INTEGRAL PART OF SIGNIFICAND ///////////////////
203 	
204 	uint digit = *p - '0';
205 	if (digit == 0)
206 	{
207 		// We have a single zero.
208 		p++;
209 	}
210 	else if (digit <= 9)
211 	{
212 		// Regular case of one or more digits.
213 		do
214 		{
215 			if (significand > canHoldOneMoreDigit)
216 				goto BigMantissa;
217 		BigMantissaNotSoMuch:
218 			significand = 10 * significand + digit;
219 			digit = *++p - '0';
220 		}
221 		while (digit <= 9);
222 	}
223 	else return false;
224 	
225 	/////////////////// FRACTIONAL PART OF SIGNIFICAND ///////////////////
226 	
227 	if (*p == '.')
228 	{
229 		point = ++p;
230 		digit = *p - '0';
231 		if (digit > 9)
232 			return false;
233 		do
234 		{
235 			if (significand > canHoldOneMoreDigit)
236 				goto BigMantissa;
237 			significand = 10 * significand + digit;
238 			digit = *++p - '0';
239 		}
240 		while (digit <= 9);
241 	}
242 	
243 	/////////////////// EXPONENT HANDLING ///////////////////
244 
245 	expAdjust = (point is null) ? 0 : p - point;
246 	if ((*p | 0x20) == 'e')
247 	{
248 		p++;
249 		expSign = (*p == '-');
250 		if (expSign || *p == '+')
251 			p++;
252 		digit = *p - '0';
253 		if (digit > 9)
254 			return false;
255 		do
256 		{
257 			if (exponent > canHoldOneMoreDigit)
258 				goto BigExponent;
259 			exponent = 10 * exponent + digit;
260 			digit = *++p - '0';
261 		}
262 		while (digit <= 9);
263 	}
264 	
265 	if (expAdjust)
266 	{
267 		if (expSign)
268 		{
269 			if (exponent > size_t.max - expAdjust)
270 				goto BigExponentAdjustForDecimalPoint;
271 			exponent += expAdjust;
272 		}
273 		else if (exponent >= expAdjust)
274 		{
275 			exponent -= expAdjust;
276 		}
277 		else
278 		{
279 			// Amount of fraction digits turns exponent from positive to negative.
280 			expAdjust -= exponent;
281 			exponent = expAdjust;
282 			expSign = true;
283 		}
284 	}
285 
286 	/////////////////// RESULT ASSEMBLY ///////////////////
287 
288 	static if (isFloatingPoint!N)
289 	{
290 		if (significand == 0 || exponent == 0)
291 		{
292 			// The significand is the unsigned result.
293 			static if (opt.minus)
294 				if (sign)
295 					n = -N(significand);
296 			n = +N(significand);
297 			str = p;
298 			return true;
299 		}
300 
301 		// Try the floating-point fast path: The significand's bits, as well as the 10^x exponent can be expressed
302 		// accurately as a float of type N. We just need to divide or multiply them based on the signedness of the
303 		// exponent.
304 		exp2 = bsr(significand);
305 		if (exp2 - bsf(significand) < N.mant_dig && exponent <= pow10MaxF)
306 		{
307 			N b = pow10F[exponent - 1];
308 			static if (opt.minus)
309 				if (sign)
310 					b = -b;
311 			n = expSign ? significand / b : significand * b;
312 			str = p;
313 			return true;
314 		}
315 		else if (exponent <= pow5Max)
316 		{
317 			// Special case, mostly to handle the little bit of extra precision that comes from
318 			// converting a double to its string representation. The last base-10 digit doesn't quite
319 			// fit back into a double, but we don't need to resort to arbitrary precision math just yet.
320 			if (expSign)
321 			{
322 				U divisor = pow5[exponent - 1];
323 				static if (isAMD64 && (isLDC || isGDC))
324 				{
325 					// AMD64 can divide 128-bit numbers by 64-bit numbers directly.
326 					size_t expDivisor = clz(divisor);
327 					divisor <<= expDivisor;
328 					exp2 = expDivisor - exponent - bigDiv(significand, divisor);
329 					significand <<= 1;
330 				}
331 				else
332 				{
333 					// We perform an iterative division.
334 					U dividend = significand << 8 * U.sizeof - 1 - exp2;
335 					U quotient = dividend / divisor;
336 					dividend %= divisor;
337 
338 					size_t lzs = clz(quotient);
339 					exp2 -= exponent + lzs;
340 					significand = quotient << ++lzs;
341 					size_t accuracy = 8 * U.sizeof - lzs;
342 					while (accuracy < N.mant_dig)
343 					{
344 						lzs = clz(dividend);
345 						dividend <<= lzs;
346 						quotient = dividend / divisor;
347 						dividend %= divisor;
348 						significand |= quotient << (8 * U.sizeof - lzs) >> accuracy;
349 						accuracy += lzs;
350 					}
351 				}
352 
353 				// Assemble floating point value from bits.
354 				roundUp = (significand & firstFractionBit) != 0;
355 				significand >>= significandRightShift;
356 				if (roundUp)
357 				{
358 					significand++;
359 					significand &= ~(U(1) << N.mant_dig - 1);
360 					if (significand == 0)
361 						++exp2;
362 				}
363 
364 				U* result = cast(U*) &n;
365 				*result = exp2 + expBias << expShift | significand;
366 				static if (opt.minus)
367 					*result |= U(sign) << U.sizeof * 8 - 1;
368 				str = p;
369 				return true;
370 			}
371 			else assert(0, "Not implemented");
372 		}
373 		else assert(0, "Not implemented");
374 	}
375 	else
376 	{
377 		import fast.intmath;
378 
379 		if (exponent && significand)
380 		{
381 			// We need to account for the exponent.
382 			U pow = pow10[exponent - 1];
383 			if (expSign)
384 			{
385 				// Negative exponent, if we get a fractional result, abort.
386 				if (significand % pow)
387 					return false;
388 				significand /= pow;
389 			}
390 			else static if (U.sizeof < ulong.sizeof)
391 			{
392 				// Multiply using a bigger result type
393 				ulong prod = ulong(significand) * pow;
394 				if (prod > U.max)
395 					return false;
396 				significand = cast(U) prod;
397 			}
398 			else
399 			{
400 				// If the multiply will overflow, abort.
401 				bool overflowed;
402 				significand = mulu(significand, pow, overflowed);
403 				if (overflowed)
404 					return false;
405 			}
406 		}
407 
408 		n = cast(N) significand;
409 		static if (isSigned!N && opt.minus)
410 		{
411 			if (significand > U(N.max) + sign)
412 				return false;
413 			if (sign)
414 				n = -n;
415 		}
416 		else if (significand > N.max)
417 			return false;
418 		str = p;
419 		return true;
420 	}
421 
422 BigMantissa:
423 	if (significand <= (significand.max - digit) / 10)
424 		goto BigMantissaNotSoMuch;
425 //	assert(0, "Not implemented");
426 
427 BigExponent:
428 //	assert(0, "Not implemented");
429 
430 BigExponentAdjustForDecimalPoint:
431 //	assert(0, "Not implemented");
432 	return false;
433 }
434 
435 
436 private template PowData(U, U base)
437 {
438 	import std.range;
439 
440 	// Largest power of `base` that fits into an integer of type U.
441 	enum powMax = { U v = 1; uint exp; while (v <= U.max / base) { v *= base; exp++; } return exp; }();
442 	
443 	// Table of powers of `base`. (We skip base^0)
444 	static immutable U[powMax] pows = base.recurrence!((a, n) => base * a[n-1]).take(powMax).array;
445 }
446 
447 
448 static if (isAMD64 && (isLDC || isGDC))
449 {
450 	@nogc pure nothrow
451 	private size_t bigDiv(ref size_t a, size_t b)
452 	in
453 	{
454 		assert(b > size_t.max / 2, "High bit of divisor must be set.");
455 	}
456 	body
457 	{
458 		// Make sure that the division will yield exactly 32 or 64 significant bits.
459 		import fast.internal.helpers;
460 		size_t lza = clz(a);
461 		version (LDC)
462 		{
463 			import ldc.llvmasm;
464 			a <<= lza;
465 			if (a >= b) { a >>= 1; lza--; }
466 			a = __asm!ulong("
467 				xor %rax, %rax
468 				divq $2
469 				", "={rax},{rdx},rm", a, b);
470 		}
471 		else version (GNU)
472 		{
473 			size_t dividend = a << lza;
474 			if (dividend >= b) { dividend >>= 1; lza--; }
475 			asm { "
476 				xor %%rax, %%rax
477 				divq %3
478 				" : "=&a" a, "=d" dividend : "d" dividend, "rm" b; }
479 		}
480 		return ++lza;
481 	}
482 	
483 	unittest
484 	{
485 		size_t a = size_t.max / 11;
486 		size_t b = size_t.max / 5;
487 		version (X86_64)
488 		{
489 			import fast.internal.helpers;
490 			long exp = clz(b);   // Positive base-2 exponent
491 			b <<= exp;
492 			exp -= bigDiv(a, b);
493 			assert(a == 0xE8BA2E8BA2E8BA2AUL);
494 			assert(exp == -2);
495 		}
496 	}
497 }
498 
499 
500 /+
501  ╔══════════════════════════════════════════════════════════════════════════════
502  ║ ⚑ String Scanning and Comparison
503  ╚══════════════════════════════════════════════════════════════════════════════
504  +/
505 
506 /*******************************************************************************
507  * 
508  * Compares a string of unknown length against a statically known key.
509  * 
510  * This function also handles escapes and requires one or more terminator chars.
511  *
512  * Params:
513  *   C = Character with.
514  *   key = The static key string.
515  *   terminators = A list of code units that terminate the string.
516  *   special = A list of code units that are handled by the user callback. Use
517  *             this for escape string handling. Default is `null`.
518  *   p_str = Pointer to the string for the comparison. After the function call
519  *           it will be behind the last matching character.
520  *   callback = User callback to handle special escape characters if `special`
521  *              is non-empty.
522  *
523  * Returns:
524  *   A code with following meanings: -1 = not equal, terminator character hit,
525  *   0 = not equal, but string not exhausted, 1 = string equals key.
526  *
527  **************************************/
528 int fixedTermStrCmp(C, immutable C[] key, immutable C[] terminators, immutable C[] special = null)
529 	(ref const(C)* p_str, scope bool delegate(ref immutable(char)*, ref const(char)*) callback = null)
530 in
531 {
532 	assert(special.length == 0 || callback !is null);
533 }
534 body
535 {
536 	import std.algorithm, std.range;
537 	
538 	static immutable byte[256] classify =
539 		iota(256).map!(c => terminators.canFind(c) ? byte(-1) : special.canFind(c) ? 1 : 0).array;
540 	
541 	immutable(C)* p_key = key.ptr;
542 	immutable C* e_key = p_key + key.length;
543 	
544 	while (p_key !is e_key)
545 	{
546 		int clazz = *p_str <= 0xFF ? classify[*p_str] : 0;
547 		
548 		if (clazz < 0)
549 		{
550 			return clazz;
551 		}
552 		else if (clazz == 0)
553 		{
554 			if (*p_str != *p_key)
555 				return clazz;
556 			
557 			p_str++;
558 			p_key++;
559 		}
560 		else if (clazz > 0)
561 		{
562 			if (!callback(p_key, p_str))
563 				return 0;
564 		}
565 	}
566 	
567 	return classify[*p_str & 0xFF] < 0;
568 }
569 
570 
571 /*
572 @nogc nothrow
573 void fixedStringCompareSSE4()
574 {
575 	enum words     = key.length / 16;
576 	enum remainder = key.length % 16;
577 	enum contains0 = key.canFind('\0');     // For SSE4.2 string search.
578 	static assert(!contains0, "Not implemented");
579 
580 	size_t remaining = e - b;
581 	auto p = b;
582 
583 	foreach (i; staticIota!(0, words))
584 	{
585 		auto backup = p;
586 		p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid);
587 		p = backup;
588 		p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid);
589 	}
590 }
591 */
592 
593 
594 @forceinline @nogc nothrow pure
595 void seekToAnyOf(string cs)(ref const(char)* p)
596 {
597 	p.vpcmpistri!(char, sanitizeChars(cs), Operation.equalAnyElem);
598 }
599 
600 
601 @forceinline @nogc nothrow pure
602 void seekToRanges(string cs)(ref const(char)* p)
603 {
604 	p.vpcmpistri!(char, sanitizeRanges(cs), Operation.inRanges);
605 }
606 
607 
608 /*******************************************************************************
609  * 
610  * Searches for a specific character known to appear in the stream and skips the
611  * read pointer over it.
612  *
613  * Params:
614  *   c = the character
615  *   p = the read pointer
616  *
617  **************************************/
618 @forceinline @nogc nothrow pure
619 void seekPast(char c)(ref const(char)* p)
620 {
621 	p.vpcmpistri!(char, c.repeat(16).to!string, Operation.equalElem);
622 	p++;
623 }
624 
625 
626 /*******************************************************************************
627  * 
628  * Skips the read pointer over characters that fall into any of up to 8 ranges
629  * of characters. The first character in `cs` is the start of the first range,
630  * the second character is the end. This is repeated for any other character
631  * pair. A character falls into a range from `a` to `b` if `a <= *p <= b`.
632  *
633  * Params:
634  *   cs = the character ranges
635  *   p = the read pointer
636  *
637  **************************************/
638 @forceinline @nogc nothrow pure
639 void skipCharRanges(string cs)(ref const(char)* p)
640 {
641 	p.vpcmpistri!(char, cs, Operation.inRanges, Polarity.negate);
642 }
643 
644 
645 /*******************************************************************************
646  * 
647  * Skips the read pointer over all and any of the given characters.
648  *
649  * Params:
650  *   cs = the characters to skip over
651  *   p = the read pointer
652  *
653  **************************************/
654 @forceinline @nogc nothrow pure
655 void skipAllOf(string cs)(ref const(char)* p)
656 { 
657 	p.vpcmpistri!(char, cs, Operation.equalAnyElem, Polarity.negate);
658 }
659 
660 
661 /*******************************************************************************
662  * 
663  * Skips the read pointer over ASCII white-space comprising '\t', '\r', '\n' and
664  * ' '.
665  *
666  * Params:
667  *   p = the read pointer
668  *
669  **************************************/
670 @forceinline @nogc nothrow pure
671 void skipAsciiWhitespace(ref const(char)* p)
672 {
673 	if (*p == ' ')
674 		p++;
675 	if (*p > ' ')
676 		return;
677 	p.skipAllOf!" \t\r\n";
678 }
679 
680 
681 /*******************************************************************************
682  * 
683  * Sets the read pointer to the start of the next line.
684  *
685  * Params:
686  *   p = the read pointer
687  *
688  **************************************/
689 @forceinline @nogc nothrow pure
690 void skipToNextLine(ref const(char)* p)
691 {
692 	// Stop at next \r, \n or \0.
693 	p.vpcmpistri!(char, "\x01\x09\x0B\x0C\x0E\xFF", Operation.inRanges, Polarity.negate);
694 	if (p[0] == '\r') p++;
695 	if (p[0] == '\n') p++;
696 }
697 
698 
699 private enum sanitizeChars(string cs)
700 {
701 	import std.exception;
702 
703 	bool has0 = false;
704 	foreach (c; cs) if (!c) { has0 = true; break; }
705 	assert(has0, "Parsers are required to also check for \0 when looking for specific chars.");
706 	
707 	char[] result;
708 	foreach (i; 1 .. 256) foreach (c; cs) if (i == c)
709 	result ~= c;
710 	return result.assumeUnique;
711 }
712 
713 
714 private enum sanitizeRanges(string cs)
715 {
716 	import std.exception;
717 
718 	bool has0 = false;
719 	foreach (i; 0 .. cs.length / 2) if (!cs[2*i]) { has0 = true; break; }
720 	assert(has0, "Parsers are required to also check for \0 when looking for specific chars.");
721 	
722 	char[] result;
723 	foreach (i; 0 .. cs.length / 2)
724 	{
725 		if (cs[2*i])
726 			result ~= cs[2*i .. 2*i+2];
727 		else if (cs[2*i+1])
728 			result ~= ['\x01', cs[2*i+1]];
729 	}
730 	return result.assumeUnique;
731 }
732 
733 
734 private enum Operation
735 {
736 	equalAnyElem = 0b0_00_00_00,
737 	inRanges     = 0b0_00_01_00,
738 	equalElem    = 0b0_00_10_00,
739 	substrPos    = 0b0_00_11_00,
740 }
741 
742 
743 private enum Polarity
744 {
745 	keep        = 0b0_00_00_00,
746 	negate      = 0b0_01_00_00,
747 	negateValid = 0b0_11_00_00,
748 }
749 
750 
751 @forceinline @nogc nothrow pure
752 private void vpcmpistri(C, immutable(C[]) cs, Operation op, Polarity pol = Polarity.keep, bool lastIndex = false)
753 	(ref const(char)* p)
754 		if (is(C == char) || is(C == ubyte) || is(C == wchar) || is(C == ushort) || is(C == byte) || is(C == short))
755 {
756 	import fast.internal.helpers;
757 
758 	// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53712
759 	static if (is(C == char) || is(C == ubyte))
760 		enum ct = 0b00;
761 	else static if (is(C == wchar) || is(C == ushort))
762 		enum ct = 0b01;
763 	else static if (is(C == byte))
764 		enum ct = 0b10;
765 	else
766 		enum ct = 0b11;
767 	
768 	enum mode = ct | op | pol | (!!lastIndex << 6);
769 	
770 	version (X86_64)
771 		enum creg = "rcx";
772 	else version (X86)
773 		enum creg = "ecx";
774 	else static assert(0, "Not implemented");
775 	
776 	version (LDC)
777 	{
778 		import ldc.llvmasm;
779 		
780 		p = __asm!(const(char*))("
781 			1:
782 			pcmpistri $2, ($1), $3
783 			add       $$16, $1
784 			cmp       $$16, %ecx
785 			je        1b
786 			sub       $$16, $1
787 			add       %" ~ creg ~ ", $1
788 			", "=r,0,K,x,~{ecx}", p, mode, SIMDFromString!cs);
789 	}
790 	else version (GNU)
791 	{
792 		asm { "
793 			1:
794 			pcmpistri %2, (%1), %3
795 			add       $16, %1
796 			cmp       $16, %%ecx
797 			je        1b
798 			sub       $16, %1
799 			add       %%" ~ creg ~ ", %1
800 			" : "=r" p : "0" p, "K" mode, "x" SIMDFromString!cs : "ecx"; }
801 	}
802 	else
803 	{
804 		alias csXMM = SIMDFromString!cs;
805 		version (D_InlineAsm_X86_64)
806 		{
807 			version (Posix)
808 			{
809 				version (D_PIC) asm @nogc pure nothrow
810 				{
811 					naked;
812 					lea         RAX, csXMM;
813 					mov         RAX, [RAX];
814 					movdqu      XMM0, [RAX];
815 					mov         RAX, [RDI];
816 				L1:
817 					vpcmpistri  XMM0, [RAX], mode;
818 					add         RAX, 16;
819 					cmp         ECX, 16;
820 					je          L1;
821 					sub         RAX, 16;
822 					add         RAX, RCX;
823 					mov         [RDI], RAX;
824 					ret;
825 				}
826 				else asm @nogc pure nothrow
827 				{
828 					naked;
829 					movdqa      XMM0, csXMM;
830 					mov         RAX, [RDI];
831 				L1:
832 					vpcmpistri  XMM0, [RAX], mode;
833 					add         RAX, 16;
834 					cmp         ECX, 16;
835 					je          L1;
836 					sub         RAX, 16;
837 					add         RAX, RCX;
838 					mov         [RDI], RAX;
839 					ret;
840 				}
841 			}
842 			else static assert(0, "Not implemented");
843 		}
844 		else version (D_InlineAsm_X86)
845 		{
846 			version (Posix)
847 			{
848 				version (D_PIC) asm @nogc pure nothrow
849 				{
850 					naked;
851 					mov         EDX, CS:csXMM[EBX];
852 					movdqu      XMM0, [EDX];
853 					mov         EDX, [EAX];
854 				L1:
855 					vpcmpistri  XMM0, [EDX], mode;
856 					add         EDX, 16;
857 					cmp         ECX, 16;
858 					je          L1;
859 					sub         EDX, 16;
860 					add         EDX, ECX;
861 					mov         [EAX], EDX;
862 					ret;
863 				}
864 				else asm @nogc pure nothrow
865 				{
866 					naked;
867 					movdqa      XMM0, csXMM;
868 					mov         EDX, [EAX];
869 				L1:
870 					vpcmpistri  XMM0, [EDX], mode;
871 					add         EDX, 16;
872 					cmp         ECX, 16;
873 					je          L1;
874 					sub         EDX, 16;
875 					add         EDX, ECX;
876 					mov         [EAX], EDX;
877 					ret;
878 				}
879 			}
880 			else static assert(0, "Not implemented");
881 		}
882 		else static assert(0, "Not implemented");
883 	}
884 }