Use JIT intrinsic for UTF8 encoding in Utf8.TryWrite's AppendLiteral#89376
Use JIT intrinsic for UTF8 encoding in Utf8.TryWrite's AppendLiteral#89376stephentoub merged 2 commits intodotnet:mainfrom
Conversation
|
Tagging subscribers to this area: @dotnet/area-system-text-encoding Issue Details@EgorBo, the right things don't seem to be happening here, or else I'm misunderstanding how it's supposed to work. I still see a call to ReadUtf8 in the asm.
|
|
@stephentoub do you have a repro? I clonned your branch locally and checked codegen for: [MethodImpl(MethodImplOptions.NoInlining)]
internal static void Egor(ref TryWriteInterpolatedStringHandler h)
{
h.AppendLiteral("hello world some long string bla bla bla dewwqedewfwef");
}and it looks ok - no ReadUtf8 calls and copying is done using avx2 |
private byte[] _buffer = new byte[1000];
private uint _id;
[Benchmark]
public bool TryWrite() => Utf8.TryWrite(_buffer, $"Id: {_id}", out _); |
Interesting, Checked codegen for ; Assembly listing for method Program:TryWrite():bool:this (FullOpts)
G_M42047_IG01: ;; offset=0000H
push rdi
push rsi
push rbx
sub rsp, 64
vxorps xmm4, xmm4, xmm4
vmovdqa xmmword ptr [rsp+20H], xmm4
vmovdqa xmmword ptr [rsp+30H], xmm4
mov rbx, rcx
G_M42047_IG02: ;; offset=001AH
mov rcx, gword ptr [rbx+08H]
test rcx, rcx
jne SHORT G_M42047_IG04
G_M42047_IG03: ;; offset=0023H
xor rdx, rdx
xor eax, eax
jmp SHORT G_M42047_IG05
G_M42047_IG04: ;; offset=0029H
lea rdx, bword ptr [rcx+10H]
mov eax, dword ptr [rcx+08H]
G_M42047_IG05: ;; offset=0030H
xor ecx, ecx
cmp eax, 4
setge cl
xor r8, r8
mov gword ptr [rsp+20H], r8
G_M42047_IG06: ;; offset=0040H
mov dword ptr [rsp+28H], r8d
mov byte ptr [rsp+2CH], cl
mov byte ptr [rsp+2DH], 0
mov bword ptr [rsp+30H], rdx
mov dword ptr [rsp+38H], eax
test ecx, ecx
je SHORT G_M42047_IG12
G_M42047_IG07: ;; offset=005BH
mov ecx, dword ptr [rsp+28H]
mov esi, dword ptr [rsp+38H]
cmp ecx, esi
ja SHORT G_M42047_IG14
mov edi, ecx
add rdi, bword ptr [rsp+30H]
sub esi, ecx
jns SHORT G_M42047_IG08
mov rdx, 0x1DF80300008 ; ''
mov rcx, rdx
call [System.Diagnostics.Debug:Fail(System.String,System.String)]
G_M42047_IG08: ;; offset=0085H
mov edx, -1
cmp esi, 4
jl SHORT G_M42047_IG09
mov dword ptr [rdi], 0x203A6449 ;;; <----- "hi: " in utf8
mov edx, 4
G_M42047_IG09: ;; offset=009AH
test edx, edx
jl SHORT G_M42047_IG10
add edx, dword ptr [rsp+28H]
mov dword ptr [rsp+28H], edx
jmp SHORT G_M42047_IG11
G_M42047_IG10: ;; offset=00A8H
mov byte ptr [rsp+2CH], 0
jmp SHORT G_M42047_IG12
G_M42047_IG11: ;; offset=00AFH
mov edx, dword ptr [rbx+10H]
lea rcx, [rsp+20H]
call [System.Text.Unicode.Utf8+TryWriteInterpolatedStringHandler:AppendFormatted[uint](uint):bool:this]
G_M42047_IG12: ;; offset=00BDH
xor eax, eax
cmp byte ptr [rsp+2CH], 0
setne al
G_M42047_IG13: ;; offset=00C7H
add rsp, 64
pop rbx
pop rsi
pop rdi
ret
G_M42047_IG14: ;; offset=00CFH
call [System.ThrowHelper:ThrowArgumentOutOfRangeException()]
int3
; Total bytes of code 214 |
|
Ok, reproduces for me when I run your benchmark in Release using BDN - checking now |
|
Interesting, when I run the benchmark with |
|
Ah, stop. There is a general problem with PGO 😢
|
src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8.cs
Outdated
Show resolved
Hide resolved
|
I can workaround it here by removing the IsKnownConstant, but that seems like something we need to fix in general for 8 :-) |

@EgorBo, the right things don't seem to be happening here, or else I'm misunderstanding how it's supposed to work. I still see a call to ReadUtf8 in the asm.