Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ zerocopy-derive = { version = "=0.8.47", path = "zerocopy-derive" }
elain = "0.3.0"
itertools = "0.11"
rand = { version = "0.8.5", default-features = false, features = ["small_rng"] }
regex = "1.0"
rustversion = "1.0"
static_assertions = "1.1"
testutil = { path = "testutil" }
Expand Down
9 changes: 9 additions & 0 deletions benches/extend_vec_zeroed.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use zerocopy::*;

#[path = "formats/coco_static_size.rs"]
mod format;

#[unsafe(no_mangle)]
fn bench_extend_vec_zeroed(v: &mut Vec<format::LocoPacket>, additional: usize) -> Option<()> {
FromZeros::extend_vec_zeroed(v, additional).ok()
}
60 changes: 60 additions & 0 deletions benches/extend_vec_zeroed.x86-64
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
bench_extend_vec_zeroed:
push r15
push r14
push r13
push r12
push rbx
sub rsp, 32
mov rbx, rdi
mov rax, qword ptr [rdi]
mov r12, qword ptr [rdi + 16]
mov rcx, rax
sub rcx, r12
cmp rsi, rcx
jbe .LBB6_3
mov r15, r12
add r15, rsi
jae .LBB6_6
.LBB6_2:
xor eax, eax
jmp .LBB6_5
.LBB6_3:
mov rax, qword ptr [rbx + 8]
lea r15, [r12 + rsi]
.LBB6_4:
lea rcx, [r12 + 2*r12]
lea rdi, [rax + 2*rcx]
add rsi, rsi
lea rdx, [rsi + 2*rsi]
xor esi, esi
call qword ptr [rip + memset@GOTPCREL]
mov qword ptr [rbx + 16], r15
mov al, 1
.LBB6_5:
add rsp, 32
pop rbx
pop r12
pop r13
pop r14
pop r15
ret
.LBB6_6:
mov r13, rsi
lea rcx, [rax + rax]
cmp r15, rcx
cmova rcx, r15
cmp rcx, 5
mov r14d, 4
cmovae r14, rcx
mov rdx, qword ptr [rbx + 8]
lea rdi, [rsp + 8]
mov rsi, rax
mov rcx, r14
call <alloc::raw_vec::RawVecInner>::finish_grow
cmp dword ptr [rsp + 8], 1
je .LBB6_2
mov rax, qword ptr [rsp + 16]
mov qword ptr [rbx + 8], rax
mov qword ptr [rbx], r14
mov rsi, r13
jmp .LBB6_4
147 changes: 147 additions & 0 deletions benches/extend_vec_zeroed.x86-64.mca
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
Iterations: 100
Instructions: 5400
Total Cycles: 6595
Total uOps: 6800

Dispatch Width: 4
uOps Per Cycle: 1.03
IPC: 0.82
Block RThroughput: 17.0


Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)

[1] [2] [3] [4] [5] [6] Instructions:
2 5 1.00 * push r15
2 5 1.00 * push r14
2 5 1.00 * push r13
2 5 1.00 * push r12
2 5 1.00 * push rbx
1 1 0.33 sub rsp, 32
1 1 0.33 mov rbx, rdi
1 5 0.50 * mov rax, qword ptr [rdi]
1 5 0.50 * mov r12, qword ptr [rdi + 16]
1 1 0.33 mov rcx, rax
1 1 0.33 sub rcx, r12
1 1 0.33 cmp rsi, rcx
1 1 1.00 jbe .LBB6_3
1 1 0.33 mov r15, r12
1 1 0.33 add r15, rsi
1 1 1.00 jae .LBB6_6
1 0 0.25 xor eax, eax
1 1 1.00 jmp .LBB6_5
1 5 0.50 * mov rax, qword ptr [rbx + 8]
1 1 0.50 lea r15, [r12 + rsi]
1 1 0.50 lea rcx, [r12 + 2*r12]
1 1 0.50 lea rdi, [rax + 2*rcx]
1 1 0.33 add rsi, rsi
1 1 0.50 lea rdx, [rsi + 2*rsi]
1 0 0.25 xor esi, esi
4 7 1.00 * call qword ptr [rip + memset@GOTPCREL]
1 1 1.00 * mov qword ptr [rbx + 16], r15
1 1 0.33 mov al, 1
1 1 0.33 add rsp, 32
1 6 0.50 * pop rbx
1 6 0.50 * pop r12
1 6 0.50 * pop r13
1 6 0.50 * pop r14
1 6 0.50 * pop r15
1 1 1.00 U ret
1 1 0.33 mov r13, rsi
1 1 0.50 lea rcx, [rax + rax]
1 1 0.33 cmp r15, rcx
3 3 1.00 cmova rcx, r15
1 1 0.33 cmp rcx, 5
1 1 0.33 mov r14d, 4
2 2 0.67 cmovae r14, rcx
1 5 0.50 * mov rdx, qword ptr [rbx + 8]
1 1 0.50 lea rdi, [rsp + 8]
1 1 0.33 mov rsi, rax
1 1 0.33 mov rcx, r14
3 5 1.00 call <alloc::raw_vec::RawVecInner>::finish_grow
2 6 0.50 * cmp dword ptr [rsp + 8], 1
1 1 1.00 je .LBB6_2
1 5 0.50 * mov rax, qword ptr [rsp + 16]
1 1 1.00 * mov qword ptr [rbx + 8], rax
1 1 1.00 * mov qword ptr [rbx], r14
1 1 0.33 mov rsi, r13
1 1 1.00 jmp .LBB6_4


Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23


Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 12.00 12.00 10.00 13.00 11.00 11.00

Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - - 1.00 - 0.49 0.51 push r15
- - - - 1.00 - 0.51 0.49 push r14
- - - - 1.00 - 0.50 0.50 push r13
- - - - 1.00 - 0.50 0.50 push r12
- - - - 1.00 - 0.50 0.50 push rbx
- - 0.01 0.99 - - - - sub rsp, 32
- - - - - 1.00 - - mov rbx, rdi
- - - - - - 0.50 0.50 mov rax, qword ptr [rdi]
- - - - - - 0.50 0.50 mov r12, qword ptr [rdi + 16]
- - - 1.00 - - - - mov rcx, rax
- - - 0.99 - 0.01 - - sub rcx, r12
- - - - - 1.00 - - cmp rsi, rcx
- - - - - 1.00 - - jbe .LBB6_3
- - 0.01 0.98 - 0.01 - - mov r15, r12
- - 0.99 0.01 - - - - add r15, rsi
- - - - - 1.00 - - jae .LBB6_6
- - - - - - - - xor eax, eax
- - - - - 1.00 - - jmp .LBB6_5
- - - - - - 0.50 0.50 mov rax, qword ptr [rbx + 8]
- - 1.00 - - - - - lea r15, [r12 + rsi]
- - 0.98 0.02 - - - - lea rcx, [r12 + 2*r12]
- - 0.99 0.01 - - - - lea rdi, [rax + 2*rcx]
- - - 1.00 - - - - add rsi, rsi
- - 0.99 0.01 - - - - lea rdx, [rsi + 2*rsi]
- - - - - - - - xor esi, esi
- - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + memset@GOTPCREL]
- - - - 1.00 - 0.50 0.50 mov qword ptr [rbx + 16], r15
- - 0.01 0.99 - - - - mov al, 1
- - 1.00 - - - - - add rsp, 32
- - - - - - 0.50 0.50 pop rbx
- - - - - - 0.50 0.50 pop r12
- - - - - - 0.50 0.50 pop r13
- - - - - - 0.50 0.50 pop r14
- - - - - - 0.50 0.50 pop r15
- - - - - 1.00 - - ret
- - 1.00 - - - - - mov r13, rsi
- - 0.01 0.99 - - - - lea rcx, [rax + rax]
- - 0.99 0.01 - - - - cmp r15, rcx
- - 2.00 0.01 - 0.99 - - cmova rcx, r15
- - 0.01 0.99 - - - - cmp rcx, 5
- - 0.01 0.99 - - - - mov r14d, 4
- - 1.00 0.01 - 0.99 - - cmovae r14, rcx
- - - - - - 0.50 0.50 mov rdx, qword ptr [rbx + 8]
- - 0.01 0.99 - - - - lea rdi, [rsp + 8]
- - - 1.00 - - - - mov rsi, rax
- - - 0.01 - 0.99 - - mov rcx, r14
- - - - 1.00 1.00 0.50 0.50 call <alloc::raw_vec::RawVecInner>::finish_grow
- - - 0.99 - 0.01 0.50 0.50 cmp dword ptr [rsp + 8], 1
- - - - - 1.00 - - je .LBB6_2
- - - - - - 0.50 0.50 mov rax, qword ptr [rsp + 16]
- - - - 1.00 - 0.49 0.51 mov qword ptr [rbx + 8], rax
- - - - 1.00 - 0.51 0.49 mov qword ptr [rbx], r14
- - 0.99 0.01 - - - - mov rsi, r13
- - - - - 1.00 - - jmp .LBB6_4
13 changes: 13 additions & 0 deletions benches/insert_vec_zeroed.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
use zerocopy::*;

#[path = "formats/coco_static_size.rs"]
mod format;

#[unsafe(no_mangle)]
fn bench_insert_vec_zeroed(
v: &mut Vec<format::LocoPacket>,
position: usize,
additional: usize,
) -> Option<()> {
FromZeros::insert_vec_zeroed(v, position, additional).ok()
}
79 changes: 79 additions & 0 deletions benches/insert_vec_zeroed.x86-64
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
bench_insert_vec_zeroed:
push rbp
push r15
push r14
push r13
push r12
push rbx
sub rsp, 24
mov r12, qword ptr [rdi + 16]
mov r13, r12
sub r13, rsi
jb .LBB6_10
mov rbx, rdi
mov rax, qword ptr [rdi]
mov rcx, rax
sub rcx, r12
cmp rdx, rcx
jbe .LBB6_4
add r12, rdx
jae .LBB6_7
.LBB6_3:
xor eax, eax
jmp .LBB6_6
.LBB6_4:
mov rax, qword ptr [rbx + 8]
add r12, rdx
.LBB6_5:
lea rcx, [rsi + 2*rsi]
lea r14, [rax + 2*rcx]
add rdx, rdx
lea r15, [rdx + 2*rdx]
lea rdi, [r14 + r15]
add r13, r13
lea rdx, [2*r13]
add rdx, r13
mov rsi, r14
call qword ptr [rip + memmove@GOTPCREL]
mov rdi, r14
xor esi, esi
mov rdx, r15
call qword ptr [rip + memset@GOTPCREL]
mov qword ptr [rbx + 16], r12
mov al, 1
.LBB6_6:
add rsp, 24
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
.LBB6_7:
mov r15, rsi
mov rbp, rdx
lea rcx, [rax + rax]
cmp r12, rcx
cmova rcx, r12
cmp rcx, 5
mov r14d, 4
cmovae r14, rcx
mov rdx, qword ptr [rbx + 8]
mov rdi, rsp
mov rsi, rax
mov rcx, r14
call <alloc::raw_vec::RawVecInner>::finish_grow
cmp dword ptr [rsp], 1
je .LBB6_3
mov rax, qword ptr [rsp + 8]
mov qword ptr [rbx + 8], rax
mov qword ptr [rbx], r14
mov rdx, rbp
mov rsi, r15
jmp .LBB6_5
.LBB6_10:
lea rdi, [rip + .Lanon.HASH.1]
lea rdx, [rip + .Lanon.HASH.3]
mov esi, 37
call qword ptr [rip + core::panicking::panic@GOTPCREL]
Loading
Loading