diff --git a/benches/as_bytes_dynamic_size.rs b/benches/as_bytes_dynamic_size.rs new file mode 100644 index 0000000000..68cd1d6f41 --- /dev/null +++ b/benches/as_bytes_dynamic_size.rs @@ -0,0 +1,9 @@ +use zerocopy::*; + +#[path = "formats/coco_dynamic_size.rs"] +mod format; + +#[unsafe(no_mangle)] +fn bench_as_bytes_dynamic_size(source: &format::CocoPacket) -> &[u8] { + source.as_bytes() +} diff --git a/benches/as_bytes_dynamic_size.x86-64 b/benches/as_bytes_dynamic_size.x86-64 new file mode 100644 index 0000000000..f68bad6126 --- /dev/null +++ b/benches/as_bytes_dynamic_size.x86-64 @@ -0,0 +1,5 @@ +bench_as_bytes_dynamic_size: + mov rax, rdi + lea rdx, [2*rsi + 5] + and rdx, -2 + ret diff --git a/benches/as_bytes_dynamic_size.x86-64.mca b/benches/as_bytes_dynamic_size.x86-64.mca new file mode 100644 index 0000000000..c3b92a9a95 --- /dev/null +++ b/benches/as_bytes_dynamic_size.x86-64.mca @@ -0,0 +1,47 @@ +Iterations: 100 +Instructions: 400 +Total Cycles: 137 +Total uOps: 400 + +Dispatch Width: 4 +uOps Per Cycle: 2.92 +IPC: 2.92 +Block RThroughput: 1.0 + + +Instruction Info: +[1]: #uOps +[2]: Latency +[3]: RThroughput +[4]: MayLoad +[5]: MayStore +[6]: HasSideEffects (U) + +[1] [2] [3] [4] [5] [6] Instructions: + 1 1 0.33 mov rax, rdi + 1 1 0.50 lea rdx, [2*rsi + 5] + 1 1 0.33 and rdx, -2 + 1 1 1.00 U ret + + +Resources: +[0] - SBDivider +[1] - SBFPDivider +[2] - SBPort0 +[3] - SBPort1 +[4] - SBPort4 +[5] - SBPort5 +[6.0] - SBPort23 +[6.1] - SBPort23 + + +Resource pressure per iteration: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] + - - 1.33 1.33 - 1.34 - - + +Resource pressure by instruction: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: + - - - 0.66 - 0.34 - - mov rax, rdi + - - 0.33 0.67 - - - - lea rdx, [2*rsi + 5] + - - 1.00 - - - - - and rdx, -2 + - - - - - 1.00 - - ret diff --git a/benches/as_bytes_static_size.rs b/benches/as_bytes_static_size.rs new file mode 100644 index 0000000000..2ad738e954 --- /dev/null +++ b/benches/as_bytes_static_size.rs @@ -0,0 +1,9 @@ +use zerocopy::*; + +#[path = "formats/coco_static_size.rs"] +mod format; + +#[unsafe(no_mangle)] +fn bench_as_bytes_static_size(source: &format::CocoPacket) -> &[u8] { + source.as_bytes() +} diff --git a/benches/as_bytes_static_size.x86-64 b/benches/as_bytes_static_size.x86-64 new file mode 100644 index 0000000000..213e74ab54 --- /dev/null +++ b/benches/as_bytes_static_size.x86-64 @@ -0,0 +1,4 @@ +bench_as_bytes_static_size: + mov rax, rdi + mov edx, 6 + ret diff --git a/benches/as_bytes_static_size.x86-64.mca b/benches/as_bytes_static_size.x86-64.mca new file mode 100644 index 0000000000..ae04a6ba90 --- /dev/null +++ b/benches/as_bytes_static_size.x86-64.mca @@ -0,0 +1,45 @@ +Iterations: 100 +Instructions: 300 +Total Cycles: 104 +Total uOps: 300 + +Dispatch Width: 4 +uOps Per Cycle: 2.88 +IPC: 2.88 +Block RThroughput: 1.0 + + +Instruction Info: +[1]: #uOps +[2]: Latency +[3]: RThroughput +[4]: MayLoad +[5]: MayStore +[6]: HasSideEffects (U) + +[1] [2] [3] [4] [5] [6] Instructions: + 1 1 0.33 mov rax, rdi + 1 1 0.33 mov edx, 6 + 1 1 1.00 U ret + + +Resources: +[0] - SBDivider +[1] - SBFPDivider +[2] - SBPort0 +[3] - SBPort1 +[4] - SBPort4 +[5] - SBPort5 +[6.0] - SBPort23 +[6.1] - SBPort23 + + +Resource pressure per iteration: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] + - - 0.99 1.00 - 1.01 - - + +Resource pressure by instruction: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: + - - 0.99 - - 0.01 - - mov rax, rdi + - - - 1.00 - - - - mov edx, 6 + - - - - - 1.00 - - ret diff --git a/benches/formats/coco_dynamic_size.rs b/benches/formats/coco_dynamic_size.rs index f4626cf949..fe6d7ad89a 100644 --- a/benches/formats/coco_dynamic_size.rs +++ b/benches/formats/coco_dynamic_size.rs @@ -1,23 +1,27 @@ use zerocopy_derive::*; // The only valid value of this type are the bytes `0xC0C0`. -#[derive(TryFromBytes, KnownLayout, Immutable)] +#[derive(TryFromBytes, KnownLayout, Immutable, IntoBytes)] #[repr(u16)] pub enum C0C0 { _XC0C0 = 0xC0C0, } -#[derive(FromBytes, KnownLayout, Immutable)] -#[repr(C, align(2))] -pub struct Packet { - magic_number: Magic, - mug_size: u8, - temperature: u8, - marshmallows: [[u8; 2]], +macro_rules! define_packet { + ($name: ident, $trait: ident, $leading_field: ty) => { + #[derive($trait, KnownLayout, Immutable, IntoBytes)] + #[repr(C, align(2))] + pub struct $name { + magic_number: $leading_field, + mug_size: u8, + temperature: u8, + marshmallows: [[u8; 2]], + } + }; } -/// A packet begining with the magic number `0xC0C0`. -pub type CocoPacket = Packet; +/// Packet begins with bytes 0xC0C0. +define_packet!(CocoPacket, TryFromBytes, C0C0); -/// A packet beginning with any two initialized bytes. -pub type LocoPacket = Packet<[u8; 2]>; +/// Packet begins with any two bytes. +define_packet!(LocoPacket, FromBytes, [u8; 2]); diff --git a/benches/formats/coco_static_size.rs b/benches/formats/coco_static_size.rs index 6d37cfd798..0839497e17 100644 --- a/benches/formats/coco_static_size.rs +++ b/benches/formats/coco_static_size.rs @@ -1,23 +1,27 @@ use zerocopy_derive::*; // The only valid value of this type are the bytes `0xC0C0`. -#[derive(TryFromBytes, KnownLayout, Immutable)] +#[derive(TryFromBytes, KnownLayout, Immutable, IntoBytes)] #[repr(u16)] pub enum C0C0 { _XC0C0 = 0xC0C0, } -#[derive(FromBytes, KnownLayout, Immutable)] -#[repr(C, align(2))] -pub struct Packet { - magic_number: Magic, - mug_size: u8, - temperature: u8, - marshmallows: [u8; 2], +macro_rules! define_packet { + ($name: ident, $trait: ident, $leading_field: ty) => { + #[derive($trait, KnownLayout, Immutable, IntoBytes)] + #[repr(C, align(2))] + pub struct $name { + magic_number: $leading_field, + mug_size: u8, + temperature: u8, + marshmallows: [u8; 2], + } + }; } -/// A packet begining with the magic number `0xC0C0`. -pub type CocoPacket = Packet; +/// Packet begins with bytes 0xC0C0. +define_packet!(CocoPacket, TryFromBytes, C0C0); -/// A packet beginning with any two initialized bytes. -pub type LocoPacket = Packet<[u8; 2]>; +/// Packet begins with any two bytes. +define_packet!(LocoPacket, FromBytes, [u8; 2]); diff --git a/benches/write_to_dynamic_size.rs b/benches/write_to_dynamic_size.rs new file mode 100644 index 0000000000..c126a1468c --- /dev/null +++ b/benches/write_to_dynamic_size.rs @@ -0,0 +1,9 @@ +use zerocopy::*; + +#[path = "formats/coco_dynamic_size.rs"] +mod format; + +#[unsafe(no_mangle)] +fn bench_write_to_dynamic_size(source: &format::CocoPacket, destination: &mut [u8]) -> Option<()> { + source.write_to(destination).ok() +} diff --git a/benches/write_to_dynamic_size.x86-64 b/benches/write_to_dynamic_size.x86-64 new file mode 100644 index 0000000000..c5abb17f7e --- /dev/null +++ b/benches/write_to_dynamic_size.x86-64 @@ -0,0 +1,21 @@ +bench_write_to_dynamic_size: + push r14 + push rbx + push rax + mov rbx, rcx + lea r14, [2*rsi + 5] + and r14, -2 + cmp rcx, r14 + jne .LBB5_2 + mov rax, rdi + mov rdi, rdx + mov rsi, rax + mov rdx, rbx + call qword ptr [rip + memcpy@GOTPCREL] +.LBB5_2: + cmp rbx, r14 + sete al + add rsp, 8 + pop rbx + pop r14 + ret diff --git a/benches/write_to_dynamic_size.x86-64.mca b/benches/write_to_dynamic_size.x86-64.mca new file mode 100644 index 0000000000..5b2c08a31a --- /dev/null +++ b/benches/write_to_dynamic_size.x86-64.mca @@ -0,0 +1,77 @@ +Iterations: 100 +Instructions: 1900 +Total Cycles: 2890 +Total uOps: 2500 + +Dispatch Width: 4 +uOps Per Cycle: 0.87 +IPC: 0.66 +Block RThroughput: 6.3 + + +Instruction Info: +[1]: #uOps +[2]: Latency +[3]: RThroughput +[4]: MayLoad +[5]: MayStore +[6]: HasSideEffects (U) + +[1] [2] [3] [4] [5] [6] Instructions: + 2 5 1.00 * push r14 + 2 5 1.00 * push rbx + 2 5 1.00 * push rax + 1 1 0.33 mov rbx, rcx + 1 1 0.50 lea r14, [2*rsi + 5] + 1 1 0.33 and r14, -2 + 1 1 0.33 cmp rcx, r14 + 1 1 1.00 jne .LBB5_2 + 1 1 0.33 mov rax, rdi + 1 1 0.33 mov rdi, rdx + 1 1 0.33 mov rsi, rax + 1 1 0.33 mov rdx, rbx + 4 7 1.00 * call qword ptr [rip + memcpy@GOTPCREL] + 1 1 0.33 cmp rbx, r14 + 1 1 0.50 sete al + 1 1 0.33 add rsp, 8 + 1 6 0.50 * pop rbx + 1 6 0.50 * pop r14 + 1 1 1.00 U ret + + +Resources: +[0] - SBDivider +[1] - SBFPDivider +[2] - SBPort0 +[3] - SBPort1 +[4] - SBPort4 +[5] - SBPort5 +[6.0] - SBPort23 +[6.1] - SBPort23 + + +Resource pressure per iteration: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] + - - 4.66 4.64 4.00 4.70 4.00 3.00 + +Resource pressure by instruction: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: + - - - - 1.00 - - 1.00 push r14 + - - - - 1.00 - 1.00 - push rbx + - - - - 1.00 - - 1.00 push rax + - - 0.02 0.97 - 0.01 - - mov rbx, rcx + - - 0.97 0.03 - - - - lea r14, [2*rsi + 5] + - - 0.63 0.35 - 0.02 - - and r14, -2 + - - 0.31 0.34 - 0.35 - - cmp rcx, r14 + - - - - - 1.00 - - jne .LBB5_2 + - - 0.33 0.33 - 0.34 - - mov rax, rdi + - - 0.36 0.31 - 0.33 - - mov rdi, rdx + - - 0.33 0.35 - 0.32 - - mov rsi, rax + - - 0.35 0.63 - 0.02 - - mov rdx, rbx + - - - - 1.00 1.00 2.00 - call qword ptr [rip + memcpy@GOTPCREL] + - - 0.65 0.35 - - - - cmp rbx, r14 + - - 0.69 - - 0.31 - - sete al + - - 0.02 0.98 - - - - add rsp, 8 + - - - - - - - 1.00 pop rbx + - - - - - - 1.00 - pop r14 + - - - - - 1.00 - - ret diff --git a/benches/write_to_prefix_dynamic_size.rs b/benches/write_to_prefix_dynamic_size.rs new file mode 100644 index 0000000000..a54d327731 --- /dev/null +++ b/benches/write_to_prefix_dynamic_size.rs @@ -0,0 +1,12 @@ +use zerocopy::*; + +#[path = "formats/coco_dynamic_size.rs"] +mod format; + +#[unsafe(no_mangle)] +fn bench_write_to_prefix_dynamic_size( + source: &format::CocoPacket, + destination: &mut [u8], +) -> Option<()> { + source.write_to_prefix(destination).ok() +} diff --git a/benches/write_to_prefix_dynamic_size.x86-64 b/benches/write_to_prefix_dynamic_size.x86-64 new file mode 100644 index 0000000000..d7779c6c91 --- /dev/null +++ b/benches/write_to_prefix_dynamic_size.x86-64 @@ -0,0 +1,21 @@ +bench_write_to_prefix_dynamic_size: + push r14 + push rbx + push rax + mov rbx, rcx + lea r14, [2*rsi + 5] + and r14, -2 + cmp r14, rcx + ja .LBB5_2 + mov rax, rdi + mov rdi, rdx + mov rsi, rax + mov rdx, r14 + call qword ptr [rip + memcpy@GOTPCREL] +.LBB5_2: + cmp r14, rbx + setbe al + add rsp, 8 + pop rbx + pop r14 + ret diff --git a/benches/write_to_prefix_dynamic_size.x86-64.mca b/benches/write_to_prefix_dynamic_size.x86-64.mca new file mode 100644 index 0000000000..4cebe24d4f --- /dev/null +++ b/benches/write_to_prefix_dynamic_size.x86-64.mca @@ -0,0 +1,77 @@ +Iterations: 100 +Instructions: 1900 +Total Cycles: 2890 +Total uOps: 2600 + +Dispatch Width: 4 +uOps Per Cycle: 0.90 +IPC: 0.66 +Block RThroughput: 6.5 + + +Instruction Info: +[1]: #uOps +[2]: Latency +[3]: RThroughput +[4]: MayLoad +[5]: MayStore +[6]: HasSideEffects (U) + +[1] [2] [3] [4] [5] [6] Instructions: + 2 5 1.00 * push r14 + 2 5 1.00 * push rbx + 2 5 1.00 * push rax + 1 1 0.33 mov rbx, rcx + 1 1 0.50 lea r14, [2*rsi + 5] + 1 1 0.33 and r14, -2 + 1 1 0.33 cmp r14, rcx + 1 1 1.00 ja .LBB5_2 + 1 1 0.33 mov rax, rdi + 1 1 0.33 mov rdi, rdx + 1 1 0.33 mov rsi, rax + 1 1 0.33 mov rdx, r14 + 4 7 1.00 * call qword ptr [rip + memcpy@GOTPCREL] + 1 1 0.33 cmp r14, rbx + 2 2 1.00 setbe al + 1 1 0.33 add rsp, 8 + 1 6 0.50 * pop rbx + 1 6 0.50 * pop r14 + 1 1 1.00 U ret + + +Resources: +[0] - SBDivider +[1] - SBFPDivider +[2] - SBPort0 +[3] - SBPort1 +[4] - SBPort4 +[5] - SBPort5 +[6.0] - SBPort23 +[6.1] - SBPort23 + + +Resource pressure per iteration: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] + - - 5.47 4.49 4.00 5.04 4.00 3.00 + +Resource pressure by instruction: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: + - - - - 1.00 - - 1.00 push r14 + - - - - 1.00 - 1.00 - push rbx + - - - - 1.00 - - 1.00 push rax + - - 0.48 0.51 - 0.01 - - mov rbx, rcx + - - 0.51 0.49 - - - - lea r14, [2*rsi + 5] + - - 0.48 0.05 - 0.47 - - and r14, -2 + - - 0.48 0.49 - 0.03 - - cmp r14, rcx + - - - - - 1.00 - - ja .LBB5_2 + - - 0.04 0.47 - 0.49 - - mov rax, rdi + - - 0.49 0.03 - 0.48 - - mov rdi, rdx + - - 0.03 0.48 - 0.49 - - mov rsi, rax + - - 0.48 0.51 - 0.01 - - mov rdx, r14 + - - - - 1.00 1.00 2.00 - call qword ptr [rip + memcpy@GOTPCREL] + - - 0.51 0.49 - - - - cmp r14, rbx + - - 1.94 - - 0.06 - - setbe al + - - 0.03 0.97 - - - - add rsp, 8 + - - - - - - - 1.00 pop rbx + - - - - - - 1.00 - pop r14 + - - - - - 1.00 - - ret diff --git a/benches/write_to_prefix_static_size.rs b/benches/write_to_prefix_static_size.rs new file mode 100644 index 0000000000..826222c129 --- /dev/null +++ b/benches/write_to_prefix_static_size.rs @@ -0,0 +1,12 @@ +use zerocopy::*; + +#[path = "formats/coco_static_size.rs"] +mod format; + +#[unsafe(no_mangle)] +fn bench_write_to_prefix_static_size( + source: &format::CocoPacket, + destination: &mut [u8], +) -> Option<()> { + source.write_to_prefix(destination).ok() +} diff --git a/benches/write_to_prefix_static_size.x86-64 b/benches/write_to_prefix_static_size.x86-64 new file mode 100644 index 0000000000..9cf0662953 --- /dev/null +++ b/benches/write_to_prefix_static_size.x86-64 @@ -0,0 +1,11 @@ +bench_write_to_prefix_static_size: + cmp rdx, 6 + jb .LBB5_2 + movzx eax, word ptr [rdi + 4] + mov word ptr [rsi + 4], ax + mov eax, dword ptr [rdi] + mov dword ptr [rsi], eax +.LBB5_2: + cmp rdx, 6 + setae al + ret diff --git a/benches/write_to_prefix_static_size.x86-64.mca b/benches/write_to_prefix_static_size.x86-64.mca new file mode 100644 index 0000000000..5d17200abd --- /dev/null +++ b/benches/write_to_prefix_static_size.x86-64.mca @@ -0,0 +1,57 @@ +Iterations: 100 +Instructions: 900 +Total Cycles: 233 +Total uOps: 900 + +Dispatch Width: 4 +uOps Per Cycle: 3.86 +IPC: 3.86 +Block RThroughput: 2.3 + + +Instruction Info: +[1]: #uOps +[2]: Latency +[3]: RThroughput +[4]: MayLoad +[5]: MayStore +[6]: HasSideEffects (U) + +[1] [2] [3] [4] [5] [6] Instructions: + 1 1 0.33 cmp rdx, 6 + 1 1 1.00 jb .LBB5_2 + 1 5 0.50 * movzx eax, word ptr [rdi + 4] + 1 1 1.00 * mov word ptr [rsi + 4], ax + 1 5 0.50 * mov eax, dword ptr [rdi] + 1 1 1.00 * mov dword ptr [rsi], eax + 1 1 0.33 cmp rdx, 6 + 1 1 0.50 setae al + 1 1 1.00 U ret + + +Resources: +[0] - SBDivider +[1] - SBFPDivider +[2] - SBPort0 +[3] - SBPort1 +[4] - SBPort4 +[5] - SBPort5 +[6.0] - SBPort23 +[6.1] - SBPort23 + + +Resource pressure per iteration: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] + - - 1.50 1.49 2.00 2.01 2.00 2.00 + +Resource pressure by instruction: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: + - - 0.25 0.74 - 0.01 - - cmp rdx, 6 + - - - - - 1.00 - - jb .LBB5_2 + - - - - - - 0.50 0.50 movzx eax, word ptr [rdi + 4] + - - - - 1.00 - 0.48 0.52 mov word ptr [rsi + 4], ax + - - - - - - 0.52 0.48 mov eax, dword ptr [rdi] + - - - - 1.00 - 0.50 0.50 mov dword ptr [rsi], eax + - - 0.25 0.75 - - - - cmp rdx, 6 + - - 1.00 - - - - - setae al + - - - - - 1.00 - - ret diff --git a/benches/write_to_static_size.rs b/benches/write_to_static_size.rs new file mode 100644 index 0000000000..3bb9435c5a --- /dev/null +++ b/benches/write_to_static_size.rs @@ -0,0 +1,9 @@ +use zerocopy::*; + +#[path = "formats/coco_static_size.rs"] +mod format; + +#[unsafe(no_mangle)] +fn bench_write_to_static_size(source: &format::CocoPacket, destination: &mut [u8]) -> Option<()> { + source.write_to(destination).ok() +} diff --git a/benches/write_to_static_size.x86-64 b/benches/write_to_static_size.x86-64 new file mode 100644 index 0000000000..d6413e0fd6 --- /dev/null +++ b/benches/write_to_static_size.x86-64 @@ -0,0 +1,11 @@ +bench_write_to_static_size: + cmp rdx, 6 + jne .LBB5_2 + movzx eax, word ptr [rdi + 4] + mov word ptr [rsi + 4], ax + mov eax, dword ptr [rdi] + mov dword ptr [rsi], eax +.LBB5_2: + cmp rdx, 6 + sete al + ret diff --git a/benches/write_to_static_size.x86-64.mca b/benches/write_to_static_size.x86-64.mca new file mode 100644 index 0000000000..cc5bb1d26f --- /dev/null +++ b/benches/write_to_static_size.x86-64.mca @@ -0,0 +1,57 @@ +Iterations: 100 +Instructions: 900 +Total Cycles: 233 +Total uOps: 900 + +Dispatch Width: 4 +uOps Per Cycle: 3.86 +IPC: 3.86 +Block RThroughput: 2.3 + + +Instruction Info: +[1]: #uOps +[2]: Latency +[3]: RThroughput +[4]: MayLoad +[5]: MayStore +[6]: HasSideEffects (U) + +[1] [2] [3] [4] [5] [6] Instructions: + 1 1 0.33 cmp rdx, 6 + 1 1 1.00 jne .LBB5_2 + 1 5 0.50 * movzx eax, word ptr [rdi + 4] + 1 1 1.00 * mov word ptr [rsi + 4], ax + 1 5 0.50 * mov eax, dword ptr [rdi] + 1 1 1.00 * mov dword ptr [rsi], eax + 1 1 0.33 cmp rdx, 6 + 1 1 0.50 sete al + 1 1 1.00 U ret + + +Resources: +[0] - SBDivider +[1] - SBFPDivider +[2] - SBPort0 +[3] - SBPort1 +[4] - SBPort4 +[5] - SBPort5 +[6.0] - SBPort23 +[6.1] - SBPort23 + + +Resource pressure per iteration: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] + - - 1.50 1.49 2.00 2.01 2.00 2.00 + +Resource pressure by instruction: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: + - - 0.25 0.74 - 0.01 - - cmp rdx, 6 + - - - - - 1.00 - - jne .LBB5_2 + - - - - - - 0.50 0.50 movzx eax, word ptr [rdi + 4] + - - - - 1.00 - 0.48 0.52 mov word ptr [rsi + 4], ax + - - - - - - 0.52 0.48 mov eax, dword ptr [rdi] + - - - - 1.00 - 0.50 0.50 mov dword ptr [rsi], eax + - - 0.25 0.75 - - - - cmp rdx, 6 + - - 1.00 - - - - - sete al + - - - - - 1.00 - - ret diff --git a/benches/write_to_suffix_dynamic_size.rs b/benches/write_to_suffix_dynamic_size.rs new file mode 100644 index 0000000000..9fa6b91cda --- /dev/null +++ b/benches/write_to_suffix_dynamic_size.rs @@ -0,0 +1,12 @@ +use zerocopy::*; + +#[path = "formats/coco_dynamic_size.rs"] +mod format; + +#[unsafe(no_mangle)] +fn bench_write_to_suffix_dynamic_size( + source: &format::CocoPacket, + destination: &mut [u8], +) -> Option<()> { + source.write_to_suffix(destination).ok() +} diff --git a/benches/write_to_suffix_dynamic_size.x86-64 b/benches/write_to_suffix_dynamic_size.x86-64 new file mode 100644 index 0000000000..75f349562d --- /dev/null +++ b/benches/write_to_suffix_dynamic_size.x86-64 @@ -0,0 +1,22 @@ +bench_write_to_suffix_dynamic_size: + push r14 + push rbx + push rax + mov rbx, rcx + lea r14, [2*rsi + 5] + and r14, -2 + sub rcx, r14 + jb .LBB5_2 + mov rax, rdi + add rdx, rcx + mov rdi, rdx + mov rsi, rax + mov rdx, r14 + call qword ptr [rip + memcpy@GOTPCREL] +.LBB5_2: + cmp rbx, r14 + setae al + add rsp, 8 + pop rbx + pop r14 + ret diff --git a/benches/write_to_suffix_dynamic_size.x86-64.mca b/benches/write_to_suffix_dynamic_size.x86-64.mca new file mode 100644 index 0000000000..95cb9dfe2e --- /dev/null +++ b/benches/write_to_suffix_dynamic_size.x86-64.mca @@ -0,0 +1,79 @@ +Iterations: 100 +Instructions: 2000 +Total Cycles: 2890 +Total uOps: 2600 + +Dispatch Width: 4 +uOps Per Cycle: 0.90 +IPC: 0.69 +Block RThroughput: 6.5 + + +Instruction Info: +[1]: #uOps +[2]: Latency +[3]: RThroughput +[4]: MayLoad +[5]: MayStore +[6]: HasSideEffects (U) + +[1] [2] [3] [4] [5] [6] Instructions: + 2 5 1.00 * push r14 + 2 5 1.00 * push rbx + 2 5 1.00 * push rax + 1 1 0.33 mov rbx, rcx + 1 1 0.50 lea r14, [2*rsi + 5] + 1 1 0.33 and r14, -2 + 1 1 0.33 sub rcx, r14 + 1 1 1.00 jb .LBB5_2 + 1 1 0.33 mov rax, rdi + 1 1 0.33 add rdx, rcx + 1 1 0.33 mov rdi, rdx + 1 1 0.33 mov rsi, rax + 1 1 0.33 mov rdx, r14 + 4 7 1.00 * call qword ptr [rip + memcpy@GOTPCREL] + 1 1 0.33 cmp rbx, r14 + 1 1 0.50 setae al + 1 1 0.33 add rsp, 8 + 1 6 0.50 * pop rbx + 1 6 0.50 * pop r14 + 1 1 1.00 U ret + + +Resources: +[0] - SBDivider +[1] - SBFPDivider +[2] - SBPort0 +[3] - SBPort1 +[4] - SBPort4 +[5] - SBPort5 +[6.0] - SBPort23 +[6.1] - SBPort23 + + +Resource pressure per iteration: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] + - - 4.98 4.98 4.00 5.04 4.00 3.00 + +Resource pressure by instruction: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: + - - - - 1.00 - - 1.00 push r14 + - - - - 1.00 - 1.00 - push rbx + - - - - 1.00 - - 1.00 push rax + - - 0.94 0.05 - 0.01 - - mov rbx, rcx + - - 0.06 0.94 - - - - lea r14, [2*rsi + 5] + - - 0.93 0.02 - 0.05 - - and r14, -2 + - - 0.05 0.94 - 0.01 - - sub rcx, r14 + - - - - - 1.00 - - jb .LBB5_2 + - - 0.02 0.04 - 0.94 - - mov rax, rdi + - - 0.03 0.97 - - - - add rdx, rcx + - - 0.95 0.05 - - - - mov rdi, rdx + - - 0.94 0.03 - 0.03 - - mov rsi, rax + - - 0.01 0.03 - 0.96 - - mov rdx, r14 + - - - - 1.00 1.00 2.00 - call qword ptr [rip + memcpy@GOTPCREL] + - - 0.05 0.94 - 0.01 - - cmp rbx, r14 + - - 0.97 - - 0.03 - - setae al + - - 0.03 0.97 - - - - add rsp, 8 + - - - - - - - 1.00 pop rbx + - - - - - - 1.00 - pop r14 + - - - - - 1.00 - - ret diff --git a/benches/write_to_suffix_static_size.rs b/benches/write_to_suffix_static_size.rs new file mode 100644 index 0000000000..1c95aba4b1 --- /dev/null +++ b/benches/write_to_suffix_static_size.rs @@ -0,0 +1,12 @@ +use zerocopy::*; + +#[path = "formats/coco_static_size.rs"] +mod format; + +#[unsafe(no_mangle)] +fn bench_write_to_suffix_static_size( + source: &format::CocoPacket, + destination: &mut [u8], +) -> Option<()> { + source.write_to_suffix(destination).ok() +} diff --git a/benches/write_to_suffix_static_size.x86-64 b/benches/write_to_suffix_static_size.x86-64 new file mode 100644 index 0000000000..934aa370d4 --- /dev/null +++ b/benches/write_to_suffix_static_size.x86-64 @@ -0,0 +1,11 @@ +bench_write_to_suffix_static_size: + cmp rdx, 6 + jb .LBB5_2 + movzx eax, word ptr [rdi + 4] + mov word ptr [rsi + rdx - 2], ax + mov eax, dword ptr [rdi] + mov dword ptr [rsi + rdx - 6], eax +.LBB5_2: + cmp rdx, 6 + setae al + ret diff --git a/benches/write_to_suffix_static_size.x86-64.mca b/benches/write_to_suffix_static_size.x86-64.mca new file mode 100644 index 0000000000..6b18e4a445 --- /dev/null +++ b/benches/write_to_suffix_static_size.x86-64.mca @@ -0,0 +1,57 @@ +Iterations: 100 +Instructions: 900 +Total Cycles: 233 +Total uOps: 900 + +Dispatch Width: 4 +uOps Per Cycle: 3.86 +IPC: 3.86 +Block RThroughput: 2.3 + + +Instruction Info: +[1]: #uOps +[2]: Latency +[3]: RThroughput +[4]: MayLoad +[5]: MayStore +[6]: HasSideEffects (U) + +[1] [2] [3] [4] [5] [6] Instructions: + 1 1 0.33 cmp rdx, 6 + 1 1 1.00 jb .LBB5_2 + 1 5 0.50 * movzx eax, word ptr [rdi + 4] + 1 1 1.00 * mov word ptr [rsi + rdx - 2], ax + 1 5 0.50 * mov eax, dword ptr [rdi] + 1 1 1.00 * mov dword ptr [rsi + rdx - 6], eax + 1 1 0.33 cmp rdx, 6 + 1 1 0.50 setae al + 1 1 1.00 U ret + + +Resources: +[0] - SBDivider +[1] - SBFPDivider +[2] - SBPort0 +[3] - SBPort1 +[4] - SBPort4 +[5] - SBPort5 +[6.0] - SBPort23 +[6.1] - SBPort23 + + +Resource pressure per iteration: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] + - - 1.50 1.49 2.00 2.01 2.00 2.00 + +Resource pressure by instruction: +[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: + - - 0.25 0.74 - 0.01 - - cmp rdx, 6 + - - - - - 1.00 - - jb .LBB5_2 + - - - - - - 0.50 0.50 movzx eax, word ptr [rdi + 4] + - - - - 1.00 - 0.48 0.52 mov word ptr [rsi + rdx - 2], ax + - - - - - - 0.52 0.48 mov eax, dword ptr [rdi] + - - - - 1.00 - 0.50 0.50 mov dword ptr [rsi + rdx - 6], eax + - - 0.25 0.75 - - - - cmp rdx, 6 + - - 1.00 - - - - - setae al + - - - - - 1.00 - - ret diff --git a/src/lib.rs b/src/lib.rs index f85a67b45b..c7a038d583 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5563,6 +5563,24 @@ pub unsafe trait IntoBytes { /// /// assert_eq!(bytes, [0, 1, 2, 3, 4, 5, 6, 7]); /// ``` + /// + #[doc = codegen_section!( + header = "h5", + bench = "as_bytes", + format = "coco", + arity = 2, + [ + open + @index 1 + @title "Sized" + @variant "static_size" + ], + [ + @index 2 + @title "Unsized" + @variant "dynamic_size" + ] + )] #[must_use = "has no side effects"] #[inline(always)] fn as_bytes(&self) -> &[u8] @@ -5635,6 +5653,10 @@ pub unsafe trait IntoBytes { /// checksum: [1, 0], /// }); /// ``` + /// + #[doc = codegen_header!("h5", "as_mut_bytes")] + /// + /// See [`IntoBytes::as_bytes`](#method.as_bytes.codegen). #[must_use = "has no side effects"] #[inline(always)] fn as_mut_bytes(&mut self) -> &mut [u8] @@ -5715,6 +5737,24 @@ pub unsafe trait IntoBytes { /// assert!(write_result.is_err()); /// assert_eq!(excessive_bytes, [0u8; 128]); /// ``` + /// + #[doc = codegen_section!( + header = "h5", + bench = "write_to", + format = "coco", + arity = 2, + [ + open + @index 1 + @title "Sized" + @variant "static_size" + ], + [ + @index 2 + @title "Unsized" + @variant "dynamic_size" + ] + )] #[must_use = "callers should check the return value to see if the operation succeeded"] #[inline] #[allow(clippy::mut_from_ref)] // False positive: `&self -> &mut [u8]` @@ -5782,6 +5822,24 @@ pub unsafe trait IntoBytes { /// assert!(write_result.is_err()); /// assert_eq!(insufficient_bytes, [0, 0]); /// ``` + /// + #[doc = codegen_section!( + header = "h5", + bench = "write_to_prefix", + format = "coco", + arity = 2, + [ + open + @index 1 + @title "Sized" + @variant "static_size" + ], + [ + @index 2 + @title "Unsized" + @variant "dynamic_size" + ] + )] #[must_use = "callers should check the return value to see if the operation succeeded"] #[inline] #[allow(clippy::mut_from_ref)] // False positive: `&self -> &mut [u8]` @@ -5858,6 +5916,24 @@ pub unsafe trait IntoBytes { /// assert!(write_result.is_err()); /// assert_eq!(insufficient_bytes, [0, 0]); /// ``` + /// + #[doc = codegen_section!( + header = "h5", + bench = "write_to_suffix", + format = "coco", + arity = 2, + [ + open + @index 1 + @title "Sized" + @variant "static_size" + ], + [ + @index 2 + @title "Unsized" + @variant "dynamic_size" + ] + )] #[must_use = "callers should check the return value to see if the operation succeeded"] #[inline] #[allow(clippy::mut_from_ref)] // False positive: `&self -> &mut [u8]` diff --git a/tests/codegen.rs b/tests/codegen.rs index 5c4f1086f3..4a9dfd6a1d 100644 --- a/tests/codegen.rs +++ b/tests/codegen.rs @@ -38,8 +38,9 @@ fn run_codegen_test(bench_name: &str, target_cpu: &str, bless: bool) { let target_dir = env!("CARGO_TARGET_DIR"); let cargo_asm = |directive: &Directive| { - Command::new("cargo") + Command::new("./cargo.sh") .args([ + "+nightly", "asm", "--quiet", "-p",