Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Teensy40 benchmark #98

Closed
wants to merge 14 commits into from
6 changes: 6 additions & 0 deletions examples/benchmark_teensy40/.cargo/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[target.thumbv7em-none-eabihf]
runner = "python run.py"
rustflags = ["-C", "link-arg=-Tt4link.x"]

[build]
target = "thumbv7em-none-eabihf" # Teensy 4
1 change: 1 addition & 0 deletions examples/benchmark_teensy40/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/target/
13 changes: 13 additions & 0 deletions examples/benchmark_teensy40/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rust-analyzer.cargo.target": "thumbv7em-none-eabihf",
// override the default setting (`cargo check --all-targets`) which produces the following error
// "can't find crate for `test`" when the default compilation target is a no_std target
// with these changes RA will call `cargo check --bins` on save
"rust-analyzer.checkOnSave.allTargets": false,
"rust-analyzer.checkOnSave.extraArgs": [
"--bins"
],
"rust-analyzer.linkedProjects": [
"Cargo.toml",
],
}
43 changes: 43 additions & 0 deletions examples/benchmark_teensy40/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
[package]
authors = ["Finomnis <[email protected]>"]
name = "micromath_benchmark_teensy40"
edition = "2021"
version = "0.1.0"

[dependencies]
# Base dependencies
cortex-m = "0.7.7"
embedded-hal = "0.2.7"
teensy4-panic = "0.2.2"

# Board support package
teensy4-bsp = { version = "0.4.2", features = ["rt"] }

# Packages necessary for board usage
nb = "1.1.0" # Async
fugit = "0.3.6" # Time

# Project dependencies
git-version = "0.3.5"
micromath = { path = "../../" }
libm = "0.2.7"


# cargo build/run
[profile.dev]
codegen-units = 1
debug = 2
debug-assertions = true # <-
incremental = false
opt-level = 'z' # <-
overflow-checks = true # <-

# cargo build/run --release
[profile.release]
codegen-units = 1
debug = 2
debug-assertions = false # <-
incremental = false
lto = 'fat'
opt-level = 3 # <-
overflow-checks = false # <-
53 changes: 53 additions & 0 deletions examples/benchmark_teensy40/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Micromod Benchmark Suite

*- intended for the [Teensy 4.0](https://www.pjrc.com/store/teensy40.html) board -*

## Prerequisites

The following hardware is required for the benchmark:
- A [Teensy 4.0](https://www.pjrc.com/store/teensy40.html) development board
- A way to read the Teensy's UART, like a USB-UART-converter

The following software tools have to be installed:
- Python3 (as `python`, or modify `run.py` to use the `python3` binary)
- [`llvm-objcopy`](https://github.com/rust-lang/rust/issues/85658)
- Install via `rustup component add llvm-tools-preview`
- [`teensy-loader-cli`](https://www.pjrc.com/teensy/loader_cli.html)


## Run

- Connect the Teensy to PC via USB cable.
- Run `cargo run --release`.
- Read the output of the benchmark on the Teensy's UART.

## Results

```none
===== Micromath Benchmark =====
Git Version: deb8988

All values in ns/iter.

micromath libm intrinsics
abs 15.0 15.0 10.0
acos 173.3 120.0
asin 103.3 136.7
atan 80.0 78.3
atan_norm 75.0
ceil 43.3 41.7
cos 98.3 1655.0
exp 231.7 115.0
floor 36.7 38.3
fract 20.0
inv 15.0
invsqrt 15.0
ln 158.3 145.0
log2 161.7 155.0
log10 161.7 160.0
round 30.0
sin 103.3 1703.3
sqrt 46.7 470.0 31.7
tan 153.3 2531.3
trunc 21.7 43.3
```
28 changes: 28 additions & 0 deletions examples/benchmark_teensy40/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env python

from pathlib import Path
from tempfile import TemporaryDirectory

import subprocess
import sys


def main():
if len(sys.argv) < 2:
print("Please provide the binary as first argument!")
exit(1)

binary = sys.argv[1]

with TemporaryDirectory() as tmpdir:
tmpdir = Path(tmpdir)
hexfile = tmpdir / "firmware.hex"

subprocess.run(["llvm-objcopy", "-O", "ihex", binary, hexfile], check=True)
subprocess.run(["teensy_loader_cli", "--mcu=TEENSY40", "-wsv", hexfile], check=True)

print("Teensy40 successfully flashed. Read the results of the benchmark from its UART2.")


if __name__ == "__main__":
main()
102 changes: 102 additions & 0 deletions examples/benchmark_teensy40/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
//! Demonstrates a loopback UART peripheral.
//!
//! It uses the alpha board, with the following pinout:
//!
//! - Pin 14 is TX.
//! - Pin 15 is RX.
//!
//! Baud rate is 115200bps.
//!
//! Every time you send the Teensy a character, it replies with
//! that same character, and it toggles the LED.

#![no_std]
#![no_main]
#![feature(core_intrinsics)]

use teensy4_bsp as bsp;
use teensy4_panic as _;

use bsp::board;
use bsp::hal;

mod uart_writer;
use uart_writer::UartWriter;

mod run_bench;
use run_bench::run_bench;

#[bsp::rt::entry]
fn main() -> ! {
let board::Resources {
pins,
mut gpio2,
lpuart2,
gpt1: mut us_timer,
..
} = board::t40(board::instances());

// Initialize LED
let led = board::led(&mut gpio2, pins.p13);
led.set();

// Initialize UART
let mut uart = UartWriter::new(board::lpuart(lpuart2, pins.p14, pins.p15, 115200));
writeln!(uart);

// Initialize timer
// Is a 32-bit timer with us precision.
// Overflows every 71.58 minutes, which is sufficient for our benchmark.
assert_eq!(board::PERCLK_FREQUENCY, 1_000_000);
us_timer.set_clock_source(hal::gpt::ClockSource::PeripheralClock);
us_timer.set_divider(1);
us_timer.set_mode(hal::gpt::Mode::FreeRunning);
us_timer.enable();
let time_us = move || us_timer.count();

// Write welcome message
writeln!(uart, "===== Micromath Benchmark =====");
writeln!(uart, "Git Version: {}", git_version::git_version!());
writeln!(uart);

writeln!(uart, "All values in ns/iter.");
writeln!(uart);
write!(uart, " ");
write!(uart, " micromath");
write!(uart, " libm");
write!(uart, " intrinsics");
writeln!(uart);

// Run benchmarks
run_bench!(time_us, uart, abs, fabsf, fabsf32);
run_bench!(time_us, uart, acos, acosf);
run_bench!(time_us, uart, asin, asinf);
run_bench!(time_us, uart, atan, atanf);
run_bench!(time_us, uart, atan_norm);
run_bench!(time_us, uart, ceil, ceilf);
run_bench!(time_us, uart, cos, cosf);
run_bench!(time_us, uart, exp, expf);
run_bench!(time_us, uart, floor, floorf);
run_bench!(time_us, uart, fract);
run_bench!(time_us, uart, inv);
run_bench!(time_us, uart, invsqrt);
run_bench!(time_us, uart, ln, logf);
run_bench!(time_us, uart, log2, log2f);
run_bench!(time_us, uart, log10, log10f);
run_bench!(time_us, uart, round);
run_bench!(time_us, uart, sin, sinf);
run_bench!(time_us, uart, sqrt, sqrtf, sqrtf32);
run_bench!(time_us, uart, tan, tanf);
run_bench!(time_us, uart, trunc, truncf);

// Blink with a cycle length of 2 seconds, to make it verifyable that
// our timer runs at the correct speed.
loop {
let time_s = time_us() / 1_000_000;
if time_s % 2 == 0 {
led.set();
} else {
led.clear();
}
}
}
117 changes: 117 additions & 0 deletions examples/benchmark_teensy40/src/run_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
macro_rules! run_bench {
($time_us:expr, $uart:expr, $f:ident) => {{
write!($uart, "{:<10}", core::stringify!($f));

run_bench!(@run_bench $time_us, $uart, |num| <f32 as micromath::F32Ext>::$f(num));

writeln!($uart);
}};

($time_us:expr, $uart:expr, $f:ident, $flibm:ident) => {{
write!($uart, "{:<10}", core::stringify!($f));

run_bench!(@run_bench $time_us, $uart, |num| <f32 as micromath::F32Ext>::$f(num));
run_bench!(@run_bench $time_us, $uart, |num| libm::$flibm(num));

writeln!($uart);
}};

($time_us:expr, $uart:expr, $f:ident, $flibm:ident, $fintr:ident) => {{
write!($uart, "{:<10}", core::stringify!($f));

run_bench!(@run_bench $time_us, $uart, |num| <f32 as micromath::F32Ext>::$f(num));
run_bench!(@run_bench $time_us, $uart, |num| libm::$flibm(num));
run_bench!(@run_bench $time_us, $uart, |num| unsafe{ core::intrinsics::$fintr(num) });

writeln!($uart);
}};

(@run_bench $time_us:expr, $uart:expr, $f:expr) => {{
const COUNT: u64 = 4096 * 64;

const VALUE: f32 = 0.12345;

if !$f(VALUE).is_finite() {
writeln!($uart, " ERROR: {} did not produce a finite value!", core::stringify!($f));
}

// Warmup
for _ in 0..COUNT {
//run_bench!(@unroll_32 {
run_bench!(@iteration VALUE, $f);
//})
}

let t_start = $time_us();
for _ in 0..COUNT {
//run_bench!(@unroll_32 {
run_bench!(@iteration VALUE, $f);
//})
}
let t_end = $time_us();

// Warmup
for _ in 0..COUNT {
//run_bench!(@unroll_32 {
run_bench!(@iteration VALUE, |val| val);
//})
}

let t_empty_start = $time_us();
for _ in 0..COUNT {
//run_bench!(@unroll_32 {
run_bench!(@iteration VALUE, |val| val);
//})
}
let t_empty_end = $time_us();

let iterations: u64 = COUNT; // * 32;

let duration_us = u64::from(t_end - t_start);
let duration_us_empty = u64::from(t_empty_end - t_empty_start);

let duration_ps = (duration_us - duration_us_empty) * 1_000_000;
let ps_per_iter = duration_ps / iterations;

let rounding_corrected_ps_per_iter = ps_per_iter + 50;
let ns_per_iter_full = rounding_corrected_ps_per_iter / 1000;
let ns_per_iter_rest = (rounding_corrected_ps_per_iter % 1000) / 100;

// writeln!($uart, " {} iterations in {} us ({} us empty)", iterations, duration_us, duration_us_empty);
// writeln!($uart, " {} ps/iter", ps_per_iter);
write!($uart, "{:>9}.{}", ns_per_iter_full, ns_per_iter_rest);
}};

// (@unroll_64 $b:block) => {{
// run_bench!(@unroll_32 $b);
// run_bench!(@unroll_32 $b);
// }};
// (@unroll_32 $b:block) => {{
// run_bench!(@unroll_16 $b);
// run_bench!(@unroll_16 $b);
// }};
// (@unroll_16 $b:block) => {{
// run_bench!(@unroll_8 $b);
// run_bench!(@unroll_8 $b);
// }};
// (@unroll_8 $b:block) => {{
// run_bench!(@unroll_4 $b);
// run_bench!(@unroll_4 $b);
// }};
// (@unroll_4 $b:block) => {{
// run_bench!(@unroll_2 $b);
// run_bench!(@unroll_2 $b);
// }};
// (@unroll_2 $b:block) => {{
// {$b}{$b}
// }};

(@iteration $val:expr, $f:expr) => {{
const NUM_CONST: f32 = $val;
let num = core::intrinsics::black_box(NUM_CONST);
let result = $f(num);
core::intrinsics::black_box(result);
}};
}

pub(crate) use run_bench;
Loading