diff --git a/.gitignore b/.gitignore index 12a5e72e..c0f7f579 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,9 @@ metals.sbt .idea out +.vscode +.metals + # Eclipse bin/ .classpath diff --git a/README.md b/README.md index e630dcc9..b8804f40 100644 --- a/README.md +++ b/README.md @@ -1,123 +1,24 @@ # VexiiRiscv -VexiiRiscv is a from scratch second iteration of VexRiscv. Here are the targets : - -- RISCV 32/64 bits IMAFDC -- Could start around as small as VexRiscv, but could scale further in performance -- Optional late-alu -- Optional multi issue -- Optional multi threading -- Cleaning implementation, especially the frontend -- ... - -There is an online documentation : +VexiiRiscv (Vex2Risc5) is the successor of VexRiscv. Here are its currently implemented features : + +- RV32/64 I[M][A][C][S][U] +- Up to 4.66 coremark/Mhz 2.24 dhystone/Mhz (WIP) +- In-order execution +- early [late-alu] +- single/dual issue (can be asymmetric) +- BTB, GShare, RAS branch prediction +- cacheless fetch/load/store, I$, D$ (WIP) +- optional SV32/SV39 MMU +- Can run linux / buildroot +- Pipeline visualisation in simulation via Konata +- Lock step simulation via RVLS and Spike + +Here is the online documentation : - https://spinalhdl.github.io/VexiiRiscv-RTD/master/VexiiRiscv/Introduction/# +- https://spinalhdl.github.io/VexiiRiscv-RTD/master/VexiiRiscv/HowToUse/index.html -# Dependencies - -```shell -# JAVA JDK -sudo add-apt-repository -y ppa:openjdk-r/ppa -sudo apt-get update -sudo apt-get install openjdk-19-jdk -y # You don't exactly need that version -sudo update-alternatives --config java -sudo update-alternatives --config javac - -# Install SBT - https://www.scala-sbt.org/ -echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list -echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | sudo tee /etc/apt/sources.list.d/sbt_old.list -curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo apt-key add -sudo apt-get update -sudo apt-get install sbt - -# Verilator (optional, for simulations) -sudo apt-get install git make autoconf g++ flex bison -git clone http://git.veripool.org/git/verilator # Only first time -unsetenv VERILATOR_ROOT # For csh; ignore error if on bash -unset VERILATOR_ROOT # For bash -cd verilator -git pull # Make sure we're up-to-date -git checkout v4.216 # You don't exactly need that version -autoconf # Create ./configure script -./configure -make -sudo make install - -# Getting a RISC-V toolchain (optional) -version=riscv64-unknown-elf-gcc-8.3.0-2019.08.0-x86_64-linux-ubuntu14 -wget -O riscv64-unknown-elf-gcc.tar.gz riscv https://static.dev.sifive.com/dev-tools/$version.tar.gz -tar -xzvf riscv64-unknown-elf-gcc.tar.gz -sudo mv $version /opt/riscv -echo 'export PATH=/opt/riscv/bin:$PATH' >> ~/.bashrc - -# RVLS / Spike dependencies -sudo apt-get install device-tree-compiler libboost-all-dev -# Install ELFIO, used to load elf file in the sim -git clone https://github.com/serge1/ELFIO.git -cd ELFIO -git checkout d251da09a07dff40af0b63b8f6c8ae71d2d1938d # Avoid C++17 -sudo cp -R elfio /usr/include -cd .. && rm -rf ELFIO -``` - -# Setup - -```shell -git clone --recursive https://github.com/SpinalHDL/VexiiRiscv.git -cd VexiiRiscv - -# (optional) Compile riscv-isa-sim (spike), used as a golden model during the sim to check the dut behaviour (lock-step) -cd ext/riscv-isa-sim -mkdir build -cd build -../configure --prefix=$RISCV --enable-commitlog --without-boost --without-boost-asio --without-boost-regex -make -j$(nproc) -cd ../../.. - -# (optional) Compile RVLS, (need riscv-isa-sim (spike) -cd ext/rvls -make -j$(nproc) -cd ../.. -``` - -# Generate Verilog - -It's currently very very early, but you can run the generation via : - -```shell -sbt "Test/runMain vexiiriscv.Generate" -``` - -You can get a list of the supported parameters via : - -```shell -sbt "Test/runMain vexiiriscv.Generate --help" -``` - - -# Run a simulation - -Note that Vexiiriscv use mostly an opt-in configuration. So, most performance related configuration are disabled by default. - -```shell -sbt -compile -Test/runMain vexiiriscv.tester.TestBench --load-elf ext/NaxSoftware/baremetal/dhrystone/build/rv32ima/dhrystone.elf --trace-all -``` - -This will generate a simWorkspace/VexiiRiscv/test folder which contains : -- test.fst : A wave file which can be open with gtkwave. It shows all the CPU signals -- konata.log : A wave file which can be open with https://github.com/shioyadan/Konata, it shows the pipeline behaviour of the CPU -- spike.log : The execution logs of Spike (golden model) -- tracer.log : The execution logs of VexRiscv (Simulation model) - -# Navigating the code - -Here are a few key / typical code examples : +A roadmap is available here : -- The CPU toplevel src/main/scala/vexiiriscv/VexiiRiscv.scala -- A cpu configuration generator : dev/src/main/scala/vexiiriscv/Param.scala -- Some globaly shared definitions : src/main/scala/vexiiriscv/Global.scala -- Integer ALU plugin ; src/main/scala/vexiiriscv/execute/IntAluPlugin.scala -- A plugin which probe the CPU at different points for simulation purposes : src/main/scala/vexiiriscv/misc/WhiteboxerPlugin.scala +- https://github.com/SpinalHDL/VexiiRiscv/issues/1 diff --git a/ext/NaxSoftware b/ext/NaxSoftware index ca3c4f16..bf144d53 160000 --- a/ext/NaxSoftware +++ b/ext/NaxSoftware @@ -1 +1 @@ -Subproject commit ca3c4f167219cce4762f63d8581f5ae3dd0cce1f +Subproject commit bf144d53b88a8c72b93b783ff4266ce589beb18d diff --git a/ext/SpinalHDL b/ext/SpinalHDL index 9fc386df..38937100 160000 --- a/ext/SpinalHDL +++ b/ext/SpinalHDL @@ -1 +1 @@ -Subproject commit 9fc386dfbfb3a944540c1bed33601b2419fe5bf3 +Subproject commit 38937100ae882a8503c357ab6399e54d0df8776f diff --git a/ext/rvls b/ext/rvls index 0cc9365b..60ae4b99 160000 --- a/ext/rvls +++ b/ext/rvls @@ -1 +1 @@ -Subproject commit 0cc9365bbc5764379a59caeca98f8bff379afd56 +Subproject commit 60ae4b99725de96b6b7d173feff2408e34a9a712 diff --git a/log b/log new file mode 100644 index 00000000..f8f36089 --- /dev/null +++ b/log @@ -0,0 +1,1216 @@ +[Progress] Start VexiiRiscv_rv64imsu_d1_l1_rfa_bp0_btb_ras_gshare_la_m_d_a buildroot simulation with seed 2 + +OpenSBI v0.8 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + +Platform Name : NaxRiscv +Platform Features : timer,mfdeleg +Platform HART Count : 1 +Boot HART ID : 0 +Boot HART ISA : rv64imasu +BOOT HART Features : scounteren,mcounteren +BOOT HART PMP Count : 0 +Firmware Base : 0x80000000 +Firmware Size : 72 KB +Runtime SBI Version : 0.2 + +MIDELEG : 0x0000000000000222 +MEDELEG : 0x000000000000b109 +[ 0.000000] Linux version 5.10.1 (rawrr@rawrr) (riscv64-buildroot-linux-uclibc-gcc.br_real (Buildroot 2020.11-rc3-8-g9ef54b7d0b) 10.2.0, GNU ld (GNU Binutils) 2.34) #8 SMP Mon Mar 14 10:26:33 CET 2022 +[ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') +[ 0.000000] printk: bootconsole [sbi0] enabled +[ 0.000000] Initial ramdisk at: 0x(____ptrval____) (8388608 bytes) +[ 0.000000] Zone ranges: +[ 0.000000] DMA32 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Normal empty +[ 0.000000] Movable zone start for each node +[ 0.000000] Early memory node ranges +[ 0.000000] node 0: [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Initmem setup node 0 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] software IO TLB: mapped [mem 0x000000008bc8c000-0x000000008fc8c000] (64MB) +[ 0.000000] SBI specification v0.2 detected +[ 0.000000] SBI implementation ID=0x1 Version=0x8 +[ 0.000000] SBI v0.2 TIME extension detected +[ 0.000000] SBI v0.2 IPI extension detected +[ 0.000000] SBI v0.2 RFENCE extension detected +[ 0.000000] SBI v0.2 HSM extension detected +[ 0.000000] riscv: ISA extensions aim +[ 0.000000] riscv: ELF capabilities aim +[ 0.000000] percpu: Embedded 14 pages/cpu s25560 r0 d31784 u57344 +[ 0.000000] Built 1 zonelists, mobility grouping on. Total pages: 63630 +[ 0.000000] Kernel command line: rootwait console=hvc0 earlycon=sbi root=/dev/ram0 init=/sbin/init +[ 0.000000] Dentry cache hash table entries: 32768 (order: 6, 262144 bytes, linear) +[ 0.000000] Inode-cache hash table entries: 16384 (order: 5, 131072 bytes, linear) +[ 0.000000] Sorting __ex_table... +[ 0.000000] mem auto-init: stack:off, heap alloc:off, heap free:off +[ 0.000000] Memory: 173364K/258048K available (4703K kernel code, 679K rwdata, 718K rodata, 176K init, 262K bss, 84684K reserved, 0K cma-reserved) +[ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 +[ 0.000000] rcu: Hierarchical RCU implementation. +[ 0.000000] rcu: RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=1. +[ 0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies. +[ 0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 +[ 0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0 +[ 0.000000] riscv-intc: 64 local interrupts mapped +[ 0.000000] random: get_random_bytes called from start_kernel+0x364/0x4f8 with crng_init=0 +[ 0.000000] riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [0] +[ 0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x171024e7e0, max_idle_ns: 440795205315 ns +[ 0.000116] sched_clock: 64 bits at 100MHz, resolution 10ns, wraps every 4398046511100ns +[ 0.003317] Console: colour dummy device 80x25 +[ 0.004434] printk: console [hvc0] enabled +[ 0.004434] printk: console [hvc0] enabled +[ 0.006377] printk: bootconsole [sbi0] disabled +[ 0.006377] printk: bootconsole [sbi0] disabled +[ 0.008569] Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=400000) +[ 0.011119] pid_max: default: 32768 minimum: 301 +[ 0.013887] Mount-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[ 0.015746] Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[Error] Simulation failed at time=470655550 +INTEGER WRITE MISSMATCH DUT=c REF=0 +rvls.spinal.RvlsBackend.commit(Tracer.scala:196) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:486) +scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) +scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) +vexiiriscv.test.VexiiRiscvProbe.checkCommits(VexiiRiscvProbe.scala:486) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$3.apply$mcV$sp(VexiiRiscvProbe.scala:553) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1.apply$mcV$sp(package.scala:971) +spinal.core.sim.package$$anon$1.update(package.scala:196) +spinal.sim.SimManager.runWhile(SimManager.scala:324) +spinal.sim.SimManager.runAll(SimManager.scala:246) +spinal.core.sim.SimCompiled.doSimApi(SimBootstraps.scala:608) +spinal.core.sim.SimCompiled.doSimUntilVoid(SimBootstraps.scala:581) +vexiiriscv.tester.TestOptions.test(TestBench.scala:155) +vexiiriscv.tester.RegressionSingle$$anonfun$18$$anon$4$$anonfun$$lessinit$greater$1.apply$mcV$sp(Regression.scala:225) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply$mcV$sp(MultithreadedTester.scala:25) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withErr(Console.scala:92) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withOut(Console.scala:65) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply$mcV$sp(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) +scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) +scala.concurrent.impl.ExecutionContextImpl$AdaptedForkJoinTask.exec(ExecutionContextImpl.scala:121) +scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) +scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) +scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) +scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) +[Progress] Start VexiiRiscv_rv64imsu_d1_l1_rfa_bp0_btb_ras_gshare_m_d_a buildroot simulation with seed 2 + +OpenSBI v0.8 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + +Platform Name : NaxRiscv +Platform Features : timer,mfdeleg +Platform HART Count : 1 +Boot HART ID : 0 +Boot HART ISA : rv64imasu +BOOT HART Features : scounteren,mcounteren +BOOT HART PMP Count : 0 +Firmware Base : 0x80000000 +Firmware Size : 72 KB +Runtime SBI Version : 0.2 + +MIDELEG : 0x0000000000000222 +MEDELEG : 0x000000000000b109 +[ 0.000000] Linux version 5.10.1 (rawrr@rawrr) (riscv64-buildroot-linux-uclibc-gcc.br_real (Buildroot 2020.11-rc3-8-g9ef54b7d0b) 10.2.0, GNU ld (GNU Binutils) 2.34) #8 SMP Mon Mar 14 10:26:33 CET 2022 +[ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') +[ 0.000000] printk: bootconsole [sbi0] enabled +[ 0.000000] Initial ramdisk at: 0x(____ptrval____) (8388608 bytes) +[ 0.000000] Zone ranges: +[ 0.000000] DMA32 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Normal empty +[ 0.000000] Movable zone start for each node +[ 0.000000] Early memory node ranges +[ 0.000000] node 0: [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Initmem setup node 0 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] software IO TLB: mapped [mem 0x000000008bc8c000-0x000000008fc8c000] (64MB) +[ 0.000000] SBI specification v0.2 detected +[ 0.000000] SBI implementation ID=0x1 Version=0x8 +[ 0.000000] SBI v0.2 TIME extension detected +[ 0.000000] SBI v0.2 IPI extension detected +[ 0.000000] SBI v0.2 RFENCE extension detected +[ 0.000000] SBI v0.2 HSM extension detected +[ 0.000000] riscv: ISA extensions aim +[ 0.000000] riscv: ELF capabilities aim +[ 0.000000] percpu: Embedded 14 pages/cpu s25560 r0 d31784 u57344 +[ 0.000000] Built 1 zonelists, mobility grouping on. Total pages: 63630 +[ 0.000000] Kernel command line: rootwait console=hvc0 earlycon=sbi root=/dev/ram0 init=/sbin/init +[ 0.000000] Dentry cache hash table entries: 32768 (order: 6, 262144 bytes, linear) +[ 0.000000] Inode-cache hash table entries: 16384 (order: 5, 131072 bytes, linear) +[ 0.000000] Sorting __ex_table... +[ 0.000000] mem auto-init: stack:off, heap alloc:off, heap free:off +[ 0.000000] Memory: 173364K/258048K available (4703K kernel code, 679K rwdata, 718K rodata, 176K init, 262K bss, 84684K reserved, 0K cma-reserved) +[ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 +[ 0.000000] rcu: Hierarchical RCU implementation. +[ 0.000000] rcu: RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=1. +[ 0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies. +[ 0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 +[ 0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0 +[ 0.000000] riscv-intc: 64 local interrupts mapped +[ 0.000000] random: get_random_bytes called from start_kernel+0x364/0x4f8 with crng_init=0 +[ 0.000000] riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [0] +[ 0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x171024e7e0, max_idle_ns: 440795205315 ns +[ 0.000115] sched_clock: 64 bits at 100MHz, resolution 10ns, wraps every 4398046511100ns +[ 0.003314] Console: colour dummy device 80x25 +[ 0.004432] printk: console [hvc0] enabled +[ 0.004432] printk: console [hvc0] enabled +[ 0.006370] printk: bootconsole [sbi0] disabled +[ 0.006370] printk: bootconsole [sbi0] disabled +[ 0.008565] Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=400000) +[ 0.011115] pid_max: default: 32768 minimum: 301 +[ 0.013902] Mount-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[ 0.015760] Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[Error] Simulation failed at time=471451830 +INTEGER WRITE MISSMATCH DUT=39 REF=0 +rvls.spinal.RvlsBackend.commit(Tracer.scala:196) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:486) +scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) +scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) +vexiiriscv.test.VexiiRiscvProbe.checkCommits(VexiiRiscvProbe.scala:486) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$3.apply$mcV$sp(VexiiRiscvProbe.scala:553) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1.apply$mcV$sp(package.scala:971) +spinal.core.sim.package$$anon$1.update(package.scala:196) +spinal.sim.SimManager.runWhile(SimManager.scala:324) +spinal.sim.SimManager.runAll(SimManager.scala:246) +spinal.core.sim.SimCompiled.doSimApi(SimBootstraps.scala:608) +spinal.core.sim.SimCompiled.doSimUntilVoid(SimBootstraps.scala:581) +vexiiriscv.tester.TestOptions.test(TestBench.scala:155) +vexiiriscv.tester.RegressionSingle$$anonfun$18$$anon$4$$anonfun$$lessinit$greater$1.apply$mcV$sp(Regression.scala:225) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply$mcV$sp(MultithreadedTester.scala:25) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withErr(Console.scala:92) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withOut(Console.scala:65) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply$mcV$sp(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) +scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) +scala.concurrent.impl.ExecutionContextImpl$AdaptedForkJoinTask.exec(ExecutionContextImpl.scala:121) +scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) +scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) +scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) +scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) +[Progress] Start VexiiRiscv_rv64imsu_d1_l1_rfa_btb_ras_gshare_m_d_a buildroot simulation with seed 2 + +OpenSBI v0.8 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + +Platform Name : NaxRiscv +Platform Features : timer,mfdeleg +Platform HART Count : 1 +Boot HART ID : 0 +Boot HART ISA : rv64imasu +BOOT HART Features : scounteren,mcounteren +BOOT HART PMP Count : 0 +Firmware Base : 0x80000000 +Firmware Size : 72 KB +Runtime SBI Version : 0.2 + +MIDELEG : 0x0000000000000222 +MEDELEG : 0x000000000000b109 +[ 0.000000] Linux version 5.10.1 (rawrr@rawrr) (riscv64-buildroot-linux-uclibc-gcc.br_real (Buildroot 2020.11-rc3-8-g9ef54b7d0b) 10.2.0, GNU ld (GNU Binutils) 2.34) #8 SMP Mon Mar 14 10:26:33 CET 2022 +[ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') +[ 0.000000] printk: bootconsole [sbi0] enabled +[ 0.000000] Initial ramdisk at: 0x(____ptrval____) (8388608 bytes) +[ 0.000000] Zone ranges: +[ 0.000000] DMA32 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Normal empty +[ 0.000000] Movable zone start for each node +[ 0.000000] Early memory node ranges +[ 0.000000] node 0: [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Initmem setup node 0 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] software IO TLB: mapped [mem 0x000000008bc8c000-0x000000008fc8c000] (64MB) +[ 0.000000] SBI specification v0.2 detected +[ 0.000000] SBI implementation ID=0x1 Version=0x8 +[ 0.000000] SBI v0.2 TIME extension detected +[ 0.000000] SBI v0.2 IPI extension detected +[ 0.000000] SBI v0.2 RFENCE extension detected +[ 0.000000] SBI v0.2 HSM extension detected +[ 0.000000] riscv: ISA extensions aim +[ 0.000000] riscv: ELF capabilities aim +[ 0.000000] percpu: Embedded 14 pages/cpu s25560 r0 d31784 u57344 +[ 0.000000] Built 1 zonelists, mobility grouping on. Total pages: 63630 +[ 0.000000] Kernel command line: rootwait console=hvc0 earlycon=sbi root=/dev/ram0 init=/sbin/init +[ 0.000000] Dentry cache hash table entries: 32768 (order: 6, 262144 bytes, linear) +[ 0.000000] Inode-cache hash table entries: 16384 (order: 5, 131072 bytes, linear) +[ 0.000000] Sorting __ex_table... +[ 0.000000] mem auto-init: stack:off, heap alloc:off, heap free:off +[ 0.000000] Memory: 173364K/258048K available (4703K kernel code, 679K rwdata, 718K rodata, 176K init, 262K bss, 84684K reserved, 0K cma-reserved) +[ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 +[ 0.000000] rcu: Hierarchical RCU implementation. +[ 0.000000] rcu: RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=1. +[ 0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies. +[ 0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 +[ 0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0 +[ 0.000000] riscv-intc: 64 local interrupts mapped +[ 0.000000] random: get_random_bytes called from start_kernel+0x364/0x4f8 with crng_init=0 +[ 0.000000] riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [0] +[ 0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x171024e7e0, max_idle_ns: 440795205315 ns +[ 0.000123] sched_clock: 64 bits at 100MHz, resolution 10ns, wraps every 4398046511100ns +[ 0.003501] Console: colour dummy device 80x25 +[ 0.004675] printk: console [hvc0] enabled +[ 0.004675] printk: console [hvc0] enabled +[ 0.006706] printk: bootconsole [sbi0] disabled +[ 0.006706] printk: bootconsole [sbi0] disabled +[ 0.008998] Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=400000) +[ 0.011684] pid_max: default: 32768 minimum: 301 +[ 0.014658] Mount-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[ 0.016612] Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[Error] Simulation failed at time=485859680 +INTEGER WRITE MISSMATCH DUT=33 REF=0 +rvls.spinal.RvlsBackend.commit(Tracer.scala:196) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:486) +scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) +scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) +vexiiriscv.test.VexiiRiscvProbe.checkCommits(VexiiRiscvProbe.scala:486) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$3.apply$mcV$sp(VexiiRiscvProbe.scala:553) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1.apply$mcV$sp(package.scala:971) +spinal.core.sim.package$$anon$1.update(package.scala:196) +spinal.sim.SimManager.runWhile(SimManager.scala:324) +spinal.sim.SimManager.runAll(SimManager.scala:246) +spinal.core.sim.SimCompiled.doSimApi(SimBootstraps.scala:608) +spinal.core.sim.SimCompiled.doSimUntilVoid(SimBootstraps.scala:581) +vexiiriscv.tester.TestOptions.test(TestBench.scala:155) +vexiiriscv.tester.RegressionSingle$$anonfun$18$$anon$4$$anonfun$$lessinit$greater$1.apply$mcV$sp(Regression.scala:225) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply$mcV$sp(MultithreadedTester.scala:25) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withErr(Console.scala:92) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withOut(Console.scala:65) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply$mcV$sp(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) +scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) +scala.concurrent.impl.ExecutionContextImpl$AdaptedForkJoinTask.exec(ExecutionContextImpl.scala:121) +scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) +scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) +scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) +scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) +[Progress] Start VexiiRiscv_rv64imsu_d1_l1_rfs_bp0_btb_ras_gshare_la_m_d_a buildroot simulation with seed 2 + +OpenSBI v0.8 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + +Platform Name : NaxRiscv +Platform Features : timer,mfdeleg +Platform HART Count : 1 +Boot HART ID : 0 +Boot HART ISA : rv64imasu +BOOT HART Features : scounteren,mcounteren +BOOT HART PMP Count : 0 +Firmware Base : 0x80000000 +Firmware Size : 72 KB +Runtime SBI Version : 0.2 + +MIDELEG : 0x0000000000000222 +MEDELEG : 0x000000000000b109 +[ 0.000000] Linux version 5.10.1 (rawrr@rawrr) (riscv64-buildroot-linux-uclibc-gcc.br_real (Buildroot 2020.11-rc3-8-g9ef54b7d0b) 10.2.0, GNU ld (GNU Binutils) 2.34) #8 SMP Mon Mar 14 10:26:33 CET 2022 +[ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') +[ 0.000000] printk: bootconsole [sbi0] enabled +[ 0.000000] Initial ramdisk at: 0x(____ptrval____) (8388608 bytes) +[ 0.000000] Zone ranges: +[ 0.000000] DMA32 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Normal empty +[ 0.000000] Movable zone start for each node +[ 0.000000] Early memory node ranges +[ 0.000000] node 0: [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Initmem setup node 0 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] software IO TLB: mapped [mem 0x000000008bc8c000-0x000000008fc8c000] (64MB) +[ 0.000000] SBI specification v0.2 detected +[ 0.000000] SBI implementation ID=0x1 Version=0x8 +[ 0.000000] SBI v0.2 TIME extension detected +[ 0.000000] SBI v0.2 IPI extension detected +[ 0.000000] SBI v0.2 RFENCE extension detected +[ 0.000000] SBI v0.2 HSM extension detected +[ 0.000000] riscv: ISA extensions aim +[ 0.000000] riscv: ELF capabilities aim +[ 0.000000] percpu: Embedded 14 pages/cpu s25560 r0 d31784 u57344 +[ 0.000000] Built 1 zonelists, mobility grouping on. Total pages: 63630 +[ 0.000000] Kernel command line: rootwait console=hvc0 earlycon=sbi root=/dev/ram0 init=/sbin/init +[ 0.000000] Dentry cache hash table entries: 32768 (order: 6, 262144 bytes, linear) +[ 0.000000] Inode-cache hash table entries: 16384 (order: 5, 131072 bytes, linear) +[ 0.000000] Sorting __ex_table... +[ 0.000000] mem auto-init: stack:off, heap alloc:off, heap free:off +[ 0.000000] Memory: 173364K/258048K available (4703K kernel code, 679K rwdata, 718K rodata, 176K init, 262K bss, 84684K reserved, 0K cma-reserved) +[ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 +[ 0.000000] rcu: Hierarchical RCU implementation. +[ 0.000000] rcu: RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=1. +[ 0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies. +[ 0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 +[ 0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0 +[ 0.000000] riscv-intc: 64 local interrupts mapped +[ 0.000000] random: get_random_bytes called from start_kernel+0x364/0x4f8 with crng_init=0 +[ 0.000000] riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [0] +[ 0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x171024e7e0, max_idle_ns: 440795205315 ns +[ 0.000116] sched_clock: 64 bits at 100MHz, resolution 10ns, wraps every 4398046511100ns +[ 0.003340] Console: colour dummy device 80x25 +[ 0.004465] printk: console [hvc0] enabled +[ 0.004465] printk: console [hvc0] enabled +[ 0.006420] printk: bootconsole [sbi0] disabled +[ 0.006420] printk: bootconsole [sbi0] disabled +[ 0.008632] Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=400000) +[ 0.011212] pid_max: default: 32768 minimum: 301 +[ 0.014045] Mount-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[ 0.015923] Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[Error] Simulation failed at time=469183040 +INTEGER WRITE MISSMATCH DUT=4e REF=0 +rvls.spinal.RvlsBackend.commit(Tracer.scala:196) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:486) +scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) +scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) +vexiiriscv.test.VexiiRiscvProbe.checkCommits(VexiiRiscvProbe.scala:486) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$3.apply$mcV$sp(VexiiRiscvProbe.scala:553) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1.apply$mcV$sp(package.scala:971) +spinal.core.sim.package$$anon$1.update(package.scala:196) +spinal.sim.SimManager.runWhile(SimManager.scala:324) +spinal.sim.SimManager.runAll(SimManager.scala:246) +spinal.core.sim.SimCompiled.doSimApi(SimBootstraps.scala:608) +spinal.core.sim.SimCompiled.doSimUntilVoid(SimBootstraps.scala:581) +vexiiriscv.tester.TestOptions.test(TestBench.scala:155) +vexiiriscv.tester.RegressionSingle$$anonfun$18$$anon$4$$anonfun$$lessinit$greater$1.apply$mcV$sp(Regression.scala:225) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply$mcV$sp(MultithreadedTester.scala:25) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withErr(Console.scala:92) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withOut(Console.scala:65) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply$mcV$sp(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) +scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) +scala.concurrent.impl.ExecutionContextImpl$AdaptedForkJoinTask.exec(ExecutionContextImpl.scala:121) +scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) +scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) +scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) +scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) +[Progress] Start VexiiRiscv_rv64imsu_d1_l1_rfs_bp0_btb_ras_gshare_m_d_a buildroot simulation with seed 2 + +OpenSBI v0.8 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + +Platform Name : NaxRiscv +Platform Features : timer,mfdeleg +Platform HART Count : 1 +Boot HART ID : 0 +Boot HART ISA : rv64imasu +BOOT HART Features : scounteren,mcounteren +BOOT HART PMP Count : 0 +Firmware Base : 0x80000000 +Firmware Size : 72 KB +Runtime SBI Version : 0.2 + +MIDELEG : 0x0000000000000222 +MEDELEG : 0x000000000000b109 +[ 0.000000] Linux version 5.10.1 (rawrr@rawrr) (riscv64-buildroot-linux-uclibc-gcc.br_real (Buildroot 2020.11-rc3-8-g9ef54b7d0b) 10.2.0, GNU ld (GNU Binutils) 2.34) #8 SMP Mon Mar 14 10:26:33 CET 2022 +[ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') +[ 0.000000] printk: bootconsole [sbi0] enabled +[ 0.000000] Initial ramdisk at: 0x(____ptrval____) (8388608 bytes) +[ 0.000000] Zone ranges: +[ 0.000000] DMA32 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Normal empty +[ 0.000000] Movable zone start for each node +[ 0.000000] Early memory node ranges +[ 0.000000] node 0: [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Initmem setup node 0 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] software IO TLB: mapped [mem 0x000000008bc8c000-0x000000008fc8c000] (64MB) +[ 0.000000] SBI specification v0.2 detected +[ 0.000000] SBI implementation ID=0x1 Version=0x8 +[ 0.000000] SBI v0.2 TIME extension detected +[ 0.000000] SBI v0.2 IPI extension detected +[ 0.000000] SBI v0.2 RFENCE extension detected +[ 0.000000] SBI v0.2 HSM extension detected +[ 0.000000] riscv: ISA extensions aim +[ 0.000000] riscv: ELF capabilities aim +[ 0.000000] percpu: Embedded 14 pages/cpu s25560 r0 d31784 u57344 +[ 0.000000] Built 1 zonelists, mobility grouping on. Total pages: 63630 +[ 0.000000] Kernel command line: rootwait console=hvc0 earlycon=sbi root=/dev/ram0 init=/sbin/init +[ 0.000000] Dentry cache hash table entries: 32768 (order: 6, 262144 bytes, linear) +[ 0.000000] Inode-cache hash table entries: 16384 (order: 5, 131072 bytes, linear) +[ 0.000000] Sorting __ex_table... +[ 0.000000] mem auto-init: stack:off, heap alloc:off, heap free:off +[ 0.000000] Memory: 173364K/258048K available (4703K kernel code, 679K rwdata, 718K rodata, 176K init, 262K bss, 84684K reserved, 0K cma-reserved) +[ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 +[ 0.000000] rcu: Hierarchical RCU implementation. +[ 0.000000] rcu: RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=1. +[ 0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies. +[ 0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 +[ 0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0 +[ 0.000000] riscv-intc: 64 local interrupts mapped +[ 0.000000] random: get_random_bytes called from start_kernel+0x364/0x4f8 with crng_init=0 +[ 0.000000] riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [0] +[ 0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x171024e7e0, max_idle_ns: 440795205315 ns +[ 0.000118] sched_clock: 64 bits at 100MHz, resolution 10ns, wraps every 4398046511100ns +[ 0.003343] Console: colour dummy device 80x25 +[ 0.004471] printk: console [hvc0] enabled +[ 0.004471] printk: console [hvc0] enabled +[ 0.006433] printk: bootconsole [sbi0] disabled +[ 0.006433] printk: bootconsole [sbi0] disabled +[ 0.008641] Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=400000) +[ 0.011226] pid_max: default: 32768 minimum: 301 +[ 0.014062] Mount-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[ 0.015944] Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[Error] Simulation failed at time=470147600 +INTEGER WRITE MISSMATCH DUT=ffffffffffffff8b REF=0 +rvls.spinal.RvlsBackend.commit(Tracer.scala:196) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:486) +scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) +scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) +vexiiriscv.test.VexiiRiscvProbe.checkCommits(VexiiRiscvProbe.scala:486) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$3.apply$mcV$sp(VexiiRiscvProbe.scala:553) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1.apply$mcV$sp(package.scala:971) +spinal.core.sim.package$$anon$1.update(package.scala:196) +spinal.sim.SimManager.runWhile(SimManager.scala:324) +spinal.sim.SimManager.runAll(SimManager.scala:246) +spinal.core.sim.SimCompiled.doSimApi(SimBootstraps.scala:608) +spinal.core.sim.SimCompiled.doSimUntilVoid(SimBootstraps.scala:581) +vexiiriscv.tester.TestOptions.test(TestBench.scala:155) +vexiiriscv.tester.RegressionSingle$$anonfun$18$$anon$4$$anonfun$$lessinit$greater$1.apply$mcV$sp(Regression.scala:225) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply$mcV$sp(MultithreadedTester.scala:25) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withErr(Console.scala:92) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withOut(Console.scala:65) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply$mcV$sp(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) +scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) +scala.concurrent.impl.ExecutionContextImpl$AdaptedForkJoinTask.exec(ExecutionContextImpl.scala:121) +scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) +scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) +scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) +scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) +[Progress] Start VexiiRiscv_rv64imsu_d1_l1_rfs_btb_ras_gshare_m_d_a buildroot simulation with seed 2 + +OpenSBI v0.8 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + +Platform Name : NaxRiscv +Platform Features : timer,mfdeleg +Platform HART Count : 1 +Boot HART ID : 0 +Boot HART ISA : rv64imasu +BOOT HART Features : scounteren,mcounteren +BOOT HART PMP Count : 0 +Firmware Base : 0x80000000 +Firmware Size : 72 KB +Runtime SBI Version : 0.2 + +MIDELEG : 0x0000000000000222 +MEDELEG : 0x000000000000b109 +[ 0.000000] Linux version 5.10.1 (rawrr@rawrr) (riscv64-buildroot-linux-uclibc-gcc.br_real (Buildroot 2020.11-rc3-8-g9ef54b7d0b) 10.2.0, GNU ld (GNU Binutils) 2.34) #8 SMP Mon Mar 14 10:26:33 CET 2022 +[ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') +[ 0.000000] printk: bootconsole [sbi0] enabled +[ 0.000000] Initial ramdisk at: 0x(____ptrval____) (8388608 bytes) +[ 0.000000] Zone ranges: +[ 0.000000] DMA32 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Normal empty +[ 0.000000] Movable zone start for each node +[ 0.000000] Early memory node ranges +[ 0.000000] node 0: [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Initmem setup node 0 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] software IO TLB: mapped [mem 0x000000008bc8c000-0x000000008fc8c000] (64MB) +[ 0.000000] SBI specification v0.2 detected +[ 0.000000] SBI implementation ID=0x1 Version=0x8 +[ 0.000000] SBI v0.2 TIME extension detected +[ 0.000000] SBI v0.2 IPI extension detected +[ 0.000000] SBI v0.2 RFENCE extension detected +[ 0.000000] SBI v0.2 HSM extension detected +[ 0.000000] riscv: ISA extensions aim +[ 0.000000] riscv: ELF capabilities aim +[ 0.000000] percpu: Embedded 14 pages/cpu s25560 r0 d31784 u57344 +[ 0.000000] Built 1 zonelists, mobility grouping on. Total pages: 63630 +[ 0.000000] Kernel command line: rootwait console=hvc0 earlycon=sbi root=/dev/ram0 init=/sbin/init +[ 0.000000] Dentry cache hash table entries: 32768 (order: 6, 262144 bytes, linear) +[ 0.000000] Inode-cache hash table entries: 16384 (order: 5, 131072 bytes, linear) +[ 0.000000] Sorting __ex_table... +[ 0.000000] mem auto-init: stack:off, heap alloc:off, heap free:off +[ 0.000000] Memory: 173364K/258048K available (4703K kernel code, 679K rwdata, 718K rodata, 176K init, 262K bss, 84684K reserved, 0K cma-reserved) +[ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 +[ 0.000000] rcu: Hierarchical RCU implementation. +[ 0.000000] rcu: RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=1. +[ 0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies. +[ 0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 +[ 0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0 +[ 0.000000] riscv-intc: 64 local interrupts mapped +[ 0.000000] random: get_random_bytes called from start_kernel+0x364/0x4f8 with crng_init=0 +[ 0.000000] riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [0] +[ 0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x171024e7e0, max_idle_ns: 440795205315 ns +[ 0.000132] sched_clock: 64 bits at 100MHz, resolution 10ns, wraps every 4398046511100ns +[ 0.003708] Console: colour dummy device 80x25 +[ 0.004939] printk: console [hvc0] enabled +[ 0.004939] printk: console [hvc0] enabled +[ 0.007075] printk: bootconsole [sbi0] disabled +[ 0.007075] printk: bootconsole [sbi0] disabled +[ 0.009489] Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=400000) +[ 0.012309] pid_max: default: 32768 minimum: 301 +[ 0.015509] Mount-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[ 0.017555] Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[Error] Simulation failed at time=499244700 +INTEGER WRITE MISSMATCH DUT=ffffffffffffffd6 REF=0 +rvls.spinal.RvlsBackend.commit(Tracer.scala:196) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:486) +scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) +scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) +vexiiriscv.test.VexiiRiscvProbe.checkCommits(VexiiRiscvProbe.scala:486) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$3.apply$mcV$sp(VexiiRiscvProbe.scala:553) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1.apply$mcV$sp(package.scala:971) +spinal.core.sim.package$$anon$1.update(package.scala:196) +spinal.sim.SimManager.runWhile(SimManager.scala:324) +spinal.sim.SimManager.runAll(SimManager.scala:246) +spinal.core.sim.SimCompiled.doSimApi(SimBootstraps.scala:608) +spinal.core.sim.SimCompiled.doSimUntilVoid(SimBootstraps.scala:581) +vexiiriscv.tester.TestOptions.test(TestBench.scala:155) +vexiiriscv.tester.RegressionSingle$$anonfun$18$$anon$4$$anonfun$$lessinit$greater$1.apply$mcV$sp(Regression.scala:225) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply$mcV$sp(MultithreadedTester.scala:25) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withErr(Console.scala:92) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withOut(Console.scala:65) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply$mcV$sp(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) +scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) +scala.concurrent.impl.ExecutionContextImpl$AdaptedForkJoinTask.exec(ExecutionContextImpl.scala:121) +scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) +scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) +scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) +scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) +[Progress] Start VexiiRiscv_rv64imsu_d2_l2_rfa_bp0_btb_ras_gshare_la_m_d_a buildroot simulation with seed 2 + +OpenSBI v0.8 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + +Platform Name : NaxRiscv +Platform Features : timer,mfdeleg +Platform HART Count : 1 +Boot HART ID : 0 +Boot HART ISA : rv64imasu +BOOT HART Features : scounteren,mcounteren +BOOT HART PMP Count : 0 +Firmware Base : 0x80000000 +Firmware Size : 72 KB +Runtime SBI Version : 0.2 + +MIDELEG : 0x0000000000000222 +MEDELEG : 0x000000000000b109 +[ 0.000000] Linux version 5.10.1 (rawrr@rawrr) (riscv64-buildroot-linux-uclibc-gcc.br_real (Buildroot 2020.11-rc3-8-g9ef54b7d0b) 10.2.0, GNU ld (GNU Binutils) 2.34) #8 SMP Mon Mar 14 10:26:33 CET 2022 +[ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') +[ 0.000000] printk: bootconsole [sbi0] enabled +[ 0.000000] Initial ramdisk at: 0x(____ptrval____) (8388608 bytes) +[ 0.000000] Zone ranges: +[ 0.000000] DMA32 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Normal empty +[ 0.000000] Movable zone start for each node +[ 0.000000] Early memory node ranges +[ 0.000000] node 0: [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Initmem setup node 0 [mem 0x0000000080400000-0x000000008fffffff] +[Progress] Start VexiiRiscv_rv64imsu_d2_l2_rfa_bp0_btb_ras_gshare_m_d_a buildroot simulation with seed 2 + +OpenSBI v0.8 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + +Platform Name : NaxRiscv +Platform Features : timer,mfdeleg +Platform HART Count : 1 +Boot HART ID : 0 +Boot HART ISA : rv64imasu +BOOT HART Features : scounteren,mcounteren +BOOT HART PMP Count : 0 +Firmware Base : 0x80000000 +Firmware Size : 72 KB +Runtime SBI Version : 0.2 + +MIDELEG : 0x0000000000000222 +MEDELEG : 0x000000000000b109 +[ 0.000000] Linux version 5.10.1 (rawrr@rawrr) (riscv64-buildroot-linux-uclibc-gcc.br_real (Buildroot 2020.11-rc3-8-g9ef54b7d0b) 10.2.0, GNU ld (GNU Binutils) 2.34) #8 SMP Mon Mar 14 10:26:33 CET 2022 +[ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') +[ 0.000000] printk: bootconsole [sbi0] enabled +[ 0.000000] Initial ramdisk at: 0x(____ptrval____) (8388608 bytes) +[ 0.000000] Zone ranges: +[ 0.000000] DMA32 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Normal empty +[ 0.000000] Movable zone start for each node +[ 0.000000] Early memory node ranges +[ 0.000000] node 0: [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Initmem setup node 0 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] software IO TLB: mapped [mem 0x000000008bc8c000-0x000000008fc8c000] (64MB) +[ 0.000000] SBI specification v0.2 detected +[ 0.000000] SBI implementation ID=0x1 Version=0x8 +[ 0.000000] SBI v0.2 TIME extension detected +[ 0.000000] SBI v0.2 IPI extension detected +[ 0.000000] SBI v0.2 RFENCE extension detected +[ 0.000000] SBI v0.2 HSM extension detected +[ 0.000000] riscv: ISA extensions aim +[ 0.000000] riscv: ELF capabilities aim +[ 0.000000] percpu: Embedded 14 pages/cpu s25560 r0 d31784 u57344 +[ 0.000000] Built 1 zonelists, mobility grouping on. Total pages: 63630 +[ 0.000000] Kernel command line: rootwait console=hvc0 earlycon=sbi root=/dev/ram0 init=/sbin/init +[ 0.000000] Dentry cache hash table entries: 32768 (order: 6, 262144 bytes, linear) +[ 0.000000] Inode-cache hash table entries: 16384 (order: 5, 131072 bytes, linear) +[ 0.000000] Sorting __ex_table... +[ 0.000000] mem auto-init: stack:off, heap alloc:off, heap free:off +[ 0.000000] Memory: 173364K/258048K available (4703K kernel code, 679K rwdata, 718K rodata, 176K init, 262K bss, 84684K reserved, 0K cma-reserved) +[ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 +[ 0.000000] rcu: Hierarchical RCU implementation. +[ 0.000000] rcu: RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=1. +[ 0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies. +[ 0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 +[ 0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0 +[ 0.000000] riscv-intc: 64 local interrupts mapped +[ 0.000000] random: get_random_bytes called from start_kernel+0x364/0x4f8 with crng_init=0 +[ 0.000000] riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [0] +[ 0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x171024e7e0, max_idle_ns: 440795205315 ns +[ 0.000078] sched_clock: 64 bits at 100MHz, resolution 10ns, wraps every 4398046511100ns +[ 0.002272] Console: colour dummy device 80x25 +[ 0.003053] printk: console [hvc0] enabled +[ 0.003053] printk: console [hvc0] enabled +[ 0.004338] printk: bootconsole [sbi0] disabled +[ 0.004338] printk: bootconsole [sbi0] disabled +[ 0.005950] Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=400000) +[ 0.007653] pid_max: default: 32768 minimum: 301 +[ 0.009589] Mount-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[ 0.010888] Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[Error] Simulation failed at time=360996460 +INTEGER WRITE MISSMATCH DUT=6a REF=0 +rvls.spinal.RvlsBackend.commit(Tracer.scala:196) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:486) +scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) +scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) +vexiiriscv.test.VexiiRiscvProbe.checkCommits(VexiiRiscvProbe.scala:486) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$3.apply$mcV$sp(VexiiRiscvProbe.scala:553) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1.apply$mcV$sp(package.scala:971) +spinal.core.sim.package$$anon$1.update(package.scala:196) +spinal.sim.SimManager.runWhile(SimManager.scala:324) +spinal.sim.SimManager.runAll(SimManager.scala:246) +spinal.core.sim.SimCompiled.doSimApi(SimBootstraps.scala:608) +spinal.core.sim.SimCompiled.doSimUntilVoid(SimBootstraps.scala:581) +vexiiriscv.tester.TestOptions.test(TestBench.scala:155) +vexiiriscv.tester.RegressionSingle$$anonfun$18$$anon$4$$anonfun$$lessinit$greater$1.apply$mcV$sp(Regression.scala:225) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply$mcV$sp(MultithreadedTester.scala:25) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withErr(Console.scala:92) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withOut(Console.scala:65) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply$mcV$sp(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) +scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) +scala.concurrent.impl.ExecutionContextImpl$AdaptedForkJoinTask.exec(ExecutionContextImpl.scala:121) +scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) +scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) +scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) +scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) +[Progress] Start VexiiRiscv_rv64imsu_d2_l2_rfs_bp0_btb_ras_gshare_la_m_d_a buildroot simulation with seed 2 + +OpenSBI v0.8 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + +Platform Name : NaxRiscv +Platform Features : timer,mfdeleg +Platform HART Count : 1 +Boot HART ID : 0 +Boot HART ISA : rv64imasu +BOOT HART Features : scounteren,mcounteren +BOOT HART PMP Count : 0 +Firmware Base : 0x80000000 +Firmware Size : 72 KB +Runtime SBI Version : 0.2 + +MIDELEG : 0x0000000000000222 +MEDELEG : 0x000000000000b109 +[ 0.000000] Linux version 5.10.1 (rawrr@rawrr) (riscv64-buildroot-linux-uclibc-gcc.br_real (Buildroot 2020.11-rc3-8-g9ef54b7d0b) 10.2.0, GNU ld (GNU Binutils) 2.34) #8 SMP Mon Mar 14 10:26:33 CET 2022 +[ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') +[ 0.000000] printk: bootconsole [sbi0] enabled +[ 0.000000] Initial ramdisk at: 0x(____ptrval____) (8388608 bytes) +[ 0.000000] Zone ranges: +[ 0.000000] DMA32 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Normal empty +[ 0.000000] Movable zone start for each node +[ 0.000000] Early memory node ranges +[ 0.000000] node 0: [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Initmem setup node 0 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] software IO TLB: mapped [mem 0x000000008bc8c000-0x000000008fc8c000] (64MB) +[ 0.000000] SBI specification v0.2 detected +[ 0.000000] SBI implementation ID=0x1 Version=0x8 +[ 0.000000] SBI v0.2 TIME extension detected +[ 0.000000] SBI v0.2 IPI extension detected +[ 0.000000] SBI v0.2 RFENCE extension detected +[ 0.000000] SBI v0.2 HSM extension detected +[ 0.000000] riscv: ISA extensions aim +[ 0.000000] riscv: ELF capabilities aim +[ 0.000000] percpu: Embedded 14 pages/cpu s25560 r0 d31784 u57344 +[ 0.000000] Built 1 zonelists, mobility grouping on. Total pages: 63630 +[ 0.000000] Kernel command line: rootwait console=hvc0 earlycon=sbi root=/dev/ram0 init=/sbin/init +[ 0.000000] Dentry cache hash table entries: 32768 (order: 6, 262144 bytes, linear) +[ 0.000000] Inode-cache hash table entries: 16384 (order: 5, 131072 bytes, linear) +[ 0.000000] Sorting __ex_table... +[ 0.000000] mem auto-init: stack:off, heap alloc:off, heap free:off +[ 0.000000] Memory: 173364K/258048K available (4703K kernel code, 679K rwdata, 718K rodata, 176K init, 262K bss, 84684K reserved, 0K cma-reserved) +[ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 +[ 0.000000] rcu: Hierarchical RCU implementation. +[ 0.000000] rcu: RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=1. +[ 0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies. +[ 0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 +[ 0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0 +[ 0.000000] riscv-intc: 64 local interrupts mapped +[ 0.000000] random: get_random_bytes called from start_kernel+0x364/0x4f8 with crng_init=0 +[ 0.000000] riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [0] +[ 0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x171024e7e0, max_idle_ns: 440795205315 ns +[ 0.000080] sched_clock: 64 bits at 100MHz, resolution 10ns, wraps every 4398046511100ns +[ 0.002272] Console: colour dummy device 80x25 +[ 0.003054] printk: console [hvc0] enabled +[ 0.003054] printk: console [hvc0] enabled +[ 0.004343] printk: bootconsole [sbi0] disabled +[ 0.004343] printk: bootconsole [sbi0] disabled +[ 0.005968] Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=400000) +[ 0.007674] pid_max: default: 32768 minimum: 301 +[ 0.009631] Mount-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[ 0.010939] Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[Error] Simulation failed at time=358286940 +INTEGER WRITE MISSMATCH DUT=ffffffffffffff97 REF=0 +rvls.spinal.RvlsBackend.commit(Tracer.scala:196) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:486) +scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) +scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) +vexiiriscv.test.VexiiRiscvProbe.checkCommits(VexiiRiscvProbe.scala:486) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$3.apply$mcV$sp(VexiiRiscvProbe.scala:553) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1.apply$mcV$sp(package.scala:971) +spinal.core.sim.package$$anon$1.update(package.scala:196) +spinal.sim.SimManager.runWhile(SimManager.scala:324) +spinal.sim.SimManager.runAll(SimManager.scala:246) +spinal.core.sim.SimCompiled.doSimApi(SimBootstraps.scala:608) +spinal.core.sim.SimCompiled.doSimUntilVoid(SimBootstraps.scala:581) +vexiiriscv.tester.TestOptions.test(TestBench.scala:155) +vexiiriscv.tester.RegressionSingle$$anonfun$18$$anon$4$$anonfun$$lessinit$greater$1.apply$mcV$sp(Regression.scala:225) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply$mcV$sp(MultithreadedTester.scala:25) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withErr(Console.scala:92) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withOut(Console.scala:65) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply$mcV$sp(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) +scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) +scala.concurrent.impl.ExecutionContextImpl$AdaptedForkJoinTask.exec(ExecutionContextImpl.scala:121) +scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) +scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) +scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) +scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) +[Progress] Start VexiiRiscv_rv64imsu_d2_l2_rfs_bp0_btb_ras_gshare_m_d_a buildroot simulation with seed 2 + +OpenSBI v0.8 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + +Platform Name : NaxRiscv +Platform Features : timer,mfdeleg +Platform HART Count : 1 +Boot HART ID : 0 +Boot HART ISA : rv64imasu +BOOT HART Features : scounteren,mcounteren +BOOT HART PMP Count : 0 +Firmware Base : 0x80000000 +Firmware Size : 72 KB +Runtime SBI Version : 0.2 + +MIDELEG : 0x0000000000000222 +MEDELEG : 0x000000000000b109 +[ 0.000000] Linux version 5.10.1 (rawrr@rawrr) (riscv64-buildroot-linux-uclibc-gcc.br_real (Buildroot 2020.11-rc3-8-g9ef54b7d0b) 10.2.0, GNU ld (GNU Binutils) 2.34) #8 SMP Mon Mar 14 10:26:33 CET 2022 +[ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') +[ 0.000000] printk: bootconsole [sbi0] enabled +[ 0.000000] Initial ramdisk at: 0x(____ptrval____) (8388608 bytes) +[ 0.000000] Zone ranges: +[ 0.000000] DMA32 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Normal empty +[ 0.000000] Movable zone start for each node +[ 0.000000] Early memory node ranges +[ 0.000000] node 0: [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Initmem setup node 0 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] software IO TLB: mapped [mem 0x000000008bc8c000-0x000000008fc8c000] (64MB) +[ 0.000000] SBI specification v0.2 detected +[ 0.000000] SBI implementation ID=0x1 Version=0x8 +[ 0.000000] SBI v0.2 TIME extension detected +[ 0.000000] SBI v0.2 IPI extension detected +[ 0.000000] SBI v0.2 RFENCE extension detected +[ 0.000000] SBI v0.2 HSM extension detected +[ 0.000000] riscv: ISA extensions aim +[ 0.000000] riscv: ELF capabilities aim +[ 0.000000] percpu: Embedded 14 pages/cpu s25560 r0 d31784 u57344 +[ 0.000000] Built 1 zonelists, mobility grouping on. Total pages: 63630 +[ 0.000000] Kernel command line: rootwait console=hvc0 earlycon=sbi root=/dev/ram0 init=/sbin/init +[ 0.000000] Dentry cache hash table entries: 32768 (order: 6, 262144 bytes, linear) +[ 0.000000] Inode-cache hash table entries: 16384 (order: 5, 131072 bytes, linear) +[ 0.000000] Sorting __ex_table... +[ 0.000000] mem auto-init: stack:off, heap alloc:off, heap free:off +[ 0.000000] Memory: 173364K/258048K available (4703K kernel code, 679K rwdata, 718K rodata, 176K init, 262K bss, 84684K reserved, 0K cma-reserved) +[ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 +[ 0.000000] rcu: Hierarchical RCU implementation. +[ 0.000000] rcu: RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=1. +[ 0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies. +[ 0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 +[ 0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0 +[ 0.000000] riscv-intc: 64 local interrupts mapped +[ 0.000000] random: get_random_bytes called from start_kernel+0x364/0x4f8 with crng_init=0 +[ 0.000000] riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [0] +[ 0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x171024e7e0, max_idle_ns: 440795205315 ns +[ 0.000082] sched_clock: 64 bits at 100MHz, resolution 10ns, wraps every 4398046511100ns +[ 0.002304] Console: colour dummy device 80x25 +[ 0.003096] printk: console [hvc0] enabled +[ 0.003096] printk: console [hvc0] enabled +[ 0.004400] printk: bootconsole [sbi0] disabled +[ 0.004400] printk: bootconsole [sbi0] disabled +[ 0.006043] Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=400000) +[ 0.007783] pid_max: default: 32768 minimum: 301 +[ 0.009779] Mount-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[ 0.011103] Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[Error] Simulation failed at time=361517370 +INTEGER WRITE MISSMATCH DUT=ffffffffffffff97 REF=0 +rvls.spinal.RvlsBackend.commit(Tracer.scala:196) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:486) +scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) +scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) +vexiiriscv.test.VexiiRiscvProbe.checkCommits(VexiiRiscvProbe.scala:486) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$3.apply$mcV$sp(VexiiRiscvProbe.scala:553) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1.apply$mcV$sp(package.scala:971) +spinal.core.sim.package$$anon$1.update(package.scala:196) +spinal.sim.SimManager.runWhile(SimManager.scala:324) +spinal.sim.SimManager.runAll(SimManager.scala:246) +spinal.core.sim.SimCompiled.doSimApi(SimBootstraps.scala:608) +spinal.core.sim.SimCompiled.doSimUntilVoid(SimBootstraps.scala:581) +vexiiriscv.tester.TestOptions.test(TestBench.scala:155) +vexiiriscv.tester.RegressionSingle$$anonfun$18$$anon$4$$anonfun$$lessinit$greater$1.apply$mcV$sp(Regression.scala:225) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply$mcV$sp(MultithreadedTester.scala:25) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withErr(Console.scala:92) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withOut(Console.scala:65) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply$mcV$sp(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) +scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) +scala.concurrent.impl.ExecutionContextImpl$AdaptedForkJoinTask.exec(ExecutionContextImpl.scala:121) +scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) +scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) +scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) +scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) +[Progress] Start VexiiRiscv_rv64imsu_d2_l2_rfs_btb_ras_gshare_m_d_a buildroot simulation with seed 2 + +OpenSBI v0.8 + ____ _____ ____ _____ + / __ \ / ____| _ \_ _| + | | | |_ __ ___ _ __ | (___ | |_) || | + | | | | '_ \ / _ \ '_ \ \___ \| _ < | | + | |__| | |_) | __/ | | |____) | |_) || |_ + \____/| .__/ \___|_| |_|_____/|____/_____| + | | + |_| + +Platform Name : NaxRiscv +Platform Features : timer,mfdeleg +Platform HART Count : 1 +Boot HART ID : 0 +Boot HART ISA : rv64imasu +BOOT HART Features : scounteren,mcounteren +BOOT HART PMP Count : 0 +Firmware Base : 0x80000000 +Firmware Size : 72 KB +Runtime SBI Version : 0.2 + +MIDELEG : 0x0000000000000222 +MEDELEG : 0x000000000000b109 +[ 0.000000] Linux version 5.10.1 (rawrr@rawrr) (riscv64-buildroot-linux-uclibc-gcc.br_real (Buildroot 2020.11-rc3-8-g9ef54b7d0b) 10.2.0, GNU ld (GNU Binutils) 2.34) #8 SMP Mon Mar 14 10:26:33 CET 2022 +[ 0.000000] earlycon: sbi0 at I/O port 0x0 (options '') +[ 0.000000] printk: bootconsole [sbi0] enabled +[ 0.000000] Initial ramdisk at: 0x(____ptrval____) (8388608 bytes) +[ 0.000000] Zone ranges: +[ 0.000000] DMA32 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Normal empty +[ 0.000000] Movable zone start for each node +[ 0.000000] Early memory node ranges +[ 0.000000] node 0: [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] Initmem setup node 0 [mem 0x0000000080400000-0x000000008fffffff] +[ 0.000000] software IO TLB: mapped [mem 0x000000008bc8c000-0x000000008fc8c000] (64MB) +[ 0.000000] SBI specification v0.2 detected +[ 0.000000] SBI implementation ID=0x1 Version=0x8 +[ 0.000000] SBI v0.2 TIME extension detected +[ 0.000000] SBI v0.2 IPI extension detected +[ 0.000000] SBI v0.2 RFENCE extension detected +[ 0.000000] SBI v0.2 HSM extension detected +[ 0.000000] riscv: ISA extensions aim +[ 0.000000] riscv: ELF capabilities aim +[ 0.000000] percpu: Embedded 14 pages/cpu s25560 r0 d31784 u57344 +[ 0.000000] Built 1 zonelists, mobility grouping on. Total pages: 63630 +[ 0.000000] Kernel command line: rootwait console=hvc0 earlycon=sbi root=/dev/ram0 init=/sbin/init +[ 0.000000] Dentry cache hash table entries: 32768 (order: 6, 262144 bytes, linear) +[ 0.000000] Inode-cache hash table entries: 16384 (order: 5, 131072 bytes, linear) +[ 0.000000] Sorting __ex_table... +[ 0.000000] mem auto-init: stack:off, heap alloc:off, heap free:off +[ 0.000000] Memory: 173364K/258048K available (4703K kernel code, 679K rwdata, 718K rodata, 176K init, 262K bss, 84684K reserved, 0K cma-reserved) +[ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 +[ 0.000000] rcu: Hierarchical RCU implementation. +[ 0.000000] rcu: RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=1. +[ 0.000000] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies. +[ 0.000000] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 +[ 0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0 +[ 0.000000] riscv-intc: 64 local interrupts mapped +[ 0.000000] random: get_random_bytes called from start_kernel+0x364/0x4f8 with crng_init=0 +[ 0.000000] riscv_timer_init_dt: Registering clocksource cpuid [0] hartid [0] +[ 0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x171024e7e0, max_idle_ns: 440795205315 ns +[ 0.000111] sched_clock: 64 bits at 100MHz, resolution 10ns, wraps every 4398046511100ns +[ 0.003075] Console: colour dummy device 80x25 +[ 0.004085] printk: console [hvc0] enabled +[ 0.004085] printk: console [hvc0] enabled +[ 0.005718] printk: bootconsole [sbi0] disabled +[ 0.005718] printk: bootconsole [sbi0] disabled +[ 0.007790] Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=400000) +[ 0.009967] pid_max: default: 32768 minimum: 301 +[ 0.012796] Mount-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[ 0.014346] Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes, linear) +[Error] Simulation failed at time=423770540 +INTEGER WRITE MISSMATCH DUT=ffffffffffffffb8 REF=0 +rvls.spinal.RvlsBackend.commit(Tracer.scala:196) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1$$anonfun$apply$11.apply(VexiiRiscvProbe.scala:530) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:530) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$checkCommits$1.apply(VexiiRiscvProbe.scala:486) +scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) +scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) +vexiiriscv.test.VexiiRiscvProbe.checkCommits(VexiiRiscvProbe.scala:486) +vexiiriscv.test.VexiiRiscvProbe$$anonfun$3.apply$mcV$sp(VexiiRiscvProbe.scala:553) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1$$anonfun$apply$mcV$sp$1.apply(package.scala:971) +scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) +scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) +spinal.core.sim.package$SimClockDomainPimper$$anonfun$onSamplings$1.apply$mcV$sp(package.scala:971) +spinal.core.sim.package$$anon$1.update(package.scala:196) +spinal.sim.SimManager.runWhile(SimManager.scala:324) +spinal.sim.SimManager.runAll(SimManager.scala:246) +spinal.core.sim.SimCompiled.doSimApi(SimBootstraps.scala:608) +spinal.core.sim.SimCompiled.doSimUntilVoid(SimBootstraps.scala:581) +vexiiriscv.tester.TestOptions.test(TestBench.scala:155) +vexiiriscv.tester.RegressionSingle$$anonfun$18$$anon$4$$anonfun$$lessinit$greater$1.apply$mcV$sp(Regression.scala:225) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply$mcV$sp(MultithreadedTester.scala:25) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(MultithreadedTester.scala:24) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withErr(Console.scala:92) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +spinal.lib.misc.test.AsyncJob$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(MultithreadedTester.scala:23) +scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) +scala.Console$.withOut(Console.scala:65) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply$mcV$sp(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +spinal.lib.misc.test.AsyncJob$$anonfun$1.apply(MultithreadedTester.scala:22) +scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) +scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) +scala.concurrent.impl.ExecutionContextImpl$AdaptedForkJoinTask.exec(ExecutionContextImpl.scala:121) +scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) +scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) +scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) +scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) diff --git a/project/plugins.sbt b/project/plugins.sbt index 9e495ef1..64221c5a 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -3,3 +3,4 @@ // SPDX-License-Identifier: MIT addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10") +addSbtPlugin("com.github.tkawachi" % "sbt-repeat" % "0.1.0") \ No newline at end of file diff --git a/src/main/scala/spinal/core/HardMap.scala b/src/main/scala/spinal/core/HardMap.scala deleted file mode 100644 index 8b923102..00000000 --- a/src/main/scala/spinal/core/HardMap.scala +++ /dev/null @@ -1,71 +0,0 @@ -package spinal.core - -import spinal.idslplugin.Location - -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer - -object HardMap{ - def apply(content : Seq[NamedType[_ <: Data]]) : HardMap = { - val ret = new HardMap() - content.foreach(e => ret.add(e)) - ret - } -} - -class HardMap extends MultiData { - val storage = mutable.LinkedHashMap[NamedType[Data], Data]() - var elementsCache : ArrayBuffer[(String, Data)] = null - - def keyToName(key : Any) = key match { - case n: Nameable if n.isNamed => n.getName() - } - - def update[T <: Data](key : NamedType[T], value : T): Unit = { - assert(elementsCache == null) - assert(!storage.contains(key.asInstanceOf[NamedType[Data]])) - storage(key.asInstanceOf[NamedType[Data]]) = value - if(OwnableRef.proposal(value, this)) value.setPartialName(keyToName(key), Nameable.DATAMODEL_WEAK) - } - - def add[T <: Data](key: NamedType[T]) : Unit = { - this(key) = key() - } - - def apply[T <: Data](key: NamedType[T]): T = { - storage(key.asInstanceOf[NamedType[Data]]).asInstanceOf[T] - } - - override def elements: ArrayBuffer[(String, Data)] = { - if(elementsCache == null) { - elementsCache = ArrayBuffer[(String, Data)]() - for ((k, d) <- storage) { - val name = keyToName(k) - elementsCache += name -> d - } - } - elementsCache - } - - def hardMapAssign(that: HardMap)(f: (Data, Data) => Unit): Unit = { - for ((name, element) <- elements) { - val other = that.find(name) - if (other == null) { - LocatedPendingError(s"Bundle assignment is not complete. $this need '$name' but $that doesn't provide it.") - } - else { - f(element, other) - } - } - } - - protected override def assignFromImpl(that: AnyRef, target: AnyRef, kind: AnyRef)(implicit loc: Location): Unit = { - that match { - case that: HardMap => - if (!this.getClass.isAssignableFrom(that.getClass)) SpinalError("HardMap must have the same final class to" + - " be assigned. Either use assignByName or assignSomeByName at \n" + ScalaLocated.long) - hardMapAssign(that)((to, from) => to.compositAssignFrom(from, to, kind)) - case _ => throw new Exception("Undefined assignment") - } - } -} diff --git a/src/main/scala/spinal/lib/misc/Elf.scala b/src/main/scala/spinal/lib/misc/Elf.scala index ec45f8a4..ee2066a2 100644 --- a/src/main/scala/spinal/lib/misc/Elf.scala +++ b/src/main/scala/spinal/lib/misc/Elf.scala @@ -1,6 +1,7 @@ package spinal.lib.misc import net.fornwall.jelf.{ElfFile, ElfSection, ElfSectionHeader, ElfSymbol, ElfSymbolTableSection} +import spinal.core._ import spinal.lib.sim.SparseMemory import java.io.File @@ -41,6 +42,42 @@ class Elf(val f : File, addressWidth : Int){ } } + + def getMemInit[T <: Data](ram: Mem[T],offset: BigInt, allowOverflow: Boolean = false) = { + val wordSize = ram.wordType.getBitsWidth / 8 + val initContent = Array.fill[BigInt](ram.wordCount)(0) + foreachSection { section => + if ((section.header.sh_flags & ElfSectionHeader.FLAG_ALLOC) != 0) { + val data = getData(section) + val memoryAddress = (section.header.sh_addr - offset) & ((BigInt(1) << addressWidth) - 1).toLong + for((byte, i) <- data.zipWithIndex){ + val addressWithoutOffset = memoryAddress+i + val addressWord = addressWithoutOffset / wordSize + if (addressWord < 0 || addressWord >= initContent.size) { + assert(allowOverflow) + } else { + initContent(addressWord.toInt) |= BigInt(byte.toInt & 0xFF) << ((addressWithoutOffset.toInt % wordSize) * 8) + } + } + } + } + initContent + } + + def init[T <: Data](ram: Mem[T], offset: BigInt, allowOverflow: Boolean = false): Unit = { + val initContent = getMemInit(ram, offset, allowOverflow) + ram.initBigInt(initContent) + } + + def load[T <: Data](ram: Mem[T], offset: BigInt, allowOverflow: Boolean = false): Unit = { + val initContent = getMemInit(ram, offset, allowOverflow) + import spinal.core.sim._ + for((e, i) <- initContent.zipWithIndex){ + ram.setBigInt(i, e) + } + } + + def getSymbolAddress(name : String): Long ={ val s = getELFSymbol(name) s.st_value diff --git a/src/main/scala/vexiiriscv/Generate.scala b/src/main/scala/vexiiriscv/Generate.scala index 52e97a8a..1a01a246 100644 --- a/src/main/scala/vexiiriscv/Generate.scala +++ b/src/main/scala/vexiiriscv/Generate.scala @@ -2,9 +2,16 @@ package vexiiriscv import spinal.core._ import spinal.lib.LatencyAnalysis +import spinal.lib.bus.misc.SizeMapping +import spinal.lib.bus.tilelink.{M2sTransfers, SizeRange} import spinal.lib.misc.PathTracer +import spinal.lib.system.tag.{PmaRegion, PmaRegionImpl} import vexiiriscv.compat.MultiPortWritesSymplifier -import vexiiriscv.execute.{LsuCachelessPlugin, SrcPlugin} +import vexiiriscv.execute.SrcPlugin +import vexiiriscv.execute.lsu._ +import vexiiriscv.fetch._ + +import scala.collection.mutable.ArrayBuffer object Generate extends App { val param = new ParamSimple() @@ -17,7 +24,9 @@ object Generate extends App { sc.addTransformationPhase(new MultiPortWritesSymplifier) val report = sc.generateVerilog { - VexiiRiscv(param.plugins()) + val plugins = param.plugins() + ParamSimple.setPma(plugins) + VexiiRiscv(plugins) } } diff --git a/src/main/scala/vexiiriscv/Param.scala b/src/main/scala/vexiiriscv/Param.scala index 8f2fc7df..7c9e243e 100644 --- a/src/main/scala/vexiiriscv/Param.scala +++ b/src/main/scala/vexiiriscv/Param.scala @@ -2,10 +2,15 @@ package vexiiriscv import spinal.core._ import spinal.lib._ +import spinal.lib.bus.misc.SizeMapping +import spinal.lib.bus.tilelink.{M2sTransfers, SizeRange} import spinal.lib.misc.plugin.Hostable +import spinal.lib.system.tag.{PmaRegion, PmaRegionImpl} import vexiiriscv._ import vexiiriscv.decode.DecoderPlugin import vexiiriscv.execute._ +import vexiiriscv.execute.lsu._ +import vexiiriscv.fetch.{FetchCachelessPlugin, FetchL1Plugin} import vexiiriscv.memory.{MmuPortParameter, MmuSpec, MmuStorageLevel, MmuStorageParameter} import vexiiriscv.misc._ import vexiiriscv.prediction.{LearnCmd, LearnPlugin} @@ -15,6 +20,40 @@ import vexiiriscv.test.WhiteboxerPlugin import scala.collection.mutable.ArrayBuffer +object ParamSimple{ + def setPma(plugins : Seq[Hostable]) = { + val regions = ArrayBuffer[PmaRegion]( + new PmaRegionImpl( + mapping = SizeMapping(0x80000000l, 0x80000000l), + isMain = true, + isExecutable = true, + transfers = M2sTransfers( + get = SizeRange.all, + putFull = SizeRange.all, + ) + ), + new PmaRegionImpl( + mapping = SizeMapping(0x10000000l, 0x10000000l), + isMain = false, + isExecutable = true, + transfers = M2sTransfers( + get = SizeRange.all, + putFull = SizeRange.all, + ) + ) + ) + plugins.foreach { + case p: FetchCachelessPlugin => p.regions.load(regions) + case p: LsuCachelessPlugin => p.regions.load(regions) + case p: FetchL1Plugin => p.regions.load(regions) + case p: LsuPlugin => p.ioRegions.load(regions) + case p: LsuL1Plugin => p.regions.load(regions) + case _ => + } + plugins + } +} + class ParamSimple(){ var xlen = 32 var withRvc = false @@ -25,8 +64,6 @@ class ParamSimple(){ var decoders = 1 var lanes = 1 var regFileSync = true - var ioRange : UInt => Bool = a => a(31 downto 28) === 0x1 - var fetchRange : UInt => Bool = a => a(31 downto 28) =/= 0x1 var withGShare = false var withBtb = false var withRas = false @@ -41,6 +78,13 @@ class ParamSimple(){ var allowBypassFrom = 100 //100 => disabled var performanceCounters = 0 var withFetchL1 = false + var withLsuL1 = false + var fetchL1Sets = 64 + var fetchL1Ways = 1 + var lsuL1Sets = 64 + var lsuL1Ways = 1 + var withLsuBypass = false + var withIterativeShift = false // Debug modifiers val debugParam = sys.env.getOrElse("VEXIIRISCV_DEBUG_PARAM", "0").toInt.toBoolean @@ -66,7 +110,13 @@ class ParamSimple(){ withRvc = false withAlignerBuffer = withRvc withFetchL1 = false + withLsuL1 = false xlen = 32 + fetchL1Sets = 64 + fetchL1Ways = 4 + lsuL1Sets = 64 + lsuL1Ways = 4 + withLsuBypass = true } @@ -83,7 +133,8 @@ class ParamSimple(){ r += s"d${decoders}" r += s"l${lanes}" r += regFileSync.mux("rfs","rfa") - if (withFetchL1) r += "fl1" + if (withFetchL1) r += s"fl1xW${lsuL1Ways}xS${lsuL1Sets}" + if (withLsuL1) r += s"lsul1xW${lsuL1Ways}xS${lsuL1Sets}${withLsuBypass.mux("xBp","")}" if(allowBypassFrom < 100) r += s"bp$allowBypassFrom" if (withBtb) r += "btb" if (withRas) r += "ras" @@ -94,6 +145,7 @@ class ParamSimple(){ if (relaxedShift) r += "rsft" if (relaxedSrc) r += "rsrc" if(performanceCounters != 0) r += s"pc$performanceCounters" + if (withIterativeShift) r += "isft" r.mkString("_") } @@ -124,6 +176,13 @@ class ParamSimple(){ opt[Int]("allow-bypass-from") action { (v, c) => allowBypassFrom = v } opt[Int]("performance-counters") action { (v, c) => performanceCounters = v } opt[Unit]("with-fetch-l1") action { (v, c) => withFetchL1 = true } + opt[Unit]("with-lsu-l1") action { (v, c) => withLsuL1 = true } + opt[Int]("fetch-l1-sets") action { (v, c) => fetchL1Sets = v } + opt[Int]("fetch-l1-ways") action { (v, c) => fetchL1Ways = v } + opt[Int]("lsu-l1-sets") action { (v, c) => lsuL1Sets = v } + opt[Int]("lsu-l1-ways") action { (v, c) => lsuL1Ways = v } + opt[Unit]("with-lsu-bypass") action { (v, c) => withLsuBypass = true } + opt[Unit]("with-iterative-shift") action { (v, c) => withIterativeShift = true } } def plugins() = pluginsArea.plugins @@ -133,11 +192,9 @@ class ParamSimple(){ plugins += new riscv.RiscvPlugin(xlen, hartCount, rvc = withRvc) withMmu match { - case false => plugins += new memory.StaticTranslationPlugin(32, ioRange, fetchRange) + case false => plugins += new memory.StaticTranslationPlugin(32) case true => plugins += new memory.MmuPlugin( spec = if (xlen == 32) MmuSpec.sv32 else MmuSpec.sv39, - ioRange = ioRange, - fetchRange = fetchRange, physicalWidth = 32 ) } @@ -171,6 +228,10 @@ class ParamSimple(){ ) plugins += new prediction.HistoryPlugin() } + def shifter(layer: LaneLayer, shiftAt: Int = 0, formatAt: Int = 0) = withIterativeShift match { + case false => new BarrelShifterPlugin(layer, shiftAt, formatAt) + case true => new IterativeShifterPlugin(layer, shiftAt, formatAt) + } plugins += new fetch.PcPlugin(resetVector) @@ -205,7 +266,8 @@ class ParamSimple(){ } ) if(withFetchL1) plugins += new fetch.FetchL1Plugin( - cacheSize = 16*1024, + lineSize = 64, + setCount = 64, wayCount = 4, fetchDataWidth = 32*decoders, memDataWidth = 32*decoders, @@ -276,10 +338,10 @@ class ParamSimple(){ // plugins += new RedoPlugin("lane0") plugins += new SrcPlugin(early0, executeAt = 0, relaxedRs = relaxedSrc) plugins += new IntAluPlugin(early0, formatAt = 0) - plugins += new BarrelShifterPlugin(early0, formatAt = relaxedShift.toInt) + plugins += shifter(early0, formatAt = relaxedShift.toInt) plugins += new IntFormatPlugin("lane0") plugins += new BranchPlugin(layer=early0, aluAt=0, jumpAt=relaxedBranch.toInt, wbAt=0) - plugins += new LsuCachelessPlugin( + if(!withLsuL1) plugins += new LsuCachelessPlugin( layer = early0, withAmo = withRva, withSpeculativeLoadFlush = true, @@ -312,6 +374,46 @@ class ParamSimple(){ ) } ) + if(withLsuL1){ + plugins += new LsuPlugin( + layer = early0, + withRva = withRva, + translationStorageParameter = MmuStorageParameter( + levels = List( + MmuStorageLevel( + id = 0, + ways = 4, + depth = 32 + ), + MmuStorageLevel( + id = 1, + ways = 2, + depth = 32 + ) + ), + priority = 1 + ), + translationPortParameter = withMmu match { + case false => null + case true => MmuPortParameter( + readAt = 0, + hitsAt = 1, + ctrlAt = 1, + rspAt = 1 + ) + } + ) + plugins += new LsuL1Plugin( + lane = lane0, + memDataWidth = xlen, + cpuDataWidth = xlen, + refillCount = 1, + writebackCount = 1, + setCount = lsuL1Sets, + wayCount = lsuL1Ways, + withBypass = withLsuBypass + ) + } if(withMul) { plugins += new MulPlugin(early0) @@ -332,13 +434,12 @@ class ParamSimple(){ val late0 = new LaneLayer("late0", lane0, priority = -5) plugins += new SrcPlugin(late0, executeAt = 2, relaxedRs = relaxedSrc) plugins += new IntAluPlugin(late0, aluAt = 2, formatAt = 2) - plugins += new BarrelShifterPlugin(late0, shiftAt = 2, formatAt = 2) + plugins += shifter(late0, shiftAt = 2, formatAt = 2) plugins += new BranchPlugin(late0, aluAt = 2, jumpAt = 2/*+relaxedBranch.toInt*/, wbAt = 2) } plugins += new WriteBackPlugin("lane0", IntRegFile, writeAt = 2, allowBypassFrom = allowBypassFrom) - if(lanes >= 2) { val lane1 = newExecuteLanePlugin("lane1") val early1 = new LaneLayer("early1", lane1, priority = 10) @@ -346,7 +447,7 @@ class ParamSimple(){ plugins += new SrcPlugin(early1, executeAt = 0, relaxedRs = relaxedSrc) plugins += new IntAluPlugin(early1, formatAt = 0) - plugins += new BarrelShifterPlugin(early1, formatAt = relaxedShift.toInt) + plugins += shifter(early1, formatAt = relaxedShift.toInt) plugins += new IntFormatPlugin("lane1") plugins += new BranchPlugin(early1, aluAt = 0, jumpAt = relaxedBranch.toInt, wbAt = 0) @@ -354,7 +455,7 @@ class ParamSimple(){ val late1 = new LaneLayer("late1", lane1, priority = -3) plugins += new SrcPlugin(late1, executeAt = 2, relaxedRs = relaxedSrc) plugins += new IntAluPlugin(late1, aluAt = 2, formatAt = 2) - plugins += new BarrelShifterPlugin(late1, shiftAt = 2, formatAt = 2) + plugins += shifter(late1, shiftAt = 2, formatAt = 2) plugins += new BranchPlugin(late1, aluAt = 2, jumpAt = 2/*+relaxedBranch.toInt*/, wbAt = 2) } diff --git a/src/main/scala/vexiiriscv/VexiiRiscv.scala b/src/main/scala/vexiiriscv/VexiiRiscv.scala index aec6c29e..204e9fb4 100644 --- a/src/main/scala/vexiiriscv/VexiiRiscv.scala +++ b/src/main/scala/vexiiriscv/VexiiRiscv.scala @@ -18,3 +18,5 @@ class VexiiRiscv extends Component{ val host = database on (new PluginHost) } + + diff --git a/src/main/scala/vexiiriscv/decode/AlignerPlugin.scala b/src/main/scala/vexiiriscv/decode/AlignerPlugin.scala index 820d013d..7659e375 100644 --- a/src/main/scala/vexiiriscv/decode/AlignerPlugin.scala +++ b/src/main/scala/vexiiriscv/decode/AlignerPlugin.scala @@ -171,17 +171,17 @@ class AlignerPlugin(fetchAt : Int, lane.up(Decode.INSTRUCTION) := extractor.ctx.instruction lane.up(Decode.DECOMPRESSION_FAULT) := False lane.up(Decode.INSTRUCTION_RAW) := extractor.ctx.instruction + val isRvc = extractor.ctx.instruction(1 downto 0) =/= 3 val withRvc = Riscv.RVC.get generate new Area { - val isRvc = extractor.ctx.instruction(1 downto 0) =/= 3 val dec = RvcDecompressor(extractor.ctx.instruction, rvf = Riscv.RVF, rvd = Riscv.RVD, Riscv.XLEN) when(isRvc) { lane.up(Decode.INSTRUCTION) := dec.inst lane.up(Decode.DECOMPRESSION_FAULT) := dec.illegal - when(dec.illegal){ - lane.up(Decode.INSTRUCTION_RAW)(Decode.INSTRUCTION_WIDTH-1 downto 16) := 0 //To match spike - } } } + when(isRvc) { + lane.up(Decode.INSTRUCTION_RAW)(Decode.INSTRUCTION_WIDTH - 1 downto 16) := 0 //To match spike + } lane.up(Decode.INSTRUCTION_SLICE_COUNT) := OHToUInt(OHMasking.lastV2(extractor.localMask)) lane.up(Global.PC) := extractor.ctx.pc diff --git a/src/main/scala/vexiiriscv/decode/Decode.scala b/src/main/scala/vexiiriscv/decode/Decode.scala index 0fc79376..0750bdb8 100644 --- a/src/main/scala/vexiiriscv/decode/Decode.scala +++ b/src/main/scala/vexiiriscv/decode/Decode.scala @@ -34,7 +34,7 @@ object Decode extends AreaObject { val INSTRUCTION_SLICE_COUNT = Payload(UInt(INSTRUCTION_SLICE_COUNT_WIDTH bits)) // minus one => RVC => 0, normal => 1 } -case class AccessKeys(physWidth : Int, rfMapping : Seq[RegfileSpec]) extends Area{ +case class AccessKeys(rfa : RfAccess, physWidth : Int, rfMapping : Seq[RegfileSpec]) extends Area{ val rfIdWidth = log2Up(rfMapping.size) def is(rfs: RegfileSpec, that: UInt) = that === idOf(rfs) def idOf(rfs: RegfileSpec) = rfMapping.indexOf(rfs) diff --git a/src/main/scala/vexiiriscv/decode/DecoderPlugin.scala b/src/main/scala/vexiiriscv/decode/DecoderPlugin.scala index 84dc9786..6b31c6b3 100644 --- a/src/main/scala/vexiiriscv/decode/DecoderPlugin.scala +++ b/src/main/scala/vexiiriscv/decode/DecoderPlugin.scala @@ -67,7 +67,7 @@ class DecoderPlugin(var decodeAt : Int) extends FiberPlugin with DecoderService for(rfa <- rfAccesses){ val physWidth = 5 val rfMapping = resources.collect{case r : RfResource if r.access == rfa => r.rf }.toList - val ak = AccessKeys(physWidth, rfMapping) + val ak = AccessKeys(rfa, physWidth, rfMapping) ak.setCompositeName(rfa) rfaKeys(rfa) = ak } diff --git a/src/main/scala/vexiiriscv/execute/BarrelShifterPlugin.scala b/src/main/scala/vexiiriscv/execute/BarrelShifterPlugin.scala index 05c58acf..2e0e5298 100644 --- a/src/main/scala/vexiiriscv/execute/BarrelShifterPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/BarrelShifterPlugin.scala @@ -28,7 +28,6 @@ class BarrelShifterPlugin(val layer : LaneLayer, val wb = newWriteback(ifp, formatAt) - //TODO why using SRC1 ? why not directly RS1 => less combinatorial path, also not sure about SRC2 is realy wort it (for only 5/ 6 bits) add(Rvi.SLL).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> True, SIGNED -> False) add(Rvi.SRL).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, SIGNED -> False) add(Rvi.SRA).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, SIGNED -> True) diff --git a/src/main/scala/vexiiriscv/execute/BranchPlugin.scala b/src/main/scala/vexiiriscv/execute/BranchPlugin.scala index 767f3831..eaa621c5 100644 --- a/src/main/scala/vexiiriscv/execute/BranchPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/BranchPlugin.scala @@ -199,6 +199,7 @@ class BranchPlugin(val layer : LaneLayer, trapPort.code := CSR.MCAUSE_ENUM.FETCH_MISSALIGNED trapPort.tval := B(alu.PC_TRUE) trapPort.arg := 0 + trapPort.laneAge := Execute.LANE_AGE when(doIt && MISSALIGNED){ trapPort.valid := True diff --git a/src/main/scala/vexiiriscv/execute/CsrAccessPlugin.scala b/src/main/scala/vexiiriscv/execute/CsrAccessPlugin.scala index 4a36e56a..f26f474b 100644 --- a/src/main/scala/vexiiriscv/execute/CsrAccessPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/CsrAccessPlugin.scala @@ -265,6 +265,7 @@ class CsrAccessPlugin(layer : LaneLayer, trapPort.code := CSR.MCAUSE_ENUM.ILLEGAL_INSTRUCTION trapPort.tval := UOP.resized trapPort.arg := 0 + trapPort.laneAge := Execute.LANE_AGE val flushReg = RegInit(False) setWhen(flushPort.valid) clearWhen(!elp.isFreezed()) when(flushReg) { diff --git a/src/main/scala/vexiiriscv/execute/CsrRamPlugin.scala b/src/main/scala/vexiiriscv/execute/CsrRamPlugin.scala index bd9d9d7d..12724fdc 100644 --- a/src/main/scala/vexiiriscv/execute/CsrRamPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/CsrRamPlugin.scala @@ -19,8 +19,10 @@ class CsrRamPlugin extends FiberPlugin with CsrRamService with InitService { override def holdCsrRead(): Unit = api.holdRead := True + override def holdCsrWrite(): Unit = api.holdWrite := True val api = during build new Area{ val holdRead = False + val holdWrite = False } val csrMapper = during setup new Area{ @@ -44,18 +46,20 @@ class CsrRamPlugin extends FiberPlugin with CsrRamService with InitService { val addressDecoder = new DecodingSpec(RAM_ADDRESS) val selDecoder = ArrayBuffer[Int]() switch(cas.onDecodeAddress) { - for (e <- csrMappings) e.csrFilter match { - case filter: CsrListFilter => for (csrId <- filter.mapping) { - val mask = Masked(csrId, 0xFFF) - addressDecoder.addNeeds(mask, Masked(e.alloc.at + e.offset, ramAddressMask)) - selDecoder += csrId - } - case csrId: Int => { - is(csrId) { + for (e <- csrMappings) { + e.csrFilter match { + case filter: CsrListFilter => for (csrId <- filter.mapping) { val mask = Masked(csrId, 0xFFF) addressDecoder.addNeeds(mask, Masked(e.alloc.at + e.offset, ramAddressMask)) selDecoder += csrId } + case csrId: Int => { + is(csrId) { + val mask = Masked(csrId, 0xFFF) + addressDecoder.addNeeds(mask, Masked(e.alloc.at + e.offset, ramAddressMask)) + selDecoder += csrId + } + } } } } @@ -78,10 +82,10 @@ class CsrRamPlugin extends FiberPlugin with CsrRamService with InitService { val doWrite = False cas.onWrite(selFilter, false)(doWrite := True) val fired = RegInit(False) setWhen (write.fire) clearWhen (cas.onWriteMovingOff) - write.valid := doWrite && !fired + write.valid := doWrite && !fired && !api.holdWrite write.address := ramAddress write.data := cas.onWriteBits - when (write.valid && !write.ready){ + when ((doWrite && !fired) && !write.ready){ cas.onWriteHalt() } diff --git a/src/main/scala/vexiiriscv/execute/CsrService.scala b/src/main/scala/vexiiriscv/execute/CsrService.scala index 16e85711..6927d97a 100644 --- a/src/main/scala/vexiiriscv/execute/CsrService.scala +++ b/src/main/scala/vexiiriscv/execute/CsrService.scala @@ -267,8 +267,7 @@ trait CsrRamService extends Area{ } def portAddressWidth : Int - def awaitMapping() : Unit - - def holdCsrRead() : Unit + def holdCsrRead(): Unit + def holdCsrWrite(): Unit } diff --git a/src/main/scala/vexiiriscv/execute/EnvPlugin.scala b/src/main/scala/vexiiriscv/execute/EnvPlugin.scala index 400ebe93..c501c2de 100644 --- a/src/main/scala/vexiiriscv/execute/EnvPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/EnvPlugin.scala @@ -63,6 +63,7 @@ class EnvPlugin(layer : LaneLayer, trapPort.tval := B(PC).andMask(OP === EnvPluginOp.EBREAK) //That's what spike do trapPort.code.assignDontCare() trapPort.arg.assignDontCare() + trapPort.laneAge := Execute.LANE_AGE val privilege = ps.getPrivilege(HART_ID) val xretPriv = Decode.UOP(29 downto 28).asUInt diff --git a/src/main/scala/vexiiriscv/execute/ExecuteLanePlugin.scala b/src/main/scala/vexiiriscv/execute/ExecuteLanePlugin.scala index 829602e5..30401555 100644 --- a/src/main/scala/vexiiriscv/execute/ExecuteLanePlugin.scala +++ b/src/main/scala/vexiiriscv/execute/ExecuteLanePlugin.scala @@ -196,7 +196,7 @@ class ExecuteLanePlugin(override val laneName : String, node.isValid && node(rfaRd.ENABLE) && node(rfaRd.PHYS) === on(rfa.PHYS) && node(rfaRd.RFID) === on(rfa.RFID) }.asBits - on.bypass(apply(spec)) := OHMux.or(Cat(hits, !hits.orR), on.up(apply(spec)) +: filtred.map(f => f.eu.ctrl(f.nodeId)(f.payload)), true) + on.bypass(apply(spec)) := OHMux.or(Cat(hits, !hits.orR), on.up(apply(spec)) +: filtred.map(f => f.eu.ctrl(f.nodeId)(f.payload)), true) } } } @@ -252,7 +252,7 @@ class ExecuteLanePlugin(override val laneName : String, val c = idToCtrl(ctrlId) if(ctrlId != 0) c.up(c.LANE_SEL).setAsReg().init(False) - val age = getAge(ctrlId) + val age = getCtrlAge(ctrlId) val doIt = rp.isFlushedAt(age, c(Global.HART_ID), c(Execute.LANE_AGE)) c.downIsCancel := False doIt match { @@ -273,6 +273,7 @@ class ExecuteLanePlugin(override val laneName : String, buildBefore.release() } + def freezeIt()(implicit loc: Location) = eupp.freezeIt() def freezeWhen(cond: Bool)(implicit loc: Location) = eupp.freezeWhen(cond) def isFreezed(): Bool = eupp.isFreezed() override def atRiskOfFlush(executeId: Int): Bool = { diff --git a/src/main/scala/vexiiriscv/execute/ExecutePipelinePlugin.scala b/src/main/scala/vexiiriscv/execute/ExecutePipelinePlugin.scala index 781ac505..2615d032 100644 --- a/src/main/scala/vexiiriscv/execute/ExecutePipelinePlugin.scala +++ b/src/main/scala/vexiiriscv/execute/ExecutePipelinePlugin.scala @@ -24,6 +24,7 @@ class ExecutePipelinePlugin() extends FiberPlugin with PipelineService{ val pipelineLock = Retainer() def freezeWhen(cond: Bool)(implicit loc: Location) = freeze.requests += cond + def freezeIt()(implicit loc: Location) = freezeWhen(ConditionalContext.isTrue()) def isFreezed(): Bool = freeze.valid override def getLinks(): Seq[Link] = logic.connectors diff --git a/src/main/scala/vexiiriscv/execute/IterativeShiftPlugin.scala b/src/main/scala/vexiiriscv/execute/IterativeShiftPlugin.scala new file mode 100644 index 00000000..246bca7d --- /dev/null +++ b/src/main/scala/vexiiriscv/execute/IterativeShiftPlugin.scala @@ -0,0 +1,189 @@ +// SPDX-FileCopyrightText: 2023 "Everybody" +// +// SPDX-License-Identifier: MIT + +package vexiiriscv.execute + +import spinal.core._ +import spinal.lib.{MuxOH, OHMasking} +import spinal.lib.misc.pipeline._ +import vexiiriscv.decode +import vexiiriscv.riscv._ + +import scala.collection.mutable + +object IterativeShifterPlugin extends AreaObject { + val ARITHMETIC = Payload(Bool()) + val LEFT = Payload(Bool()) + val IS_W = Payload(Bool()) + val IS_W_RIGHT = Payload(Bool()) +} + +/** Iterative Shifter + * + * The default parameters are optimized for 6-input LUT devices (since it will + * use 4:1 muxes (load/shift 1/shift 8/flip), but additional shifts can be + * added to increase performance. + * Only 2**n shift distances are well supported, other distances will lead + * to suboptimal shift sequence if needed multiple times. + * + * lateResult can be used to make the done logic slighly smaller, which should not be + * needed for the default configuration. Enabling it costs 1 cycle for all shifts. + */ +class IterativeShifterPlugin(val layer: LaneLayer, + val shiftAt: Int = 0, + val formatAt: Int = 0, + val leftShifts: Seq[Int] = Seq(), + val rightShifts: Seq[Int] = Seq(1, 8), + val lateResult: Boolean = false) extends ExecutionUnitElementSimple(layer) { + def isPowerTwo(i: Int) = i > 0 && (i & (i - 1)) == 0 + assert(leftShifts.isEmpty || leftShifts.contains(1), "If left shifts are used, left shift by 1 must be enabled") + assert(rightShifts.contains(1), "At least right shift by 1 to the right must be enabled") + assert(leftShifts.forall(isPowerTwo) && rightShifts.forall(isPowerTwo), "shift distances must be power of 2") + + import IterativeShifterPlugin._ + val SHIFT_RESULT = Payload(Bits(Riscv.XLEN bits)) + + val logic = during setup new Logic { + awaitBuild() + import SrcKeys._ + + val wb = newWriteback(ifp, formatAt) + + // we use RS1 directly, keep SRC1.RF source here for hazard detection + add(Rvi.SLL).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> True, ARITHMETIC -> False) + add(Rvi.SRL).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, ARITHMETIC -> False) + add(Rvi.SRA).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, ARITHMETIC -> True) + add(Rvi.SLLI).srcs(SRC1.RF, SRC2.I).decode(LEFT -> True, ARITHMETIC -> False) + add(Rvi.SRLI).srcs(SRC1.RF, SRC2.I).decode(LEFT -> False, ARITHMETIC -> False) + add(Rvi.SRAI).srcs(SRC1.RF, SRC2.I).decode(LEFT -> False, ARITHMETIC -> True) + + if (Riscv.XLEN.get == 64) { + for (op <- List(Rvi.SLL, Rvi.SRL, Rvi.SRA, Rvi.SLLI, Rvi.SRLI, Rvi.SRAI)) { + layer(op).addDecoding(IS_W -> False, IS_W_RIGHT -> False) + } + add(Rvi.SLLW ).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> True , ARITHMETIC -> False, IS_W -> True, IS_W_RIGHT -> False) + add(Rvi.SRLW ).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, ARITHMETIC -> False, IS_W -> True, IS_W_RIGHT -> True ) + add(Rvi.SRAW ).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, ARITHMETIC -> True , IS_W -> True, IS_W_RIGHT -> True ) + add(Rvi.SLLIW).srcs(SRC1.RF, SRC2.I ).decode(LEFT -> True , ARITHMETIC -> False, IS_W -> True, IS_W_RIGHT -> False) + add(Rvi.SRLIW).srcs(SRC1.RF, SRC2.I ).decode(LEFT -> False, ARITHMETIC -> False, IS_W -> True, IS_W_RIGHT -> True ) + add(Rvi.SRAIW).srcs(SRC1.RF, SRC2.I ).decode(LEFT -> False, ARITHMETIC -> True , IS_W -> True, IS_W_RIGHT -> True ) + for (op <- List(Rvi.SLLW, Rvi.SRLW, Rvi.SRAW, Rvi.SLLIW, Rvi.SRLIW, Rvi.SRAIW)) { + ifp.signExtend(wb, layer(op), 32) + } + } + + uopRetainer.release() + + val shift = new el.Execute(shiftAt) { + val unscheduleRequest = RegNext(isCancel).clearWhen(isReady).init(False) + val selected = isValid && SEL + + val amplitudeWidth = if(Riscv.XLEN.get == 64) 6 else 5 + val shamt = srcp.SRC2.resize(amplitudeWidth).asUInt + val rs1 = el(IntRegFile, RS1).asBits + + val busy = RegInit(False) + val flipped = Reg(Bool()) + val amplitude = Reg(UInt(amplitudeWidth bits)) + val shiftReg = Reg(Bits(Riscv.XLEN bits)) + + val dataIn = CombInit(rs1) + if (Riscv.XLEN.get == 64) { + when(IS_W) { + shamt(5) := False + } + when(IS_W_RIGHT) { + dataIn(63 downto 32) := (default -> (ARITHMETIC & rs1(31))) + } + } + + val done = if(lateResult) { + amplitude === 0 & !flipped & busy + } else { + val rightShiftDone = !LEFT & busy & rightShifts.map(amplitude === _).reduce(_ | _) + val leftShiftDone = if(leftShifts.isEmpty) { + // we need to fully shift since we still need to flip back + busy & amplitude === 0 + } else { + busy & leftShifts.map(amplitude === _).reduce(_ | _) + } + + // done comes one cycle "early", in the cycle we do the last action (shift / flip) + shamt === 0 | leftShiftDone | rightShiftDone + } + + val muxInputs = mutable.ArrayBuffer[(Bool, Bits, UInt)]() + rightShifts.sorted.foreach(n => { + val doIt = if(n > 1) amplitude >= n else True + val input = (((ARITHMETIC && srcp.SRC1.msb) #* n) ## shiftReg) >> n + val ampl = amplitude - n + muxInputs.append((doIt, input, ampl)) + }) + + leftShifts.sorted.foreach(n => { + val doIt = LEFT & (if(n > 1) amplitude >= n else True) + val input = shiftReg |<< n + val ampl = amplitude - n + muxInputs.append((doIt, input, ampl)) + }) + + // we only need flip if there is no left shift + // keep track of flip indices so that we can skip them when needed + val flipIdxs = mutable.ArrayBuffer[Int]() + if (leftShifts.isEmpty) { + val doFlip = LEFT & ((busy & !flipped) | (if (lateResult) (busy & amplitude === 0) else done)) + + flipIdxs.append(muxInputs.size) + muxInputs.append((doFlip, shiftReg.reversed, amplitude)) + if (Riscv.XLEN.get == 64) { + flipIdxs.append(muxInputs.size) + muxInputs.append((IS_W & doFlip, False #* 32 ## shiftReg(31 downto 0).reversed, amplitude)) + } + } + muxInputs.append((selected & !busy, dataIn, shamt)) + + val selector = OHMasking.last(Cat(muxInputs.map(_._1))) + val muxed = MuxOH(selector, muxInputs.map(_._2)) + shiftReg := muxed + + // if we use flip(s), then we don't need them in the amplitude MUX, we just just not + // EN the amplitude register in that case, saving a few gates + val updatedAmplitude = MuxOH( + Cat(selector.asBools.zipWithIndex.flatMap{case (b, n) => if(flipIdxs.contains(n)) None else Some(b)}), + muxInputs.zipWithIndex.flatMap{case(i, n) => if(flipIdxs.contains(n)) None else Some(i._3)} + ) + + val anyFlip = flipIdxs.foldLeft(False){case (r, n) => r | muxInputs(n)._1} + if(flipIdxs.nonEmpty) { + when(anyFlip) { + flipped := !flipped + } + } + when(!anyFlip) { + amplitude := updatedAmplitude + } + + // if we do the load, initialize some other parts of the state as well + when(selector.msb) { + flipped := False + busy := (if(!lateResult) shamt =/= 0 else True) + } + + when((busy && done) || unscheduleRequest) { + busy := False + } + + val freezeIt = selected && !done && !unscheduleRequest + el.freezeWhen(freezeIt) + + SHIFT_RESULT := (if (lateResult) shiftReg else muxed) + } + + val format = new el.Execute(formatAt) { + // sign extends for 32bit ops on 64bit core are done by the ifp plugin + wb.valid := SEL + wb.payload := SHIFT_RESULT + } + } +} diff --git a/src/main/scala/vexiiriscv/execute/RedoPlugin.scala b/src/main/scala/vexiiriscv/execute/RedoPlugin.scala index 01b77a87..3ed85b03 100644 --- a/src/main/scala/vexiiriscv/execute/RedoPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/RedoPlugin.scala @@ -31,7 +31,7 @@ class RedoPlugin(val laneName : String) extends FiberPlugin { val groups = specs.groupBy(_.ctrlAt) val groupsLogic = for((ctrlAt, specs) <- groups) yield new elp.Ctrl(ctrlAt){ - val age = elp.getAge(ctrlAt) + val age = elp.getCtrlAge(ctrlAt) val pcPort = pcs.newJumpInterface(age, Execute.LANE_AGE_WIDTH, aggregationPriority = 0) val flushPort = sp.newFlushPort(age, laneAgeWidth = Execute.LANE_AGE_WIDTH, withUopId = true) val historyPort = hp.map(_.newPort(age, Execute.LANE_AGE_WIDTH)) diff --git a/src/main/scala/vexiiriscv/execute/Service.scala b/src/main/scala/vexiiriscv/execute/Service.scala index 2e415655..af264b24 100644 --- a/src/main/scala/vexiiriscv/execute/Service.scala +++ b/src/main/scala/vexiiriscv/execute/Service.scala @@ -127,9 +127,10 @@ trait ExecuteLaneService extends Area{ class Execute(id: Int) extends CtrlLaneMirror(execute(id)) class Ctrl(id: Int) extends CtrlLaneMirror(ctrl(id)) - def getAge(at: Int): Int = Ages.EU + at * Ages.STAGE - def getExecuteAge(at : Int) = getAge(at + executeAt) + def getCtrlAge(at: Int): Int = Ages.EU + at * Ages.STAGE + def getExecuteAge(at : Int) = getCtrlAge(at + executeAt) + def freezeIt()(implicit loc: Location) def freezeWhen(cond: Bool)(implicit loc: Location) def isFreezed(): Bool def atRiskOfFlush(executeId : Int) : Bool diff --git a/src/main/scala/vexiiriscv/execute/Agu.scala b/src/main/scala/vexiiriscv/execute/lsu/Agu.scala similarity index 66% rename from src/main/scala/vexiiriscv/execute/Agu.scala rename to src/main/scala/vexiiriscv/execute/lsu/Agu.scala index 38cc1cea..effc0044 100644 --- a/src/main/scala/vexiiriscv/execute/Agu.scala +++ b/src/main/scala/vexiiriscv/execute/lsu/Agu.scala @@ -1,4 +1,4 @@ -package vexiiriscv.execute +package vexiiriscv.execute.lsu import spinal.core._ import spinal.lib.misc.pipeline._ @@ -7,15 +7,18 @@ import vexiiriscv.Global import vexiiriscv.decode.Decode import vexiiriscv.riscv.{Const, MicroOp, Rvfd, Rvi} import vexiiriscv.riscv.Riscv._ +import vexiiriscv.execute._ import scala.collection.mutable.ArrayBuffer object AguPlugin extends AreaObject{ val SEL = Payload(Bool()) - val AMO = Payload(Bool()) - val SC = Payload(Bool()) - val LR = Payload(Bool()) - val LOAD = Payload(Bool()) +// val AMO = Payload(Bool()) +// val SC = Payload(Bool()) +// val LR = Payload(Bool()) + val LOAD = Payload(Bool()) + val STORE = Payload(Bool()) + val ATOMIC = Payload(Bool()) val SIZE = Payload(UInt(2 bits)) val FLOAT = Payload(Bool()) } @@ -37,15 +40,15 @@ class AguFrontend( if (XLEN.get == 64) writingRf ++= List(Rvi.LD, Rvi.LWU) if (RVF) writingRf ++= List(Rvfd.FLW) if (RVD) writingRf ++= List(Rvfd.FLD) - for (op <- writingRf) add(op).srcs(sk.Op.ADD, sk.SRC1.RF, sk.SRC2.I).decode(LR -> False, LOAD -> True) + for (op <- writingRf) add(op).srcs(sk.Op.ADD, sk.SRC1.RF, sk.SRC2.I).decode(LOAD -> True, STORE -> False, ATOMIC -> False, FLOAT -> False) // Store stuff val storeOps = List(sk.Op.ADD, sk.SRC1.RF, sk.SRC2.S) val writingMem = ArrayBuffer[MicroOp](Rvi.SB, Rvi.SH, Rvi.SW) if (XLEN.get == 64) writingMem ++= List(Rvi.SD) - for (store <- writingMem) add(store).srcs(storeOps).decode(AMO -> False, SC -> False, LOAD -> False, FLOAT -> False) - if (RVF) add(Rvfd.FSW).srcs(storeOps).decode(AMO -> False, SC -> False, LOAD -> False, FLOAT -> True) - if (RVD) add(Rvfd.FSD).srcs(storeOps).decode(AMO -> False, SC -> False, LOAD -> False, FLOAT -> True) + for (store <- writingMem) add(store).srcs(storeOps).decode(LOAD -> False, STORE -> True, ATOMIC -> False, FLOAT -> False) + if (RVF) add(Rvfd.FSW).srcs(storeOps).decode(LOAD -> False, STORE -> True, ATOMIC -> False, FLOAT -> True) + if (RVD) add(Rvfd.FSD).srcs(storeOps).decode(LOAD -> False, STORE -> True, ATOMIC -> False, FLOAT -> True) // Atomic stuff val amos = RVA.get generate new Area { @@ -58,15 +61,14 @@ class AguFrontend( Rvi.AMOSWAPD, Rvi.AMOADDD, Rvi.AMOXORD, Rvi.AMOANDD, Rvi.AMOORD, Rvi.AMOMIND, Rvi.AMOMAXD, Rvi.AMOMINUD, Rvi.AMOMAXUD ) - for (amo <- uops) add(amo).srcs(sk.Op.SRC1, sk.SRC1.RF).decode(AMO -> True, SC -> False, LOAD -> False, FLOAT -> False) - writingMem += add(Rvi.SCW).srcs(sk.Op.SRC1, sk.SRC1.RF).decode(AMO -> False, SC -> True, LOAD -> False, FLOAT -> False).uop + for (amo <- uops) add(amo).srcs(sk.Op.SRC1, sk.SRC1.RF).decode(LOAD -> True, STORE -> True, ATOMIC -> True, FLOAT -> False) + writingMem += add(Rvi.SCW).srcs(sk.Op.SRC1, sk.SRC1.RF).decode(LOAD -> False, STORE -> True, ATOMIC -> True, FLOAT -> False).uop writingRf += Rvi.SCW - writingRf += add(Rvi.LRW).srcs(sk.Op.SRC1, sk.SRC1.RF).decode(LR -> True, LOAD -> True).uop + writingRf += add(Rvi.LRW).srcs(sk.Op.SRC1, sk.SRC1.RF).decode(LOAD -> True, STORE -> False, ATOMIC -> True, FLOAT -> False).uop if(XLEN.get == 64){ - writingMem += add(Rvi.SCD).srcs(sk.Op.SRC1, sk.SRC1.RF).decode(AMO -> False, SC -> True, LOAD -> False, FLOAT -> False).uop + writingMem += add(Rvi.SCD).srcs(sk.Op.SRC1, sk.SRC1.RF).decode(LOAD -> False, STORE -> True, ATOMIC -> True, FLOAT -> False).uop writingRf += Rvi.SCD - writingRf += add(Rvi.LRD).srcs(sk.Op.SRC1, sk.SRC1.RF).decode(LR -> True, LOAD -> True).uop + writingRf += add(Rvi.LRD).srcs(sk.Op.SRC1, sk.SRC1.RF).decode(LOAD -> True, STORE -> False, ATOMIC -> True, FLOAT -> False).uop } -// assert(false, "Rvi.LR and atomic may need reformat info, CachelessPlugin may use loads list for it, need to add to loads. Also store completion need to be handled") } } \ No newline at end of file diff --git a/src/main/scala/vexiiriscv/execute/lsu/LsuCachelessBridge.scala b/src/main/scala/vexiiriscv/execute/lsu/LsuCachelessBridge.scala new file mode 100644 index 00000000..9e0449d3 --- /dev/null +++ b/src/main/scala/vexiiriscv/execute/lsu/LsuCachelessBridge.scala @@ -0,0 +1,92 @@ +package vexiiriscv.execute.lsu + +import spinal.core._ +import spinal.lib._ +import spinal.lib.misc.plugin.FiberPlugin +import spinal.lib.bus.tilelink +import spinal.lib.bus.tilelink.{DebugId, S2mSupport} +import spinal.lib.misc.pipeline._ + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + +class LsuCachelessBusToTilelink(up : LsuCachelessBus, hashWidth : Int) extends Area{ + assert(!up.p.withAmo) + + val m2sParam = up.p.toTilelinkM2s(this) + val down = tilelink.Bus(m2sParam) + + val cmdHash = up.cmd.address(log2Up(up.p.dataWidth / 8), hashWidth bits) + + val pendings = List.fill(up.p.pendingMax)(new Area{ //TODO could be one less + val valid = RegInit(False) + val hash = Reg(UInt(hashWidth bits)) + val mask = Reg(Bits(up.p.dataWidth/8 bits)) + val io = Reg(Bool()) + val hazard = valid && (hash === cmdHash && (mask & up.cmd.mask).orR || io && up.cmd.io) + }) + val hazard = pendings.map(_.hazard).orR + + when(down.d.fire) { + pendings.onSel(down.d.source) { e => + e.valid := False + } + } + + when(down.a.fire) { + pendings.onSel(up.cmd.id) { e => + e.valid := True + e.hash := cmdHash + e.mask := up.cmd.mask + e.io := up.cmd.io + } + } + + down.a.arbitrationFrom(up.cmd.haltWhen(hazard)) + down.a.opcode := up.cmd.write.mux(tilelink.Opcode.A.PUT_FULL_DATA, tilelink.Opcode.A.GET) + down.a.param := 0 + down.a.source := up.cmd.id + down.a.address := up.cmd.address + down.a.size := log2Up(up.p.dataWidth/8) + down.a.debugId := DebugId.withPostfix(up.cmd.id) + down.a.mask := up.cmd.mask + down.a.data := up.cmd.data + down.a.corrupt := False + + down.d.ready := True + up.rsp.valid := down.d.valid + up.rsp.id := down.d.source + up.rsp.error := down.d.denied + up.rsp.data := down.d.data +} + + +class LsuCachelessTileLinkPlugin(node : bus.tilelink.fabric.Node, hashWidth : Int = 8) extends FiberPlugin { + val logic = during build new Area{ + val lsucp = host[LsuCachelessPlugin] + lsucp.logic.bus.setAsDirectionLess() + + val bridge = new LsuCachelessBusToTilelink(lsucp.logic.bus, hashWidth) + master(bridge.down) + + node.m2s.forceParameters(bridge.m2sParam) + node.s2m.supported.load(S2mSupport.none()) + node.bus.component.rework(node.bus << bridge.down) + } +} + + + +class LsuTileLinkPlugin(node : bus.tilelink.fabric.Node, hashWidth : Int = 8) extends FiberPlugin { + val logic = during build new Area{ + val lsucp = host[LsuPlugin] + lsucp.logic.bus.setAsDirectionLess() + + val bridge = new LsuCachelessBusToTilelink(lsucp.logic.bus, hashWidth) + master(bridge.down) + + node.m2s.forceParameters(bridge.m2sParam) + node.s2m.supported.load(S2mSupport.none()) + node.bus.component.rework(node.bus << bridge.down) + } +} diff --git a/src/main/scala/vexiiriscv/execute/lsu/LsuCachelessBus.scala b/src/main/scala/vexiiriscv/execute/lsu/LsuCachelessBus.scala new file mode 100644 index 00000000..256c9364 --- /dev/null +++ b/src/main/scala/vexiiriscv/execute/lsu/LsuCachelessBus.scala @@ -0,0 +1,93 @@ +package vexiiriscv.execute.lsu + +import spinal.core._ +import spinal.lib._ +import spinal.lib.misc.plugin.FiberPlugin +import vexiiriscv.{Global, riscv} +import vexiiriscv.riscv.{CSR, Const, IntRegFile, MicroOp, RS1, RS2, Riscv, Rvi} +import AguPlugin._ +import spinal.core.fiber.Retainer +import spinal.lib.bus.misc.SizeMapping +import spinal.lib.bus.tilelink +import spinal.lib.bus.tilelink.DebugId +import vexiiriscv.decode.Decode +import vexiiriscv.fetch.FetchPipelinePlugin +import vexiiriscv.memory.{AddressTranslationPortUsage, AddressTranslationService, DBusAccessService} +import vexiiriscv.misc.{AddressToMask, TrapArg, TrapReason, TrapService} +import vexiiriscv.riscv.Riscv.{LSLEN, XLEN} +import spinal.lib.misc.pipeline._ +import vexiiriscv.decode.Decode.{INSTRUCTION_SLICE_COUNT_WIDTH, UOP} +import vexiiriscv.schedule.{ReschedulePlugin, ScheduleService} + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + +object LsuCachelessBusAmo{ + val LR = 0x02 + val SC = 0x03 + val AMOSWAP = 0x01 + val AMOADD = 0x00 + val AMOXOR = 0x04 + val AMOAND = 0x0C + val AMOOR = 0x08 + val AMOMIN = 0x10 + val AMOMAX = 0x14 + val AMOMINU = 0x18 + val AMOMAXU = 0x1c +} + +case class LsuCachelessBusParam(addressWidth : Int, dataWidth : Int, hartIdWidth : Int, uopIdWidth : Int, withAmo : Boolean, pendingMax : Int){ + def toTilelinkM2s(name: Nameable) = { + assert(!withAmo) + new tilelink.M2sParameters( + addressWidth = addressWidth, + dataWidth = dataWidth, + masters = List( + new tilelink.M2sAgent( + name = name, + mapping = List( + new tilelink.M2sSource( + id = SizeMapping(0, pendingMax), + emits = tilelink.M2sTransfers( + get = tilelink.SizeRange(1, dataWidth / 8), + putFull = tilelink.SizeRange(1, dataWidth / 8) + ) + ) + ) + ) + ) + ) + } +} + +case class LsuCachelessCmd(p : LsuCachelessBusParam) extends Bundle{ + val id = UInt(log2Up(p.pendingMax) bits) + val write = Bool() + val address = UInt(p.addressWidth bits) + val data = Bits(p.dataWidth bit) + val size = UInt(log2Up(log2Up(p.dataWidth / 8) + 1) bits) + val mask = Bits(p.dataWidth / 8 bits) + val io = Bool() //This is for verification purposes, allowing RVLS to track stuff + val fromHart = Bool() //This is for verification purposes, allowing RVLS to track stuff + val hartId = UInt(p.hartIdWidth bits) + val uopId = UInt(p.uopIdWidth bits) + val amoEnable = p.withAmo generate Bool() + val amoOp = p.withAmo generate Bits(5 bits) +} + +case class LsuCachelessRsp(p : LsuCachelessBusParam, withId : Boolean = true) extends Bundle{ + val id = withId generate UInt(log2Up(p.pendingMax) bits) + val error = Bool() + val data = Bits(p.dataWidth bits) + val scMiss = p.withAmo generate Bool() +} + +case class LsuCachelessBus(p : LsuCachelessBusParam) extends Bundle with IMasterSlave { + var cmd = Stream(LsuCachelessCmd(p)) + var rsp = Flow(LsuCachelessRsp(p)) + + override def asMaster(): Unit = { + master(cmd) + slave(rsp) + } +} diff --git a/src/main/scala/vexiiriscv/execute/LsuCachelessPlugin.scala b/src/main/scala/vexiiriscv/execute/lsu/LsuCachelessPlugin.scala similarity index 64% rename from src/main/scala/vexiiriscv/execute/LsuCachelessPlugin.scala rename to src/main/scala/vexiiriscv/execute/lsu/LsuCachelessPlugin.scala index 16dcf93d..967756ac 100644 --- a/src/main/scala/vexiiriscv/execute/LsuCachelessPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/lsu/LsuCachelessPlugin.scala @@ -1,4 +1,4 @@ -package vexiiriscv.execute +package vexiiriscv.execute.lsu import spinal.core._ import spinal.lib._ @@ -6,67 +6,22 @@ import spinal.lib.misc.plugin.FiberPlugin import vexiiriscv.{Global, riscv} import vexiiriscv.riscv.{CSR, Const, IntRegFile, MicroOp, RS1, RS2, Riscv, Rvi} import AguPlugin._ -import spinal.core.fiber.Retainer +import spinal.core.fiber.{Handle, Retainer} +import spinal.core.sim.SimDataPimper import vexiiriscv.decode.Decode import vexiiriscv.fetch.FetchPipelinePlugin -import vexiiriscv.memory.{AddressTranslationPortUsage, AddressTranslationService, DBusAccessService} +import vexiiriscv.memory.{AddressTranslationPortUsage, AddressTranslationService, DBusAccessService, PmaLoad, PmaLogic, PmaPort, PmaStore} import vexiiriscv.misc.{AddressToMask, TrapArg, TrapReason, TrapService} import vexiiriscv.riscv.Riscv.{LSLEN, XLEN} import spinal.lib.misc.pipeline._ +import spinal.lib.system.tag.PmaRegion import vexiiriscv.decode.Decode.{INSTRUCTION_SLICE_COUNT_WIDTH, UOP} import vexiiriscv.schedule.{ReschedulePlugin, ScheduleService} +import vexiiriscv.execute._ import scala.collection.mutable import scala.collection.mutable.ArrayBuffer -object CachelessBusAmo{ - val LR = 0x02 - val SC = 0x03 - val AMOSWAP = 0x01 - val AMOADD = 0x00 - val AMOXOR = 0x04 - val AMOAND = 0x0C - val AMOOR = 0x08 - val AMOMIN = 0x10 - val AMOMAX = 0x14 - val AMOMINU = 0x18 - val AMOMAXU = 0x1c -} - -case class CachelessBusParam(addressWidth : Int, dataWidth : Int, hartIdWidth : Int, uopIdWidth : Int, withAmo : Boolean){ - -} - -case class CachelessCmd(p : CachelessBusParam) extends Bundle{ - val write = Bool() - val address = UInt(p.addressWidth bits) - val data = Bits(p.dataWidth bit) - val size = UInt(log2Up(log2Up(p.dataWidth / 8) + 1) bits) - val mask = Bits(p.dataWidth / 8 bits) - val io = Bool() //This is for verification purposes, allowing RVLS to track stuff - val fromHart = Bool() //This is for verification purposes, allowing RVLS to track stuff - val hartId = UInt(p.hartIdWidth bits) - val uopId = UInt(p.uopIdWidth bits) - val amoEnable = p.withAmo generate Bool() - val amoOp = p.withAmo generate Bits(5 bits) -} - -case class CachelessRsp(p : CachelessBusParam) extends Bundle{ - val error = Bool() - val data = Bits(p.dataWidth bits) - val scMiss = p.withAmo generate Bool() -} - -case class CachelessBus(p : CachelessBusParam) extends Bundle with IMasterSlave { - var cmd = Stream(CachelessCmd(p)) - var rsp = Flow(CachelessRsp(p)) - - override def asMaster(): Unit = { - master(cmd) - slave(rsp) - } -} - class LsuCachelessPlugin(var layer : LaneLayer, var withAmo : Boolean, var withSpeculativeLoadFlush : Boolean, //WARNING, the fork cmd may be flushed out of existance before firing @@ -75,11 +30,12 @@ class LsuCachelessPlugin(var layer : LaneLayer, var addressAt: Int = 0, var forkAt: Int = 0, var joinAt: Int = 1, - var wbAt: Int = 2) extends FiberPlugin with DBusAccessService{ + var wbAt: Int = 2) extends FiberPlugin with DBusAccessService with LsuCachelessBusProvider{ - val WITH_RSP = Payload(Bool()) + val WITH_RSP, WITH_ACCESS, FENCE = Payload(Bool()) override def accessRefillCount: Int = 0 override def accessWake: Bits = B(0) + override def getLsuCachelessBus(): LsuCachelessBus = logic.bus val logic = during setup new Area{ val elp = host.find[ExecuteLanePlugin](_.laneName == layer.laneName) @@ -97,7 +53,7 @@ class LsuCachelessPlugin(var layer : LaneLayer, val translationStorage = ats.newStorage(translationStorageParameter) atsStorageLock.release() - val trapPort = ts.newTrap(layer.el.getAge(forkAt), Execute.LANE_AGE_WIDTH) + val trapPort = ts.newTrap(layer.el.getExecuteAge(forkAt), Execute.LANE_AGE_WIDTH) val flushPort = ss.newFlushPort(layer.el.getExecuteAge(addressAt), laneAgeWidth = Execute.LANE_AGE_WIDTH, withUopId = true) val frontend = new AguFrontend(layer, host) @@ -126,8 +82,9 @@ class LsuCachelessPlugin(var layer : LaneLayer, op.dontFlushFrom(forkAt+1) } - layer.add(Rvi.FENCE) //TODO - layer(Rvi.FENCE).setCompletion(joinAt) + elp.setDecodingDefault(FENCE, False) + layer.add(Rvi.FENCE).addDecoding(FENCE -> True) + layer(Rvi.FENCE).setCompletion(forkAt) for(uop <- frontend.writingMem if layer(uop).completion.isEmpty) layer(uop).setCompletion(joinAt) @@ -143,15 +100,17 @@ class LsuCachelessPlugin(var layer : LaneLayer, val forkCtrl = elp.execute(forkAt) val joinCtrl = elp.execute(joinAt) val wbCtrl = elp.execute(wbAt) + val bufferSize = joinAt-forkAt+1 - val busParam = CachelessBusParam( + val busParam = LsuCachelessBusParam( addressWidth = Global.PHYSICAL_WIDTH, dataWidth = Riscv.LSLEN, hartIdWidth = Global.HART_ID_WIDTH, uopIdWidth = Decode.UOP_ID_WIDTH, - withAmo = withAmo + withAmo = withAmo, + pendingMax = bufferSize ) - val bus = master(CachelessBus(busParam)) + val bus = master(LsuCachelessBus(busParam)).simPublic() accessRetainer.await() @@ -161,23 +120,37 @@ class LsuCachelessPlugin(var layer : LaneLayer, val translationPort = ats.newTranslationPort( nodes = Seq(forkCtrl.down), rawAddress = RAW_ADDRESS, - allowRefill = insert(True), + forcePhysical = insert(False), usage = AddressTranslationPortUsage.LOAD_STORE, portSpec = translationPortParameter, storageSpec = translationStorage ) } + val cmdInflights = Bool() + val onFork = new forkCtrl.Area{ val tpk = onAddress.translationPort.keys val MISS_ALIGNED = insert((1 to log2Up(LSLEN / 8)).map(i => SIZE === i && onAddress.RAW_ADDRESS(i - 1 downto 0) =/= 0).orR) //TODO remove from speculLoad and handle it with trap val RS2 = elp(IntRegFile, riscv.RS2) + val pmaPort = new PmaPort(Global.PHYSICAL_WIDTH, (0 to log2Up(Riscv.LSLEN/8)).map(1 << _), List(PmaLoad, PmaStore)) + pmaPort.cmd.address := tpk.TRANSLATED + pmaPort.cmd.size := SIZE.asBits + pmaPort.cmd.op(0) := STORE + val PMA_RSP = insert(pmaPort.rsp) + val skip = False + val askFenceReg = RegNextWhen(isValid && SEL && ATOMIC, !elp.isFreezed()) init(False) //Implement atomic fencing (pessimistic) + val askFence = isValid && (FENCE || SEL && ATOMIC || askFenceReg) + val doFence = askFence && cmdInflights //Not ideal, because if the first cycle is freezed, then it will also consider the send cmd as something to fence + + val cmdCounter = Counter(bufferSize, bus.cmd.fire) val cmdSent = RegInit(False) setWhen(bus.cmd.fire) clearWhen(!elp.isFreezed()) - bus.cmd.valid := isValid && SEL && !cmdSent && !isCancel && !skip - bus.cmd.write := !LOAD + bus.cmd.valid := isValid && SEL && !cmdSent && !isCancel && !skip && !doFence + bus.cmd.id := cmdCounter + bus.cmd.write := STORE bus.cmd.address := tpk.TRANSLATED //TODO Overflow on TRANSLATED itself ? val mapping = (0 to log2Up(Riscv.LSLEN / 8)).map{size => val w = (1 << size) * 8 @@ -186,17 +159,18 @@ class LsuCachelessPlugin(var layer : LaneLayer, bus.cmd.data := bus.cmd.size.muxListDc(mapping) bus.cmd.size := SIZE.resized bus.cmd.mask := AddressToMask(bus.cmd.address, bus.cmd.size, Riscv.LSLEN/8) - bus.cmd.io := tpk.IO + bus.cmd.io := pmaPort.rsp.io bus.cmd.fromHart := True bus.cmd.hartId := Global.HART_ID bus.cmd.uopId := Decode.UOP_ID if(withAmo) { - bus.cmd.amoEnable := LOAD.mux[Bool](LR, SC || AMO) + bus.cmd.amoEnable := ATOMIC bus.cmd.amoOp := UOP(31 downto 27) } //TODO amo AQ/RL - elp.freezeWhen(bus.cmd.isStall) + val freezeIt = bus.cmd.isStall || doFence + elp.freezeWhen(freezeIt) flushPort.valid := False flushPort.hartId := Global.HART_ID @@ -213,27 +187,34 @@ class LsuCachelessPlugin(var layer : LaneLayer, trapPort.code.assignDontCare() trapPort.arg.allowOverride() := 0 - if(withSpeculativeLoadFlush) when(LOAD && tpk.IO && elp.atRiskOfFlush(forkAt)){ + if(withSpeculativeLoadFlush) when(LOAD && pmaPort.rsp.io && elp.atRiskOfFlush(forkAt)){ skip := True trapPort.exception := False trapPort.code := TrapReason.REDO } - when(tpk.PAGE_FAULT || LOAD.mux(!tpk.ALLOW_READ, !tpk.ALLOW_WRITE)) { + when(tpk.ACCESS_FAULT || pmaPort.rsp.fault) { + skip := True + trapPort.exception := True + trapPort.code := CSR.MCAUSE_ENUM.LOAD_ACCESS_FAULT + trapPort.code(1) setWhen (STORE) + } + + when(tpk.PAGE_FAULT || STORE.mux( !tpk.ALLOW_WRITE, !tpk.ALLOW_READ)) { skip := True trapPort.exception := True trapPort.code := CSR.MCAUSE_ENUM.LOAD_PAGE_FAULT - trapPort.code(1) setWhen(!LOAD) + trapPort.code(1) setWhen(STORE) } when(tpk.ACCESS_FAULT) { skip := True trapPort.exception := True trapPort.code := CSR.MCAUSE_ENUM.LOAD_ACCESS_FAULT - trapPort.code(1) setWhen (!LOAD) + trapPort.code(1) setWhen (STORE) } - trapPort.arg(0, 2 bits) := LOAD.mux(B(TrapArg.LOAD, 2 bits), B(TrapArg.STORE, 2 bits)) + trapPort.arg(0, 2 bits) := STORE.mux(B(TrapArg.STORE, 2 bits), B(TrapArg.LOAD, 2 bits)) trapPort.arg(2, ats.getStorageIdWidth() bits) := ats.getStorageId(translationStorage) when(tpk.REDO) { skip := True @@ -244,7 +225,7 @@ class LsuCachelessPlugin(var layer : LaneLayer, when(MISS_ALIGNED){ skip := True trapPort.exception := True - trapPort.code := LOAD.mux[Bits](CSR.MCAUSE_ENUM.LOAD_MISALIGNED, CSR.MCAUSE_ENUM.STORE_MISALIGNED).andMask(MISS_ALIGNED).resized + trapPort.code := STORE.mux[Bits](CSR.MCAUSE_ENUM.STORE_MISALIGNED, CSR.MCAUSE_ENUM.LOAD_MISALIGNED).andMask(MISS_ALIGNED).resized } when(isValid && SEL && skip){ @@ -260,6 +241,9 @@ class LsuCachelessPlugin(var layer : LaneLayer, val allowIt = !(isValid && SEL) && !cmdSent val cmd = dbusAccesses.head.cmd cmd.ready := allowIt && !elp.isFreezed() + + val accessSent = RegInit(False) setWhen(cmd.fire) clearWhen(!elp.isFreezed()) + WITH_ACCESS := accessSent || cmd.fire when(allowIt){ bus.cmd.valid := cmd.valid bus.cmd.write := False @@ -273,26 +257,60 @@ class LsuCachelessPlugin(var layer : LaneLayer, } val onJoin = new joinCtrl.Area{ - val buffer = bus.rsp.toStream.queueLowLatency(joinAt-forkAt+1).combStage - val SC_MISS = insert(withAmo.mux(buffer.scMiss, False)) - val READ_DATA = insert(buffer.data) - elp.freezeWhen(WITH_RSP && !buffer.valid) - buffer.ready := WITH_RSP && isReady - assert(!(isValid && isCancel && SEL && !LOAD && !up(Global.TRAP)), "LsuCachelessPlugin saw unexpected select && !LOAD && cancel request") //TODO add tpk.IO and along the way)) //TODO add tpk.IO and along the way + val buffers = List.fill(bufferSize)(new Area{ + val valid = RegInit(False) + val inflight = RegInit(False) + val payload = Reg(LsuCachelessRsp(bus.p, false)) + }) + cmdInflights := buffers.map(_.inflight).orR + + val busRspWithoutId = LsuCachelessRsp(bus.p, false) + busRspWithoutId.assignSomeByName(bus.rsp.payload) + when(bus.cmd.fire) { + buffers.onSel(bus.cmd.id) { b => + b.inflight := True + } + } + when(bus.rsp.valid){ + buffers.onSel(bus.rsp.id){b => + b.valid := True + b.inflight := False + b.payload := busRspWithoutId + } + } + val pop = WITH_RSP && !elp.isFreezed() + val rspCounter = Counter(bufferSize, pop) + val reader = buffers.reader(rspCounter) + val readerValid = reader(_.valid) + when(pop){ + buffers.onSel(rspCounter)(_.valid := False) + } + + val busRspHit = bus.rsp.valid && bus.rsp.id === rspCounter + val rspValid = readerValid || busRspHit + val rspPayload = readerValid.mux(CombInit(reader(_.payload)), busRspWithoutId) + + val SC_MISS = insert(withAmo.mux(rspPayload.scMiss, False)) + val READ_DATA = insert(rspPayload.data) + elp.freezeWhen(WITH_RSP && !rspValid) + assert(!(isValid && isCancel && SEL && STORE && !up(Global.TRAP)), "LsuCachelessPlugin saw unexpected select && STORE && cancel request") //TODO add tpk.IO and along the way)) //TODO add tpk.IO and along the way val access = dbusAccesses.nonEmpty generate new Area { assert(dbusAccesses.size == 1) val rsp = dbusAccesses.head.rsp - rsp.valid := !(isValid && SEL) && WITH_RSP && buffer.valid - rsp.data := buffer.data - rsp.error := buffer.error + rsp.valid := WITH_ACCESS && pop + rsp.data := rspPayload.data + rsp.error := rspPayload.error rsp.redo := False rsp.waitAny := False } } - for(eid <- forkAt + 1 to joinAt) elp.execute(eid).up(WITH_RSP).setAsReg().init(False) + for(eid <- forkAt + 1 to joinAt) { + elp.execute(eid).up(WITH_RSP).setAsReg().init(False) + elp.execute(eid).up(WITH_ACCESS).setAsReg().init(False) + } - val onWb = new wbCtrl.Area{ + val onWb = new wbCtrl.Area { val rspSplits = onJoin.READ_DATA.subdivideIn(8 bits) val rspShifted = Bits(LSLEN bits) val wordBytes = LSLEN/8 @@ -310,7 +328,7 @@ class LsuCachelessPlugin(var layer : LaneLayer, iwb.valid := SEL iwb.payload := rspShifted - if (withAmo) when(!LOAD && SC) { + if (withAmo) when(ATOMIC && !LOAD) { iwb.payload(0) := onJoin.SC_MISS iwb.payload(7 downto 1) := 0 } @@ -318,4 +336,7 @@ class LsuCachelessPlugin(var layer : LaneLayer, buildBefore.release() } + + val regions = Handle[ArrayBuffer[PmaRegion]]() + val pmaBuilder = during build new PmaLogic(logic.onFork.pmaPort, regions) } diff --git a/src/main/scala/vexiiriscv/execute/lsu/LsuL1Bridge.scala b/src/main/scala/vexiiriscv/execute/lsu/LsuL1Bridge.scala new file mode 100644 index 00000000..68cce196 --- /dev/null +++ b/src/main/scala/vexiiriscv/execute/lsu/LsuL1Bridge.scala @@ -0,0 +1,29 @@ +package vexiiriscv.execute.lsu + +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.tilelink +import spinal.lib.bus.tilelink.{DebugId, S2mSupport} +import spinal.lib.misc.plugin.FiberPlugin +import vexiiriscv.execute.LsuL1Bus + + + +class LsuL1TileLinkPlugin(node : bus.tilelink.fabric.Node) extends FiberPlugin { + val logic = during setup new Area{ + val lsucp = host[LsuL1Plugin] + lsucp.probeIdWidth = 0 + lsucp.ackIdWidth = 0 + assert(!lsucp.withCoherency) + + awaitBuild() + lsucp.logic.bus.setAsDirectionLess() + + val down = lsucp.logic.bus.toTilelink() + master(down) + + node.m2s.forceParameters(down.p.node.m) + node.s2m.supported.load(S2mSupport.none()) + node.bus.component.rework(node.bus << down) + } +} diff --git a/src/main/scala/vexiiriscv/execute/lsu/LsuL1Bus.scala b/src/main/scala/vexiiriscv/execute/lsu/LsuL1Bus.scala new file mode 100644 index 00000000..7d9e636a --- /dev/null +++ b/src/main/scala/vexiiriscv/execute/lsu/LsuL1Bus.scala @@ -0,0 +1,557 @@ +package vexiiriscv.execute + +import spinal.core._ +import spinal.lib.bus.amba4.axi.{Axi4, Axi4Config} +import spinal.lib.bus.bmb.{Bmb, BmbAccessParameter, BmbParameter, BmbSourceParameter} +import spinal.lib.bus.misc.SizeMapping +import spinal.lib.bus.tilelink +import spinal.lib.bus.tilelink._ +import spinal.lib._ + +case class LsuL1BusParameter( addressWidth: Int, + dataWidth: Int, + readIdCount : Int, + writeIdCount : Int, + probeIdWidth : Int, + ackIdWidth : Int, + lineSize: Int, + withReducedBandwidth : Boolean, + withCoherency : Boolean){ + + val readIdWidth = log2Up(readIdCount) + val writeIdWidth = log2Up(writeIdCount) + + def toTileLinkM2sParameters(name : Nameable) = { + val masters = withCoherency match { + case false => List( + M2sAgent( + name = name, + M2sSource( + id = SizeMapping(0, writeIdCount), + emits = tilelink.M2sTransfers( + putFull = SizeRange(lineSize) + ) + ) + ), + M2sAgent( + name = name, + M2sSource( + id = SizeMapping(1 << log2Up(readIdCount max writeIdCount), readIdCount), + emits = tilelink.M2sTransfers( + get = SizeRange(lineSize) + ) + ) + ) + ) + case true => List( + M2sAgent( + name = name, + M2sSource( + id = SizeMapping(0, readIdCount), + emits = tilelink.M2sTransfers( + acquireB = SizeRange(lineSize), + acquireT = SizeRange(lineSize) + ) + ) + ) + ) + } + + tilelink.M2sParameters( + addressWidth = addressWidth, + dataWidth = dataWidth, + masters = masters + ) + } +} + +case class LsuL1ReadCmd(p : LsuL1BusParameter) extends Bundle { + val id = UInt(p.readIdWidth bits) + val address = UInt(p.addressWidth bits) + val unique = p.withCoherency generate Bool() + val data = p.withCoherency generate Bool() +} + +case class LsuL1ReadRsp(p : LsuL1BusParameter) extends Bundle { + val id = UInt(p.readIdWidth bits) + val data = Bits(p.dataWidth bits) + val error = Bool() + val unique = p.withCoherency generate Bool() + val ackId = p.withCoherency generate UInt(p.ackIdWidth bits) + val withData = p.withCoherency generate Bool() +} + +case class LsuL1ReadAck(p : LsuL1BusParameter) extends Bundle { + val ackId = UInt(p.ackIdWidth bits) +} + +case class LsuL1ReadBus(p : LsuL1BusParameter) extends Bundle with IMasterSlave { + val cmd = Stream(LsuL1ReadCmd(p)) + val rsp = Stream(LsuL1ReadRsp(p)) + val ack = p.withCoherency generate Stream(LsuL1ReadAck(p)) + + override def asMaster() = { + master(cmd, ack) + slave(rsp) + } + + def <<(m : LsuL1ReadBus): Unit ={ + m.cmd >> this.cmd + m.rsp << this.rsp + } + + def resizer(newDataWidth : Int) : LsuL1ReadBus = new Composite(this, "resizer"){ + val ret = LsuL1ReadBus( + p = p.copy( + dataWidth = newDataWidth, + withReducedBandwidth = p.withReducedBandwidth || newDataWidth > p.dataWidth + ) + ) + + ret.cmd << self.cmd + + val rspOutputStream = Stream(Bits(p.dataWidth bits)) + StreamWidthAdapter(ret.rsp.translateWith(ret.rsp.data), rspOutputStream) + + rsp.valid := rspOutputStream.valid + rsp.data := rspOutputStream.payload + rsp.id := ret.rsp.id + rsp.error := ret.rsp.error + rspOutputStream.ready := (if(p.withReducedBandwidth) rspOutputStream.ready else True) + }.ret + + def toBmb(): Bmb = new Composite(this, "toBmb"){ + val bmbConfig = BmbAccessParameter( + addressWidth = p.addressWidth, + dataWidth = p.dataWidth + ).addSources(p.readIdCount, BmbSourceParameter( + contextWidth = 0, + lengthWidth = log2Up(p.lineSize), + alignment = BmbParameter.BurstAlignement.LENGTH, + canWrite = false, + withCachedRead = true + )) + + val bmb = Bmb(bmbConfig) + bmb.cmd.arbitrationFrom(cmd) + bmb.cmd.setRead() + bmb.cmd.address := cmd.address + bmb.cmd.length := p.lineSize-1 + bmb.cmd.source := cmd.id + bmb.cmd.last := True + + rsp.arbitrationFrom(bmb.rsp) + rsp.id := bmb.rsp.source + rsp.data := bmb.rsp.data + rsp.error := bmb.rsp.isError + }.bmb + +} + +case class LsuL1WriteCmd(p : LsuL1BusParameter) extends Bundle { + val address = UInt(p.addressWidth bits) + val data = Bits(p.dataWidth bits) + val id = UInt(p.writeIdWidth bits) + val coherent = p.withCoherency generate new Bundle{ + val release = Bool() //else from probe + val dirty = Bool() //Meaning with data + val fromUnique = Bool() + val toShared = Bool() + val probeId = UInt(p.probeIdWidth bits) + } +} + +case class LsuL1WriteRsp(p : LsuL1BusParameter) extends Bundle { + val error = Bool() + val id = UInt(p.writeIdWidth bits) +} + +case class LsuL1WriteBus(p : LsuL1BusParameter) extends Bundle with IMasterSlave { + val cmd = Stream(Fragment(LsuL1WriteCmd(p))) + val rsp = Flow(LsuL1WriteRsp(p)) + + override def asMaster() = { + master(cmd) + slave(rsp) + } + + + def <<(m : LsuL1WriteBus): Unit ={ + m.cmd >> this.cmd + m.rsp << this.rsp + } + + def resizer(newDataWidth : Int) : LsuL1WriteBus = new Composite(this, "resizer"){ + val ret = LsuL1WriteBus( + p = p.copy( + dataWidth = newDataWidth, + withReducedBandwidth = p.withReducedBandwidth || newDataWidth > p.dataWidth + ) + ) + + val cmdOutputStream = Stream(Fragment(Bits(newDataWidth bits))) + StreamFragmentWidthAdapter(cmd.translateWith(cmd.data).addFragmentLast(cmd.last), cmdOutputStream) + + ret.cmd.arbitrationFrom(cmdOutputStream) + ret.cmd.id := self.cmd.id + ret.cmd.address := self.cmd.address + ret.cmd.data := cmdOutputStream.fragment + ret.cmd.last := cmdOutputStream.last + + self.rsp << ret.rsp + }.ret + + + def toBmb(): Bmb = new Composite(this, "toBmb"){ + val bmbConfig = BmbAccessParameter( + addressWidth = p.addressWidth, + dataWidth = p.dataWidth + ).addSources(p.readIdCount, BmbSourceParameter( + contextWidth = 0, + lengthWidth = log2Up(p.lineSize), + alignment = BmbParameter.BurstAlignement.LENGTH, + canRead = false, + withCachedRead = true + )) + + val bmb = Bmb(bmbConfig) + bmb.cmd.arbitrationFrom(cmd) + bmb.cmd.setWrite() + bmb.cmd.address := cmd.address + bmb.cmd.length := p.lineSize-1 + bmb.cmd.source := cmd.id + bmb.cmd.data := cmd.data + bmb.cmd.last := cmd.last + bmb.cmd.mask.setAll() + + + bmb.rsp.ready := True + rsp.valid := bmb.rsp.valid + rsp.id := bmb.rsp.source + rsp.error := bmb.rsp.isError + }.bmb +} + + +case class LsuL1ProbeCmd(p : LsuL1BusParameter) extends Bundle { + val address = UInt(p.addressWidth bits) + val id = UInt(p.probeIdWidth bits) + val allowUnique = Bool() + val allowShared = Bool() + val getDirtyData = Bool() +} + +case class LsuL1ProbeRsp(p : LsuL1BusParameter, fromProbe : Boolean) extends Bundle { + val id = UInt(p.probeIdWidth bits) + val address = UInt(p.addressWidth bits) + val fromUnique, fromShared = Bool() + val toShared, toUnique = Bool() + val allowShared, allowUnique, getDirtyData = Bool() //Used for redo + val redo = fromProbe generate Bool() + val writeback = fromProbe generate Bool() + + def assignTilelinkC(c : ChannelC) = { + c.opcode := tilelink.Opcode.C.PROBE_ACK() + c.param := Param.report( + fromUnique, + fromShared, + toUnique, + toShared + ) + c.source := id + c.address := address + c.size := log2Up(p.lineSize) + c.data.assignDontCare() + c.corrupt.assignDontCare() + } +} + + +case class LsuL1ProbeBus(p : LsuL1BusParameter) extends Bundle with IMasterSlave { + val cmd = Flow(LsuL1ProbeCmd(p)) + val rsp = Flow(LsuL1ProbeRsp(p, true)) + override def asMaster() = { + master(cmd) + slave(rsp) + } + + + def <<(m : LsuL1ProbeBus): Unit ={ + m.cmd >> this.cmd + m.rsp << this.rsp + } +} + + + +case class LsuL1Bus(p : LsuL1BusParameter) extends Bundle with IMasterSlave { + val read = LsuL1ReadBus(p) + val write = LsuL1WriteBus(p) + val probe = p.withCoherency generate LsuL1ProbeBus(p) + + override def asMaster() = { + master(read, write) + slave(probe) + } + + def resizer(newDataWidth : Int) : LsuL1Bus = new Composite(this, "resizer") { + val ret = LsuL1Bus( + p = p.copy( + dataWidth = newDataWidth, + withReducedBandwidth = p.withReducedBandwidth || newDataWidth > p.dataWidth + ) + ) + + ret.read << read.resizer(newDataWidth) + ret.write << write.resizer(newDataWidth) + + }.ret + + + def toAxi4(): Axi4 = new Composite(this, "toAxi4"){ + assert(!p.withCoherency) + val idWidth = p.readIdWidth max p.writeIdWidth + + val axiConfig = Axi4Config( + addressWidth = p.addressWidth, + dataWidth = p.dataWidth, + idWidth = idWidth, + useId = true, + useRegion = false, + useBurst = true, + useLock = false, + useCache = false, + useSize = true, + useQos = false, + useLen = true, + useLast = true, + useResp = true, + useProt = true, + useStrb = true + ) + + val axi = Axi4(axiConfig) + + //READ + axi.ar.valid := read.cmd.valid + axi.ar.addr := read.cmd.address + axi.ar.id := read.cmd.id + axi.ar.prot := B"010" + axi.ar.len := p.lineSize*8/p.dataWidth-1 + axi.ar.size := log2Up(p.dataWidth/8) + axi.ar.setBurstINCR() + read.cmd.ready := axi.ar.ready + + read.rsp.valid := axi.r.valid + read.rsp.data := axi.r.data + read.rsp.id := axi.r.id + read.rsp.error := !axi.r.isOKAY() + axi.r.ready := (if(p.withReducedBandwidth) read.rsp.ready else True) + + //WRITE + val (awRaw, wRaw) = StreamFork2(write.cmd) + val awFiltred = awRaw.throwWhen(!awRaw.first) + val aw = awFiltred.stage() + axi.aw.valid := aw.valid + axi.aw.addr := aw.address + axi.aw.id := aw.id + axi.aw.prot := B"010" + axi.aw.len := p.lineSize*8/p.dataWidth-1 + axi.aw.size := log2Up(p.dataWidth/8) + axi.aw.setBurstINCR() + aw.ready := axi.aw.ready + + val w = wRaw.haltWhen(awFiltred.valid) + axi.w.valid := w.valid + axi.w.data := w.data + axi.w.strb.setAll() + axi.w.last := w.last + w.ready := axi.w.ready + + write.rsp.valid := axi.b.valid + write.rsp.id := axi.b.id + write.rsp.error := !axi.b.isOKAY() + axi.b.ready := True + }.axi + + + + def toTilelink(): tilelink.Bus = new Composite(this, "toTilelink"){ + val m2s = p.toTileLinkM2sParameters(null) + val s2m = new S2mParameters(List( +// new S2mAgent() //Dummy + )) + val bus = tilelink.Bus( + BusParameter( + addressWidth = m2s.addressWidth, + dataWidth = m2s.dataWidth, + sizeBytes = m2s.sizeBytes, + sourceWidth = m2s.sourceWidth, + sinkWidth = p.ackIdWidth, + withBCE = p.withCoherency, + withDataA = true, + withDataB = false, + withDataD = true, + withDataC = true, + node = new NodeParameters( + m2s + ) + ) + ) + + val nonCoherent = !p.withCoherency generate new Area{ + val onA = new Area { + val lock = RegInit(False) setWhen (bus.a.valid) clearWhen (bus.a.fire && bus.a.isLast()) + val selReg = Reg(Bool()) + val sel = lock.mux(selReg, read.cmd.valid) + selReg := sel + + bus.a.param := 0 + bus.a.size := log2Up(p.lineSize) + bus.a.mask.setAll() + bus.a.data := write.cmd.data + bus.a.corrupt := False + + when(sel) { + bus.a.valid := read.cmd.valid + bus.a.opcode := tilelink.Opcode.A.GET() + bus.a.source := read.cmd.id.resized + bus.a.address := read.cmd.address + } otherwise { + bus.a.valid := write.cmd.valid + bus.a.opcode := tilelink.Opcode.A.PUT_FULL_DATA() + bus.a.source := write.cmd.id.resized + bus.a.address := write.cmd.address + } + + val beat = bus.a.beatCounter() + bus.a.address(log2Up(p.dataWidth/8), widthOf(beat) bits) := beat + bus.a.source.allowOverride() + bus.a.source.msb := sel + + write.cmd.ready := !sel && bus.a.ready + read.cmd.ready := sel && bus.a.ready + } + + val onD = new Area{ + val sel = bus.d.source.msb + + read.rsp.valid := bus.d.valid && sel + read.rsp.data := bus.d.data + read.rsp.error := bus.d.denied || bus.d.corrupt + read.rsp.id := bus.d.source.resized + + write.rsp.valid := bus.d.valid && !sel + write.rsp.error := bus.d.denied || bus.d.corrupt + write.rsp.id := bus.d.source.resized + + bus.d.ready := sel.mux(read.rsp.ready, True) + } + } + + + val coherent = p.withCoherency generate new Area{ + val onA = new Area{ + bus.a.arbitrationFrom(read.cmd) + bus.a.opcode := tilelink.Opcode.A.ACQUIRE_BLOCK + bus.a.param := tilelink.Param.Grow(read.cmd.data, read.cmd.unique) + bus.a.source := read.cmd.id.resized + bus.a.address := read.cmd.address + bus.a.size := log2Up(p.lineSize) + } + + val onB = new Area{ + assert(!(bus.b.valid && bus.b.opcode === Opcode.B.PROBE_PERM)) + probe.cmd.valid := bus.b.valid + probe.cmd.address := bus.b.address + probe.cmd.id := bus.b.source + probe.cmd.allowUnique := bus.b.param === Param.Cap.toT + probe.cmd.allowShared := bus.b.param =/= Param.Cap.toN + probe.cmd.getDirtyData := bus.b.opcode === Opcode.B.PROBE_BLOCK + bus.b.ready := True + } + + val onC = new Area{ + val rsp = probe.rsp.throwWhen(!probe.rsp.redo && probe.rsp.writeback) + when(rsp.valid && rsp.redo){ + probe.cmd.valid := True + probe.cmd.address := probe.rsp.address + probe.cmd.id := probe.rsp.id + probe.cmd.allowUnique := probe.rsp.allowUnique + probe.cmd.allowShared := probe.rsp.allowShared + probe.cmd.getDirtyData := probe.rsp.getDirtyData + bus.b.ready := False + } + + val rspStream = rsp.takeWhen(!rsp.redo).toStream + assert(!rspStream.isStall) + val rspFifo = StreamFifo(rsp.payloadType, 16, latency = 1) + val rspFifoAlmostFull = RegNext(rspFifo.io.occupancy(log2Up(rspFifo.depth/2))) init(False) + when(rspFifoAlmostFull){ + probe.cmd.valid := False + bus.b.ready := False + } + rspFifo.io.push << rspStream + + val arbiter = StreamArbiterFactory().lambdaLock[ChannelC](_.isLast()).roundRobin.build(bus.c.payloadType, 2) + val i0 = arbiter.io.inputs(0) + i0.arbitrationFrom(write.cmd) + i0.opcode := write.cmd.coherent.release mux( + write.cmd.coherent.dirty mux( + tilelink.Opcode.C.RELEASE_DATA(), + tilelink.Opcode.C.RELEASE() + ), + tilelink.Opcode.C.PROBE_ACK_DATA() + ) + i0.param := Param.report( + write.cmd.coherent.fromUnique, + !write.cmd.coherent.fromUnique, + False, + write.cmd.coherent.toShared + ) + i0.source := write.cmd.coherent.release.mux( + write.cmd.id, + write.cmd.coherent.probeId + ) + + i0.address := write.cmd.address + i0.size := log2Up(p.lineSize) + i0.data := write.cmd.data + i0.corrupt := False + + val i1 = arbiter.io.inputs(1) + i1.arbitrationFrom(rspFifo.io.pop) + rspFifo.io.pop.assignTilelinkC(i1) + + val beat = bus.c.beatCounter() + bus.c << arbiter.io.output + bus.c.address(log2Up(p.dataWidth/8), widthOf(beat) bits) := beat + } + + + val onD = new Area{ + val sel = tilelink.Opcode.D.fromA(bus.d.opcode) + + read.rsp.valid := bus.d.valid && sel + read.rsp.data := bus.d.data + read.rsp.error := bus.d.denied || bus.d.corrupt + read.rsp.id := bus.d.source.resized + read.rsp.unique := bus.d.param === Param.Cap.toT + read.rsp.ackId := bus.d.sink + read.rsp.withData := bus.d.opcode === Opcode.D.GRANT_DATA + + write.rsp.valid := bus.d.valid && !sel + write.rsp.error := bus.d.denied || bus.d.corrupt + write.rsp.id := bus.d.source.resized + + bus.d.ready := sel.mux(read.rsp.ready, True) + } + + val onE = new Area{ + bus.e.arbitrationFrom(read.ack) + bus.e.sink := read.ack.ackId + } + } + }.bus + +} diff --git a/src/main/scala/vexiiriscv/execute/lsu/LsuL1Plugin.scala b/src/main/scala/vexiiriscv/execute/lsu/LsuL1Plugin.scala new file mode 100644 index 00000000..e9b84acb --- /dev/null +++ b/src/main/scala/vexiiriscv/execute/lsu/LsuL1Plugin.scala @@ -0,0 +1,978 @@ +package vexiiriscv.execute.lsu + +import spinal.core._ +import spinal.core.fiber.Handle +import spinal.core.sim.SimDataPimper +import spinal.lib._ +import spinal.lib.misc.Plru +import spinal.lib.misc.database.Database.blocking +import spinal.lib.misc.pipeline._ +import spinal.lib.misc.plugin.FiberPlugin +import spinal.lib.system.tag.PmaRegion +import vexiiriscv.Global +import vexiiriscv.misc.Reservation +import vexiiriscv.riscv.{AtomicAlu, Riscv} +import vexiiriscv.execute._ +import vexiiriscv.fetch.{InitService, LsuL1Service} +import vexiiriscv.riscv.Riscv.{RVA, RVC} + +import scala.collection.mutable.ArrayBuffer + +object LsuL1 extends AreaObject{ + // -> L1 + val ABORD, SKIP_WRITE = Payload(Bool()) + val SEL = Payload(Bool()) + val LOAD, STORE, ATOMIC, FLUSH = Payload(Bool()) + val MIXED_ADDRESS = Payload(Global.MIXED_ADDRESS) + val PHYSICAL_ADDRESS = Payload(Global.PHYSICAL_ADDRESS) + val WRITE_DATA = Payload(Bits(Riscv.LSLEN bits)) + val MASK = Payload(Bits(Riscv.LSLEN / 8 bits)) //Also needed for loads + val SIZE = Payload(UInt(log2Up(log2Up(Riscv.LSLEN / 8+1)) bits)) //Also needed for loads + + // L1 -> + val READ_DATA = Payload(Bits(Riscv.LSLEN bits)) + val HAZARD, MISS, MISS_UNIQUE, FAULT = Payload(Bool()) + val FLUSH_HIT = Payload(Bool()) + + val SETS = blocking[Int] + val WAYS = blocking[Int] + val LINE_BYTES = blocking[Int] + val WRITEBACK_BUSY = blocking[Bits] +} + +/* +List of hazard to take care of : +- store to load + - withBypass = false => redo when detected + - withBypass = true => data bypass +- dirty update + - bypass +- writeback/refill conflicting + - redo when detected + */ +class LsuL1Plugin(val lane : ExecuteLaneService, + var memDataWidth: Int, + var cpuDataWidth: Int, + var refillCount: Int, + var writebackCount: Int, + var setCount: Int, + var wayCount: Int, + var lineSize: Int = 64, + var bankReadAt: Int = 0, + var wayReadAt: Int = 0, + var hitsAt: Int = 1, + var hitAt: Int = 1, + var bankMuxesAt: Int = 1, + var bankMuxAt: Int = 2, + var ctrlAt: Int = 2, + var hazardCheckWidth : Int = 12, + var hitsWithTranslationWays: Boolean = false, + var reducedBankWidth: Boolean = false, + var tagsReadAsync: Boolean = false, + var withCoherency: Boolean = false, + var withBypass: Boolean = false, + var probeIdWidth: Int = -1, + var ackIdWidth: Int = -1) extends FiberPlugin with InitService{ + + override def initHold(): Bool = !logic.initializer.done + val regions = Handle[ArrayBuffer[PmaRegion]]() + + def memParameter = LsuL1BusParameter( + addressWidth = Global.PHYSICAL_WIDTH, + dataWidth = memDataWidth, + readIdCount = refillCount, + writeIdCount = writebackCount, + probeIdWidth = probeIdWidth, + ackIdWidth = ackIdWidth, + lineSize = lineSize, + withReducedBandwidth = false, + withCoherency = withCoherency + ) + + val logic = during setup new Area{ + import LsuL1._ + + awaitBuild() + + SETS.set(setCount) + WAYS.set(wayCount) + LINE_BYTES.set(lineSize) + + val postTranslationWidth = Global.PHYSICAL_WIDTH + + val cacheSize = wayCount*setCount*lineSize + val cpuWordWidth = cpuDataWidth + val bytePerMemWord = memDataWidth / 8 + val bytePerFetchWord = cpuDataWidth / 8 + val waySize = cacheSize / wayCount + val linePerWay = waySize / lineSize + val memDataPerWay = waySize / bytePerMemWord + val memData = HardType(Bits(memDataWidth bits)) + val memWordPerLine = lineSize / bytePerMemWord + val tagWidth = postTranslationWidth - log2Up(waySize) + + val tagRange = postTranslationWidth - 1 downto log2Up(linePerWay * lineSize) + val lineRange = tagRange.low - 1 downto log2Up(lineSize) + val refillRange = tagRange.high downto lineRange.low + val hazardCheckRange = hazardCheckWidth+lineRange.low-1 downto lineRange.low + val notWordRange = tagRange.high downto log2Up(cpuDataWidth/8) + + val bankCount = wayCount + val bankWidth = if (!reducedBankWidth) memDataWidth else Math.max(cpuWordWidth, memDataWidth / wayCount) + val bankByteSize = cacheSize / bankCount + val bankWordCount = bankByteSize * 8 / bankWidth + val bankWordToCpuWordRange = log2Up(bankWidth / 8) - 1 downto log2Up(bytePerFetchWord) + val memToBankRatio = bankWidth * bankCount / memDataWidth + val bankWord = HardType(Bits(bankWidth bits)) + val bankWordPerLine = lineSize * 8 / bankWidth + + assert(bankWidth <= memDataWidth) + + val bus = master(LsuL1Bus(memParameter)).simPublic() + + val WAYS_HAZARD = Payload(Bits(wayCount bits)) + val BANK_BUSY = Payload(Bits(bankCount bits)) + val BANK_BUSY_REMAPPED = Payload(Bits(bankCount bits)) + val REFILL_HITS_EARLY = Payload(Bits(refillCount bits)) + val REFILL_HITS = Payload(Bits(refillCount bits)) + + + case class Tag() extends Bundle { + val loaded = Bool() + val address = UInt(tagWidth bits) + val fault = Bool() + val unique = withCoherency generate Bool() + val dirty = Bool() + } + + +// val STATUS = Payload(Vec.fill(wayCount)(Status())) + val BANKS_WORDS = Payload(Vec.fill(bankCount)(bankWord())) + val MUXED_DATA, BYPASSED_DATA = Payload(Bits(cpuDataWidth bits)) + val WAYS_TAGS = Payload(Vec.fill(wayCount)(Tag())) + val WAYS_HITS = Payload(Bits(wayCount bits)) + val WAYS_HIT = Payload(Bool()) + val NEED_UNIQUE = Payload(Bool()) + val DIRTY_BYPASS = Payload(Bits(wayCount bits)) + val PROBE = Payload(Bool()) + val ALLOW_UNIQUE = Payload(Bool()) + val ALLOW_SHARED = Payload(Bool()) + val ALLOW_PROBE_DATA = Payload(Bool()) + val PROBE_ID = Payload(UInt(probeIdWidth bits)) + val WRITE_TO_READ_HAZARDS = Payload(Bits(ctrlAt - bankReadAt bits)) + val EVENT_WRITE_VALID = Payload(Bool()) + val EVENT_WRITE_ADDRESS = Payload(PHYSICAL_ADDRESS) + val EVENT_WRITE_DATA = Payload(WRITE_DATA) + val EVENT_WRITE_MASK = Payload(MASK) + val BANKS_MUXES = Payload(Vec.fill(bankCount)(Bits(cpuWordWidth bits))) + + val tagsWriteArbiter = new Reservation() + val bankWriteArbiter = new Reservation() //TODO + val bankReadArbiter = new Reservation() + + val refillCompletions = Bits(refillCount bits) + val writebackBusy = Bool() + + val banks = for (id <- 0 until bankCount) yield new Area { + val mem = Mem(Bits(bankWidth bits), bankWordCount) + val write = mem.writePortWithMask(mem.getWidth / 8) + val read = new Area{ + val cmd = Flow(mem.addressType).setIdle() + val rsp = mem.readSync(cmd.payload, cmd.valid) + KeepAttribute(rsp) //Ensure that it will not use 2 cycle latency ram block + } + } + + val waysWrite = new Area { + val mask = B(0, wayCount bits) + val address = UInt(log2Up(linePerWay) bits).assignDontCare() + val tag = Tag().assignDontCare() + + //Used for hazard tracking in a pipelined way + val maskLast = RegNext(mask) + val addressLast = RegNext(address) + } + + + val ways = for (id <- 0 until wayCount) yield new Area { + val mem = Mem.fill(linePerWay)(Tag()) + mem.write(waysWrite.address, waysWrite.tag, waysWrite.mask(id)) + val lsuRead = new Area { + val cmd = Flow(mem.addressType) + val rsp = if (tagsReadAsync) mem.readAsync(cmd.payload) else mem.readSync(cmd.payload, cmd.valid) + KeepAttribute(rsp) //Ensure that it will not use 2 cycle latency ram block + } + } + + val PLRU = Payload(Plru.State(wayCount)) + val plru = new Area { + val mem = Mem.fill(linePerWay)(Plru.State(wayCount)) + val write = mem.writePort + val read = new Area { + val cmd = Flow(mem.addressType) + val rsp = if (tagsReadAsync) mem.readAsync(cmd.payload) else mem.readSync(cmd.payload, cmd.valid) + KeepAttribute(rsp) //Ensure that it will not use 2 cycle latency ram block + } + } + + val initializer = new Area { + val counter = Reg(UInt(log2Up(linePerWay) + 1 bits)) init (0) + val done = counter.msb + when(!done) { + counter := counter + 1 + waysWrite.mask.setAll() + waysWrite.address := counter.resized + waysWrite.tag.loaded := False + } + plru.write.valid := !done + plru.write.address := counter.resized + plru.write.data.clearAll() + } + + class PriorityArea(slots: Seq[(Bool, Bits)]) extends Area { + val slotsWithId = slots.zipWithIndex.map(e => (e._1._1, e._1._2, e._2)) + val hits = B(slots.map(_._1)) + val hit = hits.orR + val oh = hits & B(slotsWithId.map(slot => (B(slotsWithId.filter(_ != slot).map(other => hits(other._3))) & slot._2) === 0)) + val sel = OHToUInt(oh) + val lock = RegNext(oh) init (0) + when(lock.orR) { + oh := lock + } + } + + + val refill = new Area { + val slots = for (refillId <- 0 until refillCount) yield new Area { + val id = refillId + val valid = RegInit(False) + val dirty = Reg(Bool()) + val address = Reg(UInt(postTranslationWidth bits)) + val way = Reg(UInt(log2Up(wayCount) bits)) + val cmdSent = Reg(Bool()) + val priority = Reg(Bits(refillCount - 1 bits)) //TODO Check it + val unique = withCoherency generate Reg(Bool()) + val data = withCoherency generate Reg(Bool()) + val ackId = withCoherency generate Reg(UInt(ackIdWidth bits)) + val ackValid = withCoherency generate RegInit(False) + + // This counter ensure that load/store which started before the end of the refill memory transfer but ended after the end + // of the memory transfer do see that there was a refill ongoing and that they need to retry + val loaded = Reg(Bool()) + val loadedCounterMax = ctrlAt - Math.min(wayReadAt, bankReadAt)-1 + val loadedCounter = Reg(UInt(log2Up(loadedCounterMax + 1) bits)) + val loadedDone = loadedCounter === loadedCounterMax + loadedCounter := loadedCounter + U(loaded && !loadedDone && !lane.isFreezed()).resized + valid clearWhen (loadedDone && withCoherency.mux(!ackValid, True)) + + val free = !valid + + val victim = Reg(Bits(writebackCount bits)) + val writebackHazards = Reg(Bits(writebackCount bits)) //TODO Check it + } + + //Ignore the way, allowing coherent BtoT to detect ongoing NtoB + def isLineBusy(address: UInt) = slots.map(s => s.valid && s.address(hazardCheckRange) === address(hazardCheckRange)).orR + + val free = B(OHMasking.first(slots.map(_.free))) + val full = slots.map(!_.free).andR + + val push = Flow(new Bundle { + val address = UInt(postTranslationWidth bits) + val way = UInt(log2Up(wayCount) bits) + val victim = Bits(writebackCount bits) + val dirty = Bool() + val unique = Bool() + val data = Bool() + }) + + import spinal.core.sim._ + + val pushCounter = Reg(UInt(32 bits)) init (0) simPublic() + when(push.valid) { + pushCounter := pushCounter + 1 + } + + for (slot <- slots) when(push.valid) { + when(free(slot.id)) { + slot.valid := True + slot.address := push.address + slot.way := push.way + slot.cmdSent := False + slot.priority.setAll() + slot.loaded := False + slot.loadedCounter := 0 + slot.victim := push.victim + slot.dirty := push.dirty + slot.writebackHazards := 0 + if (withCoherency) { + slot.unique := push.unique + slot.data := push.data + } + } otherwise { + val freeFiltred = free.asBools.patch(slot.id, Nil, 1) + (slot.priority.asBools, freeFiltred).zipped.foreach(_ clearWhen (_)) + } + } + + val read = new Area { + val arbiter = new PriorityArea(slots.map(s => (s.valid && !s.cmdSent && s.victim === 0 && s.writebackHazards === 0, s.priority))) + + val writebackHazards = Bits(writebackCount bits) + val writebackHazard = writebackHazards.orR + when(bus.read.cmd.fire || writebackHazard) { + arbiter.lock := 0 + } + + val cmdAddress = slots.map(_.address(tagRange.high downto lineRange.low)).read(arbiter.sel) @@ U(0, lineRange.low bit) + bus.read.cmd.valid := arbiter.hit && !writebackHazard + bus.read.cmd.id := arbiter.sel + bus.read.cmd.address := cmdAddress + if (withCoherency) { + bus.read.cmd.unique := slots.map(_.unique).read(arbiter.sel) + bus.read.cmd.data := slots.map(_.data).read(arbiter.sel) + } + slots.onMask(arbiter.oh) { slot => + slot.writebackHazards := writebackHazards + slot.cmdSent setWhen (bus.read.cmd.ready && !writebackHazard) + } + + val rspAddress = slots.map(_.address).read(bus.read.rsp.id) + val dirty = slots.map(_.dirty).read(bus.read.rsp.id) + val way = slots.map(_.way).read(bus.read.rsp.id) + val wordIndex = KeepAttribute(Reg(UInt(log2Up(memWordPerLine) bits)) init (0)) + val rspWithData = withCoherency.mux(bus.read.rsp.withData, True) + if (withCoherency) assert(!(bus.read.rsp.valid && !rspWithData && slots.map(_.data).read(bus.read.rsp.id)), "Data cache asked for data but didn't recieved any :(") + + val bankWriteNotif = Bits(bankCount bits) + val writeReservation = bankWriteArbiter.create(0) + when(bus.read.rsp.valid) { + writeReservation.takeIt() + assert(writeReservation.win) + } + for ((bank, bankId) <- banks.zipWithIndex) { + if (!reducedBankWidth) { + bankWriteNotif(bankId) := bus.read.rsp.valid && rspWithData && way === bankId + bank.write.valid := bankWriteNotif(bankId) + bank.write.address := rspAddress(lineRange) @@ wordIndex + bank.write.data := bus.read.rsp.data + } else { + val sel = U(bankId) - way + val groupSel = way(log2Up(bankCount) - 1 downto log2Up(bankCount / memToBankRatio)) + val subSel = sel(log2Up(bankCount / memToBankRatio) - 1 downto 0) + bankWriteNotif(bankId) := bus.read.rsp.valid && rspWithData && groupSel === (bankId >> log2Up(bankCount / memToBankRatio)) + bank.write.valid := bankWriteNotif(bankId) + bank.write.address := rspAddress(lineRange) @@ wordIndex @@ (subSel) + bank.write.data := bus.read.rsp.data.subdivideIn(bankCount / memToBankRatio slices)(subSel) + } + banks(bankId).write.mask := (default -> true) + } + + val hadError = RegInit(False) setWhen (bus.read.rsp.valid && bus.read.rsp.error) + val fire = False + val reservation = tagsWriteArbiter.create(0) + val faulty = hadError || bus.read.rsp.error + + refillCompletions := 0 + bus.read.rsp.ready := True + when(bus.read.rsp.valid) { + assert(reservation.win) + when(rspWithData) { + wordIndex := wordIndex + 1 + } + when(wordIndex === wordIndex.maxValue || !rspWithData) { + hadError := False + fire := True + if (!withCoherency) refillCompletions(bus.read.rsp.id) := True + reservation.takeIt() + waysWrite.mask(way) := True + waysWrite.address := rspAddress(lineRange) + waysWrite.tag.fault := faulty + waysWrite.tag.address := rspAddress(tagRange) + waysWrite.tag.loaded := True + waysWrite.tag.dirty := dirty + if (withCoherency) { + waysWrite.tag.unique := bus.read.rsp.unique + } + slots.onSel(bus.read.rsp.id) { s => + s.loaded := True + if (withCoherency) { + s.ackValid := True + s.ackId := bus.read.rsp.ackId + } + } + } + } + } + + val ackSender = withCoherency generate new Area { + val ack = cloneOf(bus.read.ack) + val requests = slots.map(_.ackValid) + val oh = OHMasking.first(requests) + ack.valid := requests.orR + ack.ackId := OhMux.or(oh, slots.map(_.ackId)) + when(ack.ready) { + refillCompletions.asBools.onMask(oh)(_ := True) + slots.onMask(oh)(_.ackValid := False) + } + + val buffer = ack.m2sPipe() + val counter = Reg(UInt(2 bits)) init (0) + when(buffer.valid) { + counter := counter + 1 + } + bus.read.ack << buffer.haltWhen(counter =/= 3) //Give some time for the CPU to do forward progress + } + } + + val writeback = new Area { + val slots = for (writebackId <- 0 until writebackCount) yield new Area { + val id = writebackId + val fire = False + val valid = RegInit(False) clearWhen (fire) + val busy = RegInit(False) clearWhen(fire) + val address = Reg(UInt(postTranslationWidth bits)) + val way = Reg(UInt(log2Up(wayCount) bits)) + val priority = Reg(Bits(writebackCount - 1 bits)) //TODO Check it + val readCmdDone = Reg(Bool()) + val victimBufferReady = Reg(Bool()) + val readRspDone = Reg(Bool()) + val writeCmdDone = Reg(Bool()) + + val coherency = withCoherency generate new Area { + val release = Reg(Bool()) + val dirty = Reg(Bool()) + val fromUnique = Reg(Bool()) + val toShared = Reg(Bool()) + val probeId = Reg(UInt(probeIdWidth bits)) + } + + //Ensure that valid stay high at least as long as the pipeline latency to ensure visibility + val timer = new Area { + val counterMax = ctrlAt - Math.min(wayReadAt, bankReadAt) - 1 + val counter = Reg(UInt(log2Up(counterMax + 1) bits)) + val done = counter === counterMax + counter := counter + U(!done && !lane.isFreezed()).resized + valid clearWhen (this.done && (fire || !busy)) + } + + val free = !valid + + refill.read.writebackHazards(id) := valid && address(refillRange) === refill.read.cmdAddress(refillRange) + when(fire) { + refill.slots.foreach(_.writebackHazards(id) := False) + } + } + + WRITEBACK_BUSY.set(B(slots.map(_.valid))) + writebackBusy := slots.map(_.valid).orR + + def isLineBusy(address: UInt) = slots.map(s => s.valid && s.address(hazardCheckRange) === address(hazardCheckRange)).orR + + val free = B(OHMasking.first(slots.map(_.free))) + val full = slots.map(!_.free).andR + + val push = Flow(new Bundle { + val address = UInt(postTranslationWidth bits) + val way = UInt(log2Up(wayCount) bits) + + //TtoB TtoN BtoN + val dirty = withCoherency generate Bool() + val fromUnique = withCoherency generate Bool() + val toShared = withCoherency generate Bool() + val release = withCoherency generate Bool() + val probeId = withCoherency generate UInt(probeIdWidth bits) + }).setIdle() + + for (slot <- slots) when(push.valid) { + when(free(slot.id)) { + slot.valid := True + slot.busy := True + slot.address := push.address + slot.way := push.way + slot.timer.counter := 0 + + slot.writeCmdDone := False + slot.priority.setAll() + if (withCoherency) { + slot.coherency.release := push.release + slot.coherency.dirty := push.dirty + slot.coherency.fromUnique := push.fromUnique + slot.coherency.toShared := push.toShared + slot.coherency.probeId := push.probeId + slot.readCmdDone := !push.dirty + slot.readRspDone := !push.dirty + slot.victimBufferReady := !push.dirty + } else { + slot.readCmdDone := False + slot.readRspDone := False + slot.victimBufferReady := False + } + } otherwise { + val freeFiltred = free.asBools.patch(slot.id, Nil, 1) + (slot.priority.asBools, freeFiltred).zipped.foreach(_ clearWhen (_)) + } + } + + val victimBuffer = Mem.fill(writebackCount * memWordPerLine)(Bits(memDataWidth bits)) + val read = new Area { + val arbiter = new PriorityArea(slots.map(s => (s.valid && !s.readCmdDone, s.priority))) + + val address = slots.map(_.address).read(arbiter.sel) + val way = slots.map(_.way).read(arbiter.sel) + val wordIndex = KeepAttribute(Reg(UInt(log2Up(memWordPerLine) bits)) init (0)) + + val slotRead = Flow(new Bundle { + val id = UInt(log2Up(writebackCount) bits) + val last = Bool() + val wordIndex = UInt(log2Up(memWordPerLine) bits) + val way = UInt(log2Up(wayCount) bits) + }) + slotRead.valid := arbiter.hit + slotRead.id := arbiter.sel + slotRead.wordIndex := wordIndex + slotRead.way := way + slotRead.last := wordIndex === wordIndex.maxValue + wordIndex := wordIndex + U(slotRead.valid) + when(slotRead.valid && slotRead.last) { + slots.onMask(arbiter.oh) { + _.readCmdDone := True + } + arbiter.lock := 0 + } + when(slotRead.fire) { + for (slot <- refill.slots) slot.victim(slotRead.id) := False + } + + val brr = bankReadArbiter.create(0) + when(slotRead.valid) { + brr.takeIt() + assert(brr.win) + } + for ((bank, bankId) <- banks.zipWithIndex) { + if (!reducedBankWidth) { + when(slotRead.valid && way === bankId) { + bank.read.cmd.valid := True + bank.read.cmd.payload := address(lineRange) @@ wordIndex + } + } else { + val sel = U(bankId) - way + val groupSel = way(log2Up(bankCount) - 1 downto log2Up(bankCount / memToBankRatio)) + val subSel = sel(log2Up(bankCount / memToBankRatio) - 1 downto 0) + when(arbiter.hit && groupSel === (bankId >> log2Up(bankCount / memToBankRatio))) { + bank.read.cmd.valid := True + bank.read.cmd.payload := address(lineRange) @@ wordIndex @@ (subSel) + } + } + } + + val slotReadLast = slotRead.stage() + val readedData = Bits(memDataWidth bits) + + if (!reducedBankWidth) { + readedData := banks.map(_.read.rsp).read(slotReadLast.way) + } else { + for ((slice, sliceId) <- readedData.subdivideIn(bankWidth bits).zipWithIndex) { + ??? + } + } + + + when(slotReadLast.valid) { + victimBuffer.write(slotReadLast.id @@ slotReadLast.wordIndex, readedData) + whenIndexed(slots, slotReadLast.id) { + _.victimBufferReady := True + } + when(slotReadLast.last) { + whenIndexed(slots, slotReadLast.id) { + _.readRspDone := True + } + } + } + } + + val write = new Area { + val arbiter = new PriorityArea(slots.map(s => (s.valid && s.victimBufferReady && !s.writeCmdDone, s.priority))) + val wordIndex = KeepAttribute(Reg(UInt(log2Up(memWordPerLine) bits)) init (0)) + val last = wordIndex === wordIndex.maxValue + + val bufferRead = Stream(new Bundle { + val id = UInt(log2Up(writebackCount) bits) + val address = UInt(postTranslationWidth bits) + val last = Bool() + val coherency = withCoherency generate new Bundle { + val release = Bool() + val dirty = Bool() + val fromUnique = Bool() + val toShared = Bool() + val probeId = UInt(probeIdWidth bits) + } + }) + bufferRead.valid := arbiter.hit + bufferRead.id := arbiter.sel + bufferRead.last := last + bufferRead.address := slots.map(_.address).read(arbiter.sel) + val c = withCoherency generate new Area { + last setWhen (!bufferRead.coherency.dirty) + bufferRead.coherency.release := slots.map(_.coherency.release).read(arbiter.sel) + bufferRead.coherency.dirty := slots.map(_.coherency.dirty).read(arbiter.sel) + bufferRead.coherency.fromUnique := slots.map(_.coherency.fromUnique).read(arbiter.sel) + bufferRead.coherency.toShared := slots.map(_.coherency.toShared).read(arbiter.sel) + bufferRead.coherency.probeId := slots.map(_.coherency.probeId).read(arbiter.sel) + } + wordIndex := wordIndex + U(bufferRead.fire && withCoherency.mux(bufferRead.coherency.dirty, True)) + when(bufferRead.fire && last) { + slots.onMask(arbiter.oh)(_.writeCmdDone := True) + arbiter.lock := 0 + } + + val cmd = bufferRead.stage() + val word = victimBuffer.readSync(bufferRead.id @@ wordIndex, bufferRead.ready) + bus.write.cmd.arbitrationFrom(cmd) + bus.write.cmd.address := cmd.address + bus.write.cmd.data := word + bus.write.cmd.id := cmd.id + bus.write.cmd.last := cmd.last + if (withCoherency) { + bus.write.cmd.coherent.release := cmd.coherency.release + bus.write.cmd.coherent.dirty := cmd.coherency.dirty + bus.write.cmd.coherent.fromUnique := cmd.coherency.fromUnique + bus.write.cmd.coherent.toShared := cmd.coherency.toShared + bus.write.cmd.coherent.probeId := cmd.coherency.probeId + when(cmd.fire && cmd.last && !cmd.coherency.release) { + slots.onSel(cmd.id) { s => + s.fire := True + } + } + } + + when(bus.write.rsp.valid) { + whenIndexed(slots, bus.write.rsp.id) { s => + s.fire := True + } + } + } + } + + +// def waysHazard(stages: Seq[Stage], address: Stageable[UInt]): Unit = { +// for (s <- stages) { +// s.overloaded(WAYS_HAZARD) := s(WAYS_HAZARD) | waysWrite.maskLast.andMask(waysWrite.addressLast === s(address)(lineRange)) +// } +// } + + val ls = new Area { + val rb0 = new lane.Execute(bankReadAt){ + val readAddress = MIXED_ADDRESS(lineRange.high downto log2Up(bankWidth / 8)) + val reservation = bankReadArbiter.create(1) + val freezeIt = False + lane.freezeWhen(freezeIt) + + assert(Global.HART_COUNT.get == 1) + freezeIt setWhen(SEL && refill.slots.map(s => s.valid && !s.loaded).orR) //TODO PREFETCH not friendly + + for ((bank, bankId) <- banks.zipWithIndex) { + BANK_BUSY(bankId) := bank.write.valid && bank.write.address === readAddress //Write to read hazard + when(SEL){ + when(reservation.win){ + reservation.takeIt() + bank.read.cmd.valid := !lane.isFreezed() + bank.read.cmd.payload := readAddress + } otherwise { + freezeIt := True + } + } + } + } + + val rb1 = new lane.Execute(bankReadAt+1){ + def wayToBank(way: Int): UInt = { + val wayId = U(way, log2Up(wayCount) bits) + if (!reducedBankWidth) return wayId + (wayId >> log2Up(bankCount / memToBankRatio)) @@ ((wayId + (MIXED_ADDRESS(log2Up(bankWidth / 8), log2Up(bankCount) bits))).resize(log2Up(bankCount / memToBankRatio))) + } + + for ((bank, bankId) <- banks.zipWithIndex) { + BANKS_WORDS(bankId) := banks(bankId).read.rsp + BANK_BUSY_REMAPPED(bankId) := BANK_BUSY(wayToBank(bankId)) + } + } + + val bm = new lane.Execute(bankMuxesAt){ + for ((bank, bankId) <- banks.zipWithIndex) { + BANKS_MUXES(bankId) := BANKS_WORDS(bankId).subdivideIn(cpuWordWidth bits).read(MIXED_ADDRESS(bankWordToCpuWordRange)) + } + } + + val bankMuxStd = !reducedBankWidth generate new lane.Execute(bankMuxAt){ + MUXED_DATA := OhMux.or(WAYS_HITS, BANKS_MUXES) + } + + val bankMuxReduced = reducedBankWidth generate new lane.Execute(bankMuxAt){ + val wayId = OHToUInt(WAYS_HITS) + val bankId = (wayId >> log2Up(bankCount / memToBankRatio)) @@ ((wayId + (MIXED_ADDRESS(log2Up(bankWidth / 8), log2Up(bankCount) bits))).resize(log2Up(bankCount / memToBankRatio))) + MUXED_DATA := BANKS_MUXES.read(bankId) //MuxOH(WAYS_HITS, BANKS_MUXES) + } + + + val weh = new Area { + val dst = lane.execute(ctrlAt-1) + for(id <- 0 until widthOf(WRITE_TO_READ_HAZARDS)) { + val src = lane.execute(bankReadAt+id) + dst(WRITE_TO_READ_HAZARDS)(id) := src(EVENT_WRITE_VALID) && src(EVENT_WRITE_ADDRESS)(notWordRange) === dst(PHYSICAL_ADDRESS)(notWordRange) && withBypass.mux(True, (src(EVENT_WRITE_MASK) & dst(MASK)).orR) + } + } + + val rt0 = new lane.Execute(wayReadAt){ + plru.read.cmd.valid := !lane.isFreezed() + plru.read.cmd.payload := MIXED_ADDRESS(lineRange) + val PLRU_BYPASS_VALID = insert(plru.write.valid && plru.write.address === plru.read.cmd.payload) + val PLRU_BYPASS_VALUE = insert(plru.write.data) + + for ((way, wayId) <- ways.zipWithIndex){ + way.lsuRead.cmd.valid := !lane.isFreezed() + way.lsuRead.cmd.payload := MIXED_ADDRESS(lineRange) + } + } + + val rt1 = new lane.Execute(wayReadAt + 1 - tagsReadAsync.toInt){ + this(PLRU) := plru.read.rsp + val plruBypass = tagsReadAsync generate new Area{ + when(rt0.PLRU_BYPASS_VALID){ + this(PLRU) := rt0.PLRU_BYPASS_VALUE + } + } + + for ((way, wayId) <- ways.zipWithIndex) { + WAYS_TAGS(wayId) := ways(wayId).lsuRead.rsp + } + } + val hs = new lane.Execute(hitsAt){ + for ((way, wayId) <- ways.zipWithIndex) { + WAYS_HITS(wayId) := WAYS_TAGS(wayId).loaded && WAYS_TAGS(wayId).address === PHYSICAL_ADDRESS(tagRange) + } + } + val h = new lane.Execute(hitAt) { + WAYS_HIT := B(WAYS_HITS).orR + } + + assert(Global.HART_COUNT.get == 1) + //TODO Store AMO SC need to be sure the cache line didn't just start being written back when theiy reach ctrl stage / warning prefetch + val preCtrl = new lane.Execute(ctrlAt){ + NEED_UNIQUE := STORE || ATOMIC + WAYS_HAZARD := 0 //TODO + } + +// val rcl = new lane.Execute(ctrlAt){ +// REFILL_HITS := B(refill.slots.map(r => r.valid && r.address(hazardCheckRange) === PHYSICAL_ADDRESS(hazardCheckRange))) +// } + + val ctrl = new lane.Execute(ctrlAt) { + val plruLogic = new Area { + val core = new Plru(wayCount, false) + core.io.context.state := PLRU + core.io.update.id.assignDontCare() + when(SEL) { + plru.write.address := MIXED_ADDRESS(lineRange) + plru.write.data := core.io.update.state + } + } + + val reservation = tagsWriteArbiter.create(2) + val bankWriteReservation = bankWriteArbiter.create(2) + val refillWayWithoutUpdate = CombInit(plruLogic.core.io.evict.id) +// val refillWayWithoutUpdate = CombInit(wayRandom.value) + val refillWayNeedWriteback = WAYS_TAGS.map(w => w.loaded && withCoherency.mux(True, w.dirty)).read(refillWayWithoutUpdate) +// val refillHit = REFILL_HITS.orR +// val refillLoaded = (B(refill.slots.map(_.loaded)) & REFILL_HITS).orR + + //Warning, those two signals aren't stable when lane.isFreezed + val refillHazard = refill.isLineBusy(PHYSICAL_ADDRESS) + val writebackHazard = writeback.isLineBusy(PHYSICAL_ADDRESS) + +// val bankBusy = (BANK_BUSY_REMAPPED & WAYS_HITS) =/= 0 // Not needed anymore as the cpu freeze early +// val waysHitHazard = (WAYS_HITS & WAYS_HAZARD).orR + val waysHazard = WAYS_HAZARD.orR +// val hitUnique = withCoherency.mux((WAYS_HITS & WAYS_TAGS.map(_.unique).asBits).orR, True) +// val uniqueMiss = NEED_UNIQUE && !hitUnique + val wasDirty = (B(WAYS_TAGS.map(_.dirty)) & WAYS_HITS).orR + val refillWayWasDirty = WAYS_TAGS.map(w => w.loaded && w.dirty).read(refillWayWithoutUpdate) + val loadBankHazard = withBypass.mux(False, LOAD && WRITE_TO_READ_HAZARDS.orR) + + //WARNING, when lane.isFreezed, nothing should change. If a hazard was detected, is has to stay + val spawn = RegNext(!lane.isFreezed()) init(True) + val hazardReg = RegNextWhen(this(HAZARD), spawn) init(False) + //TODO writeBackHazard hit performance and isn't required in most case ? + HAZARD := spawn.mux(waysHazard || loadBankHazard || refillHazard || writebackHazard, hazardReg) //TODO Line busy can likely be removed if single hart with no prefetch + MISS := !HAZARD && !WAYS_HIT && !FLUSH + FAULT := !HAZARD && WAYS_HIT && (WAYS_HITS & WAYS_TAGS.map(_.fault).asBits).orR && !FLUSH + MISS_UNIQUE := !HAZARD && WAYS_HIT && NEED_UNIQUE && withCoherency.mux((WAYS_HITS & WAYS_TAGS.map(e => !e.unique && !e.fault).asBits).orR, False) + + val canRefill = reservation.win && !(refillWayNeedWriteback && writeback.full) && !refill.full + val canFlush = reservation.win && !writeback.full && !refill.slots.map(_.valid).orR + val canDirty = reservation.win + val needFlushs = B(WAYS_TAGS.map(w => w.loaded && w.dirty)) + val needFlushOh = OHMasking.firstV2(needFlushs) + val needFlushSel = OHToUInt(needFlushOh) + + val askRefill = MISS && canRefill + val askUpgrade = MISS_UNIQUE && canRefill + val askFlush = FLUSH && !HAZARD && canFlush && needFlushs.orR + + val doRefill = SEL && askRefill + val doUpgrade = SEL && askUpgrade + val doFlush = SEL && askFlush + val doWrite = SEL && !HAZARD && STORE && WAYS_HIT && this(WAYS_TAGS).reader(WAYS_HITS)(w => withCoherency.mux(w.unique, True) && !w.fault) && !SKIP_WRITE + val doDirty = doWrite && !wasDirty && canDirty + + val wayId = OHToUInt(WAYS_HITS) + val bankHitId = if(!reducedBankWidth) wayId else (wayId >> log2Up(bankCount/memToBankRatio)) @@ ((wayId + (PHYSICAL_ADDRESS(log2Up(bankWidth/8), log2Up(bankCount) bits))).resize(log2Up(bankCount/memToBankRatio))) + + val targetWay = (askUpgrade || doDirty).mux(wayId, refillWayWithoutUpdate) + val allowSideEffects = !ABORD && !lane.isFreezed() + + when(SEL) { + assert(CountOne(WAYS_HITS) <= 1, "Multiple way hit ???") + } + +// assert(!startFlush) + + val freezeIt = SEL && STORE && (!bankWriteReservation.win || !reservation.win) + lane.freezeWhen(freezeIt) + + //TODO preset dirty if it come from a store + refill.push.valid := allowSideEffects && (doRefill || doUpgrade) + refill.push.address := PHYSICAL_ADDRESS + refill.push.unique := NEED_UNIQUE + refill.push.data := askRefill + refill.push.way := targetWay + refill.push.victim := writeback.free.andMask(refillWayNeedWriteback && refillWayWasDirty) + refill.push.dirty := STORE + when(askUpgrade) { + refill.push.way := wayId + refill.push.victim := 0 + } + + + assert(!doUpgrade) + assert(CountOne(Cat(askRefill, doUpgrade, doDirty, doFlush)) < 2) + + when(doRefill || doUpgrade || doDirty) { + reservation.takeIt() + waysWrite.mask(targetWay) := allowSideEffects + waysWrite.address := MIXED_ADDRESS(lineRange) + } + when(doDirty){ + waysWrite.tag.loaded := True + waysWrite.tag.address := PHYSICAL_ADDRESS(tagRange) + waysWrite.tag.fault := FAULT + waysWrite.tag.dirty := True + if(withCoherency) waysWrite.tag.unique := True + } + when(doRefill || doUpgrade){ + waysWrite.tag.loaded := False + } + + val dirtyBypasser = new Area { + val mask = WAYS_HITS.andMask(doDirty) + val on = for(eid <- wayReadAt until ctrlAt) yield new Area { + val dst = lane.execute(eid) + val first = eid == wayReadAt + val hit = dst(MIXED_ADDRESS)(lineRange) === MIXED_ADDRESS(lineRange) + val masked = mask.andMask(hit) + first match { + case true => dst(DIRTY_BYPASS) := masked + case false => dst.bypass(DIRTY_BYPASS) := dst.up(DIRTY_BYPASS) | masked + } + } + bypass(WAYS_TAGS) := up(WAYS_TAGS) + for(w <- 0 until wayCount) bypass(WAYS_TAGS)(w).dirty setWhen(DIRTY_BYPASS(w)) + } + + when(doWrite) { + for ((bank, bankId) <- banks.zipWithIndex) when(WAYS_HITS(bankId)) { + bank.write.valid := bankId === bankHitId && allowSideEffects + bank.write.address := PHYSICAL_ADDRESS(lineRange.high downto log2Up(bankWidth / 8)) + bank.write.data.subdivideIn(cpuWordWidth bits).foreach(_ := WRITE_DATA) + bank.write.mask := 0 + bank.write.mask.subdivideIn(cpuWordWidth / 8 bits)(PHYSICAL_ADDRESS(bankWordToCpuWordRange)) := MASK + } + } + + FLUSH_HIT := needFlushs.orR + when(doFlush) { + reservation.takeIt() + + + val reader = this (WAYS_TAGS).reader(needFlushSel) + val tag = reader(_.address) + waysWrite.mask := needFlushOh + waysWrite.address := MIXED_ADDRESS(lineRange) + waysWrite.tag.loaded := True + waysWrite.tag.address := tag + waysWrite.tag.fault := reader(_.fault) + waysWrite.tag.dirty := False + if (withCoherency) ??? + + writeback.push.valid := allowSideEffects + writeback.push.address := (tag @@ MIXED_ADDRESS(lineRange)) << lineRange.low + writeback.push.way := needFlushSel + if (withCoherency) { + ??? + } + } + + val brs = lane.execute(bankReadAt) + brs(EVENT_WRITE_VALID) := doWrite + brs(EVENT_WRITE_ADDRESS) := PHYSICAL_ADDRESS + brs(EVENT_WRITE_DATA) := WRITE_DATA + brs(EVENT_WRITE_MASK) := MASK + + + + when(doRefill) { + writeback.push.valid := refillWayNeedWriteback && allowSideEffects + writeback.push.address := (WAYS_TAGS(targetWay).address @@ MIXED_ADDRESS(lineRange)) << lineRange.low + writeback.push.way := targetWay + if (withCoherency) { + writeback.push.dirty := wasDirty + writeback.push.fromUnique := WAYS_TAGS(targetWay).unique + writeback.push.toShared := False + writeback.push.release := True + } + + plru.write.valid := allowSideEffects + plruLogic.core.io.update.id := targetWay + } + + when(SEL && !HAZARD && !MISS) { + plru.write.valid := allowSideEffects + plruLogic.core.io.update.id := wayId + } + + BYPASSED_DATA := MUXED_DATA + val bypasser = if(withBypass) new Area { + for (b <- widthOf(WRITE_TO_READ_HAZARDS) - 1 downto 0) { + when(WRITE_TO_READ_HAZARDS(b)) { + for (i <- 0 until cpuDataWidth / 8) { + val range = i * 8 + 7 downto i * 8 + val src = lane.execute(bankReadAt+1+b) + when(src(EVENT_WRITE_MASK)(i)) { + BYPASSED_DATA(range) := src(EVENT_WRITE_DATA)(range) + } + } + } + } + } + READ_DATA := BYPASSED_DATA + +// REFILL_SLOT_FULL := MISS && !refillHit && refill.full +// REFILL_SLOT := REFILL_HITS.andMask(!refillLoaded) | refill.free.andMask(askRefill) + } + } + + tagsWriteArbiter.build() + bankWriteArbiter.build() + bankReadArbiter.build() + } +} diff --git a/src/main/scala/vexiiriscv/execute/lsu/LsuL1TlPlugin.scala b/src/main/scala/vexiiriscv/execute/lsu/LsuL1TlPlugin.scala new file mode 100644 index 00000000..60eaeaf6 --- /dev/null +++ b/src/main/scala/vexiiriscv/execute/lsu/LsuL1TlPlugin.scala @@ -0,0 +1,12 @@ +package vexiiriscv.execute.lsu + +import spinal.lib._ +import spinal.lib.misc.plugin.FiberPlugin + +class LsuL1TlPlugin extends FiberPlugin{ + val bus = during build { + val l1 = host[LsuL1Plugin] + l1.logic.bus.setAsDirectionLess() + master(l1.logic.bus.toTilelink()) + } +} diff --git a/src/main/scala/vexiiriscv/execute/lsu/LsuPlugin.scala b/src/main/scala/vexiiriscv/execute/lsu/LsuPlugin.scala new file mode 100644 index 00000000..e715356a --- /dev/null +++ b/src/main/scala/vexiiriscv/execute/lsu/LsuPlugin.scala @@ -0,0 +1,459 @@ +package vexiiriscv.execute.lsu + +import spinal.core._ +import spinal.core.fiber.Handle +import spinal.core.sim.SimDataPimper +import spinal.lib._ +import spinal.lib.bus.tilelink.M2sTransfers +import spinal.lib.fsm.{State, StateMachine} +import spinal.lib.misc.pipeline._ +import spinal.lib.misc.plugin.FiberPlugin +import spinal.lib.system.tag.PmaRegion +import vexiiriscv.decode.Decode +import vexiiriscv.decode.Decode.UOP +import vexiiriscv.memory.{AddressTranslationPortUsage, AddressTranslationService, DBusAccessService, PmaLoad, PmaLogic, PmaPort, PmaStore} +import vexiiriscv.misc.{AddressToMask, TrapArg, TrapReason, TrapService} +import vexiiriscv.riscv.Riscv.LSLEN +import vexiiriscv.riscv._ +import vexiiriscv.schedule.ScheduleService +import vexiiriscv.{Global, riscv} +import vexiiriscv.execute._ +import vexiiriscv.execute.lsu.AguPlugin._ +import vexiiriscv.fetch.LsuL1Service + +import scala.collection.mutable.ArrayBuffer + + +class LsuPlugin(var layer : LaneLayer, + var withRva : Boolean, + var translationStorageParameter: Any, + var translationPortParameter: Any, + var addressAt: Int = 0, + var ctrlAt: Int = 2, + var wbAt : Int = 2) extends FiberPlugin with DBusAccessService with LsuCachelessBusProvider with LsuL1Service{ + + override def accessRefillCount: Int = 0 + override def accessWake: Bits = B(0) + + override def getLsuCachelessBus(): LsuCachelessBus = logic.bus + + val logic = during setup new Area{ + val elp = host.find[ExecuteLanePlugin](_.laneName == layer.laneName) + val ifp = host.find[IntFormatPlugin](_.laneName == layer.laneName) + val srcp = host.find[SrcPlugin](_.layer == layer) + val ats = host[AddressTranslationService] + val ts = host[TrapService] + val ss = host[ScheduleService] + val buildBefore = retains(elp.pipelineLock, ats.portsLock) + val atsStorageLock = retains(ats.storageLock) + val retainer = retains(elp.uopLock, srcp.elaborationLock, ifp.elaborationLock, ts.trapLock, ss.elaborationLock) + awaitBuild() + Riscv.RVA.set(withRva) + + val translationStorage = ats.newStorage(translationStorageParameter) + atsStorageLock.release() + + val trapPort = ts.newTrap(layer.el.getExecuteAge(ctrlAt), Execute.LANE_AGE_WIDTH) + val flushPort = ss.newFlushPort(layer.el.getExecuteAge(ctrlAt), laneAgeWidth = Execute.LANE_AGE_WIDTH, withUopId = true) + val frontend = new AguFrontend(layer, host) + + // IntFormatPlugin specification + val iwb = ifp.access(wbAt) + val amos = Riscv.RVA.get.option(frontend.amos.uops).toList.flatten + for(load <- frontend.writingRf ++ amos){ + val spec = Rvi.loadSpec(load) + val op = layer(load) + ifp.addMicroOp(iwb, op) + spec.signed match { + case false => ifp.zeroExtend(iwb, op, spec.width) + case true => ifp.signExtend(iwb, op, spec.width) + } + op.mayFlushUpTo(ctrlAt) // page fault / trap + op.dontFlushFrom(ctrlAt) + } + + for(store <- frontend.writingMem ++ amos){ + val op = layer(store) + op.mayFlushUpTo(ctrlAt) + op.dontFlushFrom(ctrlAt) + op.addRsSpec(RS2, 0) //TODO ! for now the executeLanePlugin store bypass bypass its own value XD, need fix to only bypass from point which are solved + } + + layer.add(Rvi.FENCE) //TODO + layer(Rvi.FENCE).setCompletion(ctrlAt) + + for(uop <- frontend.writingMem if layer(uop).completion.isEmpty) layer(uop).setCompletion(ctrlAt) + + retainer.release() + + val injectCtrl = elp.ctrl(0) + val inject = new injectCtrl.Area { + SIZE := Decode.UOP(13 downto 12).asUInt + } + + val busParam = LsuCachelessBusParam( + addressWidth = Global.PHYSICAL_WIDTH, + dataWidth = Riscv.LSLEN, + hartIdWidth = Global.HART_ID_WIDTH, + uopIdWidth = Decode.UOP_ID_WIDTH, + withAmo = withRva, + pendingMax = 1 + ) + val bus = master(LsuCachelessBus(busParam)).simPublic() + + accessRetainer.await() + val l1 = LsuL1 + val FROM_ACCESS = Payload(Bool()) + val FROM_LSU = Payload(Bool()) + + + invalidationRetainer.await() + val flusher = new StateMachine { + val IDLE = makeInstantEntry() + val CMD, COMPLETION = new State() + val arbiter = StreamArbiterFactory().transactionLock.lowerFirst.buildOn(invalidationPorts.map(_.cmd)) + val cmdCounter = Reg(UInt(log2Up(l1.SETS) + 1 bits)) + val inflight = (addressAt+1 to ctrlAt).map(elp.execute).map(e => e(l1.SEL) && e(l1.FLUSH)).orR + + val waiter = Reg(l1.WRITEBACK_BUSY.get) + + IDLE.whenIsActive{ + cmdCounter := 0 + when(arbiter.io.output.valid) { + goto(CMD) + } + } + CMD.whenIsActive{ + when(cmdCounter.msb && !inflight) { + waiter := l1.WRITEBACK_BUSY + goto(COMPLETION) + } + } + arbiter.io.output.ready := False + COMPLETION.whenIsActive{ + waiter := waiter & l1.WRITEBACK_BUSY + when(!waiter.orR){ + arbiter.io.output.ready := True + } + } + } + + val onAddress0 = new elp.Execute(addressAt){ + val translationPort = ats.newTranslationPort( + nodes = Seq(elp.execute(addressAt).down, elp.execute(addressAt+1).down), + rawAddress = l1.MIXED_ADDRESS, + forcePhysical = FROM_ACCESS, + usage = AddressTranslationPortUsage.LOAD_STORE, + portSpec = translationPortParameter, + storageSpec = translationStorage + ) + + case class Cmd() extends Bundle { + val address = l1.MIXED_ADDRESS() + val size = SIZE() + val load, store, atomic = Bool() + val fromFlush = Bool() + val fromAccess = Bool() + } + + val ports = ArrayBuffer[Stream[Cmd]]() + + val ls = new Area { + val port = ports.addRet(Stream(Cmd())) + port.valid := isValid && SEL + port.address := srcp.ADD_SUB.asUInt.resized //TODO Overflow ? + port.size := SIZE + port.load := LOAD + port.store := STORE + port.atomic := ATOMIC + port.fromFlush := False + port.fromAccess := False + } + + val access = dbusAccesses.nonEmpty generate new Area { + assert(dbusAccesses.size == 1) + val cmd = dbusAccesses.head.cmd + val port = ports.addRet(Stream(Cmd())) + port.arbitrationFrom(cmd) + port.address := cmd.address.resized + port.size := cmd.size + port.load := True + port.store := False + port.atomic := False + port.fromFlush := False + port.fromAccess := True + } + + val flush = new Area { + val port = ports.addRet(Stream(Cmd())) + port.valid := flusher.isActive(flusher.CMD) && !flusher.cmdCounter.msb + port.address := (flusher.cmdCounter << log2Up(l1.LINE_BYTES)).resized + port.size := 0 + port.load := False + port.store := False + port.atomic := False + port.fromFlush := True + port.fromAccess := False + when(port.fire) { + flusher.cmdCounter := flusher.cmdCounter + 1 + } + } + + val arbiter = StreamArbiterFactory().noLock.lowerFirst.buildOn(ports) + arbiter.io.output.ready := !elp.isFreezed() + l1.SEL := arbiter.io.output.valid + l1.MIXED_ADDRESS := arbiter.io.output.address + l1.MASK := AddressToMask(arbiter.io.output.address, arbiter.io.output.size, Riscv.LSLEN / 8) + l1.SIZE := arbiter.io.output.size + l1.LOAD := arbiter.io.output.load + l1.ATOMIC := arbiter.io.output.atomic + l1.STORE := arbiter.io.output.store + l1.FLUSH := arbiter.io.output.fromFlush + FROM_ACCESS := arbiter.io.output.fromAccess + FROM_LSU := !(arbiter.io.output.fromFlush || arbiter.io.output.fromAccess) + } + + val tpk = onAddress0.translationPort.keys + + + + val onAddress1 = new elp.Execute(addressAt+1) { + l1.PHYSICAL_ADDRESS := tpk.TRANSLATED + } + + + for(eid <- addressAt + 1 to ctrlAt) { + val e = elp.execute(eid) + e.up(l1.SEL).setAsReg().init(False) + when(e(FROM_LSU) && !e.isValid) { + e.bypass(l1.SEL) := False + } + } + + val onCtrl = new elp.Execute(ctrlAt) { + val MISS_ALIGNED = insert((1 to log2Up(LSLEN / 8)).map(i => l1.SIZE === i && l1.MIXED_ADDRESS(i - 1 downto 0) =/= 0).orR) + val mmuPageFault = tpk.PAGE_FAULT || STORE.mux(!tpk.ALLOW_WRITE, !tpk.ALLOW_READ) + + val pmaL1 = new PmaPort(Global.PHYSICAL_WIDTH, List(l1.LINE_BYTES), List(PmaLoad, PmaStore)) + val pmaIo = new PmaPort(Global.PHYSICAL_WIDTH, (0 to log2Up(Riscv.LSLEN / 8)).map(1 << _), List(PmaLoad, PmaStore)) + pmaL1.cmd.address := tpk.TRANSLATED + pmaL1.cmd.op(0) := l1.STORE + pmaIo.cmd.address := tpk.TRANSLATED + pmaIo.cmd.size := l1.SIZE.asBits + pmaIo.cmd.op(0) := l1.STORE + + val IO = insert(pmaL1.rsp.fault && !pmaIo.rsp.fault) + + val writeData = CombInit[Bits](elp(IntRegFile, riscv.RS2)) + val scMiss = Bool() + + val io = new Area { + val allowed = CombInit[Bool](IO) + val doIt = isValid && l1.SEL && allowed + + val cmdSent = RegInit(False) setWhen (bus.cmd.fire) clearWhen (!elp.isFreezed()) + bus.cmd.valid := doIt && !cmdSent + bus.cmd.write := l1.STORE + bus.cmd.address := l1.PHYSICAL_ADDRESS //TODO Overflow on TRANSLATED itself ? + bus.cmd.data := l1.WRITE_DATA + bus.cmd.size := l1.SIZE + bus.cmd.mask := l1.MASK + bus.cmd.io := True + bus.cmd.fromHart := True + bus.cmd.hartId := Global.HART_ID + bus.cmd.uopId := Decode.UOP_ID + if (withRva) { + bus.cmd.amoEnable := l1.ATOMIC + bus.cmd.amoOp := UOP(31 downto 27) + } + + val rsp = bus.rsp.toStream.halfPipe() + rsp.ready := !elp.isFreezed() + + val freezeIt = doIt && !rsp.valid + elp.freezeWhen(freezeIt) + } + + + val rspData = io.doIt.mux[Bits](io.rsp.data, l1.READ_DATA) + val rspSplits = rspData.subdivideIn(8 bits) + val rspShifted = Bits(LSLEN bits) + val wordBytes = LSLEN / 8 + + //Generate minimal mux to move from a wide aligned memory read to the register file shifter representation + for (i <- 0 until wordBytes) { + val srcSize = 1 << (log2Up(wordBytes) - log2Up(i + 1)) + val srcZipped = rspSplits.zipWithIndex.filter { case (v, b) => b % (wordBytes / srcSize) == i } + val src = srcZipped.map(_._1) + val range = log2Up(wordBytes) - 1 downto log2Up(wordBytes) - log2Up(srcSize) + val sel = srcp.ADD_SUB(range).asUInt + rspShifted(i * 8, 8 bits) := src.read(sel) + } + + val READ_SHIFTED = insert(rspShifted) + val SC_MISS = insert(withRva.mux(io.doIt.mux[Bool](io.rsp.scMiss, scMiss), False)) + + + if (!Riscv.RVA.get) { + scMiss := False + } + val rva = Riscv.RVA.get generate new Area { + val srcBuffer = RegNext[Bits](READ_SHIFTED) + val alu = new AtomicAlu( + op = UOP(29, 3 bits), + swap = UOP(27), + mem = srcBuffer, + rf = elp(IntRegFile, riscv.RS2), + isWord = l1.SIZE === 2 + ) + val aluBuffer = RegNext(alu.result) + val isAmo = l1.ATOMIC && l1.STORE && l1.LOAD + when(isAmo) { + writeData := aluBuffer + } + + val delay = History(!elp.isFreezed(), 1 to 2) + val freezeIt = isValid && SEL && isAmo && delay.orR + elp.freezeWhen(freezeIt) //Note that if the refill is faster than 2 cycle, it may create issues + + assert(Global.HART_COUNT.get == 1) + val nc = new Area { + val reserved = RegInit(False) + when(!elp.isFreezed() && l1.SEL && !l1.ABORD) { + reserved setWhen (l1.ATOMIC && !l1.STORE) + reserved clearWhen (l1.STORE) + } + scMiss := !reserved + } + } + + val mapping = (0 to log2Up(Riscv.LSLEN / 8)).map { size => + val w = (1 << size) * 8 + size -> writeData(0, w bits).#*(Riscv.LSLEN / w) + } + l1.WRITE_DATA := l1.SIZE.muxListDc(mapping) + + + flushPort.valid := False + flushPort.hartId := Global.HART_ID + flushPort.uopId := Decode.UOP_ID + flushPort.laneAge := Execute.LANE_AGE + flushPort.self := False + + //TODO handle case were address isn't in the range of the virtual address ? + trapPort.valid := False + trapPort.hartId := Global.HART_ID + trapPort.laneAge := Execute.LANE_AGE + trapPort.tval := l1.MIXED_ADDRESS.asBits.resized //PC RESIZED + trapPort.exception.assignDontCare() + trapPort.code.assignDontCare() + trapPort.arg.allowOverride() := 0 + + val lsuTrap = False + when((!pmaIo.rsp.fault).mux[Bool](io.rsp.valid && io.rsp.error, l1.FAULT)) { + lsuTrap := True + trapPort.exception := True + trapPort.code := CSR.MCAUSE_ENUM.LOAD_ACCESS_FAULT + trapPort.code(1) setWhen (STORE) + } + + val l1Redo = !pmaL1.rsp.fault && (l1.HAZARD || l1.MISS || l1.MISS_UNIQUE) + when(l1Redo){ + lsuTrap := True + trapPort.exception := False + trapPort.code := TrapReason.REDO + } + + val pmaFault = pmaL1.rsp.fault && pmaIo.rsp.fault + when(pmaFault) { + lsuTrap := True; io.allowed := False + trapPort.exception := True + trapPort.code := CSR.MCAUSE_ENUM.LOAD_ACCESS_FAULT + trapPort.code(1) setWhen (STORE) + } + + when(mmuPageFault) { + lsuTrap := True; io.allowed := False + trapPort.exception := True + trapPort.code := CSR.MCAUSE_ENUM.LOAD_PAGE_FAULT + trapPort.code(1) setWhen (STORE) + } + + when(tpk.ACCESS_FAULT) { + lsuTrap := True; io.allowed := False + trapPort.exception := True + trapPort.code := CSR.MCAUSE_ENUM.LOAD_ACCESS_FAULT + trapPort.code(1) setWhen (STORE) + } + + trapPort.arg(0, 2 bits) := STORE.mux(B(TrapArg.STORE, 2 bits), B(TrapArg.LOAD, 2 bits)) + trapPort.arg(2, ats.getStorageIdWidth() bits) := ats.getStorageId(translationStorage) + when(tpk.REDO) { + lsuTrap := True; io.allowed := False + trapPort.exception := False + trapPort.code := TrapReason.MMU_REFILL + } + + when(MISS_ALIGNED) { + lsuTrap := True; io.allowed := False + trapPort.exception := True + trapPort.code := STORE.mux[Bits](CSR.MCAUSE_ENUM.STORE_MISALIGNED, CSR.MCAUSE_ENUM.LOAD_MISALIGNED).andMask(MISS_ALIGNED).resized + } + + when(isValid && SEL && lsuTrap) { + trapPort.valid := True + flushPort.valid := True + bypass(Global.TRAP) := True + bypass(Global.COMMIT) := False + } + + l1.ABORD := FROM_LSU && (!isValid || isCancel || pmaL1.rsp.fault || l1.FAULT || mmuPageFault || tpk.ACCESS_FAULT || tpk.REDO || MISS_ALIGNED || pmaFault) + l1.SKIP_WRITE := l1.ATOMIC && !l1.LOAD && scMiss + + when(l1.SEL && l1.FLUSH && (l1.FLUSH_HIT || l1.HAZARD)){ + flusher.cmdCounter := l1.MIXED_ADDRESS(log2Up(l1.LINE_BYTES), log2Up(l1.SETS) bits).resized + } + + val access = dbusAccesses.nonEmpty generate new Area { + assert(dbusAccesses.size == 1) + val rsp = dbusAccesses.head.rsp + rsp.valid := l1.SEL && FROM_ACCESS && !elp.isFreezed() + rsp.data := l1.READ_DATA + rsp.error := l1.FAULT + rsp.redo := l1Redo + rsp.waitSlot := 0 + rsp.waitAny := False //TODO + when(pmaFault){ + rsp.error := True + rsp.redo := False + } + } + } + + val onWb = new elp.Execute(wbAt){ + iwb.valid := SEL + iwb.payload := onCtrl.READ_SHIFTED + + if (withRva) when(l1.ATOMIC && !l1.LOAD) { + iwb.payload(0) := onCtrl.SC_MISS + iwb.payload(7 downto 1) := 0 + } + } + + buildBefore.release() + } + + + val ioRegions = Handle[ArrayBuffer[PmaRegion]]() + val pmaBuilder = during build new Area{ + val l1Regions = ArrayBuffer[PmaRegion]() + for(r <- host[LsuL1Plugin].regions if r.isMain){ + r.transfers match { + case t: M2sTransfers if t.get.contains(LsuL1.LINE_BYTES) && (t.putFull.contains(LsuL1.LINE_BYTES) || t.putFull.none) => + l1Regions += r + } + } + val l1 = new PmaLogic(logic.onCtrl.pmaL1, l1Regions) + val io = new PmaLogic(logic.onCtrl.pmaIo, ioRegions) + } +} diff --git a/src/main/scala/vexiiriscv/execute/lsu/Service.scala b/src/main/scala/vexiiriscv/execute/lsu/Service.scala new file mode 100644 index 00000000..f34bbf20 --- /dev/null +++ b/src/main/scala/vexiiriscv/execute/lsu/Service.scala @@ -0,0 +1,5 @@ +package vexiiriscv.execute.lsu + +trait LsuCachelessBusProvider { + def getLsuCachelessBus() : LsuCachelessBus +} diff --git a/src/main/scala/vexiiriscv/fetch/FetchCachelessBridge.scala b/src/main/scala/vexiiriscv/fetch/FetchCachelessBridge.scala new file mode 100644 index 00000000..527e1aa4 --- /dev/null +++ b/src/main/scala/vexiiriscv/fetch/FetchCachelessBridge.scala @@ -0,0 +1,39 @@ +package vexiiriscv.fetch + +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.tilelink +import spinal.lib.bus.tilelink.{DebugId, S2mSupport} +import spinal.lib.misc.plugin.FiberPlugin + +class CachelessBusToTilelink(up : CachelessBus) extends Area{ + val m2sParam = up.p.toTilelinkM2s(this) + val down = tilelink.Bus(m2sParam) + down.a.arbitrationFrom(up.cmd) + down.a.opcode := tilelink.Opcode.A.GET + down.a.param := 0 + down.a.source := up.cmd.id + down.a.address := up.cmd.address + down.a.size := log2Up(up.p.dataWidth/8) + down.a.debugId := DebugId.withPostfix(up.cmd.id) + + down.d.ready := True + up.rsp.valid := down.d.valid + up.rsp.id := down.d.source + up.rsp.error := down.d.denied + up.rsp.word := down.d.data +} + +class FetchCachelessTileLinkPlugin(node : bus.tilelink.fabric.Node) extends FiberPlugin { + val logic = during build new Area{ + val fcp = host[FetchCachelessPlugin] + fcp.logic.bus.setAsDirectionLess() + + val bridge = new CachelessBusToTilelink(fcp.logic.bus) + master(bridge.down) + + node.m2s.forceParameters(bridge.m2sParam) + node.s2m.supported.load(S2mSupport.none()) + node.bus.component.rework(node.bus << bridge.down) + } +} diff --git a/src/main/scala/vexiiriscv/fetch/FetchCachelessBus.scala b/src/main/scala/vexiiriscv/fetch/FetchCachelessBus.scala new file mode 100644 index 00000000..91ff8e52 --- /dev/null +++ b/src/main/scala/vexiiriscv/fetch/FetchCachelessBus.scala @@ -0,0 +1,54 @@ +package vexiiriscv.fetch + +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.misc.SizeMapping +import spinal.lib.misc.plugin.FiberPlugin +import spinal.lib.misc.database.Database._ +import spinal.lib.misc.pipeline._ +import spinal.lib.bus.tilelink +import spinal.lib.bus.tilelink.DebugId + +case class CachelessBusParam(addressWidth : Int, dataWidth : Int, idCount : Int, cmdPersistence : Boolean){ + val idWidth = log2Up(idCount) + + def toTilelinkM2s(name : Nameable) = new tilelink.M2sParameters( + addressWidth = addressWidth, + dataWidth = dataWidth, + masters = List( + new tilelink.M2sAgent( + name = name, + mapping = List( + new tilelink.M2sSource( + id = SizeMapping(0, idCount), + emits = tilelink.M2sTransfers( + get = tilelink.SizeRange(dataWidth/8) + ) + ) + ) + ) + ) + ) +} + +case class CachelessCmd(p : CachelessBusParam) extends Bundle{ + val id = UInt(p.idWidth bits) + val address = UInt(p.addressWidth bits) +} + +case class CachelessRsp(p : CachelessBusParam, withId : Boolean = true) extends Bundle{ + val id = withId generate UInt(p.idWidth bits) + val error = Bool() + val word = Bits(p.dataWidth bits) +} + +case class CachelessBus(p : CachelessBusParam) extends Bundle with IMasterSlave { + var cmd = Stream(CachelessCmd(p)) + var rsp = Flow(CachelessRsp(p)) + + override def asMaster(): Unit = { + master(cmd) + slave(rsp) + } +} + diff --git a/src/main/scala/vexiiriscv/fetch/FetchCachelessPlugin.scala b/src/main/scala/vexiiriscv/fetch/FetchCachelessPlugin.scala index da566060..480dc221 100644 --- a/src/main/scala/vexiiriscv/fetch/FetchCachelessPlugin.scala +++ b/src/main/scala/vexiiriscv/fetch/FetchCachelessPlugin.scala @@ -1,46 +1,29 @@ package vexiiriscv.fetch import spinal.core._ +import spinal.core.fiber.Handle import spinal.lib._ +import spinal.lib.bus.misc.SizeMapping import spinal.lib.misc.plugin.FiberPlugin import spinal.lib.misc.database.Database._ import spinal.lib.misc.pipeline._ +import spinal.lib.bus.tilelink +import spinal.lib.bus.tilelink.DebugId +import spinal.lib.system.tag.{MappedTransfers, PmaRegion} import vexiiriscv._ import vexiiriscv.Global._ -import vexiiriscv.memory.{AddressTranslationPortUsage, AddressTranslationService} +import vexiiriscv.memory.{AddressTranslationPortUsage, AddressTranslationService, PmaLoad, PmaLogic, PmaPort} import vexiiriscv.misc.{TrapArg, TrapReason, TrapService} import vexiiriscv.riscv.CSR -case class CachelessBusParam(addressWidth : Int, dataWidth : Int, idCount : Int, cmdPersistence : Boolean){ - val idWidth = log2Up(idCount) -} - -case class CachelessCmd(p : CachelessBusParam) extends Bundle{ - val id = UInt(p.idWidth bits) - val address = UInt(p.addressWidth bits) -} - -case class CachelessRsp(p : CachelessBusParam, withId : Boolean = true) extends Bundle{ - val id = withId generate UInt(p.idWidth bits) - val error = Bool() - val word = Bits(p.dataWidth bits) -} - -case class CachelessBus(p : CachelessBusParam) extends Bundle with IMasterSlave { - var cmd = Stream(CachelessCmd(p)) - var rsp = Flow(CachelessRsp(p)) - - override def asMaster(): Unit = { - master(cmd) - slave(rsp) - } -} +import scala.collection.mutable.ArrayBuffer object FetchCachelessPlugin{ val ID_WIDTH = blocking[Int] val ID = blocking[Int] } +//TODO avoid cmd fork on unmapped memory space class FetchCachelessPlugin(var wordWidth : Int, var translationStorageParameter: Any, var translationPortParameter: Any, @@ -48,6 +31,7 @@ class FetchCachelessPlugin(var wordWidth : Int, var forkAt : Int = 0, var joinAt : Int = 1, var cmdForkPersistence : Boolean = true) extends FiberPlugin{ + val regions = Handle[ArrayBuffer[PmaRegion]]() val logic = during setup new Area{ val pp = host[FetchPipelinePlugin] @@ -99,7 +83,7 @@ class FetchCachelessPlugin(var wordWidth : Int, val translationPort = ats.newTranslationPort( nodes = Seq(down), rawAddress = Fetch.WORD_PC, - allowRefill = insert(True), + forcePhysical = insert(False), usage = AddressTranslationPortUsage.FETCH, portSpec = translationPortParameter, storageSpec = translationStorage @@ -108,6 +92,9 @@ class FetchCachelessPlugin(var wordWidth : Int, val tpk = onAddress.translationPort.keys val fork = new pp.Fetch(forkAt){ + val pmaPort = new PmaPort(Global.PHYSICAL_WIDTH, List(Fetch.WORD_WIDTH/8), List(PmaLoad)) + pmaPort.cmd.address := tpk.TRANSLATED + val fresh = (forkAt == 0).option(host[PcPlugin].forcedSpawn()) val cmdFork = forkStream(fresh) bus.cmd.arbitrationFrom(cmdFork.haltWhen(buffer.full)) @@ -117,7 +104,8 @@ class FetchCachelessPlugin(var wordWidth : Int, BUFFER_ID := buffer.reserveId - when(tpk.REDO) { + val PMA_FAULT = insert(pmaPort.rsp.fault) + when(tpk.REDO || PMA_FAULT) { bus.cmd.valid := False }otherwise { when(up.isMoving) { @@ -149,7 +137,7 @@ class FetchCachelessPlugin(var wordWidth : Int, trapPort.code.assignDontCare() trapPort.arg.allowOverride() := 0 - when(rsp.error){ + when(rsp.error || fork.PMA_FAULT){ TRAP := True trapPort.exception := True trapPort.code := CSR.MCAUSE_ENUM.INSTRUCTION_ACCESS_FAULT @@ -181,4 +169,6 @@ class FetchCachelessPlugin(var wordWidth : Int, } buildBefore.release() } + + val pmaBuilder = during build new PmaLogic(logic.fork.pmaPort, regions.filter(_.isExecutable)) } diff --git a/src/main/scala/vexiiriscv/fetch/FetchL1Bridge.scala b/src/main/scala/vexiiriscv/fetch/FetchL1Bridge.scala new file mode 100644 index 00000000..c7d63f3a --- /dev/null +++ b/src/main/scala/vexiiriscv/fetch/FetchL1Bridge.scala @@ -0,0 +1,20 @@ +package vexiiriscv.fetch + +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.tilelink +import spinal.lib.bus.tilelink.{DebugId, S2mSupport} +import spinal.lib.misc.plugin.FiberPlugin + + +class FetchFetchL1TileLinkPlugin(node : bus.tilelink.fabric.Node) extends FiberPlugin { + val logic = during build new Area{ + val fcp = host[FetchL1Plugin] + fcp.logic.bus.setAsDirectionLess() + val down = fcp.logic.bus.toTilelink() + master(down) + node.m2s.forceParameters(down.p.node.m) + node.s2m.supported.load(S2mSupport.none()) + node.bus.component.rework(node.bus << down) + } +} diff --git a/src/main/scala/vexiiriscv/fetch/FetchL1Plugin.scala b/src/main/scala/vexiiriscv/fetch/FetchL1Plugin.scala index a76f0e74..a40ab966 100644 --- a/src/main/scala/vexiiriscv/fetch/FetchL1Plugin.scala +++ b/src/main/scala/vexiiriscv/fetch/FetchL1Plugin.scala @@ -10,17 +10,22 @@ import spinal.lib.bus.amba4.axilite.{AxiLite4Config, AxiLite4ReadOnly} import spinal.lib.bus.bmb.{Bmb, BmbAccessParameter, BmbParameter, BmbSourceParameter} import spinal.lib.bus.tilelink.{M2sSupport, SizeRange} import spinal.lib.misc.Plru -import vexiiriscv.memory.{AddressTranslationPortUsage, AddressTranslationService} +import vexiiriscv.memory.{AddressTranslationPortUsage, AddressTranslationService, PmaLoad, PmaLogic, PmaPort} import vexiiriscv.misc._ import vexiiriscv._ import vexiiriscv.Global._ import Fetch._ -import spinal.core.fiber.Retainer +import spinal.core.fiber.{Handle, Retainer} +import spinal.lib.system.tag.PmaRegion import vexiiriscv.riscv.CSR import vexiiriscv.schedule.ReschedulePlugin import scala.collection.mutable.ArrayBuffer +case class FetchL1InvalidationCmd() extends Bundle //Empty for now +case class FetchL1InvalidationBus() extends Bundle { + val cmd = Stream(FetchL1InvalidationCmd()) +} trait FetchL1Service{ val invalidationRetainer = Retainer() @@ -28,18 +33,25 @@ trait FetchL1Service{ def newInvalidationPort() = invalidationPorts.addRet(FetchL1InvalidationBus()) } -case class FetchL1InvalidationCmd() extends Bundle //Empty for now -case class FetchL1InvalidationBus() extends Bundle { - val cmd = Stream(FetchL1InvalidationCmd()) +case class LsuL1InvalidationCmd() extends Bundle //Empty for now +case class LsuL1InvalidationBus() extends Bundle { + val cmd = Stream(LsuL1InvalidationCmd()) } +trait LsuL1Service{ + val invalidationRetainer = Retainer() + val invalidationPorts = ArrayBuffer[LsuL1InvalidationBus]() + def newInvalidationPort() = invalidationPorts.addRet(LsuL1InvalidationBus()) +} + + class FetchL1Plugin(var translationStorageParameter: Any, var translationPortParameter: Any, - var cacheSize : Int, - var wayCount : Int, var memDataWidth : Int, var fetchDataWidth : Int, + var setCount: Int, + var wayCount: Int, var lineSize: Int = 64, var readAt: Int = 0, var hitsAt: Int = 1, @@ -49,7 +61,7 @@ class FetchL1Plugin(var translationStorageParameter: Any, var ctrlAt: Int = 2, var hitsWithTranslationWays: Boolean = false, var reducedBankWidth: Boolean = false, - var tagsReadAsync: Boolean = false) extends FiberPlugin with FetchL1Service { + var tagsReadAsync: Boolean = false) extends FiberPlugin with FetchL1Service with InitService { def getBusParameter() = FetchL1BusParam( physicalWidth = PHYSICAL_WIDTH, @@ -59,6 +71,8 @@ class FetchL1Plugin(var translationStorageParameter: Any, ) + override def initHold(): Bool = logic.invalidate.firstEver + val logic = during setup new Area{ val pp = host[FetchPipelinePlugin] val pcp = host[PcService] @@ -86,6 +100,7 @@ class FetchL1Plugin(var translationStorageParameter: Any, val holdPorts = (0 until HART_COUNT).map(pcp.newHoldPort) setupLock.release() + val cacheSize = wayCount*setCount*lineSize val cpuWordWidth = fetchDataWidth val bytePerMemWord = memDataWidth / 8 val bytePerFetchWord = cpuWordWidth / 8 @@ -284,7 +299,7 @@ class FetchL1Plugin(var translationStorageParameter: Any, val translationPort = ats.newTranslationPort( nodes = Seq(pp.fetch(readAt).down, pp.fetch(readAt+1).down), rawAddress = Fetch.WORD_PC, - allowRefill = pp.fetch(readAt).insert(True), + forcePhysical = pp.fetch(readAt).insert(False), usage = AddressTranslationPortUsage.FETCH, portSpec = translationPortParameter, storageSpec = translationStorage @@ -360,10 +375,12 @@ class FetchL1Plugin(var translationStorageParameter: Any, } val ctrl = new pp.Fetch(ctrlAt){ + val pmaPort = new PmaPort(Global.PHYSICAL_WIDTH, List(lineSize), List(PmaLoad)) + pmaPort.cmd.address := tpk.TRANSLATED + val plruLogic = new Area { val core = new Plru(wayCount, false) core.io.context.state := PLRU_BYPASSED -// core.io.context.state.clearAll() core.io.update.id := OHToUInt(WAYS_HITS) plru.write.valid := False @@ -388,7 +405,7 @@ class FetchL1Plugin(var translationStorageParameter: Any, trapPort.code.assignDontCare() trapPort.arg.allowOverride() := 0 - when(dataAccessFault) { + when(dataAccessFault || pmaPort.rsp.fault) { TRAP := True trapPort.exception := True trapPort.code := CSR.MCAUSE_ENUM.INSTRUCTION_ACCESS_FAULT @@ -428,7 +445,7 @@ class FetchL1Plugin(var translationStorageParameter: Any, refill.start.valid := redoIt && !HAZARD refill.start.address := tpk.TRANSLATED refill.start.hartId := HART_ID - refill.start.isIo := tpk.IO + refill.start.isIo := pmaPort.rsp.io when(redoIt){ pp.fetch(0).haltIt() //"optional" @@ -449,5 +466,8 @@ class FetchL1Plugin(var translationStorageParameter: Any, buildBefore.release() } + + val regions = Handle[ArrayBuffer[PmaRegion]]() + val pmaBuilder = during build new PmaLogic(logic.ctrl.pmaPort, regions.filter(_.isExecutable)) } diff --git a/src/main/scala/vexiiriscv/memory/MmuPlugin.scala b/src/main/scala/vexiiriscv/memory/MmuPlugin.scala index e88ca775..e43ee1a3 100644 --- a/src/main/scala/vexiiriscv/memory/MmuPlugin.scala +++ b/src/main/scala/vexiiriscv/memory/MmuPlugin.scala @@ -77,16 +77,14 @@ object MmuSpec{ } class MmuPlugin(var spec : MmuSpec, - var physicalWidth : Int, - var ioRange : UInt => Bool, - var fetchRange : UInt => Bool) extends FiberPlugin with AddressTranslationService{ + var physicalWidth : Int) extends FiberPlugin with AddressTranslationService{ override def mayNeedRedo: Boolean = true case class PortSpec(stages: Seq[NodeBaseApi], preAddress: Payload[UInt], - allowRefill : Payload[Bool], + forcePhysical : Payload[Bool], usage : AddressTranslationPortUsage, pp: MmuPortParameter, ss : StorageSpec, @@ -114,7 +112,7 @@ class MmuPlugin(var spec : MmuSpec, override def newTranslationPort(stages: Seq[NodeBaseApi], preAddress: Payload[UInt], - allowRefill : Payload[Bool], + forcePhysical : Payload[Bool], usage : AddressTranslationPortUsage, portSpec: Any, storageSpec: Any) = { @@ -124,7 +122,7 @@ class MmuPlugin(var spec : MmuSpec, new PortSpec( stages = stages, preAddress = preAddress, - allowRefill = allowRefill, + forcePhysical = forcePhysical, usage = usage, pp = pp, ss = ss, @@ -283,9 +281,9 @@ class MmuPlugin(var spec : MmuSpec, requireMmuLockup := False } } + requireMmuLockup clearWhen(ps.forcePhysical) import ps.rsp.keys._ - IO := ioRange(TRANSLATED) when(requireMmuLockup) { REDO := !hit TRANSLATED := lineTranslated @@ -304,7 +302,6 @@ class MmuPlugin(var spec : MmuSpec, ACCESS_FAULT := ps.preAddress.drop(physicalWidth) =/= 0 } - ALLOW_EXECUTE clearWhen(!fetchRange(TRANSLATED)) BYPASS_TRANSLATION := !requireMmuLockup WAYS_OH := oh diff --git a/src/main/scala/vexiiriscv/memory/PmaPlugin.scala b/src/main/scala/vexiiriscv/memory/PmaPlugin.scala new file mode 100644 index 00000000..36f7a441 --- /dev/null +++ b/src/main/scala/vexiiriscv/memory/PmaPlugin.scala @@ -0,0 +1,87 @@ +package vexiiriscv.memory + +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.misc.AddressMapping +import spinal.lib.bus.tilelink.M2sTransfers +import spinal.lib.logic.{DecodingSpec, Masked, Symplify} +import spinal.lib.misc.plugin.FiberPlugin +import spinal.lib.system.tag.{MappedTransfers, PmaRegion} +import vexiiriscv.Global + +import scala.collection.mutable.ArrayBuffer + + +trait PmaOp +object PmaLoad extends PmaOp +object PmaStore extends PmaOp + +class PmaCmd(addressWidth : Int, sizes : Seq[Int], ops : Seq[PmaOp]) extends Bundle{ + val address = UInt(addressWidth bits) + val size = Bits(log2Up(sizes.size) bits) + val op = Bits(log2Up(ops.size) bits) +} + +class PmaRsp() extends Bundle{ + val fault = Bool() + val io = Bool() +} + +case class PmaPort(addressWidth : Int, sizes : Seq[Int], ops : Seq[PmaOp]) extends Bundle{ + val cmd = new PmaCmd(addressWidth, sizes, ops) + val rsp = new PmaRsp() +} + + +class PmaLogic(port : PmaPort, regions : Seq[PmaRegion]) extends Area{ + import port._ + val hitsTerms = ArrayBuffer[Masked]() + val mainSpec = new DecodingSpec(Bool()).setDefault(Masked.zero) + val executableSpec = new DecodingSpec(Bool()).setDefault(Masked.zero) + + val addressBits = cmd.address.asBits + val argsBits = cmd.size ## cmd.op + val argsWidth = widthOf(argsBits) + val argsMask = BigInt((1 << argsWidth)-1) + def opMask(opId: Int, sizeId: Int) = Masked(opId | (sizeId << widthOf(cmd.op)), argsMask) + + + val onRegion = for (region <- regions) yield new Area { + val regionTerms = AddressMapping.terms(region.mapping, addressWidth) + hitsTerms ++= regionTerms + if (region.isMain) mainSpec.addNeeds(regionTerms, Masked.one) + if(region.isExecutable) executableSpec.addNeeds(regionTerms, Masked.one) + } + + val byTransfers = regions.groupBy(_.transfers) + val onTransfers = for ((transfer, regions) <- byTransfers) yield new Area{ + val terms = ArrayBuffer[Masked]() + val addressSpec = new DecodingSpec(Bool()).setDefault(Masked.zero) + for (region <- regions) terms ++= AddressMapping.terms(region.mapping, addressWidth) + addressSpec.addNeeds(terms, Masked.one) + val addressHit = addressSpec.build(addressBits, hitsTerms) + + val argsOk, argsKo = ArrayBuffer[Masked]() + for((size, sizeId) <- sizes.zipWithIndex){ + for((op, opId) <- ops.zipWithIndex){ + val mask = opMask(opId, sizeId) + val ok = op match { + case PmaLoad => transfer match { + case t : M2sTransfers => t.get.contains(size) + } + case PmaStore => transfer match { + case t: M2sTransfers => t.putFull.contains(size) + } + } + if(ok) argsOk += mask else argsKo += mask + } + } + val argsHit = Symplify(argsBits, argsOk, argsKo) + + val hit = argsHit && addressHit + } + + + port.rsp.fault := !(Symplify(addressBits, hitsTerms) && onTransfers.map(_.hit).orR) + port.rsp.io := !mainSpec.build(addressBits, hitsTerms) +} diff --git a/src/main/scala/vexiiriscv/memory/Service.scala b/src/main/scala/vexiiriscv/memory/Service.scala index ed9fcb9d..1d277fec 100644 --- a/src/main/scala/vexiiriscv/memory/Service.scala +++ b/src/main/scala/vexiiriscv/memory/Service.scala @@ -47,9 +47,11 @@ trait AddressTranslationService extends Area { def getStorageId(s : Any) : Int def getStorageIdWidth() : Int + val regionRetainer = Retainer() + def newTranslationPort(nodes: Seq[NodeBaseApi], rawAddress: Payload[UInt], - allowRefill: Payload[Bool], + forcePhysical: Payload[Bool], usage: AddressTranslationPortUsage, portSpec: Any, storageSpec: Any): AddressTranslationRsp @@ -65,7 +67,6 @@ class AddressTranslationRsp(s : AddressTranslationService, val wayCount : Int) e val keys = new Area { setName("MMU") val TRANSLATED = Payload(PHYSICAL_ADDRESS) - val IO = Payload(Bool()) val REDO = Payload(Bool()) val ALLOW_READ, ALLOW_WRITE, ALLOW_EXECUTE = Payload(Bool()) val PAGE_FAULT = Payload(Bool()) diff --git a/src/main/scala/vexiiriscv/memory/StaticTranslationPlugin.scala b/src/main/scala/vexiiriscv/memory/StaticTranslationPlugin.scala index 4fb55950..1ba60bf5 100644 --- a/src/main/scala/vexiiriscv/memory/StaticTranslationPlugin.scala +++ b/src/main/scala/vexiiriscv/memory/StaticTranslationPlugin.scala @@ -8,44 +8,40 @@ import spinal.lib.misc.plugin._ import vexiiriscv.Global._ import vexiiriscv.riscv.Riscv -class StaticTranslationPlugin(var physicalWidth: Int, - var ioRange: UInt => Bool, - var fetchRange: UInt => Bool) extends FiberPlugin with AddressTranslationService { +import scala.collection.mutable.ArrayBuffer + +class StaticTranslationPlugin(var physicalWidth: Int) extends FiberPlugin with AddressTranslationService { override def mayNeedRedo: Boolean = false override def newStorage(pAny: Any): Any = { } override def getStorageId(s: Any): Int = 0 override def getStorageIdWidth(): Int = 0 - override def newTranslationPort(nodes: Seq[NodeBaseApi], + case class PortSpec(stages: Seq[NodeBaseApi], + preAddress: Payload[UInt], + forcePhysical: Payload[Bool], + usage: AddressTranslationPortUsage, + rsp: AddressTranslationRsp) + + val portSpecs = ArrayBuffer[PortSpec]() + override def newTranslationPort(stages: Seq[NodeBaseApi], rawAddress: Payload[UInt], - allowRefill: Payload[Bool], + forcePhysical: Payload[Bool], usage: AddressTranslationPortUsage, portSpec: Any, storageSpec: Any): AddressTranslationRsp = { - new AddressTranslationRsp(this, wayCount = 0) { - val node = nodes.last - import node._ - import keys._ - - REDO := False - TRANSLATED := rawAddress.resized //PC RESIZED - IO := ioRange(TRANSLATED) - ALLOW_EXECUTE := True - ALLOW_READ := True - ALLOW_WRITE := True - PAGE_FAULT := False - ACCESS_FAULT := False - ALLOW_EXECUTE clearWhen (!fetchRange(TRANSLATED)) - } + portSpecs.addRet( + new PortSpec( + stages = stages, + preAddress = rawAddress, + forcePhysical = forcePhysical, + usage = usage, + rsp = new AddressTranslationRsp(this, 0) + ) + ).rsp } val logic = during build new Area { -// PHYSICAL_WIDTH.set(physicalWidth) -// VIRTUAL_WIDTH.set(physicalWidth min Riscv.XLEN.get) -// MIXED_WIDTH.set(VIRTUAL_WIDTH.get + (VIRTUAL_WIDTH < Riscv.XLEN).toInt) -// PC_WIDTH.set(MIXED_WIDTH) -// TVAL_WIDTH.set(MIXED_WIDTH) PHYSICAL_WIDTH.set(physicalWidth) VIRTUAL_WIDTH.set(physicalWidth) MIXED_WIDTH.set(physicalWidth) @@ -54,5 +50,21 @@ class StaticTranslationPlugin(var physicalWidth: Int, portsLock.await() assert(refillPorts.isEmpty) + + regionRetainer.await() + val ports = for(spec <- portSpecs) yield new Area{ + val node = spec.stages.last + + import node._ + import spec.rsp.keys._ + + REDO := False + TRANSLATED := spec.preAddress.resized //PC RESIZED + ALLOW_EXECUTE := True + ALLOW_READ := True + ALLOW_WRITE := True + PAGE_FAULT := False + ACCESS_FAULT := False + } } } \ No newline at end of file diff --git a/src/main/scala/vexiiriscv/misc/PerformanceCounterPlugin.scala b/src/main/scala/vexiiriscv/misc/PerformanceCounterPlugin.scala index 968856be..7524a484 100644 --- a/src/main/scala/vexiiriscv/misc/PerformanceCounterPlugin.scala +++ b/src/main/scala/vexiiriscv/misc/PerformanceCounterPlugin.scala @@ -144,9 +144,16 @@ class PerformanceCounterPlugin(var additionalCounterCount : Int, } val idleCsrAddress = csrReadCmd.valid.mux(csrReadCmd.address, csrWriteCmd.address) + val holdCsrWrite = True + when(holdCsrWrite){ + ram.holdCsrWrite() + } IDLE whenIsActive{ + holdCsrWrite := False cmd.oh := B(for (c <- counters.list) yield idleCsrAddress === c.counterId) - when(flusherCmd.valid){ + when(csrWriteCmd.valid) { + goto(CSR_WRITE) + }elsewhen(flusherCmd.valid){ cmd.flusher := True cmd.oh := flusherCmd.oh flusherCmd.ready := True @@ -155,13 +162,12 @@ class PerformanceCounterPlugin(var additionalCounterCount : Int, cmd.flusher := False csrReadCmd.ready := True goto(READ_LOW) - } elsewhen(csrWriteCmd.valid){ - goto(CSR_WRITE) } carry := False } CSR_WRITE whenIsActive { + holdCsrWrite := False when(csr.onWriteAddress(7) === False) { counters.list.onMask(cmd.oh) { c => c.value := csr.onWriteBits.asUInt.resized diff --git a/src/main/scala/vexiiriscv/misc/PrivilegedPlugin.scala b/src/main/scala/vexiiriscv/misc/PrivilegedPlugin.scala index 8f27d9ae..96457b50 100644 --- a/src/main/scala/vexiiriscv/misc/PrivilegedPlugin.scala +++ b/src/main/scala/vexiiriscv/misc/PrivilegedPlugin.scala @@ -45,7 +45,7 @@ case class PrivilegedParam(var withSupervisor : Boolean, } } -class PrivilegedPlugin(val p : PrivilegedParam, hartIds : Seq[Int]) extends FiberPlugin with CommitService{ +class PrivilegedPlugin(val p : PrivilegedParam, val hartIds : Seq[Int]) extends FiberPlugin with CommitService{ def implementSupervisor = p.withSupervisor def implementUser = p.withUser def implementUserTrap = p.withUserTrap diff --git a/src/main/scala/vexiiriscv/misc/TrapPlugin.scala b/src/main/scala/vexiiriscv/misc/TrapPlugin.scala index 37a515fa..486e6ac3 100644 --- a/src/main/scala/vexiiriscv/misc/TrapPlugin.scala +++ b/src/main/scala/vexiiriscv/misc/TrapPlugin.scala @@ -1,6 +1,6 @@ package vexiiriscv.misc -import spinal.core.{Bool, _} +import spinal.core._ import spinal.core.fiber.Retainer import spinal.lib._ import spinal.lib.fsm._ @@ -12,8 +12,9 @@ import vexiiriscv.riscv.Riscv._ import vexiiriscv._ import vexiiriscv.decode.Decode import vexiiriscv.decode.Decode.{INSTRUCTION_SLICE_COUNT, INSTRUCTION_SLICE_COUNT_WIDTH, INSTRUCTION_WIDTH} -import vexiiriscv.fetch.{Fetch, FetchL1Service, InitService, PcService} +import vexiiriscv.fetch.{Fetch, FetchL1Service, InitService, LsuL1Service, PcService} import vexiiriscv.memory.AddressTranslationService +import vexiiriscv.prediction.{HistoryPlugin, Prediction} import vexiiriscv.schedule.Ages import scala.collection.mutable @@ -97,19 +98,21 @@ class TrapPlugin(trapAt : Int) extends FiberPlugin with TrapService { val cap = host[CsrAccessPlugin] val pp = host[PipelineBuilderPlugin] val fl1p = host.get[FetchL1Service] + val lsul1p = host.get[LsuL1Service] val pcs = host[PcService] + val hp = host.get[HistoryPlugin] val ats = host[AddressTranslationService] val withRam = host.get[CsrRamService].nonEmpty val crs = withRam generate host[CsrRamService] - val fl1pLock = fl1p.map(_.invalidationRetainer()) - val buildBefore = retains(List(pp.elaborationLock, pcs.elaborationLock, cap.csrLock, ats.portsLock)) + val invalidationLocks = retains(fl1p.map(_.invalidationRetainer).toList ++ lsul1p.map(_.invalidationRetainer)) + val buildBefore = retains(List(pp.elaborationLock, pcs.elaborationLock, cap.csrLock, ats.portsLock) ++ hp.map(_.elaborationLock)) val ramPortRetainers = withRam generate crs.portLock() awaitBuild() - val fetchL1Invalidate = fl1p.nonEmpty generate new Area{ - val ports = (0 until HART_COUNT).map(hartId => fl1p.get.newInvalidationPort()) - fl1pLock.get.release() - } + val fetchL1Invalidate = fl1p.nonEmpty generate (0 until HART_COUNT).map(hartId => fl1p.get.newInvalidationPort()) + val lsuL1Invalidate = lsul1p.nonEmpty generate (0 until HART_COUNT).map(hartId => lsul1p.get.newInvalidationPort()) + + invalidationLocks.release() val trapArgWidths = ArrayBuffer[Int](2) if(ats.mayNeedRedo) trapArgWidths += 2+ats.getStorageIdWidth() @@ -201,10 +204,11 @@ class TrapPlugin(trapAt : Int) extends FiberPlugin with TrapService { val trap = new Area { val pending = new Area { - val requests = traps.map(e => new AgedArbiterUp(e.bus.valid && e.bus.hartId === hartId, e.bus.payload.toRaw(), e.age, e.age)) + val requests = traps.map(e => new AgedArbiterUp(e.bus.valid && e.bus.hartId === hartId, e.bus.payload.toRaw(), e.age, e.bus.laneAge)) val arbiter = new AgedArbiter(requests) val state = arbiter.down.toReg val pc = Reg(PC) + val history = hp.nonEmpty generate Reg(Prediction.BRANCH_HISTORY) val slices = Reg(UInt(INSTRUCTION_SLICE_COUNT_WIDTH+1 bits)) val xret = new Area { @@ -247,11 +251,11 @@ class TrapPlugin(trapAt : Int) extends FiberPlugin with TrapService { csr.hasInflight := (for (self <- lanes; ctrlId <- 1 to self.executeAt + trapAt; sn = self.ctrl(ctrlId).up) yield sn.isValid && sn(HART_ID) === hartId).orR val oh = B(for (self <- lanes; sn = self.execute(trapAt).down) yield sn.isFiring && sn(TRAP)) val valid = oh.orR - val pc = OHMux.or(oh, lanes.map(_.execute(trapAt).down(PC)), true) - val slices = OHMux.or(oh, lanes.map(_.execute(trapAt).down(INSTRUCTION_SLICE_COUNT)), true) + val reader = lanes.map(_.execute(trapAt).down).reader(oh, true) when(valid) { - pending.pc := pc - pending.slices := slices.resize(INSTRUCTION_SLICE_COUNT_WIDTH+1)+1 + pending.pc := reader(_(PC)) + if(hp.nonEmpty) pending.history := reader(_(Prediction.BRANCH_HISTORY)) + pending.slices := reader(_(INSTRUCTION_SLICE_COUNT)).resize(INSTRUCTION_SLICE_COUNT_WIDTH+1)+1 } } @@ -261,6 +265,12 @@ class TrapPlugin(trapAt : Int) extends FiberPlugin with TrapService { val code = Global.CODE().assignDontCare() } + val historyPort = hp.nonEmpty generate hp.get.newPort(Integer.MAX_VALUE, 0) + if(hp.nonEmpty) { + historyPort.valid := False + historyPort.history := pending.history + } + val pcPort = pcs.newJumpInterface(Ages.TRAP, 0, 0) pcPort.valid := False pcPort.hartId := hartId @@ -272,6 +282,8 @@ class TrapPlugin(trapAt : Int) extends FiberPlugin with TrapService { val XRET_EPC, XRET_APPLY = new State() val ATS_RSP = ats.mayNeedRedo generate new State() val JUMP = new State() + val LSU_FLUSH = lsul1p.nonEmpty generate new State() + val FETCH_FLUSH = fl1p.nonEmpty generate new State() val inflightTrap = trapPendings.map(_(hartId)).orR val holdPort = pcs.newHoldPort(hartId) @@ -320,7 +332,6 @@ class TrapPlugin(trapAt : Int) extends FiberPlugin with TrapService { } } - RUNNING.whenIsActive { when(trigger.valid) { buffer.sampleIt := True @@ -340,8 +351,10 @@ class TrapPlugin(trapAt : Int) extends FiberPlugin with TrapService { ).map(pending.state.code === _).orR ) - if(fl1p.nonEmpty) fetchL1Invalidate.ports(hartId).cmd.valid := False + if (fl1p.nonEmpty) fetchL1Invalidate(hartId).cmd.valid := False + if (lsul1p.nonEmpty) lsuL1Invalidate(hartId).cmd.valid := False PROCESS.whenIsActive{ + if(hp.nonEmpty) historyPort.valid := True when(pending.state.exception || buffer.trap.interrupt) { goto(TRAP_TVAL) } otherwise { @@ -356,16 +369,13 @@ class TrapPlugin(trapAt : Int) extends FiberPlugin with TrapService { goto(XRET_EPC) } is(TrapReason.FENCE_I) { - fl1p.isEmpty match { - case true => goto(JUMP) - case false => { - fetchL1Invalidate.ports(hartId).cmd.valid := True - when(fetchL1Invalidate.ports(hartId).cmd.ready) { - goto(JUMP) - } + lsul1p.nonEmpty match { + case true => goto(LSU_FLUSH) + case false => fl1p.nonEmpty match { + case true => goto(FETCH_FLUSH) + case false => goto(JUMP) } } - } is(TrapReason.REDO) { goto(JUMP) @@ -402,6 +412,23 @@ class TrapPlugin(trapAt : Int) extends FiberPlugin with TrapService { } } + if(lsul1p.nonEmpty) LSU_FLUSH.whenIsActive{ + lsuL1Invalidate(hartId).cmd.valid := True + when(lsuL1Invalidate(hartId).cmd.ready) { + fl1p.nonEmpty match { + case true => goto(FETCH_FLUSH) + case false => goto(JUMP) + } + } + } + + if (fl1p.nonEmpty) FETCH_FLUSH.whenIsActive { + fetchL1Invalidate(hartId).cmd.valid := True + when(fetchL1Invalidate(hartId).cmd.ready) { + goto(JUMP) + } + } + if(ats.mayNeedRedo) ATS_RSP.whenIsActive{ when(atsPorts.refill.rsp.valid){ goto(JUMP) //TODO shave one cycle diff --git a/src/main/scala/vexiiriscv/misc/Utils.scala b/src/main/scala/vexiiriscv/misc/Utils.scala index 56479e4a..d966552d 100644 --- a/src/main/scala/vexiiriscv/misc/Utils.scala +++ b/src/main/scala/vexiiriscv/misc/Utils.scala @@ -12,6 +12,27 @@ object AddressToMask{ } } +class Reservation{ + class Entry(val priority : Int) extends Area{ + val win = Bool() + val take = False + + def takeIt() = take := True + } + val model = ArrayBuffer[Entry]() + def create(priority : Int) : Entry = { + val e = new Entry( priority) + model += e + e + } + + def build(){ + for(e <- model){ + e.win := !model.filter(_.priority < e.priority).map(_.take).orR + } + } +} + /** * MulSpliter is a tool which will cut a multiplication into multiple smaller multiplications * Those smaller multiplication results would need to be summed together. diff --git a/src/main/scala/vexiiriscv/prediction/BtbPlugin.scala b/src/main/scala/vexiiriscv/prediction/BtbPlugin.scala index b495597c..8b43feed 100644 --- a/src/main/scala/vexiiriscv/prediction/BtbPlugin.scala +++ b/src/main/scala/vexiiriscv/prediction/BtbPlugin.scala @@ -105,7 +105,7 @@ class BtbPlugin(var sets : Int, val sliceLow = SLICE_LOW() val pcTarget = PC_TARGET() val isBranch, isPush, isPop = Bool() - val taken = Bool() //TODO remove + val taken = !withCondPrediction generate Bool() //TODO remove } // This memory could be implemented as a single port ram, as that ram is only updated on miss predicted stuff @@ -121,7 +121,7 @@ class BtbPlugin(var sets : Int, val hash = getHash(cmd.pcOnLastSlice) val port = mem.writePortWithMask(chunks) - port.valid := cmd.valid && withCondPrediction.mux(cmd.badPredictedTarget && cmd.wasWrong, cmd.wasWrong) + port.valid := cmd.valid && withCondPrediction.mux(cmd.badPredictedTarget, cmd.wasWrong || cmd.badPredictedTarget) port.address := (cmd.pcOnLastSlice >> wordBytesWidth).resized port.mask := UIntToOh(cmd.pcOnLastSlice(SLICE_HIGH_RANGE)) for(data <- port.data) { @@ -131,7 +131,7 @@ class BtbPlugin(var sets : Int, data.isBranch := cmd.isBranch data.isPush := cmd.isPush data.isPop := cmd.isPop - data.taken := cmd.taken + if(!withCondPrediction) data.taken := cmd.taken } } @@ -150,12 +150,12 @@ class BtbPlugin(var sets : Int, data.isBranch := False data.isPush := False data.isPop := False - data.taken := False + if(!withCondPrediction) data.taken := False } } } - val readPort = mem.readSyncPort() //TODO , readUnderWrite = readFirst + val readPort = mem.readSyncPort() //TODO , readUnderWrite = readFirst would save area/ipc/fmax on FPGA which support it, same for gshare val readCmd = new fpp.Fetch(readAt){ readPort.cmd.valid := isReady readPort.cmd.payload := (WORD_PC >> wordBytesWidth).resize(mem.addressWidth) diff --git a/src/main/scala/vexiiriscv/prediction/GSharePlugin.scala b/src/main/scala/vexiiriscv/prediction/GSharePlugin.scala index ac7a661a..bffea854 100644 --- a/src/main/scala/vexiiriscv/prediction/GSharePlugin.scala +++ b/src/main/scala/vexiiriscv/prediction/GSharePlugin.scala @@ -54,7 +54,7 @@ class GSharePlugin(var historyWidth : Int, def hashWidth = log2Up(words) def gshareHash(address : UInt, history : Bits) = address(SLICE_RANGE.get.high + 1, hashWidth bits).reversed ^ U(history).resized - val mem = new Area{ //TODO bypass read durring write ? + val mem = new Area{ val counter = Mem.fill(words)(GSHARE_COUNTER) val write = counter.writePort if (GenerationFlags.simulation) { diff --git a/src/main/scala/vexiiriscv/prediction/HistoryPlugin.scala b/src/main/scala/vexiiriscv/prediction/HistoryPlugin.scala index a4dcd7c2..c889a95e 100644 --- a/src/main/scala/vexiiriscv/prediction/HistoryPlugin.scala +++ b/src/main/scala/vexiiriscv/prediction/HistoryPlugin.scala @@ -15,6 +15,7 @@ import scala.collection.mutable case class HistoryJump(laneAgeWidth : Int) extends Bundle{ val history = BRANCH_HISTORY() val age = UInt(laneAgeWidth bits) + //TODO HART ID } //TODO a few history port may be removed to save area, as they are corner case : DecodePredictionPlugin, and eventualy Lsu io missprediction diff --git a/src/main/scala/vexiiriscv/sandbox/Play.scala b/src/main/scala/vexiiriscv/sandbox/Play.scala index 2f6c30ca..741ff085 100644 --- a/src/main/scala/vexiiriscv/sandbox/Play.scala +++ b/src/main/scala/vexiiriscv/sandbox/Play.scala @@ -37,10 +37,10 @@ object Miaouuuu9 extends App{ import scala.collection.mutable.ArrayBuffer class EventCounterPlugin extends FiberPlugin{ - val lock = Retainer() // Will allow other plugins to block the elaboration of "logic" thread + val hostLockX = Retainer() // Will allow other plugins to block the elaboration of "logic" thread val events = ArrayBuffer[Bool]() // Will allow other plugins to add event sources val logic = during build new Area{ - lock.await() // Active blocking + hostLockX.await() // Active blocking val counter = Reg(UInt(32 bits)) init(0) counter := counter + CountOne(events) } @@ -56,7 +56,7 @@ object Miaouuuu9 extends App{ val ecp = host[EventCounterPlugin] // Search for the single instance of EventCounterPlugin in the plugin pool // Generate a lock to prevent the EventCounterPlugin elaboration until we release it. // this will allow us to add our localEvent to the ecp.events list - val ecpLocker = ecp.lock() + val ecpLocker = ecp.hostLockX() // Wait for the build phase before generating any hardware awaitBuild() diff --git a/src/main/scala/vexiiriscv/schedule/DispatchPlugin.scala b/src/main/scala/vexiiriscv/schedule/DispatchPlugin.scala index 55bf3185..ead1a9f1 100644 --- a/src/main/scala/vexiiriscv/schedule/DispatchPlugin.scala +++ b/src/main/scala/vexiiriscv/schedule/DispatchPlugin.scala @@ -3,7 +3,7 @@ package vexiiriscv.schedule import spinal.core._ import spinal.core.fiber.Retainer import spinal.lib._ -import spinal.lib.logic.{DecodingSpec, Masked} +import spinal.lib.logic.{DecodingSpec, Masked, Symplify} import spinal.lib.misc.pipeline.{CtrlApi, CtrlLaneApi, CtrlLink, NodeApi, Payload} import spinal.lib.misc.plugin.FiberPlugin import vexiiriscv.Global @@ -96,7 +96,7 @@ class DispatchPlugin(var dispatchAt : Int, var trapLayer : LaneLayer) extends Fi for (uop <- mayFlushUops) dp.addMicroOpDecoding(uop, MAY_FLUSH, True) for (uop <- dontFlushUops) dp.addMicroOpDecoding(uop, DONT_FLUSH, True) for (uop <- dontFlushFromLanesUops) dp.addMicroOpDecoding(uop, DONT_FLUSH_FROM_LANES, True) - for(uop <- fenceOlderOps) dp.addMicroOpDecoding(uop, FENCE_OLDER, True) + for (uop <- fenceOlderOps) dp.addMicroOpDecoding(uop, FENCE_OLDER, True) // Generate upstream up dontFlush precise decoding case class DontFlushSpec(at: Int, value: Payload[Bool]) @@ -170,9 +170,25 @@ class DispatchPlugin(var dispatchAt : Int, var trapLayer : LaneLayer) extends Fi case RfResource(_, e) => true case _ => false }).values + val hazardUntilMax = eus.map(_.getRdBroadcastedFromMax()).max val onRs = for (rs <- readAccess) yield new Area { val hazards = ArrayBuffer[Bool]() + val decodeSpec = ArrayBuffer[(Masked, Masked)]() + for(uop <- ll.uops.values){ + uop.rs.get(rs.rfa.asInstanceOf[RfRead]).foreach{v => + decodeSpec += Masked(uop.uop.key) -> (v.from >= hazardUntilMax).mux(Masked.one, Masked.zero) + } + } + val skip = Symplify(c.ctx.uop, decodeSpec, 1).as(Bool()) //TODO verify with LsuPlugin store usage at execute id 2 + + for (spec <- bypassedSpecs.values) yield new Area { + for (l <- spec.el.getLayers(); uop <- l.uops.values) { + uop.rd.foreach { rd => + uop.addDecoding(spec.value -> Bool(rd.broadcastedFrom <= spec.at)) + } + } + } for(writeEu <- eus) { val hazardFrom = ll.el.rfReadHazardFrom(ll.getRsUseAtMin()) // This is a pessimistic aproach val hazardUntil = writeEu.getRdBroadcastedFromMax() @@ -182,7 +198,7 @@ class DispatchPlugin(var dispatchAt : Int, var trapLayer : LaneLayer) extends Fi hazards += node(rdKeys.ENABLE) && node(rdKeys.PHYS) === c.ctx.hm(rs.PHYS) && !node(getBypassed(writeEu, id)) } } - val hazard = c.ctx.hm(rs.ENABLE) && hazards.orR + val hazard = c.ctx.hm(rs.ENABLE) && hazards.orR && !skip } c.rsHazards(llId) := onRs.map(_.hazard).orR } diff --git a/src/main/scala/vexiiriscv/soc/TilelinkVexiiRiscvFiber.scala b/src/main/scala/vexiiriscv/soc/TilelinkVexiiRiscvFiber.scala new file mode 100644 index 00000000..898c6ae2 --- /dev/null +++ b/src/main/scala/vexiiriscv/soc/TilelinkVexiiRiscvFiber.scala @@ -0,0 +1,105 @@ +package vexiiriscv.soc + +import net.fornwall.jelf.{ElfFile, ElfSection, ElfSectionHeader} +import spinal.core +import spinal.core._ +import spinal.core.fiber._ +import spinal.lib.bus.misc.SizeMapping +import spinal.lib.bus.tilelink.fabric._ +import spinal.lib.bus.tilelink +import spinal.lib.bus.tilelink.coherent.{Hub, HubFiber} +import spinal.lib.bus.tilelink.sim.{Checker, Endpoint, MemoryAgent, Monitor, MonitorSubscriber, SlaveDriver, TransactionA, TransactionC, TransactionD} +import spinal.lib.bus.tilelink.{M2sSupport, M2sTransfers, Opcode, S2mSupport, SizeRange, fabric} +import spinal.lib.cpu.riscv.RiscvHart +import spinal.lib.cpu.riscv.debug.DebugHartBus +import spinal.lib.misc.plic.InterruptCtrlFiber +import spinal.lib.misc.plugin.Hostable +import spinal.lib.misc.{ClintPort, Elf, InterruptCtrl, InterruptNode, TilelinkClintFiber} +import spinal.lib.sim.SparseMemory +import spinal.lib.system.tag.{MemoryConnection, PMA, PmaRegion} +import spinal.sim.{Signal, SimManagerContext} +import vexiiriscv.{ParamSimple, VexiiRiscv} +import vexiiriscv.execute.lsu.{LsuCachelessPlugin, LsuCachelessTileLinkPlugin, LsuL1Plugin, LsuL1TileLinkPlugin, LsuPlugin, LsuTileLinkPlugin} +import vexiiriscv.fetch.{FetchCachelessPlugin, FetchCachelessTileLinkPlugin, FetchFetchL1TileLinkPlugin, FetchL1Plugin} +import vexiiriscv.memory.AddressTranslationService +import vexiiriscv.misc.PrivilegedPlugin + +import java.io.{BufferedWriter, File, FileWriter} +import java.nio.file.Files +import scala.collection.mutable.ArrayBuffer + +class TilelinkVexiiRiscvFiber(plugins : ArrayBuffer[Hostable]) extends Area{ + val iBus = Node.down() + val dBus = Node.down() + val lsuL1Bus = plugins.exists(_.isInstanceOf[LsuL1Plugin]) generate Node.down() + + def buses = List(iBus, dBus) ++ lsuL1Bus.nullOption + + val priv = plugins.collectFirst { + case p: PrivilegedPlugin => new Area { + val plugin = p + val mti, msi, mei = InterruptNode.slave() + val sei = p.p.withSupervisor generate InterruptNode.slave() + } + } + + def bind(ctrl: InterruptCtrlFiber) = priv match { + case Some(priv) => new Area { + val pp = priv.plugin + val intIdBase = pp.hartIds(0) * (1 + pp.p.withSupervisor.toInt) + ctrl.mapDownInterrupt(intIdBase, priv.mei) + if(pp.p.withSupervisor) ctrl.mapDownInterrupt(intIdBase + 1, priv.sei) + } + } + + var clint = Option.empty[TilelinkClintFiber] + def bind(clint: TilelinkClintFiber): Unit = priv match { + case Some(priv) => new Area { + val pp = priv.plugin + val up = clint.createPort(pp.hartIds(0)) + priv.mti << up.mti + priv.msi << up.msi + TilelinkVexiiRiscvFiber.this.clint = Some(clint) + } + } + + + // Add the plugins to bridge the CPU toward Tilelink + plugins.foreach { + case p: FetchCachelessPlugin => plugins += new FetchCachelessTileLinkPlugin(iBus) + case p: FetchL1Plugin => plugins += new FetchFetchL1TileLinkPlugin(iBus) + case p: LsuCachelessPlugin => plugins += new LsuCachelessTileLinkPlugin(dBus) + case p: LsuPlugin => plugins += new LsuTileLinkPlugin(dBus) + case p: LsuL1Plugin => plugins += new LsuL1TileLinkPlugin(lsuL1Bus) + case _ => + } + + + val logic = Fiber setup new Area{ + val core = VexiiRiscv(plugins) + Fiber.awaitBuild() + + def getRegion(node : Node) = MemoryConnection.getMemoryTransfers(node).asInstanceOf[ArrayBuffer[PmaRegion]] + plugins.foreach { + case p: FetchCachelessPlugin => p.regions.load(getRegion(iBus)) + case p: FetchL1Plugin => p.regions.load(getRegion(iBus)) + case p: LsuCachelessPlugin => p.regions.load(getRegion(dBus)) + case p: LsuPlugin => p.ioRegions.load(getRegion(dBus)) + case p: LsuL1Plugin => p.regions.load(getRegion(lsuL1Bus)) + case _ => + } + + //Connect stuff + plugins.foreach { + case p: PrivilegedPlugin => { + val hart = p.logic.harts(0) + hart.int.m.timer := priv.get.mti.flag + hart.int.m.software := priv.get.msi.flag + hart.int.m.external := priv.get.mei.flag + if (p.p.withSupervisor) hart.int.s.external := priv.get.sei.flag + if (p.p.withRdTime) p.logic.rdtime := clint.get.thread.core.io.time + } + case _ => + } + } +} \ No newline at end of file diff --git a/src/main/scala/vexiiriscv/soc/demo/MicroSoc.scala b/src/main/scala/vexiiriscv/soc/demo/MicroSoc.scala new file mode 100644 index 00000000..d50abba6 --- /dev/null +++ b/src/main/scala/vexiiriscv/soc/demo/MicroSoc.scala @@ -0,0 +1,133 @@ +package vexiiriscv.soc.demo + +import rvls.spinal.RvlsBackend +import spinal.core._ +import spinal.core.sim._ +import spinal.core.fiber._ +import spinal.lib.{ResetCtrlFiber, StreamPipe} +import spinal.lib.bus.misc.SizeMapping +import spinal.lib.bus.tilelink +import spinal.lib.bus.tilelink._ +import spinal.lib.bus.tilelink.fabric.Node +import spinal.lib.com.uart.TilelinkUartFiber +import spinal.lib.com.uart.sim.{UartDecoder, UartEncoder} +import spinal.lib.eda.bench.Rtl +import spinal.lib.misc.{Elf, TilelinkClintFiber} +import spinal.lib.misc.plic.TilelinkPlicFiber +import spinal.lib.system.tag.PMA +import vexiiriscv.ParamSimple +import vexiiriscv.soc.TilelinkVexiiRiscvFiber +import vexiiriscv.test.VexiiRiscvProbe + +import java.io.File +import scala.collection.mutable.ArrayBuffer + +class MicroSoc() extends Component { + val asyncReset = in Bool() + val cd100 = ClockDomain.external("cd100", withReset = false, frequency = FixedFrequency(100 MHz)) + + val debugResetCtrl = cd100 on new ResetCtrlFiber().addAsyncReset(asyncReset, HIGH) + val mainResetCtrl = cd100 on new ResetCtrlFiber().addReset(debugResetCtrl) + + val main = mainResetCtrl.cd on new Area { + val sharedBus = tilelink.fabric.Node() + + val param = new ParamSimple() + param.withMul = false + param.withDiv = false + param.relaxedBranch = true + + val plugins = param.plugins() + val cpu = new TilelinkVexiiRiscvFiber(plugins) + sharedBus << cpu.buses + + val ram = new tilelink.fabric.RamFiber(16 KiB) + ram.up at 0x80000000l of sharedBus + + // Handle all the IO / Peripheral things + val peripheral = new Area { + val busXlen = Node().forceDataWidth(param.xlen) + busXlen << sharedBus + + val bus32 = Node().forceDataWidth(32) + bus32 << sharedBus + + val clint = new TilelinkClintFiber() + clint.node at 0x10010000 of busXlen + + val plic = new TilelinkPlicFiber() + plic.node at 0x10C00000 of bus32 + + val uart = new TilelinkUartFiber() + uart.node at 0x10001000 of bus32 + plic.mapUpInterrupt(1, uart.interrupt) + + val cpuClint = cpu.bind(clint) + val cpuPlic = cpu.bind(plic) + } + } +} + +object MicroSocGen extends App{ + SpinalVerilog(new MicroSoc()) +} + +object MicroSocSynt extends App{ + import spinal.lib.eda.bench._ + val rtls = ArrayBuffer[Rtl]() + rtls += Rtl(SpinalVerilog(new MicroSoc())) + + val targets = XilinxStdTargets().take(2) + Bench(rtls, targets) +} + + +object MicroSocSim extends App{ + var traceKonata = false + var withRvlsCheck = false + var elf: Elf = null + val sim = SimConfig + sim.withTimeSpec(1 ns, 1 ps) + + assert(new scopt.OptionParser[Unit]("VexiiRiscv") { + help("help").text("prints this usage text") + opt[String]("load-elf") action { (v, c) => elf = new Elf(new File(v), 32) } + opt[Unit]("trace-konata") action { (v, c) => traceKonata = true } + opt[Unit]("check-rvls") action { (v, c) => withRvlsCheck = true } + sim.addOptions(this) + }.parse(args, Unit).nonEmpty) + + + sim.compile(new MicroSoc()).doSimUntilVoid("test", seed = 42){dut => + dut.cd100.forkStimulus() + dut.asyncReset #= true + delayed(100 ns)(dut.asyncReset #= false) + + val uartBaudPeriod = hzToLong(115200 Hz) + val uartTx = UartDecoder( + uartPin = dut.main.peripheral.uart.logic.uart.txd, + baudPeriod = uartBaudPeriod + ) + val uartRx = UartEncoder( + uartPin = dut.main.peripheral.uart.logic.uart.rxd, + baudPeriod = uartBaudPeriod + ) + + val konata = traceKonata.option( + new vexiiriscv.test.konata.Backend(new File(currentTestPath, "konata.log")).spinalSimFlusher(hzToLong(1000 Hz)) + ) + val probe = new VexiiRiscvProbe( + cpu = dut.main.cpu.logic.core, + kb = konata + ) + + if (withRvlsCheck) probe.add(new RvlsBackend(new File(currentTestPath)).spinalSimFlusher(hzToLong(1000 Hz))) + + probe.autoRegions() + + if(elf != null) { + elf.load(dut.main.ram.thread.logic.mem, 0x80000000l) + probe.backends.foreach(_.loadElf(0, elf.f)) + } + } +} diff --git a/src/main/scala/vexiiriscv/test/VexiiRiscvProbe.scala b/src/main/scala/vexiiriscv/test/VexiiRiscvProbe.scala index ca92f588..2018671a 100644 --- a/src/main/scala/vexiiriscv/test/VexiiRiscvProbe.scala +++ b/src/main/scala/vexiiriscv/test/VexiiRiscvProbe.scala @@ -3,11 +3,12 @@ package vexiiriscv.test import rvls.spinal.{TraceBackend, TraceIo} import spinal.core._ import spinal.core.sim._ +import spinal.lib.bus.misc.SizeMapping import spinal.lib.misc.database.Element import vexiiriscv.Global.PC_WIDTH import vexiiriscv._ import vexiiriscv.decode.Decode -import vexiiriscv.execute.LsuCachelessPlugin +import vexiiriscv.execute.lsu._ import vexiiriscv.fetch.FetchPipelinePlugin import vexiiriscv.misc.PrivilegedPlugin //import vexiiriscv.execute.LsuCachelessPlugin @@ -18,7 +19,7 @@ import vexiiriscv.test.konata.{Comment, Flush, Retire, Spawn, Stage} import scala.collection.mutable import scala.collection.mutable.ArrayBuffer -class VexiiRiscvProbe(cpu : VexiiRiscv, kb : Option[konata.Backend], withRvls : Boolean){ +class VexiiRiscvProbe(cpu : VexiiRiscv, kb : Option[konata.Backend], var withRvls : Boolean = true){ var enabled = true var trace = true var backends = ArrayBuffer[TraceBackend]() @@ -35,7 +36,12 @@ class VexiiRiscvProbe(cpu : VexiiRiscv, kb : Option[konata.Backend], withRvls : val microOpIdMask = (1 << microOpIdWidth)-1 val withFetch = true //cpu.host[FetchPipelinePlugin].idToFetch.keys.max > 1 - val disass = withRvls generate rvls.jni.Frontend.newDisassemble(xlen) + val disass = try { + if(!withRvls) 0 else rvls.jni.Frontend.newDisassemble(xlen) + } catch { + case e : Throwable => withRvls = false; 0 + } + val harts = hartsIds.map(new HartCtx(_)).toArray val wbp = cpu.host[WhiteboxerPlugin].logic.get val proxies = new wbp.Proxies(){ @@ -51,6 +57,20 @@ class VexiiRiscvProbe(cpu : VexiiRiscv, kb : Option[konata.Backend], withRvls : this } + + def autoRegions(): Unit = { + cpu.host.services.foreach { + case p: LsuCachelessPlugin => p.regions.foreach { region => + backends.foreach { b => + region.mapping match { + case SizeMapping(base, size) => b.addRegion(0, region.isIo.toInt, base.toLong, size.toLong) + } + } + } + case _ => + } + } + def flush(): Unit = { } @@ -142,7 +162,7 @@ class VexiiRiscvProbe(cpu : VexiiRiscv, kb : Option[konata.Backend], withRvls : if (get(Riscv.RVF)) isa += "F" if (get(Riscv.RVD)) isa += "D" if (get(Riscv.RVC)) isa += "C" - tracer.newCpuMemoryView(hartId, 16, 16) //TODO readIds writeIds + tracer.newCpuMemoryView(hartId, 16, 16) tracer.newCpu(hartId, isa, csrp, 63, hartId) val pc = if(xlen == 32) 0x80000000l else 0x80000000l tracer.setPc(hartId, pc) @@ -279,7 +299,7 @@ class VexiiRiscvProbe(cpu : VexiiRiscv, kb : Option[konata.Backend], withRvls : val sizeMask = Array(0xFFl, 0xFFFFl, 0xFFFFFFFFl, -1l) - val lsuClpb = cpu.host.get[LsuCachelessPlugin].map(_.logic.get.bus) + val lsuClpb = cpu.host.get[LsuCachelessBusProvider].map(_.getLsuCachelessBus()) val pendingIo = mutable.Queue[ProbeTraceIo]() class ProbeTraceIo extends TraceIo { @@ -435,6 +455,7 @@ class VexiiRiscvProbe(cpu : VexiiRiscv, kb : Option[konata.Backend], withRvls : } val offset = trace.address.toInt & (trace.size - 1) trace.data = (trace.data >> offset*8) & sizeMask(trace.sizel2) + trace.error = bus.rsp.error.toBoolean backends.foreach(_.ioAccess(trace.hartId, trace)) } } @@ -495,9 +516,9 @@ class VexiiRiscvProbe(cpu : VexiiRiscv, kb : Option[konata.Backend], withRvls : if(((wfi >> hart.hartId) & 1) != 0){ hart.lastCommitAt = cycle } - if (hart.lastCommitAt + 400l < cycle) { + if (hart.lastCommitAt + 4000l < cycle) { val status = if (hart.microOpAllocPtr != hart.microOpRetirePtr) f"waiting on uop 0x${hart.microOpRetirePtr}%X" else f"last uop id 0x${hart.lastUopId}%X" - simFailure(f"Vexii didn't commited anything since too long, $status") + simFailure(f"Vexii hasn't commited anything for too long, $status") } while (hart.microOpRetirePtr != hart.microOpAllocPtr && hart.microOp(hart.microOpRetirePtr).done) { diff --git a/src/main/scala/vexiiriscv/test/WhiteboxerPlugin.scala b/src/main/scala/vexiiriscv/test/WhiteboxerPlugin.scala index 45c00777..fbf51aec 100644 --- a/src/main/scala/vexiiriscv/test/WhiteboxerPlugin.scala +++ b/src/main/scala/vexiiriscv/test/WhiteboxerPlugin.scala @@ -8,6 +8,7 @@ import vexiiriscv.Global import vexiiriscv.Global.{HART_COUNT, TRAP} import vexiiriscv.decode.{Decode, DecodePipelinePlugin, DecoderPlugin} import vexiiriscv.execute._ +import vexiiriscv.execute.lsu._ import vexiiriscv.fetch.{Fetch, FetchPipelinePlugin} import vexiiriscv.misc.{PipelineBuilderPlugin, PrivilegedPlugin, TrapPlugin} import vexiiriscv.prediction.{BtbPlugin, LearnCmd, LearnPlugin} @@ -69,7 +70,7 @@ class WhiteboxerPlugin extends FiberPlugin{ val executes = for (eu <- host.list[ExecuteLaneService]) yield new Area { val c = eu.ctrl(eu.executeAt) - val fire = wrap(c.down.transactionSpawn) + val fire = wrap(c.down.transactionSpawn && c.down(Global.COMMIT)) val hartId = wrap(c(Global.HART_ID)) val microOpId = wrap(c(Decode.UOP_ID)) } @@ -133,13 +134,24 @@ class WhiteboxerPlugin extends FiberPlugin{ val lcp = host.get[LsuCachelessPlugin] map (p => new Area { val c = p.logic.wbCtrl - fire := c.down.isFiring && c(AguPlugin.SEL) && (c(AguPlugin.LOAD) || c(AguPlugin.AMO)) && !c(TRAP) && !c(p.logic.onAddress.translationPort.keys.IO) + fire := c.down.isFiring && c(AguPlugin.SEL) && c(AguPlugin.LOAD) && !c(TRAP) && !c(p.logic.onFork.PMA_RSP).io hartId := c(Global.HART_ID) uopId := c(Decode.UOP_ID) size := c(AguPlugin.SIZE).resized address := c(p.logic.onFork.tpk.TRANSLATED) data := host.find[IntFormatPlugin](_.laneName == p.layer.laneName).logic.stages.find(_.ctrlLink == c.ctrlLink).get.wb.payload }) + + + val lp = host.get[LsuPlugin] map (p => new Area { + val c = p.logic.onWb + fire := c.down.isFiring && c(AguPlugin.SEL) && c(AguPlugin.LOAD) && !c(TRAP) && !c(p.logic.onCtrl.IO) + hartId := c(Global.HART_ID) + uopId := c(Decode.UOP_ID) + size := c(AguPlugin.SIZE).resized + address := c(LsuL1.PHYSICAL_ADDRESS) + data := host.find[IntFormatPlugin](_.laneName == p.layer.laneName).logic.stages.find(_.ctrlLink == c.ctrlLink).get.wb.payload + }) } val storeCommit = new Area { @@ -161,6 +173,16 @@ class WhiteboxerPlugin extends FiberPlugin{ address := bus.cmd.address data := bus.cmd.data }) + + val lp = host.get[LsuPlugin] map (p => new Area { + val c = p.logic.onWb + fire := c.down.isFiring && c(AguPlugin.SEL) && c(AguPlugin.STORE) && !c(p.logic.onCtrl.IO) + hartId := c(Global.HART_ID) + uopId := c(Decode.UOP_ID) + size := c(AguPlugin.SIZE) + address := c(p.logic.tpk.TRANSLATED) + data := c(LsuL1.WRITE_DATA) + }) } val storeConditional = new Area { @@ -173,11 +195,18 @@ class WhiteboxerPlugin extends FiberPlugin{ val lcp = host.get[LsuCachelessPlugin] map (p => new Area { val c = p.logic.wbCtrl - fire := c.down.isFiring && c(AguPlugin.SEL) && (c(AguPlugin.SC)) && !c(TRAP) + fire := c.down.isFiring && c(AguPlugin.SEL) && (c(AguPlugin.ATOMIC) && !c(AguPlugin.LOAD)) && !c(TRAP) hartId := c(Global.HART_ID) uopId := c(Decode.UOP_ID) miss := c(p.logic.onJoin.SC_MISS) }) + val lp = host.get[LsuPlugin] map (p => new Area { + val c = p.logic.onWb + fire := c.down.isFiring && c(AguPlugin.SEL) && (c(AguPlugin.ATOMIC) && !c(AguPlugin.LOAD)) && !c(TRAP) + hartId := c(Global.HART_ID) + uopId := c(Decode.UOP_ID) + miss := c(p.logic.onCtrl.SC_MISS) + }) } val storeBroadcast = new Area { @@ -192,6 +221,12 @@ class WhiteboxerPlugin extends FiberPlugin{ hartId := c(Global.HART_ID) uopId := c(Decode.UOP_ID) }) + val lp = host.get[LsuPlugin] map (p => new Area { + val c = p.logic.onWb + fire := c.down.isFiring && c(AguPlugin.SEL) && !c(AguPlugin.LOAD) + hartId := c(Global.HART_ID) + uopId := c(Decode.UOP_ID) + }) } val wfi = wrap(host[TrapPlugin].logic.harts.map(_.trap.fsm.wfi).asBits) diff --git a/src/main/scala/vexiiriscv/test/konata/Backend.scala b/src/main/scala/vexiiriscv/test/konata/Backend.scala index a7d18ded..2ff88db7 100644 --- a/src/main/scala/vexiiriscv/test/konata/Backend.scala +++ b/src/main/scala/vexiiriscv/test/konata/Backend.scala @@ -81,10 +81,14 @@ class Backend(f : File) { def refresh(): Unit = { val cycleEnd = threads.map(_.cycleLock).min + var skips = 0 while(cycle != cycleEnd && pendings.nonEmpty){ + skips += 1 pendings.get(cycle) match { case Some(instrs) => { + bf.write(s"C\t$skips\n") + skips = 0 for(instr <- instrs){ if(instr.first) { instr.id = idAlloc @@ -100,9 +104,9 @@ class Backend(f : File) { } case None => } - bf.write("C\t1\n") cycle += 1 } + if (skips != 0) bf.write(s"C\t$skips\n") } @@ -116,10 +120,11 @@ class Backend(f : File) { bf.close() } - def spinalSimFlusher(period: Long): Unit = { + def spinalSimFlusher(period: Long): this.type = { periodicaly(period){ flush() } - onSimEnd(close()) + delayed(1)(onSimEnd(close())) + this } } diff --git a/src/main/scala/vexiiriscv/tester/TestBench.scala b/src/main/scala/vexiiriscv/tester/TestBench.scala index 85aee7ed..6b51d994 100644 --- a/src/main/scala/vexiiriscv/tester/TestBench.scala +++ b/src/main/scala/vexiiriscv/tester/TestBench.scala @@ -3,11 +3,17 @@ package vexiiriscv.tester import rvls.spinal.{FileBackend, RvlsBackend} import spinal.core._ import spinal.core.sim._ +import spinal.lib.bus.misc.{AddressMapping, SizeMapping} +import spinal.lib.bus.tilelink.{M2sTransfers, SizeRange} +import spinal.lib.bus.tilelink.sim.{Checker, MemoryAgent, TransactionA} import spinal.lib.misc.Elf +import spinal.lib.misc.plugin.Hostable import spinal.lib.misc.test.DualSimTracer import spinal.lib.sim.{FlowDriver, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer} +import spinal.lib.system.tag.{MemoryTransfers, PmaRegion} import vexiiriscv._ -import vexiiriscv.fetch.PcService +import vexiiriscv.execute.lsu.{LsuCachelessPlugin, LsuL1, LsuL1Plugin, LsuL1TlPlugin, LsuPlugin} +import vexiiriscv.fetch.{FetchCachelessPlugin, FetchL1Plugin, PcService} import vexiiriscv.misc.PrivilegedPlugin import vexiiriscv.riscv.Riscv import vexiiriscv.test.konata.Backend @@ -109,6 +115,7 @@ class TestOptions{ val fsmTasks = mutable.Queue[FsmTask]() var ibusReadyFactor = 1.01f var dbusReadyFactor = 1.01f + var seed = 2 def getTestName() = testName.getOrElse("test") @@ -147,12 +154,14 @@ class TestOptions{ opt[String]("fsm-getc") unbounded() action { (v, c) => fsmTasks += new FsmGetc(v) } opt[Long]("fsm-sleep") unbounded() action { (v, c) => fsmTasks += new FsmSleep(v) } opt[Unit]("fsm-success") unbounded() action { (v, c) => fsmTasks += new FsmSuccess() } + opt[Int]("seed") action { (v, c) => seed = v } + opt[Unit]("rand-seed") action { (v, c) => seed = scala.util.Random.nextInt() } } def test(compiled : SimCompiled[VexiiRiscv]): Unit = { dualSim match { - case true => DualSimTracer.withCb(compiled, window = 50000 * 10, seed = 2)(test) - case false => compiled.doSimUntilVoid(name = getTestName(), seed = 2) { dut => disableSimWave(); test(dut, f => f) } + case true => DualSimTracer.withCb(compiled, window = 500000 * 10, seed=seed)(test) + case false => compiled.doSimUntilVoid(name = getTestName(), seed=seed) { dut => disableSimWave(); test(dut, f => f) } } } @@ -161,8 +170,8 @@ class TestOptions{ cd.forkStimulus(10) simSpeedPrinter.foreach(cd.forkSimSpeedPrinter) - failAfter.map(delayed(_)(simFailure("Reached Timeout"))) - passAfter.map(delayed(_)(simSuccess())) + failAfter.foreach(delayed(_)(simFailure("Reached Timeout"))) + passAfter.foreach(delayed(_)(simSuccess())) // fork{ // while(true){ @@ -176,13 +185,13 @@ class TestOptions{ val xlen = dut.database(Riscv.XLEN) // Rvls will check that the CPUs are doing things right - val rvls = withRvlsCheck generate new RvlsBackend(new File(currentTestPath)) + val rvls = withRvlsCheck generate new RvlsBackend(new File(currentTestPath())) if (withRvlsCheck) { rvls.spinalSimFlusher(10 * 10000) rvls.spinalSimTime(10000) } - val konataBackend = traceKonata.option(new Backend(new File(currentTestPath, "konata.log"))) + val konataBackend = traceKonata.option(new Backend(new File(currentTestPath(), "konata.log"))) delayed(1)(konataBackend.foreach(_.spinalSimFlusher(10 * 10000))) // Delayed to ensure this is registred last // Collect traces from the CPUs behaviour @@ -192,11 +201,11 @@ class TestOptions{ probe.trace = false // Things to enable when we want to collect traces - val tracerFile = traceRvlsLog.option(new FileBackend(new File(currentTestPath, "tracer.log"))) + val tracerFile = traceRvlsLog.option(new FileBackend(new File(currentTestPath(), "tracer.log"))) onTrace { - if(traceWave) enableSimWave() + if (traceWave) enableSimWave() if (withRvlsCheck && traceSpikeLog) rvls.debug() - if(traceKonata) probe.trace = true + if (traceKonata) probe.trace = true tracerFile.foreach{f => f.spinalSimFlusher(10 * 10000) @@ -219,7 +228,7 @@ class TestOptions{ for ((offset, file) <- bins) { mem.loadBin(offset, file) if (withRvlsCheck) rvls.loadBin(offset, file) - tracerFile.foreach(_.loadBin(0, file)) + tracerFile.foreach(_.loadBin(offset, file)) } // load elfs @@ -246,7 +255,7 @@ class TestOptions{ val failSymbol = if(withFail) trunkPc(elf.getSymbolAddress("fail")) else -1 probe.commitsCallbacks += { (hartId, pc) => if (pc == passSymbol) delayed(1)(simSuccess()) - if (pc == failSymbol) delayed(1)(simFailure("Software reach the fail symbole :(")) + if (pc == failSymbol) delayed(1)(simFailure("Software reached the fail symbol :(")) } } } @@ -275,7 +284,7 @@ class TestOptions{ val cmd = pending.randomPop() p.word #= mem.readBytes(cmd.address, p.p.dataWidth / 8) p.id #= cmd.id - p.error #= cmd.address < 0x10000000 + p.error #= cmd.address < 0x20000000 } doIt } @@ -311,13 +320,13 @@ class TestOptions{ rspDriver.setFactor(ibusReadyFactor) } - val lsclp = dut.host.get[execute.LsuCachelessPlugin].map { p => - val bus = p.logic.bus + val lsclp = dut.host.get[execute.lsu.LsuCachelessBusProvider].map { p => + val bus = p.getLsuCachelessBus() val cmdReady = StreamReadyRandomizer(bus.cmd, cd) bus.cmd.ready #= true var reserved = false - case class Access(write : Boolean, address: Long, data : Array[Byte], bytes : Int, io : Boolean, hartId : Int, uopId : Int, amoEnable : Boolean, amoOp : Int) + case class Access(id : Int, write : Boolean, address: Long, data : Array[Byte], bytes : Int, io : Boolean, hartId : Int, uopId : Int, amoEnable : Boolean, amoOp : Int) val pending = mutable.Queue[Access]() val cmdMonitor = StreamMonitor(bus.cmd, cd) { p => @@ -326,6 +335,7 @@ class TestOptions{ val offset = address.toInt & (bytes-1) pending.enqueue( Access( + p.id.toInt, p.write.toBoolean, address, p.data.toBytes.drop(offset).take(bytes), @@ -375,7 +385,7 @@ class TestOptions{ error = read(bytes, cmd.address.toInt & (p.p.dataWidth / 8 - 1)) } } else { - import vexiiriscv.execute.CachelessBusAmo._ + import vexiiriscv.execute.lsu.LsuCachelessBusAmo._ cmd.amoOp match { case LR => { error = read(bytes, cmd.address.toInt & (p.p.dataWidth / 8 - 1)) @@ -418,6 +428,7 @@ class TestOptions{ } p.data #= bytes p.error #= error + p.id #= cmd.id if(p.scMiss != null) p.scMiss #= scMiss if(cmd.address < 0x10000000) p.error #= true } @@ -440,6 +451,22 @@ class TestOptions{ peripheral.putcListeners += (c => if (fsmTasks.nonEmpty) fsmTasks.head.getc(hal, c)) } + + val lsul1 = dut.host.get[LsuL1TlPlugin] map (p => new Area{ + val ma = new MemoryAgent(p.bus, cd, seed = 0, randomProberFactor = if(dbusReadyFactor < 1.0) 0.2f else 0.0f, memArg = Some(mem))(null) { + driver.driver.setFactor(dbusReadyFactor) + val checker = if (monitor.bus.p.withBCE) Checker(monitor) + override def checkAddress(address: Long) = address >= 0x20000000 + override def delayOnA(a: TransactionA) = { +// if(a.address == 0x81820000l){ +// println(f"miaou ${mem.readByteAsInt(0x817FFFF3l)}%x") +// println(s"\n!! $simTime ${a.opcode.getName} ${a.data}") +// } + if(dbusReadyFactor < 1.0) super.delayOnA(a) + } + } + }) + if(printStats) onSimEnd{ println(probe.getStats()) } @@ -449,6 +476,55 @@ class TestOptions{ object TestBench extends App{ doIt() + def paramToPlugins(param : ParamSimple): ArrayBuffer[Hostable] = { + val ret = param.plugins() + ret.collectFirst{case p : LsuL1Plugin => p}.foreach{p => + p.ackIdWidth = 8 + p.probeIdWidth = 4 + ret += new LsuL1TlPlugin + } + val regions = ArrayBuffer( + new PmaRegion{ + override def mapping: AddressMapping = SizeMapping(0x80000000l, 0x80000000l) + override def transfers: MemoryTransfers = M2sTransfers( + get = SizeRange.all, + putFull = SizeRange.all, + ) + override def isMain: Boolean = true + override def isExecutable: Boolean = true + }, + new PmaRegion{ + override def mapping: AddressMapping = SizeMapping(0x10000000l, 0x10000000l) + override def transfers: MemoryTransfers = M2sTransfers( + get = SizeRange.all, + putFull = SizeRange.all, + ) + override def isMain: Boolean = false + override def isExecutable: Boolean = true + }, + new PmaRegion{ + override def mapping: AddressMapping = SizeMapping(0x1000, 0x1000) + override def transfers: MemoryTransfers = M2sTransfers( + get = SizeRange.all, + putFull = SizeRange.all, + ) + override def isMain: Boolean = true + override def isExecutable: Boolean = true + } + + ) + ret.foreach{ + case p: FetchCachelessPlugin => p.regions.load(regions) + case p: LsuCachelessPlugin => p.regions.load(regions) + case p: FetchL1Plugin => p.regions.load(regions) + case p: LsuPlugin => p.ioRegions.load(regions) + case p: LsuL1Plugin => p.regions.load(regions) + case _ => + } + + ret + } + def doIt(param : ParamSimple = new ParamSimple()) { val testOpt = new TestOptions() @@ -468,7 +544,7 @@ object TestBench extends App{ println(s"With Vexiiriscv parm :\n - ${param.getName()}") val compiled = TestBench.synchronized { // To avoid to many calls at the same time - simConfig.compile(VexiiRiscv(param.plugins())) + simConfig.compile(VexiiRiscv(paramToPlugins(param))) } testOpt.test(compiled) Thread.sleep(10) diff --git a/src/test/scala/vexiiriscv/scratchpad/Play.scala b/src/test/scala/vexiiriscv/scratchpad/Play.scala index a5009e6e..91a5b55b 100644 --- a/src/test/scala/vexiiriscv/scratchpad/Play.scala +++ b/src/test/scala/vexiiriscv/scratchpad/Play.scala @@ -6,7 +6,7 @@ import spinal.lib.misc.PathTracer import vexiiriscv._ import vexiiriscv.VexiiRiscv import vexiiriscv.compat.MultiPortWritesSymplifier -import vexiiriscv.execute.{LsuCachelessPlugin, SrcPlugin} +import vexiiriscv.execute.{SrcPlugin} object Play1 extends App { val sc = SpinalConfig() diff --git a/src/test/scala/vexiiriscv/scratchpad/Synt.scala b/src/test/scala/vexiiriscv/scratchpad/Synt.scala index d41d318e..3e7d95d7 100644 --- a/src/test/scala/vexiiriscv/scratchpad/Synt.scala +++ b/src/test/scala/vexiiriscv/scratchpad/Synt.scala @@ -17,7 +17,7 @@ object IntegrationSynthBench extends App{ def add(param : ParamSimple, name : String) = { rtls += Rtl(sc.generateVerilog { - Rtl.ffIo(VexiiRiscv(param.plugins()).setDefinitionName(if(name.isEmpty) param.getName() else name)) + Rtl.ffIo(VexiiRiscv(ParamSimple.setPma(param.plugins())).setDefinitionName(if(name.isEmpty) param.getName() else name)) }) } @@ -27,10 +27,80 @@ object IntegrationSynthBench extends App{ add(p, postfix) } -// add(""){ p => + add(""){ p => + p.regFileSync = false + p.withMul = false + p.withDiv = false + } + add("") { p => + p.regFileSync = false + p.withMul = false + p.withDiv = false + p.withIterativeShift = true + } +// add("") { p => // p.regFileSync = false // p.withMul = false // p.withDiv = false +// p.withLsuL1 = true +// } +// add("") { p => +// import p._ +// decoders = 1 +// lanes = 1 +// regFileSync = false +// withGShare = false +// withBtb = false +// withRas = false +// withMul = false +// withDiv = false +// withLateAlu = false +// allowBypassFrom = 0 +// relaxedBranch = true +// relaxedShift = false +// relaxedSrc = true +// performanceCounters = 0 +// privParam.withSupervisor = true +// privParam.withUser = true +// withMmu = false +// withRva = true +// withRvc = false +// withAlignerBuffer = withRvc +// withFetchL1 = true +// withLsuL1 = true +// xlen = 32 +// lsuL1Sets = 64 +// lsuL1Ways = 1 +// withLsuBypass = false +// } +// add("") { p => +// import p._ +// decoders = 1 +// lanes = 1 +// regFileSync = false +// withGShare = true +// withBtb = true +// withRas = true +// withMul = false +// withDiv = false +// withLateAlu = false +// allowBypassFrom = 0 +// relaxedBranch = true +// relaxedShift = false +// relaxedSrc = true +// performanceCounters = 0 +// privParam.withSupervisor = true +// privParam.withUser = true +// withMmu = false +// withRva = true +// withRvc = false +// withAlignerBuffer = withRvc +// withFetchL1 = true +// withLsuL1 = true +// xlen = 32 +// lsuL1Sets = 64 +// lsuL1Ways = 1 +// withLsuBypass = false // } // add("") { p => // p.regFileSync = false @@ -38,23 +108,32 @@ object IntegrationSynthBench extends App{ // p.withDiv = false // p.allowBypassFrom = 0 // } - add("") { p => - p.regFileSync = false - p.withMul = false - p.withDiv = false - p.withGShare = true - p.withBtb = true - p.withRas = true - } - add("") { p => - p.regFileSync = false - p.withMul = false - p.withDiv = false - p.withGShare = true - p.withBtb = true - p.withRas = true - p.relaxedBranch = true - } +// add("") { p => +// p.regFileSync = false +// p.withMul = false +// p.withDiv = false +// p.allowBypassFrom = 0 +// p.withLateAlu = true +// } + +// add("") { p => +// p.regFileSync = false +// p.withMul = false +// p.withDiv = false +// p.withGShare = true +// p.withBtb = true +// p.withRas = true +// } +// add("") { p => +// p.regFileSync = false +// p.withMul = false +// p.withDiv = false +// p.withGShare = true +// p.withBtb = true +// p.withRas = true +// p.relaxedBranch = true +// } + // add("") { p => // p.regFileSync = false // p.withMul = false @@ -195,4 +274,24 @@ rv32i_d1_l1_rfa_btb_ras_gshare_rbra_rsrc -> Artix 7 -> 90 Mhz 1456 LUT 1123 FF Artix 7 -> 164 Mhz 1573 LUT 1123 FF +rv32i_d1_l1_rfa_rsrc -> +Artix 7 -> 90 Mhz 1172 LUT 870 FF +Artix 7 -> 212 Mhz 1255 LUT 870 FF +rv32i_d1_l1_rfa_lsul1_rsrc -> +Artix 7 -> 90 Mhz 1327 LUT 1198 FF +Artix 7 -> 200 Mhz 1439 LUT 1200 FF + +rv32iasu_d1_l1_rfa_fl1_lsul1xW1xS64_bp0_rsrc -> +Artix 7 -> 90 Mhz 1959 LUT 1574 FF +Artix 7 -> 177 Mhz 2091 LUT 1590 FF +rv32iasu_d1_l1_rfa_fl1_lsul1xW1xS64_bp0_btb_ras_gshare_rsrc -> +Artix 7 -> 87 Mhz 2169 LUT 1810 FF +Artix 7 -> 119 Mhz 2333 LUT 1824 FF + +rv32i_d1_l1_rfa_rsrc -> +Artix 7 -> 90 Mhz 1153 LUT 933 FF +Artix 7 -> 193 Mhz 1236 LUT 935 FF +rv32i_d1_l1_rfa_rsrc_isft -> +Artix 7 -> 90 Mhz 1108 LUT 972 FF +Artix 7 -> 187 Mhz 1218 LUT 974 FF */ \ No newline at end of file diff --git a/src/test/scala/vexiiriscv/scratchpad/doc/Miaouuuu.scala b/src/test/scala/vexiiriscv/scratchpad/doc/Miaouuuu.scala index e0ac9083..58623bc1 100644 --- a/src/test/scala/vexiiriscv/scratchpad/doc/Miaouuuu.scala +++ b/src/test/scala/vexiiriscv/scratchpad/doc/Miaouuuu.scala @@ -43,10 +43,10 @@ object Miaouuuu2 extends App{ import scala.collection.mutable.ArrayBuffer class EventCounterPlugin extends FiberPlugin{ - val lock = Retainer() // Will allow other plugins to block the elaboration of "logic" thread + val hostLockX = Retainer() // Will allow other plugins to block the elaboration of "logic" thread val events = ArrayBuffer[Bool]() // Will allow other plugins to add event sources val logic = during build new Area{ - lock.await() // Active blocking + hostLockX.await() // Active blocking val counter = Reg(UInt(32 bits)) init(0) counter := counter + CountOne(events) } @@ -62,7 +62,7 @@ object Miaouuuu2 extends App{ val ecp = host[EventCounterPlugin] // Search for the single instance of EventCounterPlugin in the plugin pool // Generate a lock to prevent the EventCounterPlugin elaboration until we release it. // this will allow us to add our localEvent to the ecp.events list - val ecpLocker = ecp.lock() + val ecpLocker = ecp.hostLockX() // Wait for the build phase before generating any hardware awaitBuild() diff --git a/src/test/scala/vexiiriscv/tester/Regression.scala b/src/test/scala/vexiiriscv/tester/Regression.scala index 85b43a9a..03764221 100644 --- a/src/test/scala/vexiiriscv/tester/Regression.scala +++ b/src/test/scala/vexiiriscv/tester/Regression.scala @@ -1,7 +1,6 @@ package vexiiriscv.tester import org.apache.commons.io.FileUtils -import org.scalatest.funsuite.AnyFunSuite import spinal.core._ import spinal.core.sim._ import spinal.lib.misc.plugin.Hostable @@ -151,7 +150,8 @@ class RegressionSingle(compiled : SimCompiled[VexiiRiscv], val regulars = ArrayBuffer("dhrystone", "coremark_vexii", "machine_vexii") - priv.filter(_.p.withSupervisor).foreach(_ => regulars ++= List("supervisor", s"mmu_sv${if(xlen == 32) 32 else 39}")) + priv.filter(_.p.withSupervisor).foreach(_ => regulars ++= List("supervisor")) + if(mmu.nonEmpty) regulars ++= List(s"mmu_sv${if(xlen == 32) 32 else 39}") for(name <- regulars){ val args = newArgs() args.loadElf(new File(nsf, s"baremetal/$name/build/$arch/$name.elf")) @@ -182,7 +182,7 @@ class RegressionSingle(compiled : SimCompiled[VexiiRiscv], args.name(s"freertos/$name") } - if(withBuildroot && rvm && rva) priv.filter(_.p.withSupervisor).foreach{ _ => + if(withBuildroot && rvm && rva && mmu.nonEmpty) priv.filter(_.p.withSupervisor).foreach{ _ => val path = s"ext/NaxSoftware/buildroot/images/$archLinux" val args = newArgs() args.failAfter(10000000000l) @@ -234,6 +234,8 @@ class RegressionSingle(compiled : SimCompiled[VexiiRiscv], val testPath = new File(compiled.simConfig.getTestPath(t.testName.get)) val passFile = new File(testPath, "PASS") val failFile = new File(testPath, "FAIL") + FileUtils.deleteQuietly(passFile) + FileUtils.deleteQuietly(failFile) val testName = t.testName.get if(!passFile.exists()){ @@ -245,7 +247,6 @@ class RegressionSingle(compiled : SimCompiled[VexiiRiscv], argsFile.close() t.test(compiled) - FileUtils.deleteQuietly(failFile) val bf = new BufferedWriter(new FileWriter(passFile)) bf.flush() bf.close() @@ -273,8 +274,9 @@ object RegressionSingle extends App{ val regression = new RegressionSingle(compiled, dutArgs) println("*" * 80) val fails = regression.jobs.filter(_.failed) - if (fails.size == 0) { - println("PASS"); return + if (fails.isEmpty) { + println("PASS") + return } println(s"FAILED ${fails.size}/${regression.jobs.size}") for (fail <- fails) { @@ -285,7 +287,7 @@ object RegressionSingle extends App{ } def test(ps : ParamSimple, dutArgs : Seq[String] = Nil): Unit = { - test(ps.getName(), ps.plugins(), dutArgs) + test(ps.getName(), TestBench.paramToPlugins(ps), dutArgs) } def test(args : String) : Unit = test(args.split(" ")) @@ -342,7 +344,30 @@ class Regression extends MultithreadedFunSuite(sys.env.getOrElse("VEXIIRISCV_REG addDim("rva", List("", "--with-mul --with-div --with-rva")) addDim("rvc", List("", "--with-mul --with-div --with-rvc")) addDim("late-alu", List("", "--with-late-alu")) - addDim("fetch", List("", "--with-fetch-l1")) + addDim("fetch", { + val p = ArrayBuffer[String]("") + for (bytes <- List(1 << 10, 1 << 12, 1 << 14); + sets <- List(16, 32, 64)) { + if (bytes / sets >= 64) { + val ways = bytes / sets / 64 + p += s"--with-fetch-l1 --fetch-l1-sets=$sets --fetch-l1-ways=$ways" + } + } + p + }) + addDim("lsu", { + val p = ArrayBuffer[String]("") + for(bytes <- List(1 << 10, 1 << 12, 1 << 14); + sets <- List(16 , 32, 64)){ + if(bytes / sets >= 64) { + val ways = bytes / sets / 64 + p += s"--with-lsu-l1 --lsu-l1-sets=$sets --lsu-l1-ways=$ways" + } + } + p + }) + addDim("lsu bypass", List("", "--with-lsu-bypass")) + addDim("ishift", List("", "--with-iterative-shift")) val default = "--with-mul --with-div --performance-counters 4"