Skip to content

Commit

Permalink
Fix bitmanip, add UW shift to shifters
Browse files Browse the repository at this point in the history
  • Loading branch information
andreasWallner committed Apr 2, 2024
1 parent 38e1f88 commit 08d3765
Show file tree
Hide file tree
Showing 8 changed files with 185 additions and 81 deletions.
4 changes: 2 additions & 2 deletions src/main/scala/vexiiriscv/Param.scala
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,8 @@ class ParamSimple(){
plugins += new prediction.HistoryPlugin()
}
def shifter(layer: LaneLayer, shiftAt: Int = 0, formatAt: Int = 0) = withIterativeShift match {
case false => new BarrelShifterPlugin(layer, shiftAt, formatAt)
case true => new IterativeShifterPlugin(layer, shiftAt, formatAt)
case false => new BarrelShifterPlugin(layer, with_slli_uw=withRvZb, shiftAt=shiftAt, formatAt=formatAt)
case true => new IterativeShifterPlugin(layer, with_slli_uw=withRvZb, shiftAt=shiftAt, formatAt=formatAt)
}


Expand Down
15 changes: 15 additions & 0 deletions src/main/scala/vexiiriscv/execute/BarrelShifterPlugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ object BarrelShifterPlugin extends AreaObject {
val LEFT = Payload(Bool())
val IS_W = Payload(Bool())
val IS_W_RIGHT = Payload(Bool())
val IS_UW = Payload(Bool())
}

class BarrelShifterPlugin(val layer : LaneLayer,
var with_slli_uw: Boolean = false,
var shiftAt : Int = 0,
var formatAt : Int = 0) extends ExecutionUnitElementSimple(layer) {
import BarrelShifterPlugin._
Expand All @@ -38,6 +40,8 @@ class BarrelShifterPlugin(val layer : LaneLayer,
if (Riscv.XLEN.get == 64) {
for (op <- List(Rvi.SLL, Rvi.SRL, Rvi.SRA, Rvi.SLLI, Rvi.SRLI, Rvi.SRAI)) {
layer(op).addDecoding(IS_W -> False, IS_W_RIGHT -> False)
if (with_slli_uw)
layer(op).addDecoding(IS_UW -> False)
}
add(Rvi.SLLW ).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> True , SIGNED -> False, IS_W -> True, IS_W_RIGHT -> False )
add(Rvi.SRLW ).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, SIGNED -> False, IS_W -> True, IS_W_RIGHT -> True )
Expand All @@ -47,6 +51,11 @@ class BarrelShifterPlugin(val layer : LaneLayer,
add(Rvi.SRAIW).srcs(SRC1.RF, SRC2.I ).decode(LEFT -> False, SIGNED -> True , IS_W -> True, IS_W_RIGHT -> True )
for (op <- List(Rvi.SLLW, Rvi.SRLW, Rvi.SRAW, Rvi.SLLIW, Rvi.SRLIW, Rvi.SRAIW)) {
ifp.signExtend(wb, layer(op), 32)
if (with_slli_uw)
layer(op).addDecoding(IS_UW -> False)
}
if(with_slli_uw) {
add(RvZbx.SLLI_UW).srcs(SRC1.RF, SRC2.I ).decode(LEFT -> True , SIGNED -> False, IS_W -> False, IS_W_RIGHT -> False, IS_UW -> True)
}
}

Expand All @@ -66,6 +75,12 @@ class BarrelShifterPlugin(val layer : LaneLayer,
when(IS_W) {
amplitude(5) := False
}
if(with_slli_uw) {
when(IS_UW) {
// remove lower bits since we are post-shift
reversed(31 downto 0) := 0
}
}
}

SHIFT_RESULT := B(patched)
Expand Down
130 changes: 87 additions & 43 deletions src/main/scala/vexiiriscv/execute/Bitmanip.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,46 +14,67 @@ object ZbPlugin {
def make(layer: LaneLayer,
executeAt: Int = 0,
formatAt: Int = 0) = {
Seq(
new ZbaPlugin(layer, executeAt, formatAt),
new ZbbLogicPlugin(layer, executeAt, formatAt),
new ZbbCountPlugin(layer, executeAt, formatAt),
new ZbbMinMaxPlugin(layer, executeAt, formatAt),
new ZbbRotatePlugin(layer, executeAt, formatAt),
new ZbbOrPlugin(layer, executeAt, formatAt),
new ZbbByteReversePlugin(layer, formatAt),
new ZbbExtendPlugin(layer, formatAt),
new ZbcPlugin(layer, executeAt, formatAt),
new ZbsPlugin(layer, executeAt, executeAt, formatAt)
)
val plugins = Seq(
new ZbaPlugin(layer, executeAt, formatAt),
new ZbbLogicPlugin(layer, executeAt, formatAt),
new ZbbCountPlugin(layer, executeAt, formatAt),
new ZbbMinMaxPlugin(layer, executeAt, formatAt),
new ZbbRotatePlugin(layer, executeAt, formatAt),
new ZbbOrPlugin(layer, executeAt, formatAt),
new ZbbByteReversePlugin(layer, formatAt),
new ZbbExtendPlugin(layer, formatAt),
new ZbcPlugin(layer, executeAt, formatAt),
new ZbsPlugin(layer, executeAt, executeAt, formatAt)
)
plugins.head.during setup {
Riscv.RVZbb.set(true)
}
plugins
}
}

object ZbaPlugin {
val MUX = Payload(Bits(3 bit))
val MUX = Payload(Bits(4 bit))
val UW = Payload(Bool())
}

// TODO check whether MUX bit 0 is optimized out on 32 bit platforms
class ZbaPlugin(val layer: LaneLayer,
val executeAt: Int = 0,
val formatAt: Int = 0) extends ExecutionUnitElementSimple(layer) {
val RESULT = Payload(SInt(Riscv.XLEN bits))

val logic = during setup new Logic {
Riscv.RVZba.set(true)
awaitBuild()
import SrcKeys._
import ZbaPlugin._

val wb = newWriteback(ifp, formatAt)
add(RvZbx.SH1ADD).srcs(SRC1.RF, SRC2.RF).decode(MUX -> B"001")
add(RvZbx.SH2ADD).srcs(SRC1.RF, SRC2.RF).decode(MUX -> B"010")
add(RvZbx.SH3ADD).srcs(SRC1.RF, SRC2.RF).decode(MUX -> B"100")
add(RvZbx.SH1ADD).srcs(SRC1.RF, SRC2.RF).decode(MUX -> B"0010", UW -> False)
add(RvZbx.SH2ADD).srcs(SRC1.RF, SRC2.RF).decode(MUX -> B"0100", UW -> False)
add(RvZbx.SH3ADD).srcs(SRC1.RF, SRC2.RF).decode(MUX -> B"1000", UW -> False)

if(Riscv.XLEN.get == 64) {
add(RvZbx.SH1ADD_UW).srcs(SRC1.RF, SRC2.RF).decode(MUX -> B"0010", UW -> True)
add(RvZbx.SH2ADD_UW).srcs(SRC1.RF, SRC2.RF).decode(MUX -> B"0100", UW -> True)
add(RvZbx.SH3ADD_UW).srcs(SRC1.RF, SRC2.RF).decode(MUX -> B"1000", UW -> True)
add(RvZbx.ADD_UW).srcs(SRC1.RF, SRC2.RF).decode(MUX -> B"0001", UW -> True)
}
uopRetainer.release()

val execute = new el.Execute(executeAt) {
val sh1 = srcp.SRC1 |<< 1
val sh2 = srcp.SRC1 |<< 2
val sh3 = srcp.SRC1 |<< 3
val sh = MuxOH(MUX, Seq(sh1, sh2, sh3))
val rs1 = el(IntRegFile, RS1).asSInt
if (Riscv.XLEN.get == 64) {
when(UW) {
rs1(63 downto 32) := 0
}
}

val sh1 = rs1 |<< 1
val sh2 = rs1 |<< 2
val sh3 = rs1 |<< 3
val sh = MuxOH(MUX, Seq(rs1, sh1, sh2, sh3))
RESULT := el(IntRegFile, RS2).asSInt + sh
}

Expand Down Expand Up @@ -107,6 +128,7 @@ object ZbbCountPlugin extends AreaObject {
val INVERT = Payload(Bool())
val MASK = Payload(Bool())
val WORD = Payload(Bool())
val OR = Payload(Bool())
}

class ZbbCountPlugin(val layer: LaneLayer,
Expand All @@ -122,19 +144,28 @@ class ZbbCountPlugin(val layer: LaneLayer,

// TODO use ifp for getting word instead of mask
val wb = newWriteback(ifp, formatAt)
add(RvZbx.CLZ).srcs(SRC1.RF).decode(MASK -> True, INVERT -> True, FLIP -> False, WORD -> False)
add(RvZbx.CTZ).srcs(SRC1.RF).decode(MASK -> True, INVERT -> True, FLIP -> True, WORD -> False)
add(RvZbx.CPOP).srcs(SRC1.RF).decode(MASK -> False, INVERT -> False, WORD -> False)
add(RvZbx.CLZW).srcs(SRC1.RF).decode(MASK -> True, INVERT -> True, FLIP -> False, WORD -> True)
add(RvZbx.CTZW).srcs(SRC1.RF).decode(MASK -> True, INVERT -> True, FLIP -> True, WORD -> True)
add(RvZbx.CPOPW).srcs(SRC1.RF).decode(MASK -> True, INVERT -> False, WORD -> True)
add(RvZbx.CLZ).srcs(SRC1.RF).decode(MASK -> True, INVERT -> True, FLIP -> False, WORD -> False, OR -> False)
add(RvZbx.CTZ).srcs(SRC1.RF).decode(MASK -> True, INVERT -> True, FLIP -> True, WORD -> False, OR -> False)
add(RvZbx.CPOP).srcs(SRC1.RF).decode(MASK -> False, INVERT -> False, WORD -> False, OR -> False)
if(Riscv.XLEN.get == 64) {
add(RvZbx.CLZW).srcs(SRC1.RF).decode(MASK -> True, INVERT -> True, FLIP -> False, WORD -> True, OR -> True)
add(RvZbx.CTZW).srcs(SRC1.RF).decode(MASK -> True, INVERT -> True, FLIP -> True, WORD -> True, OR -> False)
add(RvZbx.CPOPW).srcs(SRC1.RF).decode(MASK -> False, INVERT -> False, WORD -> True, OR -> False)
}

uopRetainer.release()

val count = new el.Execute(executeAt) {
// TODO optimize by merging stuff
val rs1 = el(IntRegFile, RS1).asBits
val inverted = rs1 ^ (apply(INVERT) #* Riscv.XLEN.get)
// TODO explicitly build tree instead of the long combinatorial path...
val rs1 = CombInit(this(el(IntRegFile, RS1)))
if (Riscv.XLEN.get == 64) {
when(WORD) {
rs1(63 downto 32) := 0
}
}
val invertMask = B(Riscv.XLEN.get bit, (31 downto 0) -> this(INVERT), default -> (this(INVERT) & !this(WORD)))
val orMask = B(Riscv.XLEN.get bit, (31 downto 0) -> False, default -> this(OR))
val inverted = (rs1 ^ invertMask) | orMask
val flipped = FLIP ? inverted.reversed | inverted
val masked = Vec(Bool(), Riscv.XLEN.get)
masked(masked.size - 1) := flipped.msb
Expand All @@ -145,8 +176,15 @@ class ZbbCountPlugin(val layer: LaneLayer,
}

val format = new el.Execute(formatAt) {
val ones = CountOne(MASKED).asBits.resized
wb.valid := SEL
wb.payload := CountOne(MASKED).asBits.resized
wb.payload := ones
when(OR) {
if(Riscv.XLEN.get == 64) {
wb.payload(5) := ones(6)
wb.payload(6) := False
}
}
}
}
}
Expand Down Expand Up @@ -188,6 +226,7 @@ object ZbbRotatePlugin {
val IS_W = Payload(Bool())
}

// TODO add option to also generate shift instructions? most of the hardware is a duplicate
class ZbbRotatePlugin(val layer: LaneLayer,
val executeAt: Int = 0,
val formatAt: Int = 0) extends ExecutionUnitElementSimple(layer) {
Expand All @@ -200,32 +239,36 @@ class ZbbRotatePlugin(val layer: LaneLayer,

val wb = newWriteback(ifp, formatAt)
add(RvZbx.ROL).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> True, IS_W -> False)
add(RvZbx.ROLW).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> True, IS_W -> False)
add(RvZbx.ROR).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, IS_W -> False)
add(RvZbx.RORI).srcs(SRC1.RF, SRC2.I).decode(LEFT -> False, IS_W -> False)
if(Riscv.XLEN.get == 64) {
add(RvZbx.ROLW).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> True, IS_W -> True)
add(RvZbx.RORIW).srcs(SRC1.RF, SRC2.I).decode(LEFT -> False, IS_W -> True)
add(RvZbx.RORW).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, IS_W -> True)
for(op <- Seq(RvZbx.ROLW, RvZbx.RORIW, RvZbx.RORW)) {
ifp.signExtend(wb, layer(op), 32)
}
}
uopRetainer.release()

val execute = new el.Execute(executeAt) {
val src1 = if(Riscv.XLEN.get == 64)
el(IntRegFile, RS1) & B(64 bits, (63 downto 32) -> !IS_W, default -> True)
else
el(IntRegFile, RS1).asBits
val src1 = this(srcp.SRC1).asBits
if(Riscv.XLEN.get == 64)
when(IS_W) {
src1(63 downto 32) := 0
}

val amplitude = srcp.SRC2(log2Up(Riscv.XLEN.get) - 1 downto 0).asUInt
val reversed = LEFT ? src1.reversed | src1
val shifted = reversed.rotateRight(amplitude)
val patched = LEFT ? shifted.reversed | shifted

RESULT := (if (Riscv.XLEN.get == 64) {
val wordResult = patched(31 downto 0) | patched(63 downto 32)
IS_W ? wordResult.asSInt.resize(64).asBits | patched
} else {
patched
})
RESULT := patched
if (Riscv.XLEN.get == 64) {
when(IS_W) {
RESULT(31 downto 0) := patched(31 downto 0) | patched(63 downto 32)
}
}
}

val format = new el.Execute(formatAt) {
Expand Down Expand Up @@ -297,7 +340,7 @@ class ZbbExtendPlugin(val layer: LaneLayer,

val format = new el.Execute(formatAt) {
wb.valid := SEL
wb.payload := srcp.SRC1.asBits
wb.payload := el(IntRegFile, RS1)
}
}
}
Expand All @@ -319,6 +362,7 @@ class ZbcPlugin(val layer: LaneLayer,
val RESULT = Payload(Bits(Riscv.XLEN.get bit))

val logic = during setup new Logic {
Riscv.RVZbc.set(true)
awaitBuild()
import SrcKeys._

Expand Down Expand Up @@ -366,7 +410,7 @@ class ZbsPlugin(val layer: LaneLayer,
val RESULT = Payload(Bits(Riscv.XLEN bits))

val logic = during setup new Logic {
Riscv.RVZb.set(true)
Riscv.RVZbs.set(true)
awaitBuild()
import SrcKeys._

Expand Down
28 changes: 22 additions & 6 deletions src/main/scala/vexiiriscv/execute/IterativeShiftPlugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ object IterativeShifterPlugin extends AreaObject {
val LEFT = Payload(Bool())
val IS_W = Payload(Bool())
val IS_W_RIGHT = Payload(Bool())
val IS_UW = Payload(Bool())
}

/** Iterative Shifter
Expand All @@ -31,6 +32,7 @@ object IterativeShifterPlugin extends AreaObject {
* needed for the default configuration. Enabling it costs 1 cycle for all shifts.
*/
class IterativeShifterPlugin(val layer: LaneLayer,
val with_slli_uw: Boolean = false,
val shiftAt: Int = 0,
val formatAt: Int = 0,
val leftShifts: Seq[Int] = Seq(),
Expand Down Expand Up @@ -61,15 +63,24 @@ class IterativeShifterPlugin(val layer: LaneLayer,
if (Riscv.XLEN.get == 64) {
for (op <- List(Rvi.SLL, Rvi.SRL, Rvi.SRA, Rvi.SLLI, Rvi.SRLI, Rvi.SRAI)) {
layer(op).addDecoding(IS_W -> False, IS_W_RIGHT -> False)
if(with_slli_uw)
layer(op).addDecoding(IS_UW -> False)
}
add(Rvi.SLLW ).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> True , ARITHMETIC -> False, IS_W -> True, IS_W_RIGHT -> False)
add(Rvi.SRLW ).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, ARITHMETIC -> False, IS_W -> True, IS_W_RIGHT -> True )
add(Rvi.SRAW ).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, ARITHMETIC -> True , IS_W -> True, IS_W_RIGHT -> True )
add(Rvi.SLLIW).srcs(SRC1.RF, SRC2.I ).decode(LEFT -> True , ARITHMETIC -> False, IS_W -> True, IS_W_RIGHT -> False)
add(Rvi.SRLIW).srcs(SRC1.RF, SRC2.I ).decode(LEFT -> False, ARITHMETIC -> False, IS_W -> True, IS_W_RIGHT -> True )
add(Rvi.SRAIW).srcs(SRC1.RF, SRC2.I ).decode(LEFT -> False, ARITHMETIC -> True , IS_W -> True, IS_W_RIGHT -> True )
add(Rvi.SLLW ).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> True , ARITHMETIC -> False, IS_W_RIGHT -> False)
add(Rvi.SRLW ).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, ARITHMETIC -> False, IS_W_RIGHT -> True )
add(Rvi.SRAW ).srcs(SRC1.RF, SRC2.RF).decode(LEFT -> False, ARITHMETIC -> True , IS_W_RIGHT -> True )
add(Rvi.SLLIW).srcs(SRC1.RF, SRC2.I ).decode(LEFT -> True , ARITHMETIC -> False, IS_W_RIGHT -> False)
add(Rvi.SRLIW).srcs(SRC1.RF, SRC2.I ).decode(LEFT -> False, ARITHMETIC -> False, IS_W_RIGHT -> True )
add(Rvi.SRAIW).srcs(SRC1.RF, SRC2.I ).decode(LEFT -> False, ARITHMETIC -> True , IS_W_RIGHT -> True )
for (op <- List(Rvi.SLLW, Rvi.SRLW, Rvi.SRAW, Rvi.SLLIW, Rvi.SRLIW, Rvi.SRAIW)) {
layer(op).addDecoding(IS_W -> True)
ifp.signExtend(wb, layer(op), 32)

if(with_slli_uw)
layer(op).addDecoding(IS_UW -> False)
}
if (with_slli_uw) {
add(RvZbx.SLLI_UW).srcs(SRC1.RF, SRC2.I).decode(LEFT -> True, ARITHMETIC -> False, IS_W -> False, IS_W_RIGHT -> False, IS_UW -> True)
}
}

Expand All @@ -96,6 +107,11 @@ class IterativeShifterPlugin(val layer: LaneLayer,
when(IS_W_RIGHT) {
dataIn(63 downto 32) := (default -> (ARITHMETIC & rs1(31)))
}
if (with_slli_uw) {
when(IS_UW) {
dataIn(63 downto 32) := 0
}
}
}

val done = if(lateResult) {
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/vexiiriscv/riscv/Misc.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ object Riscv extends AreaObject {
val XLEN = blocking[Int]
val FLEN = blocking[Int]
val LSLEN = blocking[Int]
val RVC, RVM, RVD, RVF, RVA, RVZb = blocking[Boolean]
val RVC, RVM, RVD, RVF, RVA, RVZba, RVZbb, RVZbc, RVZbs = blocking[Boolean]
}
5 changes: 4 additions & 1 deletion src/main/scala/vexiiriscv/riscv/RiscvPlugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ class RiscvPlugin(var xlen : Int,
if(Riscv.RVM.isEmpty) Riscv.RVM.set(false)
if(Riscv.RVF.isEmpty) Riscv.RVF.set(false)
if(Riscv.RVD.isEmpty) Riscv.RVD.set(false)
if(Riscv.RVZb.isEmpty) Riscv.RVZb.set(false)
if(Riscv.RVZba.isEmpty) Riscv.RVZba.set(false)
if(Riscv.RVZbb.isEmpty) Riscv.RVZbb.set(false)
if(Riscv.RVZbc.isEmpty) Riscv.RVZbc.set(false)
if(Riscv.RVZbs.isEmpty) Riscv.RVZbs.set(false)
Riscv.XLEN.set(xlen)
Riscv.FLEN.set(List(Riscv.RVF.get.toInt*32, Riscv.RVD.get.toInt*64).max)
Riscv.LSLEN.set(List(Riscv.XLEN.get, Riscv.FLEN.get).max)
Expand Down
6 changes: 4 additions & 2 deletions src/main/scala/vexiiriscv/test/VexiiRiscvProbe.scala
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,14 @@ class VexiiRiscvProbe(cpu : VexiiRiscv, kb : Option[konata.Backend], var withRvl
if (get(Riscv.RVF)) isa += "F"
if (get(Riscv.RVD)) isa += "D"
if (get(Riscv.RVC)) isa += "C"
if (get(Riscv.RVZb)) isa += "_zba_zbb_zbc_zbs"
if (get(Riscv.RVZba)) isa += "_zba"
if (get(Riscv.RVZbb)) isa += "_zbb"
if (get(Riscv.RVZbc)) isa += "_zbc"
if (get(Riscv.RVZbs)) isa += "_zbs"
tracer.newCpuMemoryView(hartId, 16, 16)
tracer.newCpu(hartId, isa, csrp, 63, hartId)
val pc = if(xlen == 32) 0x80000000l else 0x80000000l
tracer.setPc(hartId, pc)
this
}
}

Expand Down
Loading

0 comments on commit 08d3765

Please sign in to comment.