Richard Allen, Nathan Wooster, Andrew Yee, James Hillman, Sec. 102

Appendix


Block Summaries of selected components:

The Reorder Buffer

The entries in the reorder buffer are added when the instruction is in the execute stage, and the data is written to the buffer when the instruction either finishes execution (as in the case of an add), or when the fetch from memory is finished. Commitment to the register file occurs in order, which allows for precise exceptions, and takes care of write after write hazards. Also, we take care of forwarding out of the buffer in case the instruction going into the execute stage depends on something that hasn’t been committed. We may have to forward as many as two values from the buffer.


The MSHR

When a load reaches the memory stage and misses, we look for a free spot in the MSHR, stalling if full. We decided not to use F/E bits in the register file, but rather to have the control look at the MSHR to determine which registers are waiting for something from memory. The MSHR deals with write after read hazards -- this might happen if we get an arithmetic instruction depending on the result of a previous load which missed, and is now in the MSHR. In this case, we stall the execution of the add until the value comes outof memory.


The Victim Cache

This cache holds 8 words of data and sits between the data cache and the DRAM module. When we have a memory request, we search in parallel the data cache and the victim cache. If it is in the victim cache, it is moved to the proper spot in the data cache, and the piece of data in the data cache that was replaced is moved to the victim cache.

Schematics


The first seven pages are the top level schematic. Zoom in for a better view.

Sorry if they are a bit fuzzy. We used window’s “screen capture” function.







And now for the victim cache:


Test programs

We used the “mystery” programs from the previous labs.

VHDL

And now for the VHDL files… (Almost 100 pages)

-- modernlw_fwd_ctrl.vhd

-- takes care of forwarding the values for the MSHR loads

library pack1076;

use pack1076.pack1076.all;

entity modernlw_forwardctrl is

generic (fwd_delay : TIME := 5 ns);

port (

lreg : in vlbit_1d(4 downto 0);

dec_instr : in vlbit_1d(31 downto 0);

fwd_rs : out vlbit;

fwd_rt : out vlbit);

end modernlw_forwardctrl;

------

-- purpose: determine when to forward

architecture behavior of modernlw_forwardctrl is

begin -- behavior

modernlw_fwd_process : process (lreg, dec_instr)

variable rs_read : boolean;

variable rt_read : boolean;

variable rs_tmp : vlbit;

variable rt_tmp : vlbit;

-- variable write2reg : boolean;

variable ex_dst : integer;

variable mem_dst : integer;

variable wb_dst : integer;

variable rs_dec : integer;

variable rt_dec : integer;

variable opcode : integer;

variable fcode : integer;

variable mem_op : integer;

constant RTYPE_op : integer := 0;

constant bz_op : integer := 1;

constant j_op : integer := 2;

constant jal_op : integer := 3;

constant beq_op : integer := 4;

constant bne_op : integer := 5;

constant addiu_op : integer := 9;

constant slti_op : integer := 10;

constant sltiu_op : integer := 11;

constant andi_op : integer := 12;

constant ori_op : integer := 13;

constant xori_op : integer := 14;

constant lui_op : integer := 15;

constant lw_op : integer := 35;

constant sw_op : integer := 43;

constant bgez_rt : integer := 1;

constant bltz_rt : integer := 0;

constant sll_funct : integer := 0;

constant srl_funct : integer := 2;

constant sra_funct : integer := 3;

constant jr_funct : integer := 8;

constant addu_funct : integer := 33;

constant subu_funct : integer := 35;

constant and_funct : integer := 36;

constant or_funct : integer := 37;

constant xor_funct : integer := 38;

constant slt_funct : integer := 42;

constant sltu_funct : integer := 43;

begin --process

rs_tmp := '0';

rt_tmp := '0';

if not(bitunknown(lreg(0))) then

opcode := v1d2int(dec_instr(31 downto 26));

fcode := v1d2int(dec_instr(5 downto 0));

rs_read := NOT(opcode = j_op or opcode = jal_op or opcode = lui_op or

(opcode = RTYPE_op and (fcode = sll_funct or

fcode = srl_funct or

fcode = sra_funct)));

rt_read := (opcode = beq_op or opcode = bne_op or opcode = sw_op or

(opcode = RTYPE_op and NOT(fcode = jr_funct)));

if rs_read then

if NOT(dec_instr(25 downto 21) = ("00000")) then

if (dec_instr(25 downto 21) = lreg) then

rs_tmp := '1';

else

rs_tmp := '0';

end if;

end if;

end if;

if rt_read then

if NOT(dec_instr(20 downto 16) = ("00000")) then

if (dec_instr(20 downto 16) = lreg) then

rt_tmp := '1';

else

rt_tmp := '0';

end if;

end if;

end if;

end if;

fwd_rs <= rs_tmp after fwd_delay;

fwd_rt <= rt_tmp after fwd_delay;

end process;

end behavior;

------

-- The MSHR (keeps track of 2 outstanding loads)

--

library pack1076;

use pack1076.pack1076.all;

entity mshr is

generic (delay : TIME := 5 ns);

port (

signal clk : in vlbit;

signal dwait : in vlbit; -- tells us to not to start any mem requests

-- this signal is from the memory system

signal stall_mshr : out vlbit;

signal dr_w_in : in vlbit; -- tells us if we are doing a load or store

signal dr_w_out : out vlbit;

signal drequest_in : in vlbit;

signal drequest_out : out vlbit;

signal daddr_in : in vlbit_1d(9 downto 0);

signal daddr_out : out vlbit_1d(9 downto 0);

signal ddin : in vlbit_1d(31 downto 0);

signal ddout : out vlbit_1d(31 downto 0);

signal memdata_in : in vlbit_1d(31 downto 0);

signal memdata_out : out vlbit_1d(31 downto 0);

signal memdata_valid : out vlbit;

signal reg_out1 : out vlbit_1d(4 downto 0); -- control looks at these

signal reg_out2 : out vlbit_1d(4 downto 0); -- two output ports

signal inst_m : in vlbit_1d(31 downto 0);

signal lreg : out vlbit_1d(4 downto 0));

-- might need an output port coupled with the load data telling the dest reg

end mshr;

------

architecture behavior of mshr is

begin

mshr_process : process -- (clk)

-- variable state : integer := 0;

variable valid1 : vlbit := '0';-- all the entries in the table start

variable valid2 : vlbit := '0';-- off as invalid

variable reg_wr1 : vlbit_1d(4 downto 0) := ("XXXXX");

variable reg_wr2 : vlbit_1d(4 downto 0) := ("XXXXX");

variable addr_miss1 : vlbit_1d(9 downto 0);

variable addr_miss2 : vlbit_1d(9 downto 0);

variable fallingedge : boolean := false;

constant allx : vlbit_1d(4 downto 0) := ("XXXXX");

--constant allz : vlbit_1d(31 downto 0) := ("ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ");

--variable sw_coming : boolean := false;

--variable sw_fwd_value : vlbid_1d(31 downto 0) := ("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX");

begin -- needed

wait until (pchanging(clk));

fallingedge := pfalling(clk);

if not fallingedge then

memdata_valid <= '0';

lreg <= allx;

end if;

wait for 2 ns; -- wait for drequest to come out of pipeline reg : used to wait for 5 ns;

if (valid1 = '0' and valid2 = '0' and drequest_in = '1') then

-- send it to mem, and then check for a hit 1/2 a cycle later

-- ON HITS: send it out

-- ON MISSES:

-- if it's a "sw", then we pass on dwait to stall_mshr

-- if it's a "lw", then we latch things in and set the valid bits, etc...

if fallingedge then

-- put("valid1 = 0, valid2 = 0, pfalling(clk), drequest_in = 1");

drequest_out <= drequest_in;

dr_w_out <= dr_w_in;

daddr_out <= daddr_in;

--if sw_coming then

--ddout <= sw_fwd_value;

--sw_coming := false;

--else

ddout <= ddin;

--end if;

end if;

-- now check to see if it was a hit in the cache

if (not fallingedge) then

wait for 0 ns; -- used to be 11

-- check dwait

if (dwait = '0') then -- hit

stall_mshr <= '0';

memdata_out <= memdata_in;

if (dr_w_in = '0') then

lreg <= inst_m(20 downto 16);

memdata_valid <= '1';-- need to figure out when to deassert

end if;

else

-- put("valid1 = 0, valid2 = 0, not(pfalling(clk)), dwait = 1, drequest_in = 1");

if (dr_w_in = '1') then-- store word instruction

stall_mshr <= '1';

-- watch the dwait line

wait until dwait = '0';

stall_mshr <= '0';

drequest_out <= '0';

-- might need to check the previous line

else

stall_mshr <= '0';

valid1 := '1';

addr_miss1 := daddr_in;

reg_wr1 := inst_m(20 downto 16); -- rt

end if;

end if;

end if;

-- this is where we left off

elsif (valid1 = '0' and valid2 = '0' and drequest_in = '0') then

stall_mshr <= '0';

drequest_out <= '0';

dr_w_out <= '0';

elsif (valid1 = '1' and valid2 = '0' and not(fallingedge)) then

wait for 0 ns; -- used to be 11

-- check dwait

if (dwait = '0') then -- hit

-- put("valid1 = 1, valid2 = 0, not(pfalling(clk)), dwait = 0");

memdata_out <= memdata_in;

memdata_valid <= '1';

lreg <= reg_wr1;

drequest_out <= '0';

valid1 := '0';

reg_wr1 := allx;

end if;

elsif (valid1 = '1' and valid2 = '1' and not(fallingedge)) then

wait for 0 ns; -- used to be 11

-- check dwait

-- if (dwait = '1' and drequest_in = '1') then

--stall_mshr <= '1';

-- end if;

if (dwait = '0' and drequest_in = '0') then

-- output value and set valid2 to 0, valid1 to 1, lower drequest_out, etc...

-- make the next request

stall_mshr <= '0' after delay; -- added

memdata_out <= memdata_in;

memdata_valid <= '1';

lreg <= reg_wr1;

-- copy the "2" values to "1" values

reg_wr1 := reg_wr2;

reg_wr2 := allx;

addr_miss1 := addr_miss2;

valid2 := '0';

valid1 := '1'; -- just in case

wait until pchanging(clk);

drequest_out <= '1';

dr_w_out <= '0'; -- load

daddr_out <= addr_miss1;

end if;

if (dwait = '0' and drequest_in = '1') then

stall_mshr <= '0' after delay; -- added

memdata_out <= memdata_in;

memdata_valid <= '1';

lreg <= reg_wr1;

addr_miss1 := addr_miss2;

reg_wr1 := reg_wr2;

if dr_w_in = '1' then -- store

stall_mshr <= '1';

valid2 := '0';

reg_wr2 := allx;

else

valid2 := '1';

addr_miss2 := daddr_in;

reg_wr2 := inst_m(20 downto 16);

end if;

wait until pchanging(clk);

-- now make the request

daddr_out <= addr_miss1;

dr_w_out <= '0';

drequest_out <= '1';

end if;

elsif (valid1 = '1' and valid2 = '0' and fallingedge) then

wait for 11 ns;

if (dwait = '1' and drequest_in = '1') then

-- put("valid1 = 1, valid2 = 0, pfalling(clk), dwait = 1, drequest_in = 1");

-- latch in the request, make sure that it's a load

-- if it's a store, we stall the pipe and finish the pending load

if dr_w_in = '1' then

stall_mshr <= '1';

else

reg_wr2 := inst_m(20 downto 16);

addr_miss2 := daddr_in;

valid2 := '1';

end if;

end if;

if (dwait = '0' and drequest_in = '0') then

-- put("valid1 = 1, valid2 = 0, pfalling(clk), dwait = 0, drequest_in = 0");

-- output value and set valid1 to 0, lower drequest_out, etc...

memdata_out <= memdata_in;

memdata_valid <= '1';

lreg <= reg_wr1;

drequest_out <= '0';

valid1 := '0';

reg_wr1 := allx;

end if;

if (dwait = '0' and drequest_in = '1') then

-- put("valid1 = 1, valid2 = 0, pfalling(clk), dwait = 0, drequest_in = 1");

-- output the value and latch in the request if it's a load

-- if it was a store, stall

memdata_out <= memdata_in;

memdata_valid <= '1';

lreg <= reg_wr1;

drequest_out <= '1';

valid1 := '0';

reg_wr1 := allx;

dr_w_out <= dr_w_in;

daddr_out <= daddr_in;

ddout <= ddin;

end if;

elsif (valid1 = '1' and valid2 = '1' and fallingedge) then

-- wait for 11 ns;

if (dwait = '1' and drequest_in = '1') then

stall_mshr <= '1';

end if;

if (dwait = '0' and drequest_in = '0') then

-- output value and set valid2 to 0, valid1 to 1, lower drequest_out, etc...

-- make the next request

stall_mshr <= '0' after delay; -- added

memdata_out <= memdata_in;

memdata_valid <= '1';

lreg <= reg_wr1;

-- copy the "2" values to "1" values

reg_wr1 := reg_wr2;

reg_wr2 := allx;

addr_miss1 := addr_miss2;

drequest_out <= '1';

dr_w_out <= '0'; -- load

daddr_out <= addr_miss1;

valid2 := '0';

valid1 := '1'; -- just in case

end if;

if (dwait = '0' and drequest_in = '1') then

stall_mshr <= '0' after delay; -- added

memdata_out <= memdata_in;

memdata_valid <= '1';

lreg <= reg_wr1;

daddr_out <= addr_miss2;

dr_w_out <= '0';

drequest_out <= '1';

addr_miss1 := addr_miss2;

reg_wr1 := reg_wr2;

if dr_w_in = '1' then -- store

stall_mshr <= '1';

valid2 := '0';

reg_wr2 := allx;

else

valid2 := '1';

addr_miss2 := daddr_in;

reg_wr2 := inst_m(20 downto 16);

end if;

-- check for a store instruction, if it is, stall the pipe

-- otherwise, it's a load, so output the data and latch in the appropriate stuff

-- make another mem request

end if;

end if;

reg_out1 <= reg_wr1 after delay;

reg_out2 <= reg_wr2 after delay;

-- PROBABLY IGNORE THIS:

-- also need to check that the current instruction going into

-- the execute stage does not depend on one of the outstanding loads

-- (assume that the stall and forwarding control takes care of this)

-- also need to worry about stores after loads:

-- lw $2, 0

-- lw $3, 4

-- sw $4, 8 <-- we need to stall until the outstanding load finishes

-- this hazard needs to be detected in the memory stage

-- in the next portion of code, make the request to the dcache controller

-- make sure there is no store instruction in the way. The store should be

-- done first, making sure that the store doesn't depend on any of the loads

end process;

end behavior;

------

library pack1076;

use pack1076.pack1076.all;

entity reorder is

generic (delay : TIME := 3 ns);

port (clk : in vlbit;

inst : in vlbit_1d(31 downto 0); -- this is the instruction to be added (in the exec stage)

-- now for the ports having to do with forwarding:

dec_instr : in vlbit_1d(31 downto 0); -- the instruction in the decode stage, for forwarding

reorder_rs_val : out vlbit_1d(31 downto 0);

fwd_rs_reorder : out vlbit;

reorder_rt_val : out vlbit_1d(31 downto 0);

fwd_rt_reorder : out vlbit;

-- end forwarding related ports

value : in vlbit_1d(31 downto 0);

reg : in vlbit_1d(4 downto 0);

valid : in vlbit;

lvalue : in vlbit_1d(31 downto 0);

lreg : in vlbit_1d(4 downto 0);

lvalid : in vlbit;

stall : in vlbit;

full : out vlbit;

regwr : out vlbit_1d(4 downto 0);

dout : out vlbit_1d(31 downto 0);

writereg : out vlbit;

ctrl_e : in vlbit_1d(1 downto 0));

end reorder;

-- if there's an instruction stall, then don't load inst into the reorder buffer!!

architecture behavior of reorder is

begin -- behavior

reorder_process: process

-- variables and constants having to do with forwarding:

variable reorder_rs_val_tmp : vlbit_1d(31 downto 0);

variable reorder_rt_val_tmp : vlbit_1d(31 downto 0);

variable rs_read : boolean;

variable rt_read : boolean;

variable rs_tmp : vlbit;

variable rt_tmp : vlbit;

-- variable write2reg : boolean;

variable ex_dst : integer;

variable mem_dst : integer;

variable wb_dst : integer;

variable rs_dec : integer;

variable rt_dec : integer;

variable opcode : integer;

variable fcode : integer;

variable mem_op : integer;

constant RTYPE_op : integer := 0;

constant bz_op : integer := 1;

constant j_op : integer := 2;

constant jal_op : integer := 3;

constant beq_op : integer := 4;

constant bne_op : integer := 5;

constant addiu_op : integer := 9;

constant slti_op : integer := 10;

constant sltiu_op : integer := 11;

constant andi_op : integer := 12;

constant ori_op : integer := 13;

constant xori_op : integer := 14;

constant lui_op : integer := 15;

--constant lw_op : integer := 35;

constant sw_op : integer := 43;

constant bgez_rt : integer := 1;

constant bltz_rt : integer := 0;

constant sll_funct : integer := 0;

constant srl_funct : integer := 2;

constant sra_funct : integer := 3;

constant jr_funct : integer := 8;

constant addu_funct : integer := 33;

constant subu_funct : integer := 35;

constant and_funct : integer := 36;

constant or_funct : integer := 37;

constant xor_funct : integer := 38;

constant slt_funct : integer := 42;

constant sltu_funct : integer := 43;

-- end variables and constants having to do with forwarding

type thirtytwowide_array is array (0 to 7) of vlbit_1d(31 downto 0);

type fivewide_array is array (0 to 7) of vlbit_1d(4 downto 0);

type bool_array is array (0 to 7) of boolean;

constant lw_op : integer := 35;

constant allx : vlbit_1d(31 downto 0) := ("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX");

constant noreg : vlbit_1d(4 downto 0) := ("XXXXX");

--variable inst0,inst1,inst2,inst3,inst4,inst5,inst6,inst7 : vlbit_1d(31 downto 0) := allx(31 downto 0);

--variable data0,data1,data2,data3,data4,data5,data6,data7 : vlbit_1d(31 downto 0) := allx(31 downto 0);

--variable dest0,dest1,dest2,dest3,dest4,dest5,dest6,dest7 : vlbit_1d(4 downto 0) := noreg(4 downto 0);

--variable done0,done1,done2,done3,done4,done5,done6,done7 : boolean := false;

--variable excp0,excp1,excp2,excp3,excp4,excp5,excp6,excp7 : boolean := false;

-- declaring our own types and then using them

--variable instb : thirtytwowide_array;

variable data : thirtytwowide_array;

variable dest : fivewide_array;

variable done : bool_array;

variable excp : bool_array;

variable lw : bool_array;

variable bufferstart : integer := 0;

variable bufferend : integer := 0;

variable i : integer := 0;

variable keepgoing : boolean := true;

variable fulltemp : boolean := false;

-- How to access individual bits? inst(0)(4 downto 3) (maybe) (yes!)

--variable fallingedge : boolean;

begin -- process reorder

wait until pchanging(clk);

if (not pfalling(clk)) then -- synchonous stuff

wait for 6 ns;

-- add new instruction to buffer

if stall = '1' then

--instb(bufferend) := inst;

lw(bufferend) := v1d2int(inst(31 downto 26)) = lw_op;

done(bufferend) := false;

if (ctrl_e = ("00")) then

dest(bufferend) := inst(20 downto 16);

--put ("adding instruction from (20 to 16)");

elsif (ctrl_e = ("01")) then

dest(bufferend) := inst(15 downto 11);

--put ("adding instruction from (15 to 11");

else

dest(bufferend) := ("11111");

--put ("adding instruction from 31");

end if;

bufferend := (bufferend + 1) mod 8;

fulltemp := ((bufferend - bufferstart) mod 8) = 0;

end if;

end if;

--if (pfalling(clk)) then -- synchonous stuff

--end if;

-- now on to the stuff having to with forwarding

if pfalling(clk) then

if false then

rs_tmp := '0';

rt_tmp := '0';

opcode := v1d2int(dec_instr(31 downto 26));

fcode := v1d2int(dec_instr(5 downto 0));

rs_read := NOT(opcode = j_op or opcode = jal_op or opcode = lui_op or

(opcode = RTYPE_op and (fcode = sll_funct or

fcode = srl_funct or

fcode = sra_funct)));

rt_read := (opcode = beq_op or opcode = bne_op or opcode = sw_op or

(opcode = RTYPE_op and NOT(fcode = jr_funct)));

if rs_read then

if NOT(dec_instr(25 downto 21) = ("00000")) then

-- should search through the reorder buffer, looking for dependencies

i := 0;

keepgoing := true;

while keepgoing loop

if (i = 8) then

keepgoing := false; -- didn't find a dependency

elsif (dec_instr(25 downto 21) = dest(i)) and done(i) then

put ("found rs dependency -- forwarding the value!");

reorder_rs_val_tmp := data(i);

rs_tmp := '1';

keepgoing := false;

end if;

i := i + 1;

end loop;

end if;

end if;

if rt_read then

if NOT(dec_instr(20 downto 16) = ("00000")) then

-- should search through the reorder buffer, looking for dependencies

i := 0;

keepgoing := true;

while keepgoing loop

if (i = 8) then

keepgoing := false; -- didn't find a dependency

elsif (dec_instr(20 downto 16) = dest(i)) and done(i) then

put ("found rt dependency -- forwarding the value!");

reorder_rt_val_tmp := data(i);

rt_tmp := '1';

keepgoing := false;

end if;

i := i + 1;

end loop;

end if;

end if;

end if;

wait for 17 ns;

dout <= allx;

regwr <= noreg;

writereg <= '0';

-- add regular instruction to buffer

if (valid = '1') then

i := 0;

keepgoing := true;

while keepgoing loop -- if i=8, that means it wasn't

-- found => why commit an instruction?

if (i = 8) or ((not lw((bufferstart + i) mod 8)) and (dest((bufferstart + i) mod 8) = reg)) then

--put ("found instruction");

data((bufferstart + i) mod 8) := value;

done((bufferstart + i) mod 8) := true;

keepgoing := false;

end if;

i := i + 1;

end loop;

end if;

-- add load word to buffer

if lvalid = '1' then

i := 0;

keepgoing := true;

while keepgoing loop

if (i = 8) or (dest((bufferstart + i) mod 8) = lreg) then

--put ("found load instruction");

data((bufferstart + i) mod 8) := lvalue;

done((bufferstart + i) mod 8) := true;

keepgoing := false;

end if;

i := i + 1;

end loop;

end if;

-- remove old instructions

if done(bufferstart) then

--put ("outputing");

writereg <= '1';

regwr <= dest(bufferstart);

dout <= data(bufferstart);

dest(bufferstart) := noreg;

data(bufferstart) := allx;

done(bufferstart) := false;

lw(bufferend) := false;

bufferstart := (bufferstart + 1) mod 8;

if fulltemp then

fulltemp := ((bufferend - bufferstart) mod 8) = 0;

end if;

end if;

full <= boo2vlb(fulltemp);

end if; -- matches "if pfalling(clk) then"

fwd_rs_reorder <= rs_tmp after delay;

fwd_rt_reorder <= rt_tmp after delay;

reorder_rs_val <= reorder_rs_val_tmp after delay;

reorder_rt_val <= reorder_rt_val_tmp after delay;

end process;

end behavior;

------

-- reorderstallcontrol --

------

-- reorderstallcontrol.vhd

library pack1076;

use pack1076.pack1076.all;

entity reorderstallcontrol is

generic (reorder_stall_delay : TIME := 0 ns);

port (ex_instr : in vlbit_1d(31 downto 0);

dec_instr :in vlbit_1d(31 downto 0);

clk : in vlbit;

mshr_reg1 : in vlbit_1d(4 downto 0);

mshr_reg2 : in vlbit_1d(4 downto 0);

stallmodern : out vlbit);

end reorderstallcontrol;

------

-- purpose: determine when to stall

architecture behavior of reorderstallcontrol is

begin -- behavior

reorder_stall_process : process

variable rs : boolean;

variable rt : boolean;

variable opcode : integer;

variable funct : integer;

variable rt_ex : integer;

variable rs_dec : integer;

variable rt_dec : integer;

variable stalltmp : vlbit;

constant RTYPE_op : integer := 0;

constant bz_op : integer := 1;

constant j_op : integer := 2;

constant jal_op : integer := 3;

constant beq_op : integer := 4;

constant bne_op : integer := 5;

constant addiu_op : integer := 9;

constant slti_op : integer := 10;

constant sltiu_op : integer := 11;

constant andi_op : integer := 12;

constant ori_op : integer := 13;

constant xori_op : integer := 14;

constant lui_op : integer := 15;

constant lw_op : integer := 35;

constant sw_op : integer := 43;

constant bgez_rt : integer := 1;

constant bltz_rt : integer := 0;

constant sll_funct : integer := 0;

constant srl_funct : integer := 2;

constant sra_funct : integer := 3;

constant jr_funct : integer := 8;

constant addu_funct : integer := 33;

constant subu_funct : integer := 35;

constant and_funct : integer := 36;

constant or_funct : integer := 37;

constant xor_funct : integer := 38;

constant slt_funct : integer := 42;

constant sltu_funct : integer := 43;

begin --process

wait until pchanging(clk);

if (pchanging(clk)) then

wait for 20 ns;

stalltmp := '0'; --default

funct := v1d2int(dec_instr(5 downto 0));

opcode := v1d2int(dec_instr(31 downto 26));

rs := NOT(opcode = j_op or opcode = jal_op or opcode = lui_op or

(opcode = RTYPE_op and (funct = sll_funct or

funct = srl_funct or

funct = sra_funct)));

rt := (opcode = beq_op or opcode = bne_op or

(opcode = RTYPE_op and NOT(funct = jr_funct)));

if rs then

if ((dec_instr(25 downto 21) = mshr_reg1) and (not (bitunknown(mshr_reg1(0))))) or ((dec_instr(25 downto 21) = mshr_reg2) and (not (bitunknown(mshr_reg2(0))))) then

stalltmp := '1';

end if;

end if;

if rt then

if ((dec_instr(20 downto 16) = mshr_reg1) and (not (bitunknown(mshr_reg1(0))))) or ((dec_instr(20 downto 16) = mshr_reg2) and (not (bitunknown(mshr_reg2(0))))) then

stalltmp := '1';

end if;

end if;

stallmodern <= stalltmp after reorder_stall_delay;

end if;

end process;

end behavior;

------

-- modernlwsw.vhd

-- takes care of stores after loads with the MSHR

library pack1076;

use pack1076.pack1076.all;

entity modernlwsw is

generic (modernlwsw_delay : TIME := 0 ns);

port (ex_instr : in vlbit_1d(31 downto 0);

lreg : in vlbit_1d(4 downto 0);

clk : in vlbit;

fwd : out vlbit);

end modernlwsw;

------

-- purpose: determine when to forward

architecture behavior of modernlwsw is

begin -- behavior

modernlwsw_process : process(clk)

variable exop : integer;

constant sw_op : integer := 43;

begin

exop := v1d2int(ex_instr(31 downto 26));

fwd <= boo2vlb(exop = sw_op and lreg = ex_instr(20 downto 16))

after modernlwsw_delay;

end process;

end behavior;

------

-- moderncontrol --

------

-- modernstallcontrol.vhd

library pack1076;

use pack1076.pack1076.all;

entity modernstallcontrol is

generic (modern_stall_delay : TIME := 0 ns);

port (ex_instr : in vlbit_1d(31 downto 0);

dec_instr :in vlbit_1d(31 downto 0);

clk : in vlbit;

mshr_reg1 : in vlbit_1d(4 downto 0);

mshr_reg2 : in vlbit_1d(4 downto 0);

stallmodern : out vlbit);

end modernstallcontrol;

------

-- purpose: determine when to stall

architecture behavior of modernstallcontrol is

begin -- behavior

modern_stall_process : process

variable rs : boolean;

variable rt : boolean;

variable opcode : integer;

variable funct : integer;

variable rt_ex : integer;

variable rs_dec : integer;

variable rt_dec : integer;

variable stalltmp : vlbit;

constant RTYPE_op : integer := 0;

constant bz_op : integer := 1;

constant j_op : integer := 2;

constant jal_op : integer := 3;

constant beq_op : integer := 4;

constant bne_op : integer := 5;

constant addiu_op : integer := 9;

constant slti_op : integer := 10;

constant sltiu_op : integer := 11;

constant andi_op : integer := 12;

constant ori_op : integer := 13;

constant xori_op : integer := 14;

constant lui_op : integer := 15;

constant lw_op : integer := 35;

constant sw_op : integer := 43;

constant bgez_rt : integer := 1;

constant bltz_rt : integer := 0;

constant sll_funct : integer := 0;

constant srl_funct : integer := 2;

constant sra_funct : integer := 3;

constant jr_funct : integer := 8;

constant addu_funct : integer := 33;

constant subu_funct : integer := 35;

constant and_funct : integer := 36;

constant or_funct : integer := 37;

constant xor_funct : integer := 38;

constant slt_funct : integer := 42;

constant sltu_funct : integer := 43;

begin --process

wait until pchanging(clk);

if (pchanging(clk)) then

wait for 2 ns;

stalltmp := '0'; --default

funct := v1d2int(dec_instr(5 downto 0));

opcode := v1d2int(dec_instr(31 downto 26));

rs := NOT(opcode = j_op or opcode = jal_op or opcode = lui_op or

(opcode = RTYPE_op and (funct = sll_funct or

funct = srl_funct or

funct = sra_funct)));

rt := (opcode = beq_op or opcode = bne_op or

(opcode = RTYPE_op and NOT(funct = jr_funct)));

if rs then

if ((dec_instr(25 downto 21) = mshr_reg1) and (not (bitunknown(mshr_reg1(0))))) or ((dec_instr(25 downto 21) = mshr_reg2) and (not (bitunknown(mshr_reg2(0))))) then

stalltmp := '1';

end if;

end if;

if rt then

if ((dec_instr(20 downto 16) = mshr_reg1) and (not (bitunknown(mshr_reg1(0))))) or ((dec_instr(20 downto 16) = mshr_reg2) and (not (bitunknown(mshr_reg2(0))))) then

stalltmp := '1';

end if;

end if;

stallmodern <= stalltmp after modern_stall_delay;

end if;

end process;

end behavior;

------

--

-- addr2addr.vhd

-- Breaks an address into its index bits

--

library pack1076;

use pack1076.pack1076.all;

entity addr2addr is

generic (addr2addr_delay : TIME := 0 ns);

port (address : in vlbit_1d(9 downto 0);

tag : out vlbit_1d(6 downto 0);

index : out vlbit_1d(1 downto 0);

a_block : out vlbit);

end addr2addr;

------

-- purpose: convert address to status bits

architecture behavior of addr2addr is

begin

addr2addr_process : process (address)

begin -- process process

tag(6 downto 0) <= address(9 downto 3) after addr2addr_delay;

index(1 downto 0) <= address(2 downto 1) after addr2addr_delay;

a_block <= address(0) after addr2addr_delay;

end process;

end behavior;

------

------

--

-- addr2status

-- gets the tag from the address, and assignes a valid bit

--

library pack1076;

use pack1076.pack1076.all;

entity addr2status is

generic (addr2status_delay : TIME := 0 ns);

port (address : in vlbit_1d(9 downto 0);

tag : out vlbit_1d(6 downto 0);

valid : out vlbit;

dirty : out vlbit);

end addr2status;

------

-- purpose: convert address to status bits

architecture behavior of addr2status is

begin

addr2status_process : process (address)

begin -- process process

tag(6 downto 0) <= address(9 downto 3) after addr2status_delay;

--if (address(0) = '1') then

valid <= '1' after addr2status_delay;

--else

--valid <= '0' after addr2status_delay;

--end if;

dirty <= '0' after addr2status_delay;