Richard Allen, Nathan Wooster, Andrew Yee, James Hillman, Sec. 102
Appendix
Block Summaries of selected components:
The Reorder Buffer
The entries in the reorder buffer are added when the instruction is in the execute stage, and the data is written to the buffer when the instruction either finishes execution (as in the case of an add), or when the fetch from memory is finished. Commitment to the register file occurs in order, which allows for precise exceptions, and takes care of write after write hazards. Also, we take care of forwarding out of the buffer in case the instruction going into the execute stage depends on something that hasn’t been committed. We may have to forward as many as two values from the buffer.
The MSHR
When a load reaches the memory stage and misses, we look for a free spot in the MSHR, stalling if full. We decided not to use F/E bits in the register file, but rather to have the control look at the MSHR to determine which registers are waiting for something from memory. The MSHR deals with write after read hazards -- this might happen if we get an arithmetic instruction depending on the result of a previous load which missed, and is now in the MSHR. In this case, we stall the execution of the add until the value comes outof memory.
The Victim Cache
This cache holds 8 words of data and sits between the data cache and the DRAM module. When we have a memory request, we search in parallel the data cache and the victim cache. If it is in the victim cache, it is moved to the proper spot in the data cache, and the piece of data in the data cache that was replaced is moved to the victim cache.
Schematics
The first seven pages are the top level schematic. Zoom in for a better view.
Sorry if they are a bit fuzzy. We used window’s “screen capture” function.
And now for the victim cache:
Test programs
We used the “mystery” programs from the previous labs.
VHDL
And now for the VHDL files… (Almost 100 pages)
-- modernlw_fwd_ctrl.vhd
-- takes care of forwarding the values for the MSHR loads
library pack1076;
use pack1076.pack1076.all;
entity modernlw_forwardctrl is
generic (fwd_delay : TIME := 5 ns);
port (
lreg : in vlbit_1d(4 downto 0);
dec_instr : in vlbit_1d(31 downto 0);
fwd_rs : out vlbit;
fwd_rt : out vlbit);
end modernlw_forwardctrl;
------
-- purpose: determine when to forward
architecture behavior of modernlw_forwardctrl is
begin -- behavior
modernlw_fwd_process : process (lreg, dec_instr)
variable rs_read : boolean;
variable rt_read : boolean;
variable rs_tmp : vlbit;
variable rt_tmp : vlbit;
-- variable write2reg : boolean;
variable ex_dst : integer;
variable mem_dst : integer;
variable wb_dst : integer;
variable rs_dec : integer;
variable rt_dec : integer;
variable opcode : integer;
variable fcode : integer;
variable mem_op : integer;
constant RTYPE_op : integer := 0;
constant bz_op : integer := 1;
constant j_op : integer := 2;
constant jal_op : integer := 3;
constant beq_op : integer := 4;
constant bne_op : integer := 5;
constant addiu_op : integer := 9;
constant slti_op : integer := 10;
constant sltiu_op : integer := 11;
constant andi_op : integer := 12;
constant ori_op : integer := 13;
constant xori_op : integer := 14;
constant lui_op : integer := 15;
constant lw_op : integer := 35;
constant sw_op : integer := 43;
constant bgez_rt : integer := 1;
constant bltz_rt : integer := 0;
constant sll_funct : integer := 0;
constant srl_funct : integer := 2;
constant sra_funct : integer := 3;
constant jr_funct : integer := 8;
constant addu_funct : integer := 33;
constant subu_funct : integer := 35;
constant and_funct : integer := 36;
constant or_funct : integer := 37;
constant xor_funct : integer := 38;
constant slt_funct : integer := 42;
constant sltu_funct : integer := 43;
begin --process
rs_tmp := '0';
rt_tmp := '0';
if not(bitunknown(lreg(0))) then
opcode := v1d2int(dec_instr(31 downto 26));
fcode := v1d2int(dec_instr(5 downto 0));
rs_read := NOT(opcode = j_op or opcode = jal_op or opcode = lui_op or
(opcode = RTYPE_op and (fcode = sll_funct or
fcode = srl_funct or
fcode = sra_funct)));
rt_read := (opcode = beq_op or opcode = bne_op or opcode = sw_op or
(opcode = RTYPE_op and NOT(fcode = jr_funct)));
if rs_read then
if NOT(dec_instr(25 downto 21) = ("00000")) then
if (dec_instr(25 downto 21) = lreg) then
rs_tmp := '1';
else
rs_tmp := '0';
end if;
end if;
end if;
if rt_read then
if NOT(dec_instr(20 downto 16) = ("00000")) then
if (dec_instr(20 downto 16) = lreg) then
rt_tmp := '1';
else
rt_tmp := '0';
end if;
end if;
end if;
end if;
fwd_rs <= rs_tmp after fwd_delay;
fwd_rt <= rt_tmp after fwd_delay;
end process;
end behavior;
------
-- The MSHR (keeps track of 2 outstanding loads)
--
library pack1076;
use pack1076.pack1076.all;
entity mshr is
generic (delay : TIME := 5 ns);
port (
signal clk : in vlbit;
signal dwait : in vlbit; -- tells us to not to start any mem requests
-- this signal is from the memory system
signal stall_mshr : out vlbit;
signal dr_w_in : in vlbit; -- tells us if we are doing a load or store
signal dr_w_out : out vlbit;
signal drequest_in : in vlbit;
signal drequest_out : out vlbit;
signal daddr_in : in vlbit_1d(9 downto 0);
signal daddr_out : out vlbit_1d(9 downto 0);
signal ddin : in vlbit_1d(31 downto 0);
signal ddout : out vlbit_1d(31 downto 0);
signal memdata_in : in vlbit_1d(31 downto 0);
signal memdata_out : out vlbit_1d(31 downto 0);
signal memdata_valid : out vlbit;
signal reg_out1 : out vlbit_1d(4 downto 0); -- control looks at these
signal reg_out2 : out vlbit_1d(4 downto 0); -- two output ports
signal inst_m : in vlbit_1d(31 downto 0);
signal lreg : out vlbit_1d(4 downto 0));
-- might need an output port coupled with the load data telling the dest reg
end mshr;
------
architecture behavior of mshr is
begin
mshr_process : process -- (clk)
-- variable state : integer := 0;
variable valid1 : vlbit := '0';-- all the entries in the table start
variable valid2 : vlbit := '0';-- off as invalid
variable reg_wr1 : vlbit_1d(4 downto 0) := ("XXXXX");
variable reg_wr2 : vlbit_1d(4 downto 0) := ("XXXXX");
variable addr_miss1 : vlbit_1d(9 downto 0);
variable addr_miss2 : vlbit_1d(9 downto 0);
variable fallingedge : boolean := false;
constant allx : vlbit_1d(4 downto 0) := ("XXXXX");
--constant allz : vlbit_1d(31 downto 0) := ("ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ");
--variable sw_coming : boolean := false;
--variable sw_fwd_value : vlbid_1d(31 downto 0) := ("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
begin -- needed
wait until (pchanging(clk));
fallingedge := pfalling(clk);
if not fallingedge then
memdata_valid <= '0';
lreg <= allx;
end if;
wait for 2 ns; -- wait for drequest to come out of pipeline reg : used to wait for 5 ns;
if (valid1 = '0' and valid2 = '0' and drequest_in = '1') then
-- send it to mem, and then check for a hit 1/2 a cycle later
-- ON HITS: send it out
-- ON MISSES:
-- if it's a "sw", then we pass on dwait to stall_mshr
-- if it's a "lw", then we latch things in and set the valid bits, etc...
if fallingedge then
-- put("valid1 = 0, valid2 = 0, pfalling(clk), drequest_in = 1");
drequest_out <= drequest_in;
dr_w_out <= dr_w_in;
daddr_out <= daddr_in;
--if sw_coming then
--ddout <= sw_fwd_value;
--sw_coming := false;
--else
ddout <= ddin;
--end if;
end if;
-- now check to see if it was a hit in the cache
if (not fallingedge) then
wait for 0 ns; -- used to be 11
-- check dwait
if (dwait = '0') then -- hit
stall_mshr <= '0';
memdata_out <= memdata_in;
if (dr_w_in = '0') then
lreg <= inst_m(20 downto 16);
memdata_valid <= '1';-- need to figure out when to deassert
end if;
else
-- put("valid1 = 0, valid2 = 0, not(pfalling(clk)), dwait = 1, drequest_in = 1");
if (dr_w_in = '1') then-- store word instruction
stall_mshr <= '1';
-- watch the dwait line
wait until dwait = '0';
stall_mshr <= '0';
drequest_out <= '0';
-- might need to check the previous line
else
stall_mshr <= '0';
valid1 := '1';
addr_miss1 := daddr_in;
reg_wr1 := inst_m(20 downto 16); -- rt
end if;
end if;
end if;
-- this is where we left off
elsif (valid1 = '0' and valid2 = '0' and drequest_in = '0') then
stall_mshr <= '0';
drequest_out <= '0';
dr_w_out <= '0';
elsif (valid1 = '1' and valid2 = '0' and not(fallingedge)) then
wait for 0 ns; -- used to be 11
-- check dwait
if (dwait = '0') then -- hit
-- put("valid1 = 1, valid2 = 0, not(pfalling(clk)), dwait = 0");
memdata_out <= memdata_in;
memdata_valid <= '1';
lreg <= reg_wr1;
drequest_out <= '0';
valid1 := '0';
reg_wr1 := allx;
end if;
elsif (valid1 = '1' and valid2 = '1' and not(fallingedge)) then
wait for 0 ns; -- used to be 11
-- check dwait
-- if (dwait = '1' and drequest_in = '1') then
--stall_mshr <= '1';
-- end if;
if (dwait = '0' and drequest_in = '0') then
-- output value and set valid2 to 0, valid1 to 1, lower drequest_out, etc...
-- make the next request
stall_mshr <= '0' after delay; -- added
memdata_out <= memdata_in;
memdata_valid <= '1';
lreg <= reg_wr1;
-- copy the "2" values to "1" values
reg_wr1 := reg_wr2;
reg_wr2 := allx;
addr_miss1 := addr_miss2;
valid2 := '0';
valid1 := '1'; -- just in case
wait until pchanging(clk);
drequest_out <= '1';
dr_w_out <= '0'; -- load
daddr_out <= addr_miss1;
end if;
if (dwait = '0' and drequest_in = '1') then
stall_mshr <= '0' after delay; -- added
memdata_out <= memdata_in;
memdata_valid <= '1';
lreg <= reg_wr1;
addr_miss1 := addr_miss2;
reg_wr1 := reg_wr2;
if dr_w_in = '1' then -- store
stall_mshr <= '1';
valid2 := '0';
reg_wr2 := allx;
else
valid2 := '1';
addr_miss2 := daddr_in;
reg_wr2 := inst_m(20 downto 16);
end if;
wait until pchanging(clk);
-- now make the request
daddr_out <= addr_miss1;
dr_w_out <= '0';
drequest_out <= '1';
end if;
elsif (valid1 = '1' and valid2 = '0' and fallingedge) then
wait for 11 ns;
if (dwait = '1' and drequest_in = '1') then
-- put("valid1 = 1, valid2 = 0, pfalling(clk), dwait = 1, drequest_in = 1");
-- latch in the request, make sure that it's a load
-- if it's a store, we stall the pipe and finish the pending load
if dr_w_in = '1' then
stall_mshr <= '1';
else
reg_wr2 := inst_m(20 downto 16);
addr_miss2 := daddr_in;
valid2 := '1';
end if;
end if;
if (dwait = '0' and drequest_in = '0') then
-- put("valid1 = 1, valid2 = 0, pfalling(clk), dwait = 0, drequest_in = 0");
-- output value and set valid1 to 0, lower drequest_out, etc...
memdata_out <= memdata_in;
memdata_valid <= '1';
lreg <= reg_wr1;
drequest_out <= '0';
valid1 := '0';
reg_wr1 := allx;
end if;
if (dwait = '0' and drequest_in = '1') then
-- put("valid1 = 1, valid2 = 0, pfalling(clk), dwait = 0, drequest_in = 1");
-- output the value and latch in the request if it's a load
-- if it was a store, stall
memdata_out <= memdata_in;
memdata_valid <= '1';
lreg <= reg_wr1;
drequest_out <= '1';
valid1 := '0';
reg_wr1 := allx;
dr_w_out <= dr_w_in;
daddr_out <= daddr_in;
ddout <= ddin;
end if;
elsif (valid1 = '1' and valid2 = '1' and fallingedge) then
-- wait for 11 ns;
if (dwait = '1' and drequest_in = '1') then
stall_mshr <= '1';
end if;
if (dwait = '0' and drequest_in = '0') then
-- output value and set valid2 to 0, valid1 to 1, lower drequest_out, etc...
-- make the next request
stall_mshr <= '0' after delay; -- added
memdata_out <= memdata_in;
memdata_valid <= '1';
lreg <= reg_wr1;
-- copy the "2" values to "1" values
reg_wr1 := reg_wr2;
reg_wr2 := allx;
addr_miss1 := addr_miss2;
drequest_out <= '1';
dr_w_out <= '0'; -- load
daddr_out <= addr_miss1;
valid2 := '0';
valid1 := '1'; -- just in case
end if;
if (dwait = '0' and drequest_in = '1') then
stall_mshr <= '0' after delay; -- added
memdata_out <= memdata_in;
memdata_valid <= '1';
lreg <= reg_wr1;
daddr_out <= addr_miss2;
dr_w_out <= '0';
drequest_out <= '1';
addr_miss1 := addr_miss2;
reg_wr1 := reg_wr2;
if dr_w_in = '1' then -- store
stall_mshr <= '1';
valid2 := '0';
reg_wr2 := allx;
else
valid2 := '1';
addr_miss2 := daddr_in;
reg_wr2 := inst_m(20 downto 16);
end if;
-- check for a store instruction, if it is, stall the pipe
-- otherwise, it's a load, so output the data and latch in the appropriate stuff
-- make another mem request
end if;
end if;
reg_out1 <= reg_wr1 after delay;
reg_out2 <= reg_wr2 after delay;
-- PROBABLY IGNORE THIS:
-- also need to check that the current instruction going into
-- the execute stage does not depend on one of the outstanding loads
-- (assume that the stall and forwarding control takes care of this)
-- also need to worry about stores after loads:
-- lw $2, 0
-- lw $3, 4
-- sw $4, 8 <-- we need to stall until the outstanding load finishes
-- this hazard needs to be detected in the memory stage
-- in the next portion of code, make the request to the dcache controller
-- make sure there is no store instruction in the way. The store should be
-- done first, making sure that the store doesn't depend on any of the loads
end process;
end behavior;
------
library pack1076;
use pack1076.pack1076.all;
entity reorder is
generic (delay : TIME := 3 ns);
port (clk : in vlbit;
inst : in vlbit_1d(31 downto 0); -- this is the instruction to be added (in the exec stage)
-- now for the ports having to do with forwarding:
dec_instr : in vlbit_1d(31 downto 0); -- the instruction in the decode stage, for forwarding
reorder_rs_val : out vlbit_1d(31 downto 0);
fwd_rs_reorder : out vlbit;
reorder_rt_val : out vlbit_1d(31 downto 0);
fwd_rt_reorder : out vlbit;
-- end forwarding related ports
value : in vlbit_1d(31 downto 0);
reg : in vlbit_1d(4 downto 0);
valid : in vlbit;
lvalue : in vlbit_1d(31 downto 0);
lreg : in vlbit_1d(4 downto 0);
lvalid : in vlbit;
stall : in vlbit;
full : out vlbit;
regwr : out vlbit_1d(4 downto 0);
dout : out vlbit_1d(31 downto 0);
writereg : out vlbit;
ctrl_e : in vlbit_1d(1 downto 0));
end reorder;
-- if there's an instruction stall, then don't load inst into the reorder buffer!!
architecture behavior of reorder is
begin -- behavior
reorder_process: process
-- variables and constants having to do with forwarding:
variable reorder_rs_val_tmp : vlbit_1d(31 downto 0);
variable reorder_rt_val_tmp : vlbit_1d(31 downto 0);
variable rs_read : boolean;
variable rt_read : boolean;
variable rs_tmp : vlbit;
variable rt_tmp : vlbit;
-- variable write2reg : boolean;
variable ex_dst : integer;
variable mem_dst : integer;
variable wb_dst : integer;
variable rs_dec : integer;
variable rt_dec : integer;
variable opcode : integer;
variable fcode : integer;
variable mem_op : integer;
constant RTYPE_op : integer := 0;
constant bz_op : integer := 1;
constant j_op : integer := 2;
constant jal_op : integer := 3;
constant beq_op : integer := 4;
constant bne_op : integer := 5;
constant addiu_op : integer := 9;
constant slti_op : integer := 10;
constant sltiu_op : integer := 11;
constant andi_op : integer := 12;
constant ori_op : integer := 13;
constant xori_op : integer := 14;
constant lui_op : integer := 15;
--constant lw_op : integer := 35;
constant sw_op : integer := 43;
constant bgez_rt : integer := 1;
constant bltz_rt : integer := 0;
constant sll_funct : integer := 0;
constant srl_funct : integer := 2;
constant sra_funct : integer := 3;
constant jr_funct : integer := 8;
constant addu_funct : integer := 33;
constant subu_funct : integer := 35;
constant and_funct : integer := 36;
constant or_funct : integer := 37;
constant xor_funct : integer := 38;
constant slt_funct : integer := 42;
constant sltu_funct : integer := 43;
-- end variables and constants having to do with forwarding
type thirtytwowide_array is array (0 to 7) of vlbit_1d(31 downto 0);
type fivewide_array is array (0 to 7) of vlbit_1d(4 downto 0);
type bool_array is array (0 to 7) of boolean;
constant lw_op : integer := 35;
constant allx : vlbit_1d(31 downto 0) := ("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
constant noreg : vlbit_1d(4 downto 0) := ("XXXXX");
--variable inst0,inst1,inst2,inst3,inst4,inst5,inst6,inst7 : vlbit_1d(31 downto 0) := allx(31 downto 0);
--variable data0,data1,data2,data3,data4,data5,data6,data7 : vlbit_1d(31 downto 0) := allx(31 downto 0);
--variable dest0,dest1,dest2,dest3,dest4,dest5,dest6,dest7 : vlbit_1d(4 downto 0) := noreg(4 downto 0);
--variable done0,done1,done2,done3,done4,done5,done6,done7 : boolean := false;
--variable excp0,excp1,excp2,excp3,excp4,excp5,excp6,excp7 : boolean := false;
-- declaring our own types and then using them
--variable instb : thirtytwowide_array;
variable data : thirtytwowide_array;
variable dest : fivewide_array;
variable done : bool_array;
variable excp : bool_array;
variable lw : bool_array;
variable bufferstart : integer := 0;
variable bufferend : integer := 0;
variable i : integer := 0;
variable keepgoing : boolean := true;
variable fulltemp : boolean := false;
-- How to access individual bits? inst(0)(4 downto 3) (maybe) (yes!)
--variable fallingedge : boolean;
begin -- process reorder
wait until pchanging(clk);
if (not pfalling(clk)) then -- synchonous stuff
wait for 6 ns;
-- add new instruction to buffer
if stall = '1' then
--instb(bufferend) := inst;
lw(bufferend) := v1d2int(inst(31 downto 26)) = lw_op;
done(bufferend) := false;
if (ctrl_e = ("00")) then
dest(bufferend) := inst(20 downto 16);
--put ("adding instruction from (20 to 16)");
elsif (ctrl_e = ("01")) then
dest(bufferend) := inst(15 downto 11);
--put ("adding instruction from (15 to 11");
else
dest(bufferend) := ("11111");
--put ("adding instruction from 31");
end if;
bufferend := (bufferend + 1) mod 8;
fulltemp := ((bufferend - bufferstart) mod 8) = 0;
end if;
end if;
--if (pfalling(clk)) then -- synchonous stuff
--end if;
-- now on to the stuff having to with forwarding
if pfalling(clk) then
if false then
rs_tmp := '0';
rt_tmp := '0';
opcode := v1d2int(dec_instr(31 downto 26));
fcode := v1d2int(dec_instr(5 downto 0));
rs_read := NOT(opcode = j_op or opcode = jal_op or opcode = lui_op or
(opcode = RTYPE_op and (fcode = sll_funct or
fcode = srl_funct or
fcode = sra_funct)));
rt_read := (opcode = beq_op or opcode = bne_op or opcode = sw_op or
(opcode = RTYPE_op and NOT(fcode = jr_funct)));
if rs_read then
if NOT(dec_instr(25 downto 21) = ("00000")) then
-- should search through the reorder buffer, looking for dependencies
i := 0;
keepgoing := true;
while keepgoing loop
if (i = 8) then
keepgoing := false; -- didn't find a dependency
elsif (dec_instr(25 downto 21) = dest(i)) and done(i) then
put ("found rs dependency -- forwarding the value!");
reorder_rs_val_tmp := data(i);
rs_tmp := '1';
keepgoing := false;
end if;
i := i + 1;
end loop;
end if;
end if;
if rt_read then
if NOT(dec_instr(20 downto 16) = ("00000")) then
-- should search through the reorder buffer, looking for dependencies
i := 0;
keepgoing := true;
while keepgoing loop
if (i = 8) then
keepgoing := false; -- didn't find a dependency
elsif (dec_instr(20 downto 16) = dest(i)) and done(i) then
put ("found rt dependency -- forwarding the value!");
reorder_rt_val_tmp := data(i);
rt_tmp := '1';
keepgoing := false;
end if;
i := i + 1;
end loop;
end if;
end if;
end if;
wait for 17 ns;
dout <= allx;
regwr <= noreg;
writereg <= '0';
-- add regular instruction to buffer
if (valid = '1') then
i := 0;
keepgoing := true;
while keepgoing loop -- if i=8, that means it wasn't
-- found => why commit an instruction?
if (i = 8) or ((not lw((bufferstart + i) mod 8)) and (dest((bufferstart + i) mod 8) = reg)) then
--put ("found instruction");
data((bufferstart + i) mod 8) := value;
done((bufferstart + i) mod 8) := true;
keepgoing := false;
end if;
i := i + 1;
end loop;
end if;
-- add load word to buffer
if lvalid = '1' then
i := 0;
keepgoing := true;
while keepgoing loop
if (i = 8) or (dest((bufferstart + i) mod 8) = lreg) then
--put ("found load instruction");
data((bufferstart + i) mod 8) := lvalue;
done((bufferstart + i) mod 8) := true;
keepgoing := false;
end if;
i := i + 1;
end loop;
end if;
-- remove old instructions
if done(bufferstart) then
--put ("outputing");
writereg <= '1';
regwr <= dest(bufferstart);
dout <= data(bufferstart);
dest(bufferstart) := noreg;
data(bufferstart) := allx;
done(bufferstart) := false;
lw(bufferend) := false;
bufferstart := (bufferstart + 1) mod 8;
if fulltemp then
fulltemp := ((bufferend - bufferstart) mod 8) = 0;
end if;
end if;
full <= boo2vlb(fulltemp);
end if; -- matches "if pfalling(clk) then"
fwd_rs_reorder <= rs_tmp after delay;
fwd_rt_reorder <= rt_tmp after delay;
reorder_rs_val <= reorder_rs_val_tmp after delay;
reorder_rt_val <= reorder_rt_val_tmp after delay;
end process;
end behavior;
------
-- reorderstallcontrol --
------
-- reorderstallcontrol.vhd
library pack1076;
use pack1076.pack1076.all;
entity reorderstallcontrol is
generic (reorder_stall_delay : TIME := 0 ns);
port (ex_instr : in vlbit_1d(31 downto 0);
dec_instr :in vlbit_1d(31 downto 0);
clk : in vlbit;
mshr_reg1 : in vlbit_1d(4 downto 0);
mshr_reg2 : in vlbit_1d(4 downto 0);
stallmodern : out vlbit);
end reorderstallcontrol;
------
-- purpose: determine when to stall
architecture behavior of reorderstallcontrol is
begin -- behavior
reorder_stall_process : process
variable rs : boolean;
variable rt : boolean;
variable opcode : integer;
variable funct : integer;
variable rt_ex : integer;
variable rs_dec : integer;
variable rt_dec : integer;
variable stalltmp : vlbit;
constant RTYPE_op : integer := 0;
constant bz_op : integer := 1;
constant j_op : integer := 2;
constant jal_op : integer := 3;
constant beq_op : integer := 4;
constant bne_op : integer := 5;
constant addiu_op : integer := 9;
constant slti_op : integer := 10;
constant sltiu_op : integer := 11;
constant andi_op : integer := 12;
constant ori_op : integer := 13;
constant xori_op : integer := 14;
constant lui_op : integer := 15;
constant lw_op : integer := 35;
constant sw_op : integer := 43;
constant bgez_rt : integer := 1;
constant bltz_rt : integer := 0;
constant sll_funct : integer := 0;
constant srl_funct : integer := 2;
constant sra_funct : integer := 3;
constant jr_funct : integer := 8;
constant addu_funct : integer := 33;
constant subu_funct : integer := 35;
constant and_funct : integer := 36;
constant or_funct : integer := 37;
constant xor_funct : integer := 38;
constant slt_funct : integer := 42;
constant sltu_funct : integer := 43;
begin --process
wait until pchanging(clk);
if (pchanging(clk)) then
wait for 20 ns;
stalltmp := '0'; --default
funct := v1d2int(dec_instr(5 downto 0));
opcode := v1d2int(dec_instr(31 downto 26));
rs := NOT(opcode = j_op or opcode = jal_op or opcode = lui_op or
(opcode = RTYPE_op and (funct = sll_funct or
funct = srl_funct or
funct = sra_funct)));
rt := (opcode = beq_op or opcode = bne_op or
(opcode = RTYPE_op and NOT(funct = jr_funct)));
if rs then
if ((dec_instr(25 downto 21) = mshr_reg1) and (not (bitunknown(mshr_reg1(0))))) or ((dec_instr(25 downto 21) = mshr_reg2) and (not (bitunknown(mshr_reg2(0))))) then
stalltmp := '1';
end if;
end if;
if rt then
if ((dec_instr(20 downto 16) = mshr_reg1) and (not (bitunknown(mshr_reg1(0))))) or ((dec_instr(20 downto 16) = mshr_reg2) and (not (bitunknown(mshr_reg2(0))))) then
stalltmp := '1';
end if;
end if;
stallmodern <= stalltmp after reorder_stall_delay;
end if;
end process;
end behavior;
------
-- modernlwsw.vhd
-- takes care of stores after loads with the MSHR
library pack1076;
use pack1076.pack1076.all;
entity modernlwsw is
generic (modernlwsw_delay : TIME := 0 ns);
port (ex_instr : in vlbit_1d(31 downto 0);
lreg : in vlbit_1d(4 downto 0);
clk : in vlbit;
fwd : out vlbit);
end modernlwsw;
------
-- purpose: determine when to forward
architecture behavior of modernlwsw is
begin -- behavior
modernlwsw_process : process(clk)
variable exop : integer;
constant sw_op : integer := 43;
begin
exop := v1d2int(ex_instr(31 downto 26));
fwd <= boo2vlb(exop = sw_op and lreg = ex_instr(20 downto 16))
after modernlwsw_delay;
end process;
end behavior;
------
-- moderncontrol --
------
-- modernstallcontrol.vhd
library pack1076;
use pack1076.pack1076.all;
entity modernstallcontrol is
generic (modern_stall_delay : TIME := 0 ns);
port (ex_instr : in vlbit_1d(31 downto 0);
dec_instr :in vlbit_1d(31 downto 0);
clk : in vlbit;
mshr_reg1 : in vlbit_1d(4 downto 0);
mshr_reg2 : in vlbit_1d(4 downto 0);
stallmodern : out vlbit);
end modernstallcontrol;
------
-- purpose: determine when to stall
architecture behavior of modernstallcontrol is
begin -- behavior
modern_stall_process : process
variable rs : boolean;
variable rt : boolean;
variable opcode : integer;
variable funct : integer;
variable rt_ex : integer;
variable rs_dec : integer;
variable rt_dec : integer;
variable stalltmp : vlbit;
constant RTYPE_op : integer := 0;
constant bz_op : integer := 1;
constant j_op : integer := 2;
constant jal_op : integer := 3;
constant beq_op : integer := 4;
constant bne_op : integer := 5;
constant addiu_op : integer := 9;
constant slti_op : integer := 10;
constant sltiu_op : integer := 11;
constant andi_op : integer := 12;
constant ori_op : integer := 13;
constant xori_op : integer := 14;
constant lui_op : integer := 15;
constant lw_op : integer := 35;
constant sw_op : integer := 43;
constant bgez_rt : integer := 1;
constant bltz_rt : integer := 0;
constant sll_funct : integer := 0;
constant srl_funct : integer := 2;
constant sra_funct : integer := 3;
constant jr_funct : integer := 8;
constant addu_funct : integer := 33;
constant subu_funct : integer := 35;
constant and_funct : integer := 36;
constant or_funct : integer := 37;
constant xor_funct : integer := 38;
constant slt_funct : integer := 42;
constant sltu_funct : integer := 43;
begin --process
wait until pchanging(clk);
if (pchanging(clk)) then
wait for 2 ns;
stalltmp := '0'; --default
funct := v1d2int(dec_instr(5 downto 0));
opcode := v1d2int(dec_instr(31 downto 26));
rs := NOT(opcode = j_op or opcode = jal_op or opcode = lui_op or
(opcode = RTYPE_op and (funct = sll_funct or
funct = srl_funct or
funct = sra_funct)));
rt := (opcode = beq_op or opcode = bne_op or
(opcode = RTYPE_op and NOT(funct = jr_funct)));
if rs then
if ((dec_instr(25 downto 21) = mshr_reg1) and (not (bitunknown(mshr_reg1(0))))) or ((dec_instr(25 downto 21) = mshr_reg2) and (not (bitunknown(mshr_reg2(0))))) then
stalltmp := '1';
end if;
end if;
if rt then
if ((dec_instr(20 downto 16) = mshr_reg1) and (not (bitunknown(mshr_reg1(0))))) or ((dec_instr(20 downto 16) = mshr_reg2) and (not (bitunknown(mshr_reg2(0))))) then
stalltmp := '1';
end if;
end if;
stallmodern <= stalltmp after modern_stall_delay;
end if;
end process;
end behavior;
------
--
-- addr2addr.vhd
-- Breaks an address into its index bits
--
library pack1076;
use pack1076.pack1076.all;
entity addr2addr is
generic (addr2addr_delay : TIME := 0 ns);
port (address : in vlbit_1d(9 downto 0);
tag : out vlbit_1d(6 downto 0);
index : out vlbit_1d(1 downto 0);
a_block : out vlbit);
end addr2addr;
------
-- purpose: convert address to status bits
architecture behavior of addr2addr is
begin
addr2addr_process : process (address)
begin -- process process
tag(6 downto 0) <= address(9 downto 3) after addr2addr_delay;
index(1 downto 0) <= address(2 downto 1) after addr2addr_delay;
a_block <= address(0) after addr2addr_delay;
end process;
end behavior;
------
------
--
-- addr2status
-- gets the tag from the address, and assignes a valid bit
--
library pack1076;
use pack1076.pack1076.all;
entity addr2status is
generic (addr2status_delay : TIME := 0 ns);
port (address : in vlbit_1d(9 downto 0);
tag : out vlbit_1d(6 downto 0);
valid : out vlbit;
dirty : out vlbit);
end addr2status;
------
-- purpose: convert address to status bits
architecture behavior of addr2status is
begin
addr2status_process : process (address)
begin -- process process
tag(6 downto 0) <= address(9 downto 3) after addr2status_delay;
--if (address(0) = '1') then
valid <= '1' after addr2status_delay;
--else
--valid <= '0' after addr2status_delay;
--end if;
dirty <= '0' after addr2status_delay;