forked from Jerc007/Open-GPGPU-FlexGrip-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblock_scheduler.vhd
174 lines (158 loc) · 7.45 KB
/
block_scheduler.vhd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
----------------------------------------------------------------------------------
-- Company: Univerity of Massachusetts
-- Engineer: Kevin Andryc
--
-- Create Date: 17:50:27 09/19/2010
-- Module Name: block_scheduler - arch
-- Project Name: GPGPU
-- Target Devices:
-- Tool versions: ISE 10.1
-- Description:
--
----------------------------------------------------------------------------
-- Revisions:
-- REV: Date: Description:
-- 0.1.a 9/13/2010 Created Top level file
----------------------------------------------------------------------------
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.numeric_std.all;
use work.gpgpu_package.all;
entity block_scheduler is
port(
clk_in : in std_logic;
host_reset : in std_logic;
en : in std_logic;
kernel_blocks_per_core_in : in std_logic_vector(3 downto 0);
kernel_num_gprs_in : in std_logic_vector(8 downto 0);
kernel_shmem_size_in : in std_logic_vector(31 downto 0);
kernel_parameter_size_in : in std_logic_vector(15 downto 0);
kernel_dyn_shmem_size_in : in std_logic_vector(31 downto 0);
kernel_block_x_in : in std_logic_vector(15 downto 0);
kernel_block_y_in : in std_logic_vector(15 downto 0);
kernel_block_z_in : in std_logic_vector(15 downto 0);
kernel_grid_x_in : in std_logic_vector(15 downto 0);
kernel_grid_y_in : in std_logic_vector(15 downto 0);
smp_done_in : in std_logic;
threads_per_block_out : out std_logic_vector(11 downto 0);
num_blocks_out : out std_logic_vector(3 downto 0);
shmem_base_addr_out : out std_logic_vector(31 downto 0);
shmem_size_out : out std_logic_vector(31 downto 0);
parameter_size_out : out std_logic_vector(15 downto 0);
gprs_size_out : out std_logic_vector(8 downto 0);
block_x_out : out std_logic_vector(15 downto 0);
block_y_out : out std_logic_vector(15 downto 0);
block_z_out : out std_logic_vector(15 downto 0);
grid_x_out : out std_logic_vector(15 downto 0);
grid_y_out : out std_logic_vector(15 downto 0);
block_idx_out : out std_logic_vector(15 downto 0);
shmem_params_out : out std_logic_vector(15 downto 0);
cmem_params_out : out std_logic_vector(15 downto 0);
smp_reset_out : out std_logic;
smp_en_out : out std_logic;
rdy : out std_logic;
kernel_done : out std_logic
);
end block_scheduler;
architecture arch of block_scheduler is
type block_scheduler_state_type is (IDLE, CALC_BLOCKS, CHECK_BLOCKS, SCHEDULE_BLOCKS, SCHEDULE_WAIT);
signal block_scheduler_state_machine : block_scheduler_state_type;
signal grid_dimension : unsigned(31 downto 0);
signal num_blocks : std_logic_vector(31 downto 0);
signal blocks_per_gpgpu : std_logic_vector(11 downto 0);
signal blocks_scheduled_cnt : std_logic_vector(15 downto 0);
signal threads_per_block_i : std_logic_vector(47 downto 0);
signal blocks_remaining : std_logic_vector(31 downto 0);
function minimum(left, right : std_logic_vector) return std_logic_vector is
begin -- function minimum
if unsigned(LEFT) < unsigned(RIGHT) then
return LEFT;
else
return RIGHT;
end if;
end function minimum;
begin
grid_dimension <= unsigned(kernel_grid_x_in) * unsigned(kernel_grid_y_in);
blocks_per_gpgpu <= std_logic_vector(unsigned(CORE_COUNT) * unsigned(kernel_blocks_per_core_in));
threads_per_block_i <= std_logic_vector(unsigned(kernel_block_x_in) * unsigned(kernel_block_y_in) * unsigned(kernel_block_z_in));
pBlockSchedulerStateMachine : process(clk_in, host_reset)
begin
if (host_reset = '1') then
rdy <= '0';
num_blocks <= (others => '0');
blocks_scheduled_cnt <= (others => '0');
threads_per_block_out <= (others => '0');
num_blocks_out <= (others => '0');
shmem_base_addr_out <= (others => '0');
shmem_size_out <= (others => '0');
gprs_size_out <= (others => '0');
block_x_out <= (others => '0');
block_y_out <= (others => '0');
block_z_out <= (others => '0');
grid_x_out <= (others => '0');
grid_y_out <= (others => '0');
block_idx_out <= (others => '0');
shmem_params_out <= (others => '0');
cmem_params_out <= (others => '0');
parameter_size_out <= (others => '0');
smp_en_out <= '0';
smp_reset_out <= '1';
kernel_done <= '0';
block_scheduler_state_machine <= IDLE;
elsif (rising_edge(clk_in)) then
case block_scheduler_state_machine is
when IDLE =>
smp_en_out <= '0';
smp_reset_out <= '0';
kernel_done <= '0';
rdy <= '1';
blocks_scheduled_cnt <= (others => '0');
if(en = '1') then
rdy <= '0';
block_scheduler_state_machine <= CALC_BLOCKS;
end if;
when CALC_BLOCKS =>
if (unsigned(blocks_scheduled_cnt) < grid_dimension) then
num_blocks <= minimum(x"0000000" & kernel_blocks_per_core_in, blocks_remaining);
smp_reset_out <= '0';
block_scheduler_state_machine <= CHECK_BLOCKS;
else
rdy <= '1';
kernel_done <= '1';
block_scheduler_state_machine <= IDLE;
end if;
when CHECK_BLOCKS =>
if (unsigned(num_blocks) > 0) then
threads_per_block_out <= threads_per_block_i(11 downto 0);
num_blocks_out <= num_blocks(3 downto 0);
shmem_base_addr_out <= (others => '0');
shmem_size_out <= std_logic_vector(unsigned(kernel_parameter_size_in) + unsigned(kernel_shmem_size_in) + unsigned(kernel_dyn_shmem_size_in) + 16 + 255) and (not x"000000FF");
parameter_size_out <= kernel_parameter_size_in;
gprs_size_out <= kernel_num_gprs_in;
block_x_out <= kernel_block_x_in;
block_y_out <= kernel_block_y_in;
block_z_out <= kernel_block_z_in;
grid_x_out <= kernel_grid_x_in;
grid_y_out <= kernel_grid_y_in;
block_idx_out <= blocks_scheduled_cnt;
block_scheduler_state_machine <= SCHEDULE_BLOCKS;
else
block_scheduler_state_machine <= IDLE;
end if;
when SCHEDULE_BLOCKS =>
blocks_scheduled_cnt <= std_logic_vector(unsigned(blocks_scheduled_cnt) + unsigned(num_blocks(15 downto 0)));
smp_en_out <= '1';
block_scheduler_state_machine <= SCHEDULE_WAIT;
when SCHEDULE_WAIT =>
smp_en_out <= '0';
if(smp_done_in = '1') then
smp_reset_out <= '1';
block_scheduler_state_machine <= CALC_BLOCKS;
end if;
--when others =>
-- block_scheduler_state_machine <= IDLE;
end case;
end if;
end process;
blocks_remaining <= std_logic_vector(grid_dimension - unsigned(blocks_scheduled_cnt)) when host_reset /= '1' else (others => '0');
end arch;