Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add zcu104 board to VivadoAccelerator backend #752

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions hls4ml/backends/vivado_accelerator/supported_boards.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
"python_drivers": {"axi_stream": "axi_stream_driver.py"},
"c_drivers": {}
},
"zcu104": {
"part": "xczu7ev-ffvc1156-2-e",
"tcl_scripts": { "axi_stream": "axi_stream_design.tcl"},
"python_drivers": {"axi_stream": "axi_stream_driver.py"},
"c_drivers": {}
},
"alveo-u50": {
"part": "xcu50-fsvh2104-2-e",
"tcl_scripts": {"axi_stream": "axi_stream_design.tcl"},
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from datetime import datetime

import numpy as np
from pynq import Overlay, allocate


class NeuralNetworkOverlay(Overlay):
def __init__(
self, bitfile_name, x_shape, y_shape, dtype=np.float32, dtbo=None, download=True, ignore_version=False, device=None
):
super().__init__(bitfile_name, dtbo=None, download=True, ignore_version=False, device=None)
self.sendchannel = self.hier_0.axi_dma_0.sendchannel
self.recvchannel = self.hier_0.axi_dma_0.recvchannel
self.input_buffer = allocate(shape=x_shape, dtype=dtype)
self.output_buffer = allocate(shape=y_shape, dtype=dtype)

def _print_dt(self, timea, timeb, N):
dt = timeb - timea
dts = dt.seconds + dt.microseconds * 10**-6
rate = N / dts
print(f"Classified {N} samples in {dts} seconds ({rate} inferences / s)")
return dts, rate

def predict(self, X, debug=False, profile=False, encode=None, decode=None):
"""
Obtain the predictions of the NN implemented in the FPGA.
Parameters:
- X : the input vector. Should be numpy ndarray.
- dtype : the data type of the elements of the input/output vectors.
Note: it should be set depending on the interface of the accelerator; if it uses 'float'
types for the 'data' AXI-Stream field, 'np.float32' dtype is the correct one to use.
Instead if it uses 'ap_fixed<A,B>', 'np.intA' is the correct one to use (note that A cannot
any integer value, but it can assume {..., 8, 16, 32, ...} values. Check `numpy`
doc for more info).
In this case the encoding/decoding has to be computed by the PS. For example for
'ap_fixed<16,6>' type the following 2 functions are the correct one to use for encode/decode
'float' -> 'ap_fixed<16,6>':
```
def encode(xi):
return np.int16(round(xi * 2**10)) # note 2**10 = 2**(A-B)
def decode(yi):
return yi * 2**-10
encode_v = np.vectorize(encode) # to apply them element-wise
decode_v = np.vectorize(decode)
```
- profile : boolean. Set it to `True` to print the performance of the algorithm in term of `inference/s`.
- encode/decode: function pointers. See `dtype` section for more information.
- return: an output array based on `np.ndarray` with a shape equal to `y_shape` and a `dtype` equal to
the namesake parameter.
"""
if profile:
timea = datetime.now()
if encode is not None:
X = encode(X)
self.input_buffer[:] = X
self.sendchannel.transfer(self.input_buffer)
self.recvchannel.transfer(self.output_buffer)
if debug:
print("Transfer OK")
self.sendchannel.wait()
if debug:
print("Send OK")
self.recvchannel.wait()
if debug:
print("Receive OK")
# result = self.output_buffer.copy()
if decode is not None:
self.output_buffer = decode(self.output_buffer)

if profile:
timeb = datetime.now()
dts, rate = self._print_dt(timea, timeb, len(X))
return self.output_buffer, dts, rate
else:
return self.output_buffer
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#@todo: try to remove startgroup and endgroup and see if it work
set tcldir [file dirname [info script]]
source [file join $tcldir project.tcl]

create_project project_1 ${project_name}_vivado_accelerator -part xczu7ev-ffvc1156-2-e -force

set_property board_part xilinx.com:zcu104:part0:1.1 [current_project]
set_property ip_repo_paths ${project_name}_prj [current_project]
update_ip_catalog

create_bd_design "design_1"
set_property ip_repo_paths ${project_name}_prj/solution1/impl/ip [current_project]
update_ip_catalog

startgroup
create_bd_cell -type ip -vlnv xilinx.com:ip:zynq_ultra_ps_e:3.3 zynq_ultra_ps_e_0
endgroup

apply_bd_automation -rule xilinx.com:bd_rule:zynq_ultra_ps_e -config {apply_board_preset "1" } [get_bd_cells zynq_ultra_ps_e_0]

set_property -dict [list CONFIG.PSU__USE__M_AXI_GP0 {1} CONFIG.PSU__USE__M_AXI_GP1 {1} CONFIG.PSU__USE__S_AXI_GP0 {1} CONFIG.PSU__SAXIGP0__DATA_WIDTH {32}] [get_bd_cells zynq_ultra_ps_e_0]

startgroup
create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 axi_dma_0
endgroup
set_property -dict [list CONFIG.c_m_axi_s2mm_data_width.VALUE_SRC USER CONFIG.c_s_axis_s2mm_tdata_width.VALUE_SRC USER] [get_bd_cells axi_dma_0]
set_property -dict [list CONFIG.c_include_sg {0} CONFIG.c_sg_length_width {26} CONFIG.c_sg_include_stscntrl_strm {0} CONFIG.c_m_axi_mm2s_data_width ${bit_width_hls_input} CONFIG.c_m_axis_mm2s_tdata_width ${bit_width_hls_input} CONFIG.c_mm2s_burst_size {256} CONFIG.c_m_axi_s2mm_data_width ${bit_width_hls_output} CONFIG.c_s_axis_s2mm_tdata_width ${bit_width_hls_output} CONFIG.c_s2mm_burst_size {256}] [get_bd_cells axi_dma_0]

startgroup
apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {Auto} Clk_xbar {Auto} Master {/zynq_ultra_ps_e_0/M_AXI_HPM0_FPD} Slave {/axi_dma_0/S_AXI_LITE} ddr_seg {Auto} intc_ip {New AXI Interconnect} master_apm {0}} [get_bd_intf_pins axi_dma_0/S_AXI_LITE]
apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {Auto} Clk_xbar {Auto} Master {/axi_dma_0/M_AXI_MM2S} Slave {/zynq_ultra_ps_e_0/S_AXI_HPC0_FPD} ddr_seg {Auto} intc_ip {New AXI SmartConnect} master_apm {0}} [get_bd_intf_pins zynq_ultra_ps_e_0/S_AXI_HPC0_FPD]
endgroup

startgroup
apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {/zynq_ultra_ps_e_0/pl_clk0 (99 MHz)} Clk_xbar {/zynq_ultra_ps_e_0/pl_clk0 (99 MHz)} Master {/axi_dma_0/M_AXI_S2MM} Slave {/zynq_ultra_ps_e_0/S_AXI_HPC0_FPD} ddr_seg {Auto} intc_ip {/axi_smc} master_apm {0}} [get_bd_intf_pins axi_dma_0/M_AXI_S2MM]
apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {/zynq_ultra_ps_e_0/pl_clk0 (99 MHz)} Clk_xbar {/zynq_ultra_ps_e_0/pl_clk0 (99 MHz)} Master {/zynq_ultra_ps_e_0/M_AXI_HPM1_FPD} Slave {/axi_dma_0/S_AXI_LITE} ddr_seg {Auto} intc_ip {/ps8_0_axi_periph} master_apm {0}} [get_bd_intf_pins zynq_ultra_ps_e_0/M_AXI_HPM1_FPD]
endgroup

startgroup
create_bd_cell -type ip -vlnv xilinx.com:hls:${project_name}_axi:1.0 ${project_name}_axi_0
endgroup
connect_bd_intf_net [get_bd_intf_pins axi_dma_0/M_AXIS_MM2S] [get_bd_intf_pins ${project_name}_axi_0/in_r]
connect_bd_intf_net [get_bd_intf_pins axi_dma_0/S_AXIS_S2MM] [get_bd_intf_pins ${project_name}_axi_0/out_r]

apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ultra_ps_e_0/pl_clk0 (99 MHz)} Freq {100} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins ${project_name}_axi_0/ap_clk]
group_bd_cells hier_0 [get_bd_cells axi_dma_0] [get_bd_cells ${project_name}_axi_0]

make_wrapper -files [get_files ./${project_name}_vivado_accelerator/project_1.srcs/sources_1/bd/design_1/design_1.bd] -top

add_files -norecurse ./${project_name}_vivado_accelerator/project_1.srcs/sources_1/bd/design_1/hdl/design_1_wrapper.v

reset_run impl_1
reset_run synth_1
launch_runs impl_1 -to_step write_bitstream -jobs 6
wait_on_run -timeout 360 impl_1

open_run impl_1
report_utilization -file util.rpt -hierarchical -hierarchical_percentages
2 changes: 2 additions & 0 deletions test/hls4ml-keras-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ VIVADO_VERSION=2020.1
#./keras-to-hls.sh KERAS_1layer KERAS_conv1d_small
./keras-to-hls.sh -b alveo-u250 -B VivadoAccelerator -x xcu250-figd2104-2L-e KERAS_3layer
./keras-to-hls.sh -b pynq-z2 -B VivadoAccelerator -x xc7z020clg400-1 KERAS_3layer
# ./keras-to-hls.sh -b zcu102 -B VivadoAccelerator -x xczu9eg-ffvb1156-2-e KERAS_3layer
./keras-to-hls.sh -b zcu104 -B VivadoAccelerator -x xczu7ev-ffvc1156-2-e KERAS_3layer
# KERAS_3layer b:pynq-z2 B:VivadoAccelerator x:xc7z020clg400-1 s:Resource

# Build the projects generated by keras-to-hls script.
Expand Down