From 2e952f689105e0425ceb3b1b8d5f5acd566f3f28 Mon Sep 17 00:00:00 2001 From: Patrick O'Grady Date: Thu, 16 Jan 2025 07:47:40 -0800 Subject: [PATCH 1/2] Release 2.0.0-VDR. Signed-off-by: Patrick O'Grady --- .dockerignore | 7 + CMakeLists.txt | 2 +- CONTRIBUTING.md | 4 +- RELEASE_NOTES.md | 171 +++- VERSION | 2 +- ci/lint.sh | 10 +- docker/Dockerfile | 63 +- docker/Dockerfile.demo | 5 +- docker/build.sh | 37 +- docker/demo.sh | 1 + docs/user_guide/RELEASE_NOTES.md | 1 + docs/user_guide/applications.md | 51 +- docs/user_guide/architecture.md | 74 +- docs/user_guide/build.md | 8 +- docs/user_guide/dataplane.md | 64 +- docs/user_guide/examples.md | 92 +- docs/user_guide/index.md | 2 + docs/user_guide/introduction.md | 37 +- docs/user_guide/ip_integration.md | 73 +- docs/user_guide/latency.md | 66 ++ docs/user_guide/latency.svg | 3 + docs/user_guide/new_sensors.md | 6 +- docs/user_guide/notes.md | 69 +- docs/user_guide/peripheral_interface.md | 4 +- docs/user_guide/port_description.md | 162 ++-- docs/user_guide/ptp.md | 8 +- docs/user_guide/register_interface.md | 8 +- .../sensor_bridge_firmware_setup.md | 32 +- .../sensor_bridge_hardware_setup.md | 4 +- docs/user_guide/setup.md | 14 +- docs/user_guide/simulation.md | 28 +- docs/user_guide/troubleshooting.md | 10 +- examples/CMakeLists.txt | 1 - examples/body_pose_estimation.py | 20 +- examples/body_pose_estimation.yaml | 2 +- examples/distributed_imx274_player.py | 18 +- examples/distributed_tao_peoplenet.py | 18 +- examples/gpio_example_app.py | 107 ++- examples/imx274_latency.py | 545 +++++++++++ examples/imx274_player.cpp | 21 +- examples/imx274_player.py | 37 +- examples/imx477_player.py | 19 +- examples/linux_body_pose_estimation.py | 12 +- examples/linux_imx274_player.py | 18 +- examples/linux_imx477_player.py | 10 +- ...nux_single_network_stereo_imx274_player.py | 368 ++++++++ examples/linux_tao_peoplenet.py | 10 +- .../single_network_stereo_imx274_player.py | 391 ++++++++ examples/stereo_imx274_player.py | 32 +- examples/tao_peoplenet.py | 18 +- pytest.ini | 6 +- python/hololink/__init__.py | 50 +- python/hololink/hololink.cpp | 68 +- python/hololink/native/__init__.py | 24 +- python/hololink/native/native.cpp | 17 + python/hololink/operators/CMakeLists.txt | 3 +- python/hololink/operators/__init__.py | 3 +- python/hololink/operators/base_receiver_op.py | 72 +- .../gamma_correction/gamma_correction.cpp | 85 -- .../linux_receiver/linux_receiver.cpp | 27 +- .../operators/linux_receiver_operator.py | 29 +- python/hololink/operators/operator_util.hpp | 5 +- .../operators/roce_receiver/roce_receiver.cpp | 6 +- python/hololink/sensors/imx274/dual_imx274.py | 84 +- python/hololink/sensors/imx274/imx274_mode.py | 528 ++++++----- python/hololink/sensors/imx477.py | 884 +++++++++--------- python/hololink/sensors/udp_cam.py | 3 - .../CMakeLists.txt => tools.py} | 22 +- python/tools/hololink.py | 19 +- scripts/manifest.yaml | 32 +- src/hololink/CMakeLists.txt | 1 + src/hololink/data_channel.cpp | 285 +++++- src/hololink/data_channel.hpp | 96 +- src/hololink/enumerator.cpp | 235 +++-- src/hololink/enumerator.hpp | 10 +- src/hololink/hololink.cpp | 434 ++++++--- src/hololink/hololink.hpp | 150 ++- src/hololink/logging.cpp | 181 ++++ src/hololink/logging.hpp | 85 ++ src/hololink/native/CMakeLists.txt | 1 + src/hololink/native/cuda_helper.cpp | 8 +- src/hololink/native/cuda_helper.hpp | 26 +- src/hololink/native/deserializer.hpp | 25 +- src/hololink/native/networking.cpp | 59 +- src/hololink/native/networking.hpp | 28 + src/hololink/native/serializer.hpp | 17 + src/hololink/operators/CMakeLists.txt | 3 +- .../operators/argus_isp/argus_impl.cpp | 8 +- .../operators/argus_isp/argus_isp.cpp | 6 +- src/hololink/operators/base_receiver_op.cpp | 130 ++- src/hololink/operators/base_receiver_op.hpp | 43 +- .../operators/csi_to_bayer/csi_to_bayer.cpp | 5 +- .../gamma_correction/gamma_correction.cpp | 169 ---- .../gamma_correction/gamma_correction.hpp | 62 -- .../image_processor/image_processor.cpp | 5 +- .../operators/linux_receiver/CMakeLists.txt | 1 + .../linux_receiver/linux_receiver.cpp | 93 +- .../linux_receiver/linux_receiver.hpp | 28 +- .../operators/roce_receiver/roce_receiver.cpp | 303 +++--- .../operators/roce_receiver/roce_receiver.hpp | 42 +- .../roce_receiver/roce_receiver_op.cpp | 99 +- .../roce_receiver/roce_receiver_op.hpp | 6 +- test-agx-cpnx100.sh | 29 + test-igx-cpnx100.sh | 30 + tests/conftest.py | 89 +- tests/operators.py | 133 +++ .../test_gpio_example_app.py | 47 +- tests/test_hololink_acks.py | 28 +- tests/test_holoscan.py | 12 +- tests/test_hsb_sequence_checking.py | 65 ++ tests/test_i2c_retry.py | 8 +- ...ao_peoplenet.py => test_imx274_latency.py} | 22 +- tests/test_imx274_pattern.py | 755 ++++++++++----- tests/test_imx274_timestamps.py | 150 ++- tests/test_imx477_pattern.py | 41 +- tests/test_imx477_timestamps.py | 73 +- tests/test_linux_hwisp_pattern.py | 102 +- tests/test_linux_imx274_player.py | 19 +- tests/test_serializer.py | 34 + ...est_single_network_stereo_imx274_player.py | 69 ++ tests/test_tao_peoplenet.py | 66 +- tests/udp_server.py | 199 ++-- tests/utils.py | 169 ++++ 123 files changed, 6467 insertions(+), 3049 deletions(-) create mode 120000 docs/user_guide/RELEASE_NOTES.md create mode 100644 docs/user_guide/latency.md create mode 100644 docs/user_guide/latency.svg create mode 100644 examples/imx274_latency.py create mode 100644 examples/linux_single_network_stereo_imx274_player.py create mode 100644 examples/single_network_stereo_imx274_player.py delete mode 100644 python/hololink/operators/gamma_correction/gamma_correction.cpp rename python/hololink/{operators/gamma_correction/CMakeLists.txt => tools.py} (70%) create mode 100644 src/hololink/logging.cpp create mode 100644 src/hololink/logging.hpp delete mode 100644 src/hololink/operators/gamma_correction/gamma_correction.cpp delete mode 100644 src/hololink/operators/gamma_correction/gamma_correction.hpp create mode 100644 test-agx-cpnx100.sh create mode 100644 test-igx-cpnx100.sh create mode 100644 tests/operators.py rename src/hololink/operators/gamma_correction/CMakeLists.txt => tests/test_gpio_example_app.py (56%) create mode 100644 tests/test_hsb_sequence_checking.py rename tests/{test_linux_tao_peoplenet.py => test_imx274_latency.py} (65%) create mode 100644 tests/test_single_network_stereo_imx274_player.py diff --git a/.dockerignore b/.dockerignore index c27bd02..5102227 100644 --- a/.dockerignore +++ b/.dockerignore @@ -13,4 +13,11 @@ build* # NSight profiler files *.ncu-rep *.nsys-rep +# FPGA files *.bit +# cscope index files +cscope.* +tags +# pcap files +*.pcap +*.deb diff --git a/CMakeLists.txt b/CMakeLists.txt index 48c7b58..528e8da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,7 +29,7 @@ find_package(holoscan 0.6 REQUIRED CONFIG PATHS "/opt/nvidia/holoscan") find_package(Python3 REQUIRED COMPONENTS Interpreter Development) set(HOLOSCAN_SDK_VERSION "$ENV{HOLOSCAN_SDK_VERSION}") -set(HOLOSCAN_SDK_VERSION_EXPECTED "2.3.0") +set(HOLOSCAN_SDK_VERSION_EXPECTED "2.7.0") if(NOT HOLOSCAN_SDK_VERSION VERSION_EQUAL HOLOSCAN_SDK_VERSION_EXPECTED) message(FATAL_ERROR "Expected Holoscan version ${HOLOSCAN_SDK_VERSION_EXPECTED} but found ${HOLOSCAN_SDK_VERSION}, please check pybind11 version of the Holoscan SDK and update here if needed") diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 32e9a6d..5486f7e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -49,8 +49,8 @@ before making their first contribution. ### Workflow 1. Developers must first [fork](https://help.github.com/en/articles/fork-a-repo) the - \[upstream\](https://github.com/nvidia-holoscan/holoscan-sensor-bridge) Holoscan - Sensor Bridge repository. + [upstream](https://github.com/nvidia-holoscan/holoscan-sensor-bridge) Holoscan Sensor + Bridge repository. 1. Git clone the forked repository and push changes to the personal fork. diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index f3d5726..50835bf 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,5 +1,134 @@ # Release Notes +## 2.0-GA, January 2025 + +### Dependencies + +- IGX: [IGX-SW 1.1 Production Release](https://developer.nvidia.com/igx-downloads) +- AGX: Use [SDK Manager](https://developer.nvidia.com/sdk-manager) to set up JetPack 6.0 + release 2. Note that JetPack 6.1 is not yet supported for HSB. +- Holoscan Sensor Bridge, 10G; FPGA v2412 + +Be sure and follow the installation instructions included with the release. To generate +documentation, in the host system, run `sh docs/make_docs.sh`, then use your browser to +look at `docs/user_guide/_build/html/index.html`. + +### Updates from 1.1-GA + +- **HSB 2.0-GA relies on FPGA IP version 2412.** Check the user guide for instructions + on how to update your configuration. Note that the enumeration data has changed, so + pre 2.0-GA software will not enumerate boards publishing 1.1 (or earlier) enumeration + data; and likewise, 1.1 and earlier software will not find the newer 2.0 configuration + boards. For Lattice-CLNX100-ETH-SENSOR-BRIDGE devices, be sure and include the + "--force" option when updating the HSB firmware; this way the software uses hardcoded + enumeration data in the software tree instead of relying on that from the device + itself. See [the firmware download instructions](sensor_bridge_firmware_setup.md) for + more details. If you need to revert your FPGA back to the 2407 version, check the FAQ + below. + +- **HSB is updated to work with Holoscan SDK 2.7.** Some older APIs, specifically in the + [C++ fmt tool](https://github.com/fmtlib/fmt) have been deprecated, so minor code + adjustments have been applied to keep HSB host code up to date. + +- **New HSB features for safety and reliability,** including CRCs, control plane + sequence number checking, and additional timestamps are included. Timestamps included + capture the PTP time when the first data in the received frame is observed by the FPGA + IP block and the time after the last data in the frame is sent. With ConnectX based + host systems, which support hardware PTP synchronization, these timestamps are within + a microsecond of the host time, and can be used to accurately measure latency through + the pipeline. These metadata values are available to pipeline operators via the + [HSDK application metadata API](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_create_app.html#dynamic-application-metadata). + See the user guide for more details. Sequence number checking is enabled for control + plane transactions, and can provide protection against interaction from several hosts + to the same HSB unit. The overall CRC of the received data frame is also included, in + a later release, a high-performance CUDA based CRC checker will be demonstrated + showing frame-rate CRC validation of ingress data. + +- **Multiple sensors over a single network port.** APIs are added to allow applications + to configure multiple sensors to use the same network port. In the examples directory, + `single_network_stereo_imx274_player.py` demonstrates how to configure both cameras in + an IMX274 stereo pair to transmit 1080p video using a single network port. Note that + 4k video streams require about 6.5Gbps each, so using both cameras in this mode over a + single network port is not supported. See the user guide for more details. + +- **Performance and latency measurement tools.** The timestamps included with safety and + reliability features can be used to accurately measure latency, from the time that + data arrives to the FPGA IP block, all the way through the end of the pipeline (e.g. + following visualization). See `examples/imx274_transfer_latency.py` for an example. + See [latency.md](latency.md) for more details on latency measurement. + +- **GammaCorrectionOp is removed.** HSDK 2.3 added support for + [sRGB space](https://en.wikipedia.org/wiki/SRGB), providing an optimized path + including Gamma correction in the visualizer. By removing HSB's naive gamma correction + and using the visualizer instead, pipeline latency is reduced by .5ms. For + applications that used GammaCorrectionOp, just remove that operator from the pipeline + and include `framebuffer_srgb=True` in the constructor parameter list for HolovizOp. + +- **Support for IMX477 cameras via Microchip MPF200-ETH-SENSOR-BRIDGE.** + +### FAQ + +- If your application, running in the demo container, halts with a "Failed to initialize + glfw" message, make sure to grant the application permission to connect with the + display via the "xhost +" command. This command is not remembered across reboots. + +- Reverting from FPGA 2412 to 2407 on Lattice HSB units. If you need to revert a Lattice + HSB unit from 2412 back to 2407, use the 2.0-GA tree and program with the 2407 + manifest file. From within the demo container: + + ```sh + hololink program scripts/manifest-2407.yaml + ``` + + After programming and power cycling, the board will no longer be visible to the 2.0-GA + version of HSB host code. At this time you can go back to using the 1.1-GA release to + work with the board. The 1.1-GA software will not be able to enumerate boards running + the 2412 configuration; the newer tree must be used to write the older firmware. + +- HSB network receiver operators use + [APIs provided by the Holoscan SDK](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_create_app.html#dynamic-application-metadata) + to share timestamps with later operators in the pipeline. Be sure and call the + application (C++) `is_metadata_enabled(true)` method or (python) + `is_metadata_enabled = True` at initialization time; otherwise each operator will only + see an empty metadata structure. In your operator's `compute` method, if you add + additional items to the pipeline metadata, be sure and add that metadata before + calling `(output).emit`. If you have a pipeline that merges two paths, and experience + a `runtime_error` exception when it fails to merge the metadata from those paths, see + [the page on Metadata update policies](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_create_app.html#metadata-update-policies) + for information on how to manage this. + +- It is possible to overrun the bandwidth available on the ethernet, particularly when + using multiple sensors over a single network connection. For example, a 4k, 60FPS, + RAW10 video stream requires about 6.5Gbps, and a stereo pair configured this way would + require something like 13Gbps--which far exceeds a single 10Gbps network port. In this + case, HSB will drop data, probably within a single network message. When operated + outside of specification this way, reconstruction of the original sensor data is not + possible. The software has no concept of available bandwidth, so it is up to the + developer to ensure that bandwidth limits are not exceeded. + +- AGX on-board ethernet supports hardware PTP synchronization, which can be enabled by + following the same directions given to set up PTP for IGX. Specifically, with the + appropriate network device name in `$EN0` (e.g. `eth0`), just follow the instructions + on the host setup page for setting up PTP on IGX. + +### Known Anomalies + +- Orin AGX running on JetPack 6.1 shows very slow network behavior. Investigation of + this is underway; for now, on Orin AGX, only JetPack 6.0 r2 is supported. + +- Following software-commanded reset, HSB sometimes observes a sequence number or + control plane transaction failure. When the HSB is commanded to reset, the host system + observes a loss of network connectivity and may take some time before steady + communication is available. In some specific Orin AGX systems, `dmesg` shows this + renegotiation can take more than 60 seconds. During this time, HSB software attempts + to read the FPGA version ID, and can time out. Investigation of this is underway; for + now, systems with this behavior can be worked around by putting a 10G ethernet switch + between the HSB and the host system. + +- Orin AGX systems, running with stereo sensor feeds on the same network port, using + either the multithreaded or event based schedulers, have unreliable operation. For + now, AGX systems with HSB are only supported with the default (greedy) scheduler. + ## 1.1.0-GA, August 2024 ### Dependencies @@ -34,7 +163,7 @@ look at `docs/user_guide/_build/html/index.html`. - There are some small **changes to the host setup instructions**. Changes focused on updates to network device names and performance on AGX configurations. -- **Hardware ISP via ArgusIspOp** \[Orin in iGPU mode only\]. Applications can offload +- **Hardware ISP via ArgusIspOp** [Orin in iGPU mode only]. Applications can offload image signal processing by using the capabilities built in to the NV ISP device present in Orin systems running with iGPU (AGX or IGX without a dGPU). Support is provided for 1080p images; contact NVIDIA to get updated libraries with support for 4K @@ -76,7 +205,45 @@ look at `docs/user_guide/_build/html/index.html`. The first time the body-post-estimation app is run, the .onnx file is converted to a TRT engine file, which is a step than can take several minutes. Subsequent runs of the body pose estimation app will skip the conversion and just load this engine file - directly. During the converstion, when high bandwidth is in use on the network (via + directly. During the conversion, when high bandwidth is in use on the network (via "--camera-mode=0"), the kernel stops delivering received UDP messages to the application, resulting in no video being displayed. Later runs the same program, after the conversion is complete, run as expected. + +- HSB does not forward data received on the MIPI interface, resulting in an "Ingress + frame timeout; ignoring." message. + + A bug in the FPGA MIPI receiver block IP causes data to be dropped before being + delivered to the FPGA's UDP packetizer; resulting in no sensor data being delivered to + the host. If you've commanded a camera to send image data, but no data is observed and + the timeout message is displayed, you can verify that this is the cause by issuing + these commands within the HSB demo container: + + ``` + hololink read_uint32 0x50000000 # for the first camera + hololink read_uint32 0x60000000 # for the second camera + ``` + + If a camera is configured to issue data, but a 0 appears in this memory location, then + this is an indication that the receiver is in this stuck state. `hololink.reset()` is + able to clear this condition. + +- PTP timestamps, published by HSB, aren't synchronized with the host time. + + The + [user guide hardware setup instructions](https://docs.nvidia.com/holoscan/sensor-bridge/1.1.0/setup.html) + show how to configure the `phc2sys` tool, which ensures that the NIC time-of-day + clock, which is published with PTP network messages, is synchronized with the host + real-time clock. As written, the setup instructions rely on the default ntpdate + configuration to initialize the system clock to the rest of the world. As written, the + `phc2sys` startup doesn't properly wait for `ntpdate` to complete, so that when + `ntpdate` does finish, `phc2sys` sees a large jump in the system time. Because + `phc2sys` slowly adjusts the NIC clock, the time published by `ptp4l` will not be + synchronized with the system clock, and could take a very long time to do so. To + verify you're in this condition, observe the output in the "sys offset" column from + the command `systemctl status phc2sys-*.service`; large absolute values are an + indication of this condition. To work around this, run + `sudo systemctl restart phc2sys-*.service` after ntpdate is synchronized. There is + some anecdotal evidence that adding "-w" to the `phc2sys` command line may fix this + problem but the documentation for this option doesn't address the configuration in use + here. diff --git a/VERSION b/VERSION index 9084fa2..04ae6e8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.1.0 +2.0.0-EA2 diff --git a/ci/lint.sh b/ci/lint.sh index 8c7aa06..16558d8 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -80,6 +80,7 @@ MODE="lint" case "$1" in --do-format) # We rely on 'set -o errexit' above to terminate on an error + set -o xtrace isort $SKIP_ISORT --profile black . black "--extend-exclude=$SKIP_RE" . flake8 --ignore=E501,E203,W503 --extend-exclude=$SKIP_COMMAS @@ -89,6 +90,7 @@ case "$1" in ;; --do-lint) # We rely on 'set -o errexit' above to terminate on an error + set -o xtrace isort --check-only $SKIP_ISORT --profile black . black --check "--extend-exclude=$SKIP_RE" . flake8 --ignore=E501,E203,W503 --extend-exclude=$SKIP_COMMAS @@ -108,9 +110,11 @@ case "$1" in ;; esac -# We only get here if we weren't "--do-lint" or "--do-format"; -# we're going to "--do-$MODE". -docker build \ +# We only get here if we weren't "--do-lint" or "--do-format"; we're going to +# "--do-$MODE". +# Also, we specifically rely on buildkit skipping the dgpu or igpu stages that +# aren't included in the final image we're creating. +DOCKER_BUILDKIT=1 docker build \ -t hololink-lint:$VERSION \ -f $HERE/Dockerfile.lint \ $ROOT diff --git a/docker/Dockerfile b/docker/Dockerfile index ff3d20e..3fddc9c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -15,11 +15,42 @@ # # See README.md for detailed information. -ARG CONTAINER_TYPE=dgpu -FROM nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-${CONTAINER_TYPE} - ARG CONTAINER_TYPE=invalid +# dgpu specific configuration goes here. +FROM nvcr.io/nvidia/clara-holoscan/holoscan:v2.7.0-dgpu AS build_dgpu + +# igpu specific configuration goes here. +FROM nvcr.io/nvidia/clara-holoscan/holoscan:v2.7.0-igpu AS build_igpu +RUN \ + # EGL and libGLESv2 is needed by Argus api otherwise the library throws error \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y -q libgles2-mesa-dev && \ + echo "deb https://repo.download.nvidia.com/jetson/common r36.3 main" > /etc/apt/sources.list.d/nvidia-l4t-apt-source.list && \ + echo "deb https://repo.download.nvidia.com/jetson/t234 r36.3 main" >> /etc/apt/sources.list.d/nvidia-l4t-apt-source.list && \ + wget -O - https://repo.download.nvidia.com/jetson/jetson-ota-public.asc | apt-key add - && \ + apt-get update && \ + # we do not have the permission to ./mm-api/DEBIAN/postinst \ + # to just install nvidia-l4t-jetson-multimedia-api via apt install \ + apt-get download nvidia-l4t-jetson-multimedia-api && \ + dpkg-deb -R ./nvidia-l4t-jetson-multimedia-api_*_arm64.deb ./mm-api && \ + cp -r ./mm-api/usr/src/jetson_multimedia_api /usr/src/jetson_multimedia_api && \ + sed -i 's/sudo//' ./mm-api/DEBIAN/postinst && \ + ./mm-api/DEBIAN/postinst && \ + rm -rf ./nvidia-l4t-jetson-multimedia-api_*_arm64.deb ./mm-api && \ + rm -rf /var/lib/apt/lists/* + +COPY --from=argus-libs libnvargus.so /usr/lib/aarch64-linux-gnu/nvidia/libnvargus.so +COPY --from=argus-libs libnvargus_socketclient.so /usr/lib/aarch64-linux-gnu/nvidia/libnvargus_socketclient.so +COPY --from=argus-libs libnvargus_socketserver.so /usr/lib/aarch64-linux-gnu/nvidia/libnvargus_socketserver.so +COPY --from=argus-libs libnvfusacap.so /usr/lib/aarch64-linux-gnu/nvidia/libnvfusacap.so +COPY --from=argus-libs libnvodm_imager.so /usr/lib/aarch64-linux-gnu/nvidia/libnvodm_imager.so +COPY --from=argus-libs libnvscf.so /usr/lib/aarch64-linux-gnu/nvidia/libnvscf.so +ENV LD_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/nvidia:$LD_LIBRARY_PATH + +# configuration for all GPU configurations start here. +FROM build_${CONTAINER_TYPE} + RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y -q \ python3 \ @@ -39,26 +70,6 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* RUN pip3 install meson ninja pyelftools - -RUN if [ "$CONTAINER_TYPE" = "igpu" ]; then \ - # libgtk is needed by Argus api otherwise the library throughs error \ - apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y -q libgtk-3-dev && \ - echo "deb https://repo.download.nvidia.com/jetson/common r36.3 main" > /etc/apt/sources.list.d/nvidia-l4t-apt-source.list && \ - echo "deb https://repo.download.nvidia.com/jetson/t234 r36.3 main" >> /etc/apt/sources.list.d/nvidia-l4t-apt-source.list && \ - wget -O - https://repo.download.nvidia.com/jetson/jetson-ota-public.asc | apt-key add - && \ - apt-get update && \ - # we do not have the permission to ./mm-api/DEBIAN/postinst \ - # to just install nvidia-l4t-jetson-multimedia-api via apt install \ - apt-get download nvidia-l4t-jetson-multimedia-api && \ - dpkg-deb -R ./nvidia-l4t-jetson-multimedia-api_*_arm64.deb ./mm-api && \ - cp -r ./mm-api/usr/src/jetson_multimedia_api /usr/src/jetson_multimedia_api && \ - sed -i 's/sudo//' ./mm-api/DEBIAN/postinst && \ - ./mm-api/DEBIAN/postinst && \ - rm -rf ./nvidia-l4t-jetson-multimedia-api_*_arm64.deb ./mm-api; \ - fi - -ADD nvidia /usr/lib/aarch64-linux-gnu/nvidia - -ENV CONTAINER_TYPE=$CONTAINER_TYPE -ENV LD_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/nvidia:$LD_LIBRARY_PATH +# repeated ARG because it has to be inside the FROM above +ARG CONTAINER_TYPE=invalid +ENV CONTAINER_TYPE=${CONTAINER_TYPE} diff --git a/docker/Dockerfile.demo b/docker/Dockerfile.demo index e014932..96f1072 100644 --- a/docker/Dockerfile.demo +++ b/docker/Dockerfile.demo @@ -27,7 +27,10 @@ RUN apt-get update \ ADD . /opt/hololink/ -RUN cd /opt/hololink/python && pip3 wheel . +# See build.sh for why INSTALL_ENVIRONMENT is here. +ARG INSTALL_ENVIRONMENT="" +RUN cd /opt/hololink/python \ + && $INSTALL_ENVIRONMENT pip3 wheel -v . # demo container FROM base AS demo diff --git a/docker/build.sh b/docker/build.sh index 51d2193..6676f31 100755 --- a/docker/build.sh +++ b/docker/build.sh @@ -99,44 +99,47 @@ HERE=`dirname "$SCRIPT"` ROOT=`realpath "$HERE/.."` VERSION=`cat $ROOT/VERSION` -if [ ! -d "$ROOT/nvidia" ]; then -mkdir $ROOT/nvidia -fi +PROTOTYPE_OPTIONS="" CONTAINER_TYPE=dgpu if [ $igpu -ne 0 ] then CONTAINER_TYPE=igpu -if [ ! -d "/usr/lib/aarch64-linux-gnu/nvidia" ] +ARGUS_LIBRARIES_DIRECTORY=/usr/lib/aarch64-linux-gnu/nvidia +if [ ! -d $ARGUS_LIBRARIES_DIRECTORY ] then echo "Error: Required path and libs are missing. \ Upgrade the development kit with Jetpack 6 or newer." exit 1 fi -cp /usr/lib/aarch64-linux-gnu/nvidia/libnvargus.so $ROOT/nvidia -cp /usr/lib/aarch64-linux-gnu/nvidia/libnvargus_socketclient.so $ROOT/nvidia -cp /usr/lib/aarch64-linux-gnu/nvidia/libnvargus_socketserver.so $ROOT/nvidia -cp /usr/lib/aarch64-linux-gnu/nvidia/libnvfusacap.so $ROOT/nvidia -cp /usr/lib/aarch64-linux-gnu/nvidia/libnvodm_imager.so $ROOT/nvidia -cp /usr/lib/aarch64-linux-gnu/nvidia/libnvscf.so $ROOT/nvidia +PROTOTYPE_OPTIONS="$PROTOTYPE_OPTIONS --build-context argus-libs=$ARGUS_LIBRARIES_DIRECTORY" fi -chmod 755 $ROOT/nvidia +# For Jetson Nano devices, which have very limited memory, +# limit the number of CPUs so we don't run out of RAM. +INSTALL_ENVIRONMENT="" +MEM_G=`awk '/MemTotal/ {print int($2 / 1024 / 1024)}' /proc/meminfo` +if [ $MEM_G -lt 10 ] +then +INSTALL_ENVIRONMENT="taskset -c 0-2" +fi -# Build the development container -docker build \ +# Build the development container. We specifically rely on buildkit skipping +# the dgpu or igpu stages that aren't included in the final image we're +# creating. +DOCKER_BUILDKIT=1 docker build \ --network=host \ - --build-arg CONTAINER_TYPE=$CONTAINER_TYPE \ + --build-arg "CONTAINER_TYPE=$CONTAINER_TYPE" \ -t hololink-prototype:$VERSION \ -f $HERE/Dockerfile \ + $PROTOTYPE_OPTIONS \ $ROOT # Build a container that has python extensions set up. -docker build \ +DOCKER_BUILDKIT=1 docker build \ --network=host \ --build-arg CONTAINER_VERSION=hololink-prototype:$VERSION \ + --build-arg "INSTALL_ENVIRONMENT=$INSTALL_ENVIRONMENT" \ -t hololink-demo:$VERSION \ -f $HERE/Dockerfile.demo \ $ROOT - -rm -rf nvidia diff --git a/docker/demo.sh b/docker/demo.sh index df63c80..5dd8310 100644 --- a/docker/demo.sh +++ b/docker/demo.sh @@ -62,6 +62,7 @@ docker run \ --name "$NAME" \ -v $PWD:$PWD \ -v $ROOT:$ROOT \ + -v $HOME:$HOME \ -v /sys/bus/pci/devices:/sys/bus/pci/devices \ -v /sys/kernel/mm/hugepages:/sys/kernel/mm/hugepages \ -v /dev:/dev \ diff --git a/docs/user_guide/RELEASE_NOTES.md b/docs/user_guide/RELEASE_NOTES.md new file mode 120000 index 0000000..8d7d499 --- /dev/null +++ b/docs/user_guide/RELEASE_NOTES.md @@ -0,0 +1 @@ +../../RELEASE_NOTES.md \ No newline at end of file diff --git a/docs/user_guide/applications.md b/docs/user_guide/applications.md index bdb9a98..f67f543 100644 --- a/docs/user_guide/applications.md +++ b/docs/user_guide/applications.md @@ -27,13 +27,12 @@ graph r[RoceReceiverOp] --> c[CsiToBayerOp] c --> i[ImageProcessorOp] i --> d[BayerDemosaicOp] - d --> g[GammaCorrectionOp] - g --> v[HolovizOp] + d --> v[HolovizOp] ``` -- `RoceReceiverOp` blocks until an end-of-frame UDP message is received. On return, the - received frame data is available in GPU memory, along with metadata which is published - to the application layer. Holoscan sensor bridge uses +- `RoceReceiverOp` wakes up when an end-of-frame UDP message is received. When it + finishes, the received frame data is available in GPU memory, along with metadata + which is published to the application layer. Holoscan sensor bridge uses [RoCE v2](https://en.wikipedia.org/wiki/RDMA_over_Converged_Ethernet) to transmit data plane traffic over UDP; this is why the receiver is called `RoceReceiverOp`. - `CsiToBayerOp` is aware that the received data is a CSI-2 RAW10 image, which it @@ -44,9 +43,6 @@ graph - `ImageProcessorOp` adjusts the received bayer image color and brightness to make it acceptable for display. - `BayerDemosaicOp` converts the bayer image data into RGBA. -- `GammaCorrectionOp` - [adjusts the luminance](https://en.wikipedia.org/wiki/Gamma_correction) of the RGBA - image to improve human perception. - `HolovizOp` displays the RGBA image on the GUI. For each step in the pipeline, the image data is stored in a buffer in GPU memory. @@ -181,8 +177,8 @@ Important details: will reset all the data channels on this device; and in the stereo IMX274 configuration, calling `camera.setup_clock` sets the clock that is shared between both cameras. For this reason, it's important that the application is careful about calling - `camera.setup_clock`--resetting the clock while a camera is running can lead to - undefined states. + `camera.setup_clock`--resetting the clock (e.g. on the second image sensor) while the + first camera is running can lead to undefined states. Holoscan, on the call to `application.run`, invokes the application's `compose` method, which includes this: @@ -278,10 +274,9 @@ Some key points: - `receiver_operator` has no idea it is dealing with video data. It's just informed of the memory region(s) to fill and the size of a block of data. When a complete block of data is received, the CPU will be notified so that pipeline processing can continue. -- Applications can pass in a frame buffer device memory pointer to the constructor for - `receiver_opearator`, or the receiver will allocate one for you. When it allocates a - buffer, it can take into account special requirements for various configurations of - GPU and RDMA controller. +- Given an expected frame size, the receiver buffer will allocate GPU memory large + enough for the received data plus additional metadata; that memory is allocated in a + way that meets hardware and subsequent operator requirements. - `csi_to_bayer_operator` is aware of memory layout for CSI-2 formatted image data. Our call to `camera.configure_converter` allows the camera to communicate the image dimensions and pixel depth; with that knowledge, the call to @@ -294,10 +289,6 @@ Some key points: data plane. Configuration automatically handles setting the sensor bridge device with our host Ethernet and IP addresses, destination memory addresses, security keys, and frame size information. -- To support RDMA, the receiver operator is given a single block of memory (instead of a - memory pool to allocate from). The peripheral component is granted access to this - region only, and that region does not change throughout the life of the sensor bridge - application. - the sensor bridge device, following configuration by the `holoscan_channel` object, will start forwarding all received sensor data to the configured receiver. We haven't instructed the camera to start streaming data yet, but at this point, we're ready to @@ -336,8 +327,7 @@ graph r[RoceReceiverOp] --> c[CsiToBayerOp] c --> i[ImageProcessorOp] i --> d[BayerDemosaicOp] - d --> g[GammaCorrectionOp] - g --> s[ImageShiftToUint8Operator] + d --> s[ImageShiftToUint8Operator] s --> p[FormatConverterOp] p --> fi[FormatInferenceInputOp] fi --> in[InferenceOp] @@ -375,8 +365,7 @@ graph r[RoceReceiverOp] --> c[CsiToBayerOp] c --> i[ImageProcessorOp] i --> d[BayerDemosaicOp] - d --> g[GammaCorrectionOp] - g --> s[ImageShiftToUint8Operator] + d --> s[ImageShiftToUint8Operator] s --> p[FormatConverterOp] p --> fi[FormatInferenceInputOp] fi --> in[InferenceOp] @@ -394,12 +383,20 @@ scores and applies non-max suppression (nms) before sending the output to `Holov ## IMX274 Stereo live video demonstration Multiple receiver operators can be instantiated to support data feeds from multiple -cameras. In examples/stereo_imx274_player.py, the same pipeline for live video feed is +cameras. In `examples/stereo_imx274_player.py`, the same pipeline for live video feed is presented, except that it is instantiated twice, once for each camera on the IMX274 stereo camera board. In this case, Holoscan cycles between each pipeline, providing two separate windows (one for each visualizer) on the display. Each `receiver_operator` instance is independent and runs simultaneously. +For systems with only a single network connection, Holoscan Sensor Bridge can be +configured to transmit both cameras data over the same network connection. The 10Gbps +network port on HSB doesn't have the bandwidth to support two 4K 60FPS video streams, so +support is limited to cameras in 1080p mode. See +`examples/single_network_stereo_imx274_player.py` for an example showing how to +configure HSB to work in this way. As before, each `receiver_operator` is independent, +even when using the same network interface. + ## GPIO Example application This application demonstrates how to utilize the hololink GPIO interface and can be @@ -449,14 +446,14 @@ graph Each cycle of this operator configures one pin to a direction and value and sends the last changed pin number and the current running configuration to the GPIO read operator.\ -Once all 16 pins are set per the currently running configuration,the operator -will move on the next cycle to the next configuration. +Once all 16 pins are set per the currently running configuration,the operator will move +on the next cycle to the next configuration. - **GPIO Read Operator** - This operator reads and displays the current value of the last configured pin. It delays 10 seconds to allow the user to validate the pin level and direction with an external measurement device like a multimeter or scope. -### GPIO Software interface +### GPIO Software interface The GPIO interface is a class defined within the hololink module. It exports the following GPIO interface: @@ -497,7 +494,7 @@ graph ``` `ArgusIspOp` allows the users to access the ISP via Argus API. This operator takes in -Bayer uncompressd image of uint16 per pixel (MSB aligned) and outputs RGB888 image. It +Bayer uncompressed image of uint16 per pixel (MSB aligned) and outputs RGB888 image. It is available as C++ operator with Python bindings. The `ArgusIspOp` can be configured using following required parameters at the diff --git a/docs/user_guide/architecture.md b/docs/user_guide/architecture.md index 89eb7a5..f61ba21 100644 --- a/docs/user_guide/architecture.md +++ b/docs/user_guide/architecture.md @@ -48,7 +48,7 @@ To instantiate a camera object, application code will typically - Use `Enumerator.find_channel` to enumerate the sensor bridge devices visible to the local system. `find_channel` accepts arguments that filter received messages; when an - enumeration message that matches the given critera is found, a dict is returned with + enumeration message that matches the given criteria is found, a dict is returned with metadata about the enumerated device. `````{tab-set} @@ -219,23 +219,21 @@ instance. ### DataChannel enumeration and IP address configuration -Once per second, each _data plane_ instance in a sensor bridge device sends out two UDP -packets; the host uses these to enumerate visible devices. One packet is called the -_enumeration_ packet, the other is the _bootp request_ packet. The -`Enumerator.find_channel` method gathers and decodes both of these messages and uses -that to generate the dictionary passed back as `channel_metadata`. Holoscan sensor -bridge sends these packets using the local broadcast MAC ID (FF:FF:FF:FF:FF:FF). Routers -are not allowed to forward these messages to other networks, so only locally connected -hosts will receive these. Your host must be connected to the same network as the sensor -bridge device in order to communicate. - -While the _enumeration_ message is intended to announce the presence of a sensor bridge -data channel; the _bootp request_ presents a request that the host can reply to with an -IP address reconfiguration command. If the host wishes to reconfigure the IP address of -the device, it sends a _bootp reply_ message with a new IP address to be assigned to -that data plane controller. The sensor bridge demo container includes a command line -tool called `hololink` that can be used to assign new IP addresses to sensor bridge -devices: +Once per second, each _data plane_ instance in a sensor bridge device sends out UDP +enumeration packets; the host uses these to locate accessible devices. +`Enumerator.find_channel` method gathers and decodes these messages and uses that to +generate the dictionary passed back as `channel_metadata`. Holoscan sensor bridge sends +these packets using the local broadcast MAC ID (FF:FF:FF:FF:FF:FF). Routers are not +allowed to forward these messages to other networks, so only locally connected hosts +will receive these. Your host must be connected to the same network as the sensor bridge +device in order to communicate. + +Holoscan sensor bridge enumeration messages are based on the BOOTP protocol, and like +BOOTP, they provide a mechanism to reconfigure the IP address of that HSB device. If the +host wishes to reconfigure the IP address of the device, it sends a reply message with a +new IP address to be assigned to that data plane controller. The sensor bridge demo +container includes a command line tool called `hololink` that can be used to assign new +IP addresses to sensor bridge devices: ```none $ hololink set-ip b0:4f:13:e0:20:4c 192.168.100.250 @@ -243,27 +241,27 @@ $ hololink set-ip b0:4f:13:e0:20:4c 192.168.100.250 Your MAC-ID and IP addresses will be different; a list with any number of mac-id and ip-address pairs can be given. By default, this starts a process that runs forever; on -receipt of a _bootp request_ with any IP address other than the configured value, a -_bootp reply_ response is sent that assigns the configured address to that data channel. -Running this as a daemon is important when resetting the sensor bridge device: +receipt of an enumeration request with any IP address other than the configured value, a +reply is sent that assigns the configured address to that data channel. Running this as +a daemon is important when resetting the sensor bridge device: - Application code establishes a connection at the new IP address - Application executes `hololink.reset` - The device resets and reverts back to the default IP address - Application code sees enumeration with the default IP address--which it ignores -- When `hololink set-ip` sees the _bootp request_ with something besides the new IP - address, it'll send a _bootp reply_ with the new IP address configuration +- When `hololink set-ip` sees the enumeration packet with something besides the new IP + address, it'll send a reply with the new IP address configuration - Holoscan sensor bridge updates its IP address. Enumeration data will now be sent using that new address - Application code then sees enumeration at the new IP address - Application reconnects and completes the reset request -_bootp request_ and _bootp reply_ packet contents follow the specification given in -[RFC951](https://datatracker.ietf.org/doc/html/rfc951) with the exception that _bootp -request_ is sent by the sensor bridge device on UDP port 12267 and _bootp reply_ is sent -by the host to UDP port 12268. +Enumeration request and reply packets follow the specification given in +[RFC951](https://datatracker.ietf.org/doc/html/rfc951) with the exception that +enumeration requests are sent by the sensor bridge device on UDP port 12267 and replies +are sent by the host to UDP port 12268. -[Specific information about host network configuration can be found here.](notes.md#holoscan-sensor-bridge-ip-address-configuration). +[Specific information about host network configuration can be found here.](notes.md#holoscan-sensor-bridge-ip-address-configuration) ### Holoscan sensor bridge data channel uses RoCE v2 RDMA write and RDMA write immediate requests @@ -276,12 +274,14 @@ with target network addressing, authentication keys, and individual-packet and o data-frame sizes. Once configured, the sensor bridge device will send received sensor data in RDMA write requests with a payload size given by the individual-packet size value. These requests, on receipt by ConnectX, are written directly into GPU or system -memory--these writes are completely offloaded from the CPU. When the total number of -received bytes reaches the data-frame size, that packet is sent using an RDMA -write-immediate request. Sending the last packet once the data-frame size is reached, -and not waiting to reach the individual-packet size, accounts for aggergate data -payloads that are not an even multiple of the individual packet size. The RDMA -write-immediate request has the same functionality as an RDMA write request with an -extra flag that is passed to the CPU with an interrupt. This interrupt is used to -indicate the end-of-frame and is what `RoceReceiverOp.compute` waits for on a call to -`get_next_frame.` +memory--these writes are completely offloaded from the CPU. After the total number of +received bytes reaches the data-frame size, a special metadata packet is sent using an +RDMA write-immediate request. This write-immediate request schedules an interrupt for +the CPU, which is used to flag that the received data is ready for further processing-- +this interrupt is what `RoceReceiverOp.compute` waits for on a call to `get_next_frame.` + +### Holoscan SDK metadata and HSB + +HSB devices send a block of metadata following each received data frame. See the +[description of HSB latency](latency.md) for details on the contents and uses of this +data. diff --git a/docs/user_guide/build.md b/docs/user_guide/build.md index 82f68e5..a5f0ccf 100644 --- a/docs/user_guide/build.md +++ b/docs/user_guide/build.md @@ -30,7 +30,7 @@ container. This container is used to run all holoscan tests and examples. - `--dgpu` requires a system with a dGPU installed (e.g. IGX with A6000 dGPU) and an OS installed with appropriate dGPU support (e.g. - [IGX OS 1.0 Production Release](https://developer.nvidia.com/igx-downloads) with + [IGX OS 1.1 Production Release](https://developer.nvidia.com/igx-downloads) with dGPU). - `--igpu` is appropriate for systems running on a system with iGPU (e.g. AGX or IGX without a dGPU). This requires an OS installed with iGPU support (e.g. for AGX: @@ -48,7 +48,7 @@ sh docker/demo.sh This brings you to a shell prompt inside the Holoscan sensor bridge demo container. (Note that iGPU configurations, when starting the demo container, will display the message "Failed to detect NVIDIA driver version": this can be ignored.) Now you're ready -to run sensor bridge applcations. +to run sensor bridge applications. ## Holoscan sensor bridge software loopback tests @@ -78,7 +78,7 @@ For IGX configurations, then ```none -pytest --imx274 +sh ./test-igx-cpnx100.sh ``` For AGX configurations, only one camera is supported, so only @@ -86,7 +86,7 @@ For AGX configurations, only one camera is supported, so only is to be connected. Run the device test on AGX this way: ```none -pytest --imx274 --unaccelerated-only +sh ./test-agx-cpnx100.sh ``` If things are not working as expected, check the diff --git a/docs/user_guide/dataplane.md b/docs/user_guide/dataplane.md index 9a021cd..5b4502e 100644 --- a/docs/user_guide/dataplane.md +++ b/docs/user_guide/dataplane.md @@ -68,35 +68,41 @@ ports. ### UDP Overhead UDP packets require overhead such as headers in the beginning of the packet and iCRC -added to the end of the packet. UDP packet overhead is dependent whether the UDP packet -is a normal UDP packet or an End of Window UDP packet. UDP packet structures for both -are described in a table below. - -Normal UDP Packet - -| **Byte Count** | **Description** | **Byte Size** | **Endianess** | -| --------------------- | ----------------------------- | ------------- | ------------- | -| 0 to 13 | Ethernet Header | 14 | Big Endian | -| 14 to 33 | IPv4 Header | 20 | Big Endian | -| 34 to 41 | UDP Header | 8 | Big Endian | -| 42 to 69 | Holoscan Sensor Bridge Header | 28 | Little Endian | -| 70 to N-101 | Sensor Data | N - 82 | Little Endian | -| N-9 to N-51 | iCRC | 4 | Little Endian | - -1. N=Configured Ethernet Packet Length - -End of Window UDP Packet - see section for more information on window - -| **Byte Count** | **Description** | **Byte Size** | **Endianess** | -| --------------------- | ----------------------------- | ------------- | ------------- | -| 0 to 13 | Ethernet Header | 14 | Big Endian | -| 14 to 33 | IPv4 Header | 20 | Big Endian | -| 34 to 41 | UDP Header | 8 | Big Endian | -| 42 to 73 | Holoscan Sensor Bridge Header | 32 | Little Endian | -| 74 to M-101 | Sensor Data | M - 74 | Little Endian | -| M-9 to M-51 | iCRC | 4 | Little Endian | - -1. M=Remaining sensor bytes at the End of Window + 82 bytes of overhead +added to the end of the packet. Sensor encapsulated UDP packets are sent when it either +reaches a configured ethernet packet length size or the end of a sensor window. After it +reaches the end of a sensor window, a Metadata packet is sent with a different UDP +packet. UDP packet structures for both are described in a table below. + +Sensor UDP Packet + +| **Byte Count** | **Description** | **Byte Size** | **Endianess** | +| ----------------------- | ----------------------------- | ------------- | ------------- | +| 0 to 13 | Ethernet Header | 14 | Big Endian | +| 14 to 33 | IPv4 Header | 20 | Big Endian | +| 34 to 41 | UDP Header | 8 | Big Endian | +| 42 to 69 | Holoscan Sensor Bridge Header | 28 | Little Endian | +| 70 to N+691 | Sensor Data | N | Little Endian | +| N+70 to N+731 | iCRC | 4 | Little Endian | + +1. N=Configured Ethernet packet length or remaining end of sensor window size. + +Metadata UDP Packet + +| **Byte Count** | **Description** | **Byte Size** | **Endianess** | +| -------------- | ---------------------------------- | ------------- | ------------- | +| 0 to 13 | Ethernet Header | 14 | Big Endian | +| 14 to 33 | IPv4 Header | 20 | Big Endian | +| 34 to 41 | UDP Header | 8 | Big Endian | +| 42 to 73 | Holoscan Sensor Bridge Header | 32 | Little Endian | +| 74 to 77 | Flags | 4 | Little Endian | +| 78 to 81 | Packet Sequence Number | 4 | Little Endian | +| 82 to 85 | CRC of Sensor Data in whole Window | 4 | Little Endian | +| 86 to 97 | PTP of First Sensor Data of Window | 12 | Little Endian | +| 98 to 105 | Valid # of Bytes within Buffer | 8 | Little Endian | +| 106 to 109 | Frame Number | 4 | Little Endian | +| 110 to 121 | PTP of Metadata Packet Formation | 12 | Little Endian | +| 122 to 201 | Reserved | 80 | Little Endian | +| 202 to 205 | iCRC | 4 | Little Endian | The Ethernet, IPv4, and UDP headers abide by the standard Ethernet header format. diff --git a/docs/user_guide/examples.md b/docs/user_guide/examples.md index 94828ba..ec2d2f1 100644 --- a/docs/user_guide/examples.md +++ b/docs/user_guide/examples.md @@ -3,14 +3,6 @@ Holoscan sensor bridge Python example applications are located under the `examples` directory. -The C++ examples need to be build first using these commands - -```sh -$ export BUILD_DIR=/tmp/build -$ cmake -S . -B $BUILD_DIR -G Ninja -DHOLOLINK_BUILD_PYTHON=OFF -$ cmake --build $BUILD_DIR -j $(nproc) -``` - Below are instructions for running the applications on the IGX and the Jetson AGX platforms. @@ -36,20 +28,36 @@ accelerated network controller, ```sh $ python3 examples/imx274_player.py ``` + +or, for unaccelerated configurations (e.g. AGX), + +```sh +$ python3 examples/linux_imx274_player.py +``` + ```` ````{tab-item} C++ + +The C++ examples need to be built first using these commands; this leaves the resulting +executables in /tmp/build/examples. + ```sh -$ $BUILD_DIR/examples/imx274_player +$ export BUILD_DIR=/tmp/build +$ cmake -S . -B $BUILD_DIR -G Ninja -DHOLOLINK_BUILD_PYTHON=OFF +$ cmake --build $BUILD_DIR -j $(nproc) ``` -```` -````` -or, for unaccelerated configurations (e.g. AGX), +After examples are built, you can run the `imx274_player`: ```sh -$ python3 examples/linux_imx274_player.py +$ $BUILD_DIR/examples/imx274_player ``` +Note that only the C++ example is only supported with the accelerated network receiver. + +```` +````` + Documentation breaking down the source code for the IMX274 player application is [available here](applications.md#imx274_player); this example illustrates the basic sensor bridge workflow which is described in the @@ -98,13 +106,14 @@ apt-get update && apt-get install -y ffmpeg pip3 install ultralytics onnx cd examples yolo export model=yolov8n-pose.pt format=onnx +trtexec --onnx=yolov8n-pose.onnx --saveEngine=yolov8n-pose.engine.fp32 cd - ``` -Note that this conversion step only needs to be executed once; the `yolov8n-pose.onnx` -file contains the converted model and is all that's needed for the demo to run. The -installed components will be forgotten when the container is exited; those do not need -to be present in future runs of the demo. +Note that this conversion step only needs to be executed once; the +`yolov8n-pose.engine.fp32` file contains the converted model and is all that's needed +for the demo to run. The installed components will be forgotten when the container is +exited; those do not need to be present in future runs of the demo. For systems with accelerated network interfaces, within the sensor bridge demo container, launch the Body Pose estimation: @@ -122,18 +131,17 @@ $ python3 examples/linux_body_pose_estimation.py This will bring up the Holoscan visualizer on the GUI showing the live video feed from the IMX274 device, along with a green overlay showing keypoints found by the body pose -net model. The first time the body pose example is run, the model is converted to an -fp32 file, which can take several minutes. These conversion results are cached in a -local file and reused on subsequent runs of the example program. For more information -about this application, look [here](applications.md#body_pose_estimation). +net model. For more information about this application, look +[here](applications.md#body_pose_estimation). Press Ctrl/C to exit. ## Running the Stereo IMX274 example -`examples/stereo_imx274_player.py` shows an example with two independent pipelines, one -for each camera on the dual-camera module. Only an accelerated version is included, and -[both network ports must be connected](sensor_bridge_hardware_setup.md#connecting-holoscan-sensor-bridge-to-the-host) +For IGX, `examples/stereo_imx274_player.py` shows an example with two independent +pipelines, one for each camera on the dual-camera module. Accelerated networking is used +to provide real time access to the pair of 4k image streams. Make sure that +[both network ports are connected](sensor_bridge_hardware_setup.md#connecting-holoscan-sensor-bridge-to-the-host) between the IGX and the Holoscan sensor bridge unit. ```sh @@ -143,6 +151,16 @@ $ python3 examples/stereo_imx274_player.py This brings up a visualizer display with two frames, one for the left channel and the other for the right. +For AGX configurations, you can observe both cameras using a single network port: + +```sh +$ python3 examples/linux_single_network_stereo_imx274_player.py +``` + +Applications wishing to map sensors to specific data channels can do so using the +`use_sensor` API, which is demonstrated in these examples. The AGX network interface is +limited to 10Gbps so support is only provided for observing stereo video in 1080p mode. + ## Running the GPIO example `examples/gpio_example_app.py` is a simple example of using the GPIO interface of the @@ -154,7 +172,7 @@ $ python3 examples/gpio_example_app.py ``` This brings up a textual display which cycles over different pre-set pin configurations -and alows time between different settings of the pins to measure or readback pins +and allows time between different settings of the pins to measure or readback pins values. Please refer to the application structure section to read more about the [GPIO example application](applications.md#gpio-example-application). @@ -166,7 +184,8 @@ Orin AGX and IGX Orin in iGPU configuration. Before starting the docker run, setup the `nvargus-daemon` with the flag `enableRawReprocessing=1`. This enables us to run the ISP with the Bayer frame capture -using Holoscan sensor bridge unit and this change persists through even restart. +using Holoscan sensor bridge unit and this change persists through even restart. In the +host system: ```sh sudo su @@ -176,12 +195,9 @@ nvargus-daemon exit ``` -Now use following commands to run the example. +To run the example, within the demo container: ```sh -$ export DISPLAY=:1 -$ xhost + -$ sh ./docker/demo.sh $ python3 examples/linux_hwisp_player.py ``` @@ -197,3 +213,19 @@ unset enableRawReprocessing nvargus-daemon exit ``` + +## Running the Latency for IMX274 example + +For IGX systems, `examples/imx274_latency.py` shows an example of how to use timestamp +to profile hardware and software pipeline. This example demonstrates recording +timestamps received from the FPGA when data is acquired and timestamps measured in the +host at various points in frame reception and pipeline execution. At the end of the run, +the application will provide a duration and latency report with average, minimum, and +maximum values. + +Before running the app, make sure the PTP sync has been enabled on the setup and then +use the following commands to run the example. + +```sh +$ python3 examples/imx274_latency.py +``` diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index 87e58d5..4005542 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -11,9 +11,11 @@ build examples applications architecture +latency new_sensors fpga_index sensor_bridge_firmware_setup troubleshooting notes +RELEASE_NOTES ``` diff --git a/docs/user_guide/introduction.md b/docs/user_guide/introduction.md index 5a3d5bf..85d3144 100644 --- a/docs/user_guide/introduction.md +++ b/docs/user_guide/introduction.md @@ -8,16 +8,22 @@ device FPGA and sent via UDP over Ethernet to the host system. In systems like [IGX Devkit](https://www.nvidia.com/en-us/edge-computing/products/igx), [ConnectX SmartNIC](https://www.nvidia.com/content/dam/en-zz/Solutions/networking/ethernet-adapters/connectx-7-datasheet-Final.pdf) interfaces can greatly lower latency by writing that UDP data directly into GPU memory. -Holoscan Sensor Bridge also supports the -[Jetson AGX Orin Devkit](https://developer.nvidia.com/embedded/learn/jetson-agx-orin-devkit-user-guide/index.html) -using socket based Ethernet connections. Holoscan Sensor Bridge host software supports -integrating sensor equipment into -[Holoscan](http://docs.nvidia.com/holoscan/sdk-user-guide) pipelines. Several examples -are provided showing video processing and inference based on a Sony IMX274 camera -attached to a Holoscan Sensor Bridge circuit board. This configuration can be connected -to an [IGX](https://www.nvidia.com/en-us/edge-computing/products/igx) or +Holoscan Sensor Bridge host software supports integrating received sensor data into +[Holoscan](http://docs.nvidia.com/holoscan/sdk-user-guide) pipelines; examples are +provided showing video processing and inference based on a Sony IMX274 camera. This +configuration can be connected to an +[IGX](https://www.nvidia.com/en-us/edge-computing/products/igx) or [Jetson AGX Orin](https://developer.nvidia.com/embedded/learn/jetson-agx-orin-devkit-user-guide/index.html) -edge compute system. +edge compute system. Additionally: + +- Holoscan Sensor Bridge supports hosts with or without accelerated RDMA capable NICs. + For unaccelerated configurations, like + [Jetson AGX Orin Devkit](https://developer.nvidia.com/embedded/learn/jetson-agx-orin-devkit-user-guide/index.html) + support is provided using Linux socket based Ethernet connections. +- Appropriately enabled host systems can support high accuracy PTP timestamps; these can + be used to record the time at which data was received, measure data and pipeline + latency, and synchronize sensor behavior. Both IGX and Orin AGX systems have hardware + PTP support in on-board network interfaces. ## Software @@ -31,9 +37,16 @@ operators that receive network data generated by Holoscan Sensor Bridge devices. example, `RoceReceiverOp` provides a facility for receiving UDP messages from a Holoscan Sensor Bridge source and storing that data in GPU memory. When used with a video camera source, received data appears as a block of memory with CSI-2 bayer image data. Holoscan -includes off-the-shelf operators for construction of RGBA images from Bayer video, image -signal processing, inference, and visualization, facilitating real-time processing of -video data. For more details, see [IMX274 demo](applications.md). +includes off-the-shelf operators for + +- Construction of RGBA images from Bayer video +- Image signal processing +- Inference +- Visualization, and +- Data integrity testing + +These tools are all intended to facilitate real-time processing of video data. For more +details, see [IMX274 demo](applications.md). Different applications have different uses for received data. For example, a high-speed analog signal (e.g. 5G cellular antenna) has different requirements than a video camera. diff --git a/docs/user_guide/ip_integration.md b/docs/user_guide/ip_integration.md index b8e3d66..2715566 100644 --- a/docs/user_guide/ip_integration.md +++ b/docs/user_guide/ip_integration.md @@ -21,36 +21,39 @@ default macro value is the configuration that has been tested and verified. Table 13 -| \*\*Macro | **Tested Values** | **Description** | -| --------------------------------- | ---------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | -| FPGA_VENDOR | LATTICE, PSG, MICROCHIP | Defines the FPGA Vendor used. | -| HIF_CLK_FREQ | 156250000 (for DATAPATH_WIDTH=64)
201416016 (for DATAPATH_WIDTH=512) | Clock frequency of the Host Interface. Unit is in Hz | -| APB_CLK_FREQ | 19531250 (for DATAPATH_WIDTH=64)
100000000 (for DATAPATH_WIDTH=512) | Clock frequency of the APB Interface. Unit is in Hz | -| BOARD_ID\[7:0\] | 8'h02 for Lattice LF-SNSR-ETH-EV
8'h04 for Microchip MPF200-ETH-SENSOR-BRIDGE | Used in Enumeration packet | -| ENUM_EEPROM | Defined or undefined | When defined, read the contents of the Enumeration packet from from external non-volatile memory. If undefined, use the macros defined below.. | -| MAC_ADDR\[47:0\] | Any value | Used in Enumeration packet if ENUM_EEPROM is undefined. | -| BOARD_VER\[159:0\] | Any value | Used in Enumeration packet if ENUM_EEPROM is undefined. | -| BOARD_SN\[55:0\] | Any value | Used in Enumeration packet if ENUM_EEPROM is undefined. | -| FPGA_CRC\[15:0\] | Any value | Used in Enumeration packet if ENUM_EEPROM is undefined. | -| MISC\[31:0\] | Any value | Used in Enumeration packet if ENUM_EEPROM is undefined. | -| DATAPATH_WIDTH | 64, 512 | Width of the AXI Stream TDATA in bits. This number must be byte-aligned. Meaning, it must be a number divisible by 8. | -| DATAKEEP_WIDTH | DATAPATH_WIDTH/8 | Width of the AXI Stream TKEEP.This should not be changed. | -| DATAUSER_WIDTH | 1 | Width of the AXI Stream TUSER signal. | -| SENSOR_IF_INST | 1-2 | Number of Sensor interfaces. | -| HOST_IF_INST | 1-2 | Number of Host interfaces. | -| HOST_MTU | 1500 (DO NOT CHANGE FOR 10G SYSTEM) | Size of Ethernet packet in bytes. | -| SPI_INST | 1-8 | Number of SPI interfaces. | -| I2C_INST | 1-8 | Number of I2C interfaces. | -| GPIO_INST | 0-255 | Number of GPIO Input & Output bits. | -| GPIO_RESET_VALUE\[GPIO_INST-1:0\] | 0 | Reset value of GPIO bits. | -| REG_INST | 1-8 | Number of user register. | -| SIF_SORT_RESOLUTION | DO NOT TOUCH | TBD. Do not change. | -| SIF_VP_COUNT | DO NOT TOUCH | TBD. Do not change. | -| SIF_VP_SIZE | DO NOT TOUCH | TBD. Do not change. | -| SIF_NUM_CYCLES | DO NOT TOUCH | TBD. Do not change. | -| SIF_DYN_VP | DO NOT TOUCH | TBD. Do not change. | -| SIF_MIXED_VP_SIZE | DO NOT TOUCH | TBD. Do not change. | -| N_INIT_REG | Integer value | Number of initialization registers. | +| \*\*Macro | **Tested Values** | **Description** | +| ------------------------------- | ------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| FPGA_VENDOR | LATTICE, PSG, MICROCHIP | Defines the FPGA Vendor used. | +| HIF_CLK_FREQ | 156250000 (for DATAPATH_WIDTH=64)
201416016 (for DATAPATH_WIDTH=512) | Clock frequency of the Host Interface. Unit is in Hz | +| APB_CLK_FREQ | 19531250 (for DATAPATH_WIDTH=64)
100000000 (for DATAPATH_WIDTH=512) | Clock frequency of the APB Interface. Unit is in Hz | +| BOARD_ID[15:0] | 16'h02 for Lattice LF-SNSR-ETH-EV
16'h04 for Microchip MPF200-ETH-SENSOR-BRIDGE | Used in Enumeration packet | +| ENUM_EEPROM | Defined or undefined | When defined, read the contents of the Enumeration packet from from external non-volatile memory. If undefined, use the macros defined below.. | +| MAC_ADDR[47:0] | Any value | Used in Enumeration packet if ENUM_EEPROM is undefined. | +| BOARD_VER[159:0] | Any value | Used in Enumeration packet if ENUM_EEPROM is undefined. | +| BOARD_SN[55:0] | Any value | Used in Enumeration packet if ENUM_EEPROM is undefined. | +| FPGA_CRC[15:0] | Any value | Used in Enumeration packet if ENUM_EEPROM is undefined. | +| MISC[31:0] | Any value | Used in Enumeration packet if ENUM_EEPROM is undefined. | +| DATAPATH_WIDTH | 64, 512 | Width of the Sensor AXI Stream TDATA in bits. This number must be byte-aligned. Meaning, it must be a number divisible by 8. | +| DATAKEEP_WIDTH | DATAPATH_WIDTH/8 | Width of the Sensor AXI Stream TKEEP.This should not be changed. | +| DATAUSER_WIDTH | 1 | Width of the Sensor AXI Stream TUSER signal. | +| SENSOR_IF_INST | 1-2 | Number of Sensor Interface. | +| HOST_WIDTH | 64, 512 | Width of the Host AXI Stream TDATA in bits. This number must be byte-aligned. Meaning, it must be a number divisible by 8. | +| HOSTKEEP_WIDTH | DATAPATH_WIDTH/8 | Width of the Host AXI Stream TKEEP.This should not be changed. | +| HOSTUSER_WIDTH | 1 | Width of the Host AXI Stream TUSER signal. | +| HOST_IF_INST | 1-2 | Number of Host interfaces. | +| HOST_MTU | 1500 (DO NOT CHANGE FOR 10G SYSTEM) | Size of Ethernet packet in bytes. | +| SPI_INST | 1-8 | Number of SPI interfaces. | +| I2C_INST | 1-8 | Number of I2C interfaces. | +| GPIO_INST | 0-255 | Number of GPIO Input & Output bits. | +| GPIO_RESET_VALUE[GPIO_INST-1:0] | 0 | Reset value of GPIO bits. | +| REG_INST | 1-8 | Number of user register. | +| SIF_SORT_RESOLUTION | DO NOT TOUCH | TBD. Do not change. | +| SIF_VP_COUNT | DO NOT TOUCH | TBD. Do not change. | +| SIF_VP_SIZE | DO NOT TOUCH | TBD. Do not change. | +| SIF_NUM_CYCLES | DO NOT TOUCH | TBD. Do not change. | +| SIF_DYN_VP | DO NOT TOUCH | TBD. Do not change. | +| SIF_MIXED_VP_SIZE | DO NOT TOUCH | TBD. Do not change. | +| N_INIT_REG | Integer value | Number of initialization registers. | ### Build Revision @@ -59,7 +62,7 @@ is used to identify the revision of the FPGA and the HOLOLINK and is sent to the part of the Enumeration Packet. From the module where "HOLOLINK_top" module is instantiated, the instantiated parameter, -HOLOLINK_REV\[15:0\] must be set to *16'h2407* for *Holoscan SDK v1.1.0* release. +HOLOLINK_REV[15:0] must be set to *16'h2412* for *Holoscan SDK v2.0.0* release. ### Enumeration Packet @@ -108,14 +111,14 @@ establish ethernet connection between the FPGA and the host, and can be used for other user function in the top level design. The list of registers to be initialized is defined in “Hololink_def.svh” as “init_reg” -array. The “init_reg” is an unpacked array, sized \[N_INIT_REG\] \[63:0\], where the +array. The “init_reg” is an unpacked array, sized [N_INIT_REG] [63:0], where the N_INIT_REG macro defines the number of registers to be initialized and the 64-bit vector -is used to define the 32- bit address of register at \[63:32\] and the 32-bit write data -at \[31:0\]. +is used to define the 32- bit address of register at [63:32] and the 32-bit write data +at [31:0]. To give an example of one of the init_reg array entry: -{32'h1000_0020, 32'h0000_00FF} //init_reg\[0\] +{32'h1000_0020, 32'h0000_00FF} //init_reg[0] Will write to the User REG_INST_0 block address offset 0x0000_0020 the data 0x0000_00FF. diff --git a/docs/user_guide/latency.md b/docs/user_guide/latency.md new file mode 100644 index 0000000..7418020 --- /dev/null +++ b/docs/user_guide/latency.md @@ -0,0 +1,66 @@ +# Holoscan sensor bridge data latency + +### Holoscan SDK metadata and HSB + +HSB devices send a block of metadata following each received data frame. That metadata +includes: + +- `frame_number` counts of the number of end-of-frame interrupts observed on this data + channel +- `timestamp_s` and `timestamp_ns` are the PTP timestamp when the first data for the + current frame arrived at the FPGA. +- `metadata_s` and `metadata_ns` are the PTP timestamp recorded when the metadata packet + is sent out-- which exactly follows the last byte in the received data frame. + +When host PTP support is properly configured, this time is synchronized with the host +time to within one microsecond. HSDK operators can access this metadata using +[APIs provided by the Holoscan SDK](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_create_app.html#dynamic-application-metadata). +Notes: + +- Timestamps are comparable to the clock values read from the + `clock_gettime(CLOCK_REALTIME, ×pec)` API. +- Receiver operators produce different sets of metadata based on the unique + characteristics of that implementation. For example, data specific to RoceReceiverOp + may not appear in the metadata presented by LinuxReceiverOperator. Applications may + choose to access metadata using `metadata.get("parameter_name", 0)` to provide a + useful default value when the specifically named metadata isn't set by the framework. +- Be sure and call the application `is_metadata_enabled(true)` method at initialization + time; otherwise each operator will only see an empty metadata structure. + +### Measuring sensor data latency + +In `examples/imx274_latency.py`, you can see a pipeline that records additional +timestamps, and uses these timestamps to issue a latency report: + +- `operator_s` and `operator_ns` are recorded by the operator following the network + receiver operator. This is the time at which pipeline operators can actually access + received sensor data. +- `completed_s` and `completed_ns` are recorded by the last operator in the example + pipeline, after visualization is complete. + +The receiver operator also records `received_s` and `received_ns` which are recorded at +the time the CPU wakes up with the end-of-frame interrupt. This occurs in a background +thread, independent of the pipeline. The times listed below are computed by combining +`(name)_s` and `(name)_ns` into a single floating point seconds value. Time values +displayed are all typical but will vary. + +HSB Latency + +- `frame_end - frame_start` is the time that the sensor requires to transfer an entire + frame of data into the FPGA. For the IMX274 in 4k RAW10 mode, this is typically + 15.8ms. +- `received - frame_end` shows how long it takes for the CPU to wake up in the + background thread due to end-of-frame indication. On an IGX with accelerated + networking, this is typically 120us. +- `operator - received` is the time that it takes for the next pipeline operator to + execute with the currently received data. On IGX, if the pipeline is idle, this time + is typically around 1ms. +- `completed - operator` is the time required for the rest of the pipeline to complete. + For IGX, executing the naive example ISP and visualizer, this time is typically about + 2.4ms. + +The sample application therefore shows, for each video frame, almost 16ms data +acquisition time followed by almost 4ms of processing time, for a total of under 20ms +latency. In this application, frames are delivered at 60FPS, which means that each new +frame starts at a 16ms interval; reception of the next frame goes on in the background +while the current frame processing is underway. diff --git a/docs/user_guide/latency.svg b/docs/user_guide/latency.svg new file mode 100644 index 0000000..dcd5f85 --- /dev/null +++ b/docs/user_guide/latency.svg @@ -0,0 +1,3 @@ + + +
1.2ms
1.2ms
19.5ms
19.5ms
16ms
16ms
frame n, 15.8ms
frame n, 15.8ms
frame n+1
frame n+1
2.4ms
2.4ms
"complete": Visualization operator completes
"complete": Visualization operator comple...
"operator": Next pipeline operator executes
"operator": Next pipeline operator executes
"frame_end": End of frame metadata sent
"frame_end": End of frame metadata se...
0ms
0ms
"frame_start": First data arrives at FPGA
"frame_start": First data arrives at...
"received": CPU wakes up on end-of-frame
"received": CPU wakes up on end-of-frame
\ No newline at end of file diff --git a/docs/user_guide/new_sensors.md b/docs/user_guide/new_sensors.md index f4e31d3..1bebb9d 100644 --- a/docs/user_guide/new_sensors.md +++ b/docs/user_guide/new_sensors.md @@ -28,8 +28,8 @@ class MyCamera: # write_buffer will contain the big-endian 2-byte address # of the register we're reading. write_buffer = bytearray(10) # must be at least 2 - serializer = hololink_module.abstract_os.Serializer(write_buffer) - serializer.append_u16_be(register) + serializer = hololink_module.Serializer(write_buffer) + serializer.append_uint16_be(register) # send write_buffer to the peripheral device, # and return data read back from it. reply will # be a 4 byte buffer, or None if there's a problem @@ -41,7 +41,7 @@ class MyCamera: ) # deserializer fetches data from reply; this # raises an exception if reply is None - deserializer = hololink_module.abstract_os.Deserializer(reply) + deserializer = hololink_module.Deserializer(reply) # Fetch an unsigned 32-bit value stored in big-endian format r = deserializer.next_u32_be() return r diff --git a/docs/user_guide/notes.md b/docs/user_guide/notes.md index 059787f..b5462f4 100644 --- a/docs/user_guide/notes.md +++ b/docs/user_guide/notes.md @@ -12,19 +12,23 @@ include: the last completed video frame. - Never return the same video frame twice. If the pipeline is faster than a video reception time, the pipeline will block until the next incoming frame is complete. +- PTP timestamps, sent by HSB on both the start-of-frame and the end-of-frame condition, + are available by looking at received metadata. When PTP is properly configured, this + provides a reliable mechanism for determining the actual latency for processing or + this displaying this data. Note that PTP is only supported for IGX configurations. RoceReceiverOp uses RDMA with special caveats that are likely to change in future versions: - There is no protection against rewriting the video buffer while the pipeline is using - it. The current version uses a single GPU memory region for received data and any - ingress UDP write request can update it. Video pipelines usually start by copying the - data from the receiver buffer into another region of memory (via CsiToBayerOp)--this - minimizes the time during which this hazard can occur--but scheduling these operations - is dependent on CPU availability. -- In RoceReceiverOp's current implementation, the compute method always blocks until the - next end-of-frame interrupt is received; future versions are likely to return a - previously completed frame if one is ready. + it. The current implementation allocates enough GPU memory to receive two video + frames, and the transmitter is configured to alternate between these two buffers. + Video pipelines usually start by copying the data from the receiver buffer into + another region of memory (via CsiToBayerOp)--this minimizes the time during which an + overwrite hazard can occur--but scheduling these operations is dependent on CPU + availability. CudaCRCOp can be used to verify that received data has not been + corrupted; as long as it computes the same value that's found in the receiver + metadata, then you've proven that the memory contents have not been updated. ## Holoscan sensor bridge IP address configuration @@ -49,9 +53,10 @@ INFO:root:mac_id=48:B0:2D:EE:03:8F cpnx_version=0X2402 clnx_version=0XFFFF ip_ad ``` This configuration has two network ports, with MAC ID 48:B0:2D:EE:03:8E connected to the -local eth0 device; 48:B0:2D:EE:03:8F is connected to eth1. This is on the same sensor -bridge device, as shown by the common serial number. Note that these messages will be -observed by the local system regardless of the IP addresses of local network devices. +local eth0 device; 48:B0:2D:EE:03:8F is connected to eth1. These connect to the same +sensor bridge device, as shown by the common serial number. Note that these messages +will be observed by the local system regardless of the IP addresses of local network +devices. For our example, we'll set up this configuration: @@ -61,37 +66,11 @@ For our example, we'll set up this configuration: - Sensor bridge port 48:B0:2D:EE:03:8F will use IP address 192.168.200.3 - Explicit routes are added with eth0 to 192.168.200.2 and eth1 to 192.168.200.3 -These commands assume you're on a system using NetworkManager for local network -interface management (e.g. [IGX OS 1.0 DP](https://developer.nvidia.com/igx-downloads) -or JetPack 6.0). - -First, use `nmcli` to set the IP address of eth0 and eth1. This is done on the host -system, outside the demo container: - -```none -$ sudo nmcli con add con-name hololink-eth0 ifname eth0 type ethernet ip4 192.168.200.101/24 -$ sudo nmcli con add con-name hololink-eth1 ifname eth1 type ethernet ip4 192.168.200.102/24 -``` - -Next, configure the routing table: - -```none -$ sudo nmcli connection modify hololink-eth0 +ipv4.routes "192.168.200.2/32 192.168.200.101" -$ sudo nmcli connection modify hololink-eth1 +ipv4.routes "192.168.200.3/32 192.168.200.102" -``` - -Ensure that these configurations are activated: - -```none -$ sudo nmcli connection up hololink-eth0 -$ sudo nmcli connection up hololink-eth1 -``` - -Note that the above settings are remembered by the host system and are restored on -reboot. - -Finally, use the `hololink set-ip` command to reconfigure the sensor bridge device -ports. Within the demo container, +First, follow the [setup instructions](setup.md) to configure the host IP addresses and +routes for your expected configuration-- replace the references to 192.168.0.x with the +addresses you expect to use. Then, use the `hololink set-ip` command to reconfigure the +sensor bridge device ports. To configure the target IP address examples above, in the +demo container, ```none $ hololink set-ip 48:B0:2D:EE:03:8E 192.168.200.2 48:B0:2D:EE:03:8F 192.168.200.3 @@ -107,7 +86,7 @@ bridge IP address configuration is not stored in nonvolatile memory, `hololink s runs as a daemon, and must be running whenever this configuration is desired. When `hololink set-ip` sees an enumeration message from a device with a listed MAC ID but a different IP address, it will reply with a request to set the desired IP address-- this -accomodates IP address reverting on power cycle or reset. Following this, pinging the +accommodates IP address reverting on power cycle or reset. Following this, pinging the target IP address now works: ```none @@ -150,10 +129,10 @@ devkit** QSFP port: Please use the following cables to connect the sensor bridge SFP+ to the **Jetson AGX Orin devkit** 10G Ethernet port: -1. [RJ45 Etherent cable](https://www.amazon.com/Amazon-Basics-Ethernet-High-Speed-Snagless/dp/B089MGH8W3/ref=sr_1_5?crid=1KJ1COP3OKCV7&dib=eyJ2IjoiMSJ9.awXrUbdN3xPxSw8yHRVmtqoUhU1UJEBgQ7Bt3D1N-o4R66qUmZdXTiq-3z8avmIBca3drzlYJhDUl2a8emDyXxFtjeYRRH6OgEOfqtc1w9-y1SPhRXhFWKwLnC3aFhzNs6uT3x_OYvZRxUgOiadVqR8GAUdJiHgH-2SyzwUS8bM_CMRTnRdrU6y-d59mmKSet0zarNIM5FuTMVdwoBJIs_DecT4gyQQA4UnlgvC9VsXYpIxPlFkLnJGnllhPNGDUtysKngtLL1_WyhiUI5y0Q2lcAqDyHlzCCPCPRmm6Hpg.-xBCBUe3Gj5rNmopY7uoCfHAf0ybNBqeWSgi1ARCvW8&dib_tag=se&keywords=rj45%2Bethernet%2Bcable&qid=1708561933&s=electronics&sprefix=rj45%2B%2Celectronics%2C172&sr=1-5&th=1) +1. [RJ45 Ethernet cable](https://www.amazon.com/Amazon-Basics-Ethernet-High-Speed-Snagless/dp/B089MGH8W3/ref=sr_1_5?crid=1KJ1COP3OKCV7&dib=eyJ2IjoiMSJ9.awXrUbdN3xPxSw8yHRVmtqoUhU1UJEBgQ7Bt3D1N-o4R66qUmZdXTiq-3z8avmIBca3drzlYJhDUl2a8emDyXxFtjeYRRH6OgEOfqtc1w9-y1SPhRXhFWKwLnC3aFhzNs6uT3x_OYvZRxUgOiadVqR8GAUdJiHgH-2SyzwUS8bM_CMRTnRdrU6y-d59mmKSet0zarNIM5FuTMVdwoBJIs_DecT4gyQQA4UnlgvC9VsXYpIxPlFkLnJGnllhPNGDUtysKngtLL1_WyhiUI5y0Q2lcAqDyHlzCCPCPRmm6Hpg.-xBCBUe3Gj5rNmopY7uoCfHAf0ybNBqeWSgi1ARCvW8&dib_tag=se&keywords=rj45%2Bethernet%2Bcable&qid=1708561933&s=electronics&sprefix=rj45%2B%2Celectronics%2C172&sr=1-5&th=1) with [RJ45 to SFP+ adapter](https://www.amazon.com/10Gtek-SFP-10G-T-S-Compatible-10GBase-T-Transceiver/dp/B01KFBFL16?pd_rd_w=JvDu0&content-id=amzn1.sym.80b2efcb-1985-4e3a-b8e5-050c8b58b7cf&pf_rd_p=80b2efcb-1985-4e3a-b8e5-050c8b58b7cf&pf_rd_r=0ZFMCGJQJSRGSKQ4G71B&pd_rd_wg=fWzpt&pd_rd_r=d37211e0-40ab-4fe9-807d-0f62cad47c18&pd_rd_i=B01KFBFL16&ref_=pd_bap_d_grid_rp_0_4_i&th=1) -All Cables and adpaters are available for purchase online - please note that the links +All Cables and adapters are available for purchase online - please note that the links above are **only for demonstration purposes** and should not be considered as a purchase recommendation. diff --git a/docs/user_guide/peripheral_interface.md b/docs/user_guide/peripheral_interface.md index 6ee751c..92be745 100644 --- a/docs/user_guide/peripheral_interface.md +++ b/docs/user_guide/peripheral_interface.md @@ -24,8 +24,8 @@ ECB reads. The SPI core has a 4-bit bi-directional signal data signal (SDIO), a chip select (CS_N) and clock signal (SCK) for each endpoint connected. The bi-directional data signal is broken up into a 4-bit input and 4-bit output signal, with an output enable (oen) signal -for tri-stating. This SPI core supports single SPI mode using SDIO\[0\] as an output -(MOSI) and SDIO\[1\] as an input (MISO). Dual and Quad SPI are supported using 2 and 4 +for tri-stating. This SPI core supports single SPI mode using SDIO[0] as an output +(MOSI) and SDIO[1] as an input (MISO). Dual and Quad SPI are supported using 2 and 4 bits of the SDIO signal respectively. The SPI core is clocked using the i_apb_clk and reset with i_apb_rst. All logic is based diff --git a/docs/user_guide/port_description.md b/docs/user_guide/port_description.md index 3c78c26..147b3a8 100644 --- a/docs/user_guide/port_description.md +++ b/docs/user_guide/port_description.md @@ -20,16 +20,16 @@ Table 1 User Register Clock and Reset Ports Table 2 User Register APB Ports -| **Signal Name** | **Direction** | **Description** | -| ----------------------------------------- | ------------- | ------------------- | -| i_apb_pready \[N-1:0\]1 | Input | APB Ready | -| \[31:0\] i_apb_prdata\[0:N-1\]1 | Input | APB Read Data | -| i_apb_pslverr\[N-1:0\]1 | Input | APB Completer Error | -| o_apb_psel\[N-1:0\]1 | Output | APB Select | -| o_apb_penable | Output | APB Enable | -| o_apb_paddr\[31:0\] | Output | APB Address | -| o_apb_pwdata\[31:0\] | Output | APB Write Data | -| o_apb_pwrite | Output | APB Write | +| **Signal Name** | **Direction** | **Description** | +| ------------------------------------- | ------------- | ------------------- | +| i_apb_pready [N-1:0]1 | Input | APB Ready | +| [31:0] i_apb_prdata[0:N-1]1 | Input | APB Read Data | +| i_apb_pserr[N-1:0]1 | Input | APB Completer Error | +| o_apb_psel[N-1:0]1 | Output | APB Select | +| o_apb_penable | Output | APB Enable | +| o_apb_paddr[31:0] | Output | APB Address | +| o_apb_pwdata[31:0] | Output | APB Write Data | +| o_apb_pwrite | Output | APB Write | 1. N=REG_INST. Refer to the Macro Definitions section for details. @@ -50,14 +50,14 @@ Table 4 Sensor Interface Clock and Reset Ports Table 5 Sensor RX Interface Ports -| **Signal Name** | **Directi on** | **Description** | -| ---------------------------------------------- | -------------- | ------------------------------------------------------------------------------ | -| i_sif_axis_tvalid\[N-1:0\]1 | Input | AXI-Stream Valid | -| i_sif_axis_tlast\[N-1:0\]1 | Input | AXI-Stream Last. Currently not supported. See Sensor RX section for more info. | -| \[W-1:0\] i_sif_axis_tdata\[0:N-1\]1 | Input | AXI-Stream Data. | -| \[X-1:0\] i_sif_axis_tkeep\[0:N-1\]1 | Input | AXI-Stream Keep. Currently not supported. See Sensor RX section for more info. | -| \[Y-1:0\] i_sif_axis_tuser\[0:N-1\]1 | Input | AXI-Stream User. | -| o_sif_axis_tready\[N-1:0\]1 | Output | AXI-Stream Ready | +| **Signal Name** | **Directi on** | **Description** | +| ------------------------------------------ | -------------- | ------------------------------------------------------------------------------ | +| i_sif_axis_tvalid[N-1:0]1 | Input | AXI-Stream Valid | +| i_sif_axis_tlast[N-1:0]1 | Input | AXI-Stream Last. Currently not supported. See Sensor RX section for more info. | +| [W-1:0] i_sif_axis_tdata[0:N-1]1 | Input | AXI-Stream Data. | +| [X-1:0] i_sif_axis_tkeep[0:N-1]1 | Input | AXI-Stream Keep. Currently not supported. See Sensor RX section for more info. | +| [Y-1:0] i_sif_axis_tuser[0:N-1]1 | Input | AXI-Stream User. | +| o_sif_axis_tready[N-1:0]1 | Output | AXI-Stream Ready | 1. N=SENSOR_IF_INST, W=DATAPATH_WIDTH, X=DATAKEEP_WIDTH, Y=DATAUSER_WIDTH. See Macro Definitions section for details.Table 6 Sensor Event Ports @@ -65,23 +65,23 @@ Table 5 Sensor RX Interface Ports Table 6 Sensor TX Interface Ports Sensor TX interface is unsupported but TBD for future revisions. Sensor TX interface ports should still be instantiated. -| **Signal Name** | **Directi on** | **Description** | -| ---------------------------------------------- | -------------- | --------------- | -| o_sif_axis_tvalid\[N-1:0\]1 | Output | TBD | -| o_sif_axis_tlast\[N-1:0\]1 | Output | TBD | -| \[W-1:0\] o_sif_axis_tdata\[0:N-1\]1 | Output | TBD | -| \[X-1:0\] o_sif_axis_tkeep\[0:N-1\]1 | Output | TBD | -| \[Y-1:0\] o_sif_axis_tuser\[0:N-1\]1 | Output | TBD | -| i_sif_axis_tready\[N-1:0\]1 | Input | TBD | +| **Signal Name** | **Directi on** | **Description** | +| ------------------------------------------ | -------------- | --------------- | +| o_sif_axis_tvalid[N-1:0]1 | Output | TBD | +| o_sif_axis_tlast[N-1:0]1 | Output | TBD | +| [W-1:0] o_sif_axis_tdata[0:N-1]1 | Output | TBD | +| [X-1:0] o_sif_axis_tkeep[0:N-1]1 | Output | TBD | +| [Y-1:0] o_sif_axis_tuser[0:N-1]1 | Output | TBD | +| i_sif_axis_tready[N-1:0]1 | Input | TBD | 1. N=SENSOR_IF_INST, W=DATAPATH_WIDTH, X=DATAKEEP_WIDTH, Y=DATAUSER_WIDTH. See Macro Definitions section for details. Table 7 Sensor Event Ports -| **Signal Name** | **Direction** | **Description** | -| -------------------------------- | ------------- | -------------------------------------------------------------------- | -| i_sif_event \[N-1:0\]1 | Input | Sensor Interface Event. Refer to Sensor RX section for more details. | +| **Signal Name** | **Direction** | **Description** | +| ------------------------------ | ------------- | -------------------------------------------------------------------- | +| i_sif_event [N-1:0]1 | Input | Sensor Interface Event. Refer to Sensor RX section for more details. | 1.N=SENSOR_IF_INST. See Macro Definitions section for details. @@ -98,14 +98,14 @@ Table 9 Host RX Interface Ports Connect the Host RX AXI-Streaming ports directly to Ethernet MAC TX AXI-Streaming ports. -| **Signal Name** | **Direction** | **Description** | -| --------------------------------------------- | ------------- | ---------------- | -| i_hif_axis_tvalid\[N-1:0\]1 | Input | AXI-Stream Valid | -| i_hif_axis_tlast\[N-1:0\]1 | Input | AXI-Stream Last | -| \[W-1:0\]i_hif_axis_tdata\[0:N-1\]1 | Input | AXI-Stream Data | -| \[X-1:0\]i_hif_axis_tkeep\[0:N-1\]1 | Input | AXI-Stream Keep | -| \[Y-1:0\]i_hif_axis_tuser\[0:N-1\]1 | Input | AXI-Stream User | -| o_hif_axis_tready\[N-1:0\]1 | Output | AXI-Stream Ready | +| **Signal Name** | **Direction** | **Description** | +| ----------------------------------------- | ------------- | ---------------- | +| i_hif_axis_tvalid[N-1:0]1 | Input | AXI-Stream Valid | +| i_hif_axis_tlast[N-1:0]1 | Input | AXI-Stream Last | +| [W-1:0]i_hif_axis_tdata[0:N-1]1 | Input | AXI-Stream Data | +| [X-1:0]i_hif_axis_tkeep[0:N-1]1 | Input | AXI-Stream Keep | +| [Y-1:0]i_hif_axis_tuser[0:N-1]1 | Input | AXI-Stream User | +| o_hif_axis_tready[N-1:0]1 | Output | AXI-Stream Ready | 1. N=HOST_IF_INST, W=DATAPATH_WIDTH, X=DATAKEEP_WIDTH, Y=DATAUSER_WIDTH. See Macro Definitions section for details. @@ -114,14 +114,14 @@ Table 10 Host TX Interface Ports Connect the Host TX AXI-Streaming ports directly to Ethernet MAC RX AXI-Streaming ports. -| **Signal Name** | **Direction** | **Description** | -| --------------------------------------------- | ------------- | ---------------- | -| o_hif_axis_tvalid\[N-1:0\]1 | Output | AXI-Stream Valid | -| o_hif_axis_tlast\[N-1:0\]1 | Output | AXI-Stream Last | -| \[W-1:0\]o_hif_axis_tdata\[0:N-1\]1 | Output | AXI-Stream Data | -| \[X-1:0\]o_hif_axis_tkeep\[0:N-1\]1 | Output | AXI-Stream Keep | -| \[Y-1:0\]o_hif_axis_tuser\[0:N-1\]1 | Output | AXI-Stream User | -| i_hif_axis_tready\[N-1:0\]1 | Input | AXI-Stream Read | +| **Signal Name** | **Direction** | **Description** | +| ----------------------------------------- | ------------- | ---------------- | +| o_hif_axis_tvalid[N-1:0]1 | Output | AXI-Stream Valid | +| o_hif_axis_tlast[N-1:0]1 | Output | AXI-Stream Last | +| [W-1:0]o_hif_axis_tdata[0:N-1]1 | Output | AXI-Stream Data | +| [X-1:0]o_hif_axis_tkeep[0:N-1]1 | Output | AXI-Stream Keep | +| [Y-1:0]o_hif_axis_tuser[0:N-1]1 | Output | AXI-Stream User | +| i_hif_axis_tready[N-1:0]1 | Input | AXI-Stream Read | 1. N=HOST_IF_INST, W=DATAPATH_WIDTH, X=DATAKEEP_WIDTH, Y=DATAUSER_WIDTH. See Macro Definitions section for details. @@ -130,33 +130,33 @@ Connect the Host TX AXI-Streaming ports directly to Ethernet MAC RX AXI-Streamin Table 11 SPI Ports -| **Signal Name** | **Direction** | **Description** | -| ------------------------------------- | ------------- | ------------------------ | -| o_spi_csn\[N-1:0\]1 | Output | Chip Select (Active Low) | -| o_spi_sck\[N-1:0\]1 | Output | SPI Clock | -| o_spi_oen\[N-1:0\]1 | Output | Output Enable | -| \[3:0\]o_spi_sdio\[0:N-1\]1 | Output | SDIO Output | -| \[3:0\]i_spi_sdio\[0:N-1\]1 | Input | SDIO Input | +| **Signal Name** | **Direction** | **Description** | +| --------------------------------- | ------------- | ------------------------ | +| o_spi_csn[N-1:0]1 | Output | Chip Select (Active Low) | +| o_spi_sck[N-1:0]1 | Output | SPI Clock | +| o_spi_oen[N-1:0]1 | Output | Output Enable | +| [3:0]o_spi_sdio[0:N-1]1 | Output | SDIO Output | +| [3:0]i_spi_sdio[0:N-1]1 | Input | SDIO Input | 1. N=SPI_INST. See Macro Definitions section for details. Table 12 I2C Ports -| **Signal Name** | **Direction** | **Description** | -| -------------------------------- | ------------- | ----------------------- | -| i_i2c_scl\[N-1:0\]1 | Input | I2C Clock | -| i_i2c_sda\[N-1:0\]1 | Input | I2C Data | -| o_i2c_scl_en\[N-1:0\]1 | Output | I2C Clock Output Enable | -| o_i2c_sda_en\[N-1:0\]1 | Output | I2C Data Output Enable | +| **Signal Name** | **Direction** | **Description** | +| ------------------------------ | ------------- | ----------------------- | +| i_i2c_scl[N-1:0]1 | Input | I2C Clock | +| i_i2c_sda[N-1:0]1 | Input | I2C Data | +| o_i2c_scl_en[N-1:0]1 | Output | I2C Clock Output Enable | +| o_i2c_sda_en[N-1:0]1 | Output | I2C Data Output Enable | 1. N=I2C_INST. See Macro Definitions section for details. Table 13 GPIO Ports -| **Signal Name** | **Direction** | **Description** | -| -------------------------- | ------------- | ------------------------------------ | -| i_gpio\[N-1:0\]1 | Input | GPIO In. Synchronized to “i_apb_clk” | -| o_gpio\[N-1:0\]1 | Output | GPIO Out. Synchronous to “i_apb_clk” | +| **Signal Name** | **Direction** | **Description** | +| ------------------------ | ------------- | ------------------------------------ | +| i_gpio[N-1:0]1 | Input | GPIO In. Synchronized to “i_apb_clk” | +| o_gpio[N-1:0]1 | Output | GPIO Out. Synchronous to “i_apb_clk” | 1. N=GPIO_INST. See Macro Definitions section for details. @@ -165,30 +165,30 @@ Table 14 JESD Ports JESD Sensor Ports are unsupported and do not need to be instantiated. TBD in future revisions. -| **Signal Name** | **Direction** | **Description** | -| -------------------- | ------------- | --------------- | -| i_jesd_rxdp\[7-1:0\] | Input | TBD | -| i_jesd_rxdn\[7-1:0\] | Input | TBD | -| o_jesd_txdp\[7-1:0\] | Output | TBD | -| o_jesd_txdn\[7-1:0\] | Output | TBD | -| i_jesd_tx_sysref | Input | TBD | -| i_jesd_rx_sysref | Input | TBD | -| i_jesd_xcvr_refclk | Input | TBD | -| i_jesd_pll_refclk | Input | TBD | +| **Signal Name** | **Direction** | **Description** | +| ------------------ | ------------- | --------------- | +| i_jesd_rxdp[7:0] | Input | TBD | +| i_jesd_rxdn[7:0] | Input | TBD | +| o_jesd_txdp[7:0] | Output | TBD | +| o_jesd_txdn[7:0] | Output | TBD | +| i_jesd_tx_sysref | Input | TBD | +| i_jesd_rx_sysref | Input | TBD | +| i_jesd_xcvr_refclk | Input | TBD | +| i_jesd_pll_refclk | Input | TBD | Table 15 Sensor Reset Port -| **Signal Name** | **Direction** | **Description** | -| --------------------------------- | ------------- | ---------------------------------------------------------------------------------------- | -| o_sw_sen_rst \[N-1:0\]1 | Output | Register Controlled Reset. Connect to on-board sensor reset pin | -| o_sw_sys_rst | Output | Register controlled self-clearing reset. Can be used to reset blocks, such as PCS block. | +| **Signal Name** | **Direction** | **Description** | +| ------------------------------- | ------------- | ---------------------------------------------------------------------------------------- | +| o_sw_sen_rst [N-1:0]1 | Output | Register Controlled Reset. Connect to on-board sensor reset pin | +| o_sw_sys_rst | Output | Register controlled self-clearing reset. Can be used to reset blocks, such as PCS block. | 1. N=SENSOR_IF_INST. See Macro Definitions section for details. Table 16 PTP Port -| **Signal Name** | **Direction** | **Description** | -| ---------------------- | ------------- | ------------------------------------------------------------------------ | -| o_ptp_sec \[47:0\] | Output | PTP Seconds Field per PTP1588-2019 v2 spec. Synchronous to i_hif_clk | -| o_ptp_nanosec \[31:0\] | Output | PTP Nanoseconds Field per PTP1588-2019 v2 spec. Synchronous to i_hif_clk | -| o_pps | Output | Pulse Per Second. Synchronous to i_hif_clk | +| **Signal Name** | **Direction** | **Description** | +| -------------------- | ------------- | ------------------------------------------------------------------------ | +| o_ptp_sec [47:0] | Output | PTP Seconds Field per PTP1588-2019 v2 spec. Synchronous to i_hif_clk | +| o_ptp_nanosec [31:0] | Output | PTP Nanoseconds Field per PTP1588-2019 v2 spec. Synchronous to i_hif_clk | +| o_pps | Output | Pulse Per Second. Synchronous to i_hif_clk | diff --git a/docs/user_guide/ptp.md b/docs/user_guide/ptp.md index e6e0cf5..b54db71 100644 --- a/docs/user_guide/ptp.md +++ b/docs/user_guide/ptp.md @@ -3,14 +3,14 @@ HOLOLINK IP supports Precision Time Protocol (PTP) per IEEE1588-2019 specification. PTP synchronizes the HOLOLINK IP's internal time to the host time. This allows the -HOLOLINK IP to accurately timestamp the incoming sensor data for the host processing and +HOLOLINK IP to accurately timestamp the incoming sensor data for host processing and synchronize multiple boards on the network. HOLOLINK IP timer operates in the following manner: 1. When the HOLOLINK IP comes out of reset, the timer begins at 0 seconds and 0 nanoseconds. At each rising clock edge, the timer increments by (1/HIF_CLK_FREQ) - nanoseconds and 16-bit fractional nanoseconds, where HIF_CLK_FREQ is a parameter + nanoseconds and 24-bit fractional nanoseconds, where HIF_CLK_FREQ is a parameter defined in "HOLOLINK_def.svh" For example, if HIF_CLK_FREQ is set to 156250000Hz in 10G application, the incremental value per rising clock edge is 6.4ns. 1. When the HOLOLINK IP receives a SYNC (and FOLLOW-UP for 2-step) message from the @@ -40,8 +40,6 @@ HOLOLINK IP is listed below. The HOLOLINK IP PTP currently has these limitations that can be added in future revisions. -1. Mean link delay is not part of the offset measurement calculation. -1. Correction Field is not part of the offset measurement calculation. 1. Announce messages are ignored. 1. No Best Master Clock Algorithm. It assumes there is only 1 master in the network at a given time. @@ -53,4 +51,4 @@ The performance of the HOLOLINK IP PTP was tested by comparing the Pulse Per Sec | **Offset** | **End to End Standard Deviation** | | ---------- | --------------------------------- | -| \< 20 us | \< 100 ns | +| < 1 us | < 100 ns | diff --git a/docs/user_guide/register_interface.md b/docs/user_guide/register_interface.md index ed8175e..8e94eae 100644 --- a/docs/user_guide/register_interface.md +++ b/docs/user_guide/register_interface.md @@ -24,12 +24,12 @@ The APB ports available on the Holoscan Sensor Bridge IP allow user to connect t specific blocks, for example, Ethernet MAC/PCS IP block or sensor interface registers, where the Holoscan Sensor Bridge IP is the APB Requester. -The msb\[31:28\] of the register address is used to determine the REG_INST\_# block and -the address \[27:0\] is the offset address. +The msb[31:28] of the register address is used to determine the REG_INST\_# block and +the address [27:0] is the offset address. For example, if the host accesses register in 0x1000_0000 – 0x1FFF_FFFF address mapping, -this will trigger o_apb_psel\[0\] with the offset address o_apb_paddr mapping -0x0000_0000 – 0x0FFF_FFFF and so on. +this will trigger o_apb_psel[0] with the offset address o_apb_paddr mapping 0x0000_0000 +– 0x0FFF_FFFF and so on. ![reg_offset](reg_offset.png) diff --git a/docs/user_guide/sensor_bridge_firmware_setup.md b/docs/user_guide/sensor_bridge_firmware_setup.md index 55e485c..d0cd79e 100644 --- a/docs/user_guide/sensor_bridge_firmware_setup.md +++ b/docs/user_guide/sensor_bridge_firmware_setup.md @@ -5,10 +5,8 @@ components are programmable and should be updated. 1. **Power cycle** the sensor bridge device and make sure 2 green LEDs are on -1. Follow the - [setup instructions](setup.md#running-holoscan-sensor-bridge-demos-from-source) to - build and run the demo container. All the following commands are to be run from - within the demo container. +1. Follow the [setup instructions](setup.md) to build and run the demo container. All + the following commands are to be run from within the demo container. 1. Check connectivity with the sensor bridge board with the ping command: @@ -21,12 +19,12 @@ components are programmable and should be updated. ``` If your system uses the 192.168.0.0/24 network for another purpose, see instructions - for - [configuring the IP addresses used by the sensor bridge device](notes.html#holoscan-sensor-bridge-ip-address-configuration). - After reconfiguring addresses appropriately, be sure you can ping your device at the + for [configuring the IP addresses used by the sensor bridge device.](notes.md) After + reconfiguring addresses appropriately, be sure you can ping your device at the address you expect. -1. Use the `hololink program` command to update the device firmware: +1. If your current configuration is a Lattice CPNX100-ETH-SENSOR-BRIDGE device currently + loaded with 2412 or newer firmware, `hololink program` will reprogram it: ```none hololink program scripts/manifest.yaml @@ -39,12 +37,26 @@ components are programmable and should be updated. hololink --hololink=192.168.200.2 program scripts/manifest.yaml ``` - When run this way, the manifest file directs the firmware tool to download the FPGA +1. If your configuration is a Lattice CPNX100-ETH-SENSOR-BRIDGE device with older (e.g. + 2407\) firmware, `hololink --force program` will reprogram it. Note that this only + works on units that are available at the default IP address of 192.168.0.2. + + ```none + hololink --force program scripts/manifest.yaml + ``` + + The way that HSB units are enumerated by the host has changed, so systems producing + 2407 enumeration data are not visible to hosts running 2.0-GA software. Including the + `--force` option tells the host software to use a hard-coded enumeration structure, + which enables the software to access the flash memory on the older configuration of + HSB. + +1. When run this way, the manifest file directs the firmware tool to download the FPGA BIT files from [NGC](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara-holoscan/resources/holoscan_sensor_bridge_fpga_ip) with the version validated for use with this software tree. When run on an IGX configuration, firmware updates can take up to 5 minutes; when run on AGX, expect a - run time of as much as 45 minutes **Do not interrupt the process in the middle.** + run time of as much as 30 minutes. **Do not interrupt the process in the middle.** 1. Once flashing is complete, **power cycle** the device and watch that the sensor bridge powers up with 2 green LEDs on diff --git a/docs/user_guide/sensor_bridge_hardware_setup.md b/docs/user_guide/sensor_bridge_hardware_setup.md index a669504..214f141 100644 --- a/docs/user_guide/sensor_bridge_hardware_setup.md +++ b/docs/user_guide/sensor_bridge_hardware_setup.md @@ -17,7 +17,9 @@ Lattice CPNX100-ETH-SENSOR-BRIDGE has the following connectors: 1. **Camera connector** - This connector is used to interface with a camera (e.g. IMX274). 1. **Power port** - the sensor bridge device is powered by using a USB-C power supply - with a minimum of 12V/2A connected to this port. + with input voltage from 5V to 20V. Since the sensors are powered through the + sensor bridge board, it is recommended to use a dedicated power supply to power + both the board and the sensors. 1. **GPIO pins** - the sensor bridge device supports 16 GPIO pins (0...15) and 4 ground pins (marked 'G' in the image above). diff --git a/docs/user_guide/setup.md b/docs/user_guide/setup.md index 3899a16..56d1ace 100644 --- a/docs/user_guide/setup.md +++ b/docs/user_guide/setup.md @@ -3,13 +3,13 @@ Holoscan sensor bridge is supported on the following configurations: - IGX systems configured with - [IGX OS 1.0 Production Release](https://developer.nvidia.com/igx-downloads) with CX7 + [IGX OS 1.1 Production Release](https://developer.nvidia.com/igx-downloads) with CX7 SmartNIC devices. - AGX Orin systems running - [JP6.0 release 2](https://developer.nvidia.com/embedded/jetpack). In this - configuration, the on-board Ethernet controller is used with the Linux kernel network - stack for data I/O; all network I/O is performed by the CPU without network - acceleration. + [JP6.0 release 2](https://developer.nvidia.com/embedded/jetpack). **NOTE THAT JETPACK + 6.1 AND NEWER ARE NOT YET SUPPORTED.** In this configuration, the on-board Ethernet + controller is used with the Linux kernel network stack for data I/O; all network I/O + is performed by the CPU without network acceleration. After the [Holoscan sensor bridge board is set up](sensor_bridge_hardware_setup.md), configure a few prerequisites in your host system. While holoscan sensor bridge @@ -82,6 +82,7 @@ Next, follow the directions on the appropriate tab below to configure your host ```none $ sudo nmcli con add con-name hololink-$EN0 ifname $EN0 type ethernet ip4 192.168.0.101/24 $ sudo nmcli connection modify hololink-$EN0 +ipv4.routes "192.168.0.2/32 192.168.0.101" + $ sudo nmcli connection modify hololink-$EN0 ethtool.ring-rx 4096 $ sudo nmcli connection up hololink-$EN0 ``` @@ -127,6 +128,7 @@ Next, follow the directions on the appropriate tab below to configure your host ```none $ sudo nmcli con add con-name hololink-$EN1 ifname $EN1 type ethernet ip4 192.168.0.102/24 $ sudo nmcli connection modify hololink-$EN1 +ipv4.routes "192.168.0.3/32 192.168.0.102" + $ sudo nmcli connection modify hololink-$EN1 ethtool.ring-rx 4096 $ sudo nmcli connection up hololink-$EN1 ``` @@ -170,7 +172,7 @@ Next, follow the directions on the appropriate tab below to configure your host [Service] Type=simple - ExecStart=/usr/sbin/phc2sys -c $EN0 -s CLOCK_REALTIME -O 0 + ExecStart=/usr/sbin/phc2sys -c $EN0 -s CLOCK_REALTIME -O 0 -S 0.001 [Install] WantedBy=multi-user.target diff --git a/docs/user_guide/simulation.md b/docs/user_guide/simulation.md index f325728..2ffd276 100644 --- a/docs/user_guide/simulation.md +++ b/docs/user_guide/simulation.md @@ -21,24 +21,24 @@ will drive the dataplane stream on the Host TX interface. ``` //Address Data - {32'h0200_030C, 32'h0000_05CE}, // dp_pkt_0 , dp_pkt_len - {32'h0200_0324, 32'h0000_0001}, // dp_pkt_0 , dp_pkt_vip_mask - {32'h0200_0310, 32'h0000_600D}, // dp_pkt_0 , dp_pkt_mac_addr_lo - {32'h0200_0314, 32'h0000_0000}, // dp_pkt_0 , dp_pkt_mac_addr_hi - {32'h0200_0318, 32'h0000_BEEF}, // dp_pkt_0 , dp_pkt_ip_addr - {32'h0200_031C, 32'h0000_12B7}, // dp_pkt_0 , dp_pkt_host_udp_port - {32'h0200_0320, 32'h0000_3000}, // dp_pkt_0 , dp_pkt_fpga_udp_port - {32'h0200_1000, 32'h0000_0000}, // dp_pkt_0 , Destination QP - {32'h0200_1008, 32'h0000_0000}, // dp_pkt_0 , Start Virtual Address MSB - {32'h0200_100C, 32'h0000_0000}, // dp_pkt_0 , Start Virtual Address LSB - {32'h0200_1010, 32'h0000_0000}, // dp_pkt_0 , End Virtual Address MSB - {32'h0200_1014, 32'h0001_0000}, // dp_pkt_0 , End Virtual Address LSB - {32'h0200_1004, 32'h0000_F00D}, // dp_pkt_0 , Remote Key + {32'h0200_0304, 32'h0000_000B}, // dp_pkt_0 , dp_pkt_len + {32'h0200_0308, 32'h0000_12B7}, // dp_pkt_0 , dp_pkt_host_udp_port + {32'h0200_030C, 32'h0000_0001}, // dp_pkt_0 , dp_pkt_vip_mask + {32'h0000_1020, 32'h0000_600D}, // sif_0 , dp_pkt_mac_addr_lo + {32'h0000_1024, 32'h0000_0000}, // sif_0 , dp_pkt_mac_addr_hi + {32'h0000_1028, 32'h0000_BEEF}, // sif_0 , dp_pkt_ip_addr + {32'h0000_102C, 32'h0000_3000}, // sif_0 , dp_pkt_fpga_udp_port + {32'h0000_1000, 32'h0000_0000}, // sif_0 , Destination QP + {32'h0000_1004, 32'h0000_F00D}, // sif_0 , Remote Key + {32'h0000_1008, 32'h0000_0000}, // sif_0 , Buffer 0 Virtual Address + {32'h0000_1018, 32'h0001_0000}, // sif_0 , Bytes per Window + {32'h0000_101C, 32'h0000_0001}, // sif_0 , Buffer Enable {32'h0200_0108, 32'h0000_0064}, // eth_pkt_0 , Eth pkt data plane priority ``` Above example is for 1 data path, for additional data paths -1. Add offset "0x0001_0000" to the addresses above +1. Add offset "0x0001_0000" to the dp_pkt register addresses and add offset + "0x0000_0040" to the sif register addresses. 1. Instantiate 2nd or more data paths in HOLOLINK IP HIF TX/RX Interface ports. 1. Increase N_INIT_REG by number of registers added to initialize. diff --git a/docs/user_guide/troubleshooting.md b/docs/user_guide/troubleshooting.md index cdcc50d..e16dca5 100644 --- a/docs/user_guide/troubleshooting.md +++ b/docs/user_guide/troubleshooting.md @@ -1,5 +1,8 @@ # Troubleshooting +Additional troubleshooting notes can be found on the +[release notes page](RELEASE_NOTES.md). + ## Segmentation fault from Holoscan Visualizer If the Holoscan visualizer is not able to access the host display, the program will @@ -14,7 +17,10 @@ The `hololink enumerate` command, in the demo container, can be used to monitor enumeration messages sent by the sensor bridge device. If no messages appear, then check for power to the sensor bridge device, physical connections to the device, and appropriate network configurations as listed above. `ping 192.168.0.2` and -`ping 192.168.0.3` can also be used to check for connectivity. +`ping 192.168.0.3` can be used to check for connectivity. If an HSB device is running an +incompatible FPGA image (e.g. FPGA is 2407 while the host software requires 2412), ping +would be successful but no enumeration data would appear. Firmware version problems can +be solved by [reprogramming your device](sensor_bridge_firmware_setup.md). ## Visualizer display is completely white @@ -45,7 +51,7 @@ sensor bridge board. ## Sensor Bridge LED indications -The Holsocan Sensor Bridge board has two leds that depending on their state have the +The Holoscan Sensor Bridge board has two leds that depending on their state have the following indications: 1. **Both leds are off** - The Holoscan Sensor Bridge Board is not powered. diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 7794884..708832d 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -29,7 +29,6 @@ target_link_libraries(imx274_player hololink::native hololink::operators::csi_to_bayer hololink::operators::image_processor - hololink::operators::gamma_correction hololink::operators::roce_receiver holoscan::core holoscan::ops::bayer_demosaic diff --git a/examples/body_pose_estimation.py b/examples/body_pose_estimation.py index b9ef206..00a1eaa 100644 --- a/examples/body_pose_estimation.py +++ b/examples/body_pose_estimation.py @@ -444,17 +444,12 @@ def compose(self): interpolation_mode=0, ) - gamma_correction = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - visualizer = holoscan.operators.HolovizOp( self, name="holoviz", fullscreen=self._fullscreen, headless=self._headless, + framebuffer_srgb=True, **self.kwargs("holoviz"), ) @@ -497,8 +492,7 @@ def compose(self): csi_to_bayer_operator, image_processor_operator, {("output", "input")} ) self.add_flow(image_processor_operator, demosaic, {("output", "receiver")}) - self.add_flow(demosaic, gamma_correction, {("transmitter", "input")}) - self.add_flow(gamma_correction, image_shift) + self.add_flow(demosaic, image_shift, {("transmitter", "input")}) self.add_flow(image_shift, visualizer, {("output", "receivers")}) self.add_flow(image_shift, preprocessor, {("output", "")}) self.add_flow(preprocessor, format_input) @@ -536,7 +530,7 @@ def main(): default="192.168.0.2", help="IP address of Hololink board", ) - default_engine = os.path.join(os.path.dirname(__file__), "yolov8n-pose.onnx") + default_engine = os.path.join(os.path.dirname(__file__), "yolov8n-pose.engine.fp32") parser.add_argument( "--engine", default=default_engine, @@ -548,14 +542,10 @@ def main(): default=20, help="Logging level to display", ) - default_infiniband_interface = "roceP5p3s0f0" - try: - default_infiniband_interface = sorted(os.listdir("/sys/class/infiniband"))[0] - except FileNotFoundError: - pass + infiniband_devices = hololink_module.infiniband_devices() parser.add_argument( "--ibv-name", - default=default_infiniband_interface, + default=infiniband_devices[0], help="IBV device to use", ) parser.add_argument( diff --git a/examples/body_pose_estimation.yaml b/examples/body_pose_estimation.yaml index 24f292f..5ff91f0 100644 --- a/examples/body_pose_estimation.yaml +++ b/examples/body_pose_estimation.yaml @@ -29,7 +29,7 @@ inference: inference_map: "yolo_pose": ["inference_output"] input_on_cuda: true - is_engine_path: false + is_engine_path: true postprocessor: iou_threshold: 0.5 diff --git a/examples/distributed_imx274_player.py b/examples/distributed_imx274_player.py index 48dd0cf..d19cd51 100644 --- a/examples/distributed_imx274_player.py +++ b/examples/distributed_imx274_player.py @@ -137,19 +137,12 @@ def compose(self): interpolation_mode=0, ) - gamma_correction = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - # self.add_flow(receiver_operator, csi_to_bayer_operator, {("output", "input")}) self.add_flow( csi_to_bayer_operator, image_processor_operator, {("output", "input")} ) self.add_flow(image_processor_operator, demosaic, {("output", "receiver")}) - self.add_flow(demosaic, gamma_correction, {("transmitter", "input")}) class VisualizerFragment(holoscan.core.Fragment): @@ -172,6 +165,7 @@ def compose(self): name="holoviz", fullscreen=self._fullscreen, headless=self._headless, + framebuffer_srgb=True, ) self.add_operator(visualizer) @@ -228,7 +222,7 @@ def compose(self): self.add_flow( src_fragment, visualizer_fragment, - {("gamma_correction.output", "holoviz.receivers")}, + {("demosaic.transmitter", "holoviz.receivers")}, ) @@ -269,14 +263,10 @@ def main(): default=20, help="Logging level to display", ) - default_infiniband_interface = "roceP5p3s0f0" - try: - default_infiniband_interface = sorted(os.listdir("/sys/class/infiniband"))[0] - except FileNotFoundError: - pass + infiniband_devices = hololink_module.infiniband_devices() parser.add_argument( "--ibv-name", - default=default_infiniband_interface, + default=infiniband_devices[0], help="IBV device to use", ) parser.add_argument( diff --git a/examples/distributed_tao_peoplenet.py b/examples/distributed_tao_peoplenet.py index ac3e7e4..83296af 100644 --- a/examples/distributed_tao_peoplenet.py +++ b/examples/distributed_tao_peoplenet.py @@ -334,12 +334,6 @@ def compose(self): interpolation_mode=0, ) - gamma_correction = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - image_shift = hololink_module.operators.ImageShiftToUint8Operator( self, name="image_shift", shift=8 ) @@ -349,6 +343,7 @@ def compose(self): name="holoviz", fullscreen=self._fullscreen, headless=self._headless, + framebuffer_srgb=True, **self.kwargs("holoviz"), ) # @@ -357,8 +352,7 @@ def compose(self): csi_to_bayer_operator, image_processor_operator, {("output", "input")} ) self.add_flow(image_processor_operator, demosaic, {("output", "receiver")}) - self.add_flow(demosaic, gamma_correction, {("transmitter", "input")}) - self.add_flow(gamma_correction, image_shift) + self.add_flow(demosaic, image_shift, {("transmitter", "input")}) self.add_flow(image_shift, visualizer, {("output", "receivers")}) @@ -519,14 +513,10 @@ def main(): default=20, help="Logging level to display", ) - default_infiniband_interface = "roceP5p3s0f0" - try: - default_infiniband_interface = sorted(os.listdir("/sys/class/infiniband"))[0] - except FileNotFoundError: - pass + infiniband_devices = hololink_module.infiniband_devices() parser.add_argument( "--ibv-name", - default=default_infiniband_interface, + default=infiniband_devices[0], help="IBV device to use", ) parser.add_argument( diff --git a/examples/gpio_example_app.py b/examples/gpio_example_app.py index f3321f3..d9f3b66 100644 --- a/examples/gpio_example_app.py +++ b/examples/gpio_example_app.py @@ -44,16 +44,19 @@ class GpioSetOp(holoscan.core.Operator): This operator changes the GPIOs pin by pin according to the test configuration it currently runs (specified in the 'configs' variable). It sends the latest changed pin + the current test configuration to the - GpioReadOp for validation purposes. + GpioGetOp for validation purposes. once a sweep on all 16 pins is completed, next test configuration is set. """ - def __init__(self, fragment, hololink_channel, *args, **kwargs): + def __init__(self, fragment, hololink_channel, gpio, *args, **kwargs): self._hololink = hololink_channel.hololink() - self._GPIO = self._hololink.get_gpio() + self._gpio = gpio self.pin = 0 self.test_config = 0 + # how many pins are supported on the platform running the example + self._supported_pins_number = self._gpio.get_supported_pin_num() + # Need to call the base class constructor last super().__init__(fragment, *args, **kwargs) @@ -62,9 +65,9 @@ def setup(self, spec: holoscan.core.OperatorSpec): spec.output("test_config_out") # set all gpios as output, high - test fast setting via loop - for i in range(self._GPIO.GPIO_PIN_RANGE): - self._GPIO.set_direction(i, self._GPIO.OUT) - self._GPIO.set_value(i, self._GPIO.HIGH) + for i in range(self._supported_pins_number): + self._gpio.set_direction(i, self._gpio.OUT) + self._gpio.set_value(i, self._gpio.HIGH) def compute(self, op_input, op_output, context): @@ -73,29 +76,29 @@ def compute(self, op_input, op_output, context): # set gpio pins per test configuration 1 pin at a time per current # configuration tested if configs[self.test_config] == "ALL_OUT_L": - self._GPIO.set_direction(self.pin, self._GPIO.OUT) - self._GPIO.set_value(self.pin, self._GPIO.LOW) + self._gpio.set_direction(self.pin, self._gpio.OUT) + self._gpio.set_value(self.pin, self._gpio.LOW) elif configs[self.test_config] == "ALL_OUT_H": - self._GPIO.set_direction(self.pin, self._GPIO.OUT) - self._GPIO.set_value(self.pin, self._GPIO.HIGH) + self._gpio.set_direction(self.pin, self._gpio.OUT) + self._gpio.set_value(self.pin, self._gpio.HIGH) elif configs[self.test_config] == "ALL_IN": - self._GPIO.set_direction(self.pin, self._GPIO.IN) + self._gpio.set_direction(self.pin, self._gpio.IN) elif configs[self.test_config] == "ODD_OUT_H": if (self.pin & 0x1) == 1: # odd pin - self._GPIO.set_direction(self.pin, self._GPIO.OUT) - self._GPIO.set_value(self.pin, self._GPIO.HIGH) + self._gpio.set_direction(self.pin, self._gpio.OUT) + self._gpio.set_value(self.pin, self._gpio.HIGH) else: # even pin - self._GPIO.set_direction(self.pin, self._GPIO.IN) + self._gpio.set_direction(self.pin, self._gpio.IN) else: # EVEN_OUT_H if (self.pin & 0x1) == 1: # odd pin - self._GPIO.set_direction(self.pin, self._GPIO.IN) + self._gpio.set_direction(self.pin, self._gpio.IN) else: # even pin - self._GPIO.set_direction(self.pin, self._GPIO.OUT) - self._GPIO.set_value(self.pin, self._GPIO.HIGH) + self._gpio.set_direction(self.pin, self._gpio.OUT) + self._gpio.set_value(self.pin, self._gpio.HIGH) # send current changed pin and tested configuration to # second operator to validate value changes @@ -104,7 +107,7 @@ def compute(self, op_input, op_output, context): # prepare for next gpio to change self.pin += 1 - self.pin %= self._GPIO.GPIO_PIN_RANGE + self.pin %= self._supported_pins_number # done pins sweep - move to next configuration to test if self.pin == 0: @@ -112,17 +115,18 @@ def compute(self, op_input, op_output, context): self.test_config %= len(configs) -class GpioReadOp(holoscan.core.Operator): +class GpioGetOp(holoscan.core.Operator): """ operator to demonstrate reads from the GPIO bank Recieves the last changed GPIO number and it is set to input reads and prints it value and direction. - sleeps 10 seconds to allow physcal board measurements + sleeps for the given time to allow physcal board measurements """ - def __init__(self, fragment, hololink_channel, *args, **kwargs): + def __init__(self, fragment, hololink_channel, gpio, sleep_time, *args, **kwargs): self._hololink = hololink_channel.hololink() - self._GPIO = self._hololink.get_gpio() + self._gpio = gpio + self._sleep_time = sleep_time # Need to call the base class constructor last super().__init__(fragment, *args, **kwargs) @@ -136,30 +140,61 @@ def compute(self, op_input, op_output, context): pin = op_input.receive("gpio_changed_in") test_config = op_input.receive("test_config_in") - logging.info(f"GpioReadOp[{configs[test_config]}]") - direction = self._GPIO.get_direction(pin) - value = self._GPIO.get_value(pin) + logging.info(f"GpioGetOp[{configs[test_config]}]") + direction = self._gpio.get_direction(pin) + value = self._gpio.get_value(pin) logging.info(f"pin:{pin}, direction:{dir[direction]},value:{value}") - # sleep 10 seconds to allow time for pysical board measurememt - time.sleep(10) + # sleep to allow time for pysical board measurememt + time.sleep(self._sleep_time) class HoloscanApplication(holoscan.core.Application): def __init__( self, hololink_channel, + channel_metadata, + sleep_time, + cycle_limit, ): logging.info("__init__") super().__init__() self._hololink_channel = hololink_channel + self._channel_metadata = channel_metadata + self._hololink = hololink_channel.hololink() + self._sleep_time = sleep_time + self._cycle_limit = cycle_limit # may be None def compose(self): logging.info("compose") + # create the GPIO instance to be shared with the two operators + self._gpio = self._hololink.get_gpio(self._channel_metadata) + + # Conditions support cycle-limit + if self._cycle_limit: + self._count = holoscan.conditions.CountCondition( + self, + name="count", + count=self._cycle_limit, + ) + condition = self._count + else: + self._ok = holoscan.conditions.BooleanCondition( + self, name="ok", enable_tick=True + ) + condition = self._ok + # example of operator instantiation - gpio_read = GpioReadOp(self, self._hololink_channel, name="gpio_read") - gpio_set = GpioSetOp(self, self._hololink_channel, name="gpio_set") + gpio_read = GpioGetOp( + self, + self._hololink_channel, + self._gpio, + self._sleep_time, + condition, + name="gpio_read", + ) + gpio_set = GpioSetOp(self, self._hololink_channel, self._gpio, name="gpio_set") self.add_flow( gpio_set, gpio_read, @@ -192,6 +227,17 @@ def main(): default=20, help="Logging level to display", ) + parser.add_argument( + "--sleep-time", + type=float, + default=2.0, + help="Time to allow for physical signal measurement.", + ) + parser.add_argument( + "--cycle-limit", + type=int, + help="Limit the number of cycles for the application; by default this runs forever.", + ) args = parser.parse_args() hololink_module.logging_level(args.log_level) @@ -204,6 +250,9 @@ def main(): # Set up the application application = HoloscanApplication( hololink_channel, + channel_metadata, + args.sleep_time, + args.cycle_limit, ) application.config(args.configuration) # Run it. diff --git a/examples/imx274_latency.py b/examples/imx274_latency.py new file mode 100644 index 0000000..3759399 --- /dev/null +++ b/examples/imx274_latency.py @@ -0,0 +1,545 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# See README.md for detailed information. + +import argparse +import ctypes +import datetime +import logging +import math +import os + +import cupy as cp +import holoscan +from cuda import cuda + +import hololink as hololink_module + +MS_PER_SEC = 1000.0 +US_PER_SEC = 1000.0 * MS_PER_SEC +NS_PER_SEC = 1000.0 * US_PER_SEC +SEC_PER_NS = 1.0 / NS_PER_SEC + + +def get_timestamp(metadata, name): + s = metadata[f"{name}_s"] + f = metadata[f"{name}_ns"] + f *= SEC_PER_NS + return s + f + + +def record_times(recorder_queue, metadata): + # + now = datetime.datetime.utcnow() + # + frame_number = metadata.get("frame_number", 0) + + # frame_start_s is the time that the first data arrived at the FPGA; + # the network receiver calls this "timestamp". + frame_start_s = get_timestamp(metadata, "timestamp") + + # After the FPGA sends the last sensor data packet for a frame, it follows + # that with a 128-byte metadata packet. This timestamp (which the network + # receiver calls "metadata") is the time at which the FPGA sends that + # packet; so it's the time immediately after the the last byte of sensor + # data in this window. The difference between frame_start_s and frame_end_s + # is how long it took for the sensor to produce enough data for a complete + # frame. + frame_end_s = get_timestamp(metadata, "metadata") + + # received_timestamp_s is the host time after the background thread woke up + # with the nofication that a frame of data was available. This shows how long + # it took for the CPU to actually run the backtground user-mode thread where it observes + # the end-of-frame. This background thread sets a flag that will wake up + # the pipeline network receiver operator. + received_timestamp_s = get_timestamp(metadata, "received") + + # operator_timestamp_s is the time when the next pipeline element woke up-- + # the next operator after the network receiver. This is used to compute + # how much time overhead is required for the pipeline to actually receive + # sensor data. + operator_timestamp_s = get_timestamp(metadata, "operator_timestamp") + + # complete_timestamp_s is the time when visualization finished. + complete_timestamp_s = get_timestamp(metadata, "complete_timestamp") + + recorder_queue.append( + ( + now, + frame_start_s, + frame_end_s, + received_timestamp_s, + operator_timestamp_s, + complete_timestamp_s, + frame_number, + ) + ) + + +def save_timestamp(metadata, name, timestamp): + # This method works around the fact that we can't store + # datetime objects in metadata. + f, s = math.modf(timestamp.timestamp()) + metadata[f"{name}_s"] = int(s) + metadata[f"{name}_ns"] = int(f * NS_PER_SEC) + + +class InstrumentedTimeProfiler(holoscan.core.Operator): + def __init__( + self, + *args, + recorder_queue=None, + operator_name="operator", + **kwargs, + ): + super().__init__(*args, **kwargs) + self._recorder_queue = recorder_queue + self._operator_name = operator_name + + def setup(self, spec): + logging.info("setup") + spec.input("input") + spec.output("output") + + def compute(self, op_input, op_output, context): + # What time is it now? + operator_timestamp = datetime.datetime.utcnow() + + in_message = op_input.receive("input") + cp_frame = cp.asarray(in_message.get("")) + # + save_timestamp( + self.metadata, self._operator_name + "_timestamp", operator_timestamp + ) + op_output.emit({"": cp_frame}, "output") + + +class MonitorOperator(holoscan.core.Operator): + def __init__( + self, + *args, + recorder_queue=None, + **kwargs, + ): + super().__init__(*args, **kwargs) + self._recorder_queue = recorder_queue + + def setup(self, spec): + logging.info("setup") + spec.input("input") + + def compute(self, op_input, op_output, context): + # What time is it now? + complete_timestamp = datetime.datetime.utcnow() + + _ = op_input.receive("input") + # + save_timestamp(self.metadata, "complete_timestamp", complete_timestamp) + record_times(self._recorder_queue, self.metadata) + + +class HoloscanApplication(holoscan.core.Application): + def __init__( + self, + headless, + fullscreen, + cuda_context, + cuda_device_ordinal, + hololink_channel, + ibv_name, + ibv_port, + camera, + camera_mode, + frame_limit, + recorder_queue, + ): + logging.info("__init__") + super().__init__() + self._headless = headless + self._fullscreen = fullscreen + self._cuda_context = cuda_context + self._cuda_device_ordinal = cuda_device_ordinal + self._hololink_channel = hololink_channel + self._ibv_name = ibv_name + self._ibv_port = ibv_port + self._camera = camera + self._camera_mode = camera_mode + self._frame_limit = frame_limit + self._recorder_queue = recorder_queue + # This is a control for HSDK + self.is_metadata_enabled = True + + def compose(self): + logging.info("compose") + if self._frame_limit: + self._count = holoscan.conditions.CountCondition( + self, + name="count", + count=self._frame_limit, + ) + condition = self._count + else: + self._ok = holoscan.conditions.BooleanCondition( + self, name="ok", enable_tick=True + ) + condition = self._ok + self._camera.set_mode(self._camera_mode) + + csi_to_bayer_pool = holoscan.resources.BlockMemoryPool( + self, + name="pool", + # storage_type of 1 is device memory + storage_type=1, + block_size=self._camera._width + * ctypes.sizeof(ctypes.c_uint16) + * self._camera._height, + num_blocks=2, + ) + csi_to_bayer_operator = hololink_module.operators.CsiToBayerOp( + self, + name="csi_to_bayer", + allocator=csi_to_bayer_pool, + cuda_device_ordinal=self._cuda_device_ordinal, + ) + self._camera.configure_converter(csi_to_bayer_operator) + + frame_size = csi_to_bayer_operator.get_csi_length() + logging.info(f"{frame_size=}") + frame_context = self._cuda_context + receiver_operator = hololink_module.operators.RoceReceiverOp( + self, + condition, + name="receiver", + frame_size=frame_size, + frame_context=frame_context, + ibv_name=self._ibv_name, + ibv_port=self._ibv_port, + hololink_channel=self._hololink_channel, + device=self._camera, + ) + + profiler = InstrumentedTimeProfiler( + self, + name="profiler", + recorder_queue=self._recorder_queue, + ) + + bayer_format = self._camera.bayer_format() + pixel_format = self._camera.pixel_format() + image_processor_operator = hololink_module.operators.ImageProcessorOp( + self, + name="image_processor", + # Optical black value for imx274 is 50 + optical_black=50, + bayer_format=bayer_format.value, + pixel_format=pixel_format.value, + ) + + rgba_components_per_pixel = 4 + bayer_pool = holoscan.resources.BlockMemoryPool( + self, + name="pool", + # storage_type of 1 is device memory + storage_type=1, + block_size=self._camera._width + * rgba_components_per_pixel + * ctypes.sizeof(ctypes.c_uint16) + * self._camera._height, + num_blocks=2, + ) + demosaic = holoscan.operators.BayerDemosaicOp( + self, + name="demosaic", + pool=bayer_pool, + generate_alpha=True, + alpha_value=65535, + bayer_grid_pos=bayer_format.value, + interpolation_mode=0, + ) + + visualizer = holoscan.operators.HolovizOp( + self, + name="holoviz", + fullscreen=self._fullscreen, + headless=self._headless, + framebuffer_srgb=True, + enable_camera_pose_output=True, + camera_pose_output_type="extrinsics_model", + ) + # + monitor = MonitorOperator( + self, + name="monitor", + recorder_queue=self._recorder_queue, + ) + # + self.add_flow(receiver_operator, profiler, {("output", "input")}) + self.add_flow(profiler, csi_to_bayer_operator, {("output", "input")}) + self.add_flow( + csi_to_bayer_operator, image_processor_operator, {("output", "input")} + ) + self.add_flow(image_processor_operator, demosaic, {("output", "receiver")}) + self.add_flow(demosaic, visualizer, {("transmitter", "receivers")}) + self.add_flow(visualizer, monitor, {("camera_pose_output", "input")}) + + def _terminate(self, recorded_timestamps): + self._ok.disable_tick() + global timestamps + timestamps = recorded_timestamps + + +def main(): + parser = argparse.ArgumentParser() + modes = hololink_module.sensors.imx274.imx274_mode.Imx274_Mode + mode_choices = [mode.value for mode in modes] + mode_help = " ".join([f"{mode.value}:{mode.name}" for mode in modes]) + parser.add_argument( + "--camera-mode", + type=int, + choices=mode_choices, + default=mode_choices[0], + help=mode_help, + ) + parser.add_argument("--headless", action="store_true", help="Run in headless mode") + parser.add_argument( + "--fullscreen", action="store_true", help="Run in fullscreen mode" + ) + parser.add_argument( + "--frame-limit", + type=int, + default=200, + help="Exit after receiving this many frames", + ) + default_configuration = os.path.join( + os.path.dirname(__file__), "example_configuration.yaml" + ) + parser.add_argument( + "--configuration", + default=default_configuration, + help="Configuration file", + ) + parser.add_argument( + "--hololink", + default="192.168.0.2", + help="IP address of Hololink board", + ) + parser.add_argument( + "--log-level", + type=int, + default=20, + help="Logging level to display", + ) + infiniband_devices = hololink_module.infiniband_devices() + parser.add_argument( + "--ibv-name", + default=infiniband_devices[0], + help="IBV device to use", + ) + parser.add_argument( + "--ibv-port", + type=int, + default=1, + help="Port number of IBV device", + ) + parser.add_argument( + "--expander-configuration", + type=int, + default=0, + choices=(0, 1), + help="I2C Expander configuration", + ) + parser.add_argument( + "--pattern", + type=int, + choices=range(12), + help="Configure to display a test pattern.", + ) + parser.add_argument( + "--skip-reset", + action="store_true", + help="Don't call reset on the hololink device.", + ) + args = parser.parse_args() + hololink_module.logging_level(args.log_level) + logging.info("Initializing.") + # Get a handle to the GPU + (cu_result,) = cuda.cuInit(0) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + cu_device_ordinal = 0 + cu_result, cu_device = cuda.cuDeviceGet(cu_device_ordinal) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + cu_result, cu_context = cuda.cuDevicePrimaryCtxRetain(cu_device) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + # Get a handle to the data source + channel_metadata = hololink_module.Enumerator.find_channel(channel_ip=args.hololink) + logging.info(f"{channel_metadata=}") + hololink_channel = hololink_module.DataChannel(channel_metadata) + # Get a handle to the camera + camera = hololink_module.sensors.imx274.dual_imx274.Imx274Cam( + hololink_channel, expander_configuration=args.expander_configuration + ) + camera_mode = hololink_module.sensors.imx274.imx274_mode.Imx274_Mode( + args.camera_mode + ) + + # recorder of timestamps + recorder_queue = [] + + # Set up the application + application = HoloscanApplication( + args.headless, + args.fullscreen, + cu_context, + cu_device_ordinal, + hololink_channel, + args.ibv_name, + args.ibv_port, + camera, + camera_mode, + args.frame_limit, + recorder_queue, + ) + application.config(args.configuration) + # Run it. + hololink = hololink_channel.hololink() + hololink.start() + if not args.skip_reset: + hololink.reset() + ptp_sync_timeout_s = 10 + ptp_sync_timeout = hololink_module.Timeout(ptp_sync_timeout_s) + logging.debug("Waiting for PTP sync.") + if not hololink.ptp_synchronize(ptp_sync_timeout): + raise ValueError( + f"Failed to synchronize PTP after {ptp_sync_timeout_s} seconds; ignoring." + ) + else: + logging.debug("PTP synchronized.") + if not args.skip_reset: + camera.setup_clock() + camera.configure(camera_mode) + camera.set_digital_gain_reg(0x4) + if args.pattern is not None: + camera.test_pattern(args.pattern) + logging.info("Calling run") + application.run() + hololink.stop() + + (cu_result,) = cuda.cuDevicePrimaryCtxRelease(cu_device) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + + # Report stats at the end of the application + frame_time_dts = [] + cpu_latency_dts = [] + operator_latency_dts = [] + processing_time_dts = [] + overall_time_dts = [] + + settled_timestamps = recorder_queue[5:-5] + assert len(settled_timestamps) >= 100 + for ( + now, + frame_start_s, + frame_end_s, + received_timestamp_s, + operator_timestamp_s, + complete_timestamp_s, + frame_number, + ) in settled_timestamps: + + frame_start = datetime.datetime.fromtimestamp(frame_start_s).isoformat() + frame_end = datetime.datetime.fromtimestamp(frame_end_s).isoformat() + received_timestamp = datetime.datetime.fromtimestamp( + received_timestamp_s + ).isoformat() + operator_timestamp = datetime.datetime.fromtimestamp( + operator_timestamp_s + ).isoformat() + complete_timestamp = datetime.datetime.fromtimestamp( + complete_timestamp_s + ).isoformat() + + frame_time_dt = frame_end_s - frame_start_s + frame_time_dts.append(round(frame_time_dt, 4)) + + cpu_latency_dt = received_timestamp_s - frame_end_s + cpu_latency_dts.append(round(cpu_latency_dt, 4)) + + operator_latency_dt = operator_timestamp_s - received_timestamp_s + operator_latency_dts.append(round(operator_latency_dt, 4)) + + processing_time_dt = complete_timestamp_s - operator_timestamp_s + processing_time_dts.append(round(processing_time_dt, 4)) + + overall_time_dt = complete_timestamp_s - frame_start_s + overall_time_dts.append(round(overall_time_dt, 4)) + logging.debug(f"** Frame Information for Frame Number = {frame_number}**") + logging.debug(f"Frame Start : {frame_start}") + logging.debug(f"Frame End : {frame_end}") + logging.debug(f"Received Timestamp : {received_timestamp}") + logging.debug(f"Operator Timestamp : {operator_timestamp}") + logging.debug(f"Complete Timestamp : {complete_timestamp}") + logging.debug(f"Frame Time (dt) : {frame_time_dt:.6f} s") + logging.debug(f"CPU Latency (dt) : {cpu_latency_dt:.6f} s") + logging.debug(f"Operator Latency (dt): {operator_latency_dt:.6f} s") + logging.debug(f"Processing Time (dt) : {processing_time_dt:.6f} s") + logging.debug(f"Overall Time (dt) : {overall_time_dt:.6f} s") + + logging.info("** Complete report: **") + logging.info(f"{'Metric':<30}{'Min':<15}{'Max':<15}{'Avg':<15}") + # + ft_min_time_difference = min(frame_time_dts) + ft_max_time_difference = max(frame_time_dts) + ft_avg_time_difference = sum(frame_time_dts) / len(frame_time_dts) + logging.info("Frame Time (in sec):") + logging.info( + f"{'Frame Time':<30}{ft_min_time_difference:<15}{ft_max_time_difference:<15}{ft_avg_time_difference:<15}" + ) + # + cl_min_time_difference = min(cpu_latency_dts) + cl_max_time_difference = max(cpu_latency_dts) + cl_avg_time_difference = sum(cpu_latency_dts) / len(cpu_latency_dts) + logging.info("FGPA frame transfer latency (in sec):") + logging.info( + f"{'Frame Transfer Latency':<30}{cl_min_time_difference:<15}{cl_max_time_difference:<15}{cl_avg_time_difference:<15}" + ) + # + ol_min_time_difference = min(operator_latency_dts) + ol_max_time_difference = max(operator_latency_dts) + ol_avg_time_difference = sum(operator_latency_dts) / len(operator_latency_dts) + logging.info("FGPA to Operator after network operator latency (in sec):") + logging.info( + f"{'Operator Latency':<30}{ol_min_time_difference:<15}{ol_max_time_difference:<15}{ol_avg_time_difference:<15}" + ) + # + pt_min_time_difference = min(processing_time_dts) + pt_max_time_difference = max(processing_time_dts) + pt_avg_time_difference = sum(processing_time_dts) / len(processing_time_dts) + logging.info("Processing of frame latency (in sec):") + logging.info( + f"{'Processing Latency':<30}{pt_min_time_difference:<15}{pt_max_time_difference:<15}{pt_avg_time_difference:<15}" + ) + # + ot_min_time_difference = min(overall_time_dts) + ot_max_time_difference = max(overall_time_dts) + ot_avg_time_difference = sum(overall_time_dts) / len(overall_time_dts) + logging.info("Frame start till end of SW pipeline latency (in sec):") + logging.info( + f"{'SW Pipeline Latency':<30}{ot_min_time_difference:<15}{ot_max_time_difference:<15}{ot_avg_time_difference:<15}" + ) + + +if __name__ == "__main__": + main() diff --git a/examples/imx274_player.cpp b/examples/imx274_player.cpp index c827554..4b07c93 100644 --- a/examples/imx274_player.cpp +++ b/examples/imx274_player.cpp @@ -23,9 +23,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -126,17 +126,14 @@ class HoloscanApplication : public holoscan::Application { holoscan::Arg("alpha_value", 65535), holoscan::Arg("bayer_grid_pos", int(bayer_format)), holoscan::Arg("interpolation_mode", 0)); - auto gamma_correction = make_operator( - "gamma_correction", holoscan::Arg("cuda_device_ordinal", cuda_device_ordinal_)); - auto visualizer = make_operator("holoviz", - holoscan::Arg("fullscreen", fullscreen_), holoscan::Arg("headless", headless_)); + holoscan::Arg("fullscreen", fullscreen_), holoscan::Arg("headless", headless_), + holoscan::Arg("framebuffer_srgb", true)); add_flow(receiver_operator, csi_to_bayer_operator, { { "output", "input" } }); add_flow(csi_to_bayer_operator, image_processor_operator, { { "output", "input" } }); add_flow(image_processor_operator, demosaic, { { "output", "receiver" } }); - add_flow(demosaic, gamma_correction, { { "transmitter", "input" } }); - add_flow(gamma_correction, visualizer, { { "output", "receivers" } }); + add_flow(demosaic, visualizer, { { "transmitter", "receivers" } }); } private: @@ -302,7 +299,7 @@ int main(int argc, char** argv) } hololink_module.attr("logging_level")(python_log_level); - HOLOSCAN_LOG_INFO("Initializing."); + HSB_LOG_INFO("Initializing."); // Get a handle to the GPU CudaCheck(cuInit(0)); @@ -314,7 +311,7 @@ int main(int argc, char** argv) // Get a handle to the data source hololink::Metadata channel_metadata = hololink::Enumerator::find_channel(hololink_ip); - HOLOSCAN_LOG_INFO(fmt::format("channel_metadata {}", channel_metadata)); + HSB_LOG_INFO(fmt::format("channel_metadata {}", channel_metadata)); hololink::DataChannel hololink_channel(channel_metadata); @@ -343,7 +340,7 @@ int main(int argc, char** argv) if (pattern_set) { camera.attr("test_pattern")(pattern); } - HOLOSCAN_LOG_INFO("Calling run"); + HSB_LOG_INFO("Calling run"); { // we need release the Python GIL before starting the application to make sure the // operators can call camera device functions @@ -355,9 +352,9 @@ int main(int argc, char** argv) CudaCheck(cuDevicePrimaryCtxRelease(cu_device)); } catch (std::exception& e) { - HOLOSCAN_LOG_ERROR(e.what()); + HSB_LOG_ERROR(e.what()); return -1; } return 0; -} \ No newline at end of file +} diff --git a/examples/imx274_player.py b/examples/imx274_player.py index bb14e4b..113b2c8 100644 --- a/examples/imx274_player.py +++ b/examples/imx274_player.py @@ -135,17 +135,12 @@ def compose(self): interpolation_mode=0, ) - gamma_correction = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - visualizer = holoscan.operators.HolovizOp( self, name="holoviz", fullscreen=self._fullscreen, headless=self._headless, + framebuffer_srgb=True, ) # self.add_flow(receiver_operator, csi_to_bayer_operator, {("output", "input")}) @@ -153,17 +148,20 @@ def compose(self): csi_to_bayer_operator, image_processor_operator, {("output", "input")} ) self.add_flow(image_processor_operator, demosaic, {("output", "receiver")}) - self.add_flow(demosaic, gamma_correction, {("transmitter", "input")}) - self.add_flow(gamma_correction, visualizer, {("output", "receivers")}) + self.add_flow(demosaic, visualizer, {("transmitter", "receivers")}) def main(): parser = argparse.ArgumentParser() + modes = hololink_module.sensors.imx274.imx274_mode.Imx274_Mode + mode_choices = [mode.value for mode in modes] + mode_help = " ".join([f"{mode.value}:{mode.name}" for mode in modes]) parser.add_argument( "--camera-mode", type=int, - default=hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_3840X2160_60FPS.value, - help="IMX274 mode", + choices=mode_choices, + default=mode_choices[0], + help=mode_help, ) parser.add_argument("--headless", action="store_true", help="Run in headless mode") parser.add_argument( @@ -194,14 +192,10 @@ def main(): default=20, help="Logging level to display", ) - default_infiniband_interface = "roceP5p3s0f0" - try: - default_infiniband_interface = sorted(os.listdir("/sys/class/infiniband"))[0] - except FileNotFoundError: - pass + infiniband_devices = hololink_module.infiniband_devices() parser.add_argument( "--ibv-name", - default=default_infiniband_interface, + default=infiniband_devices[0], help="IBV device to use", ) parser.add_argument( @@ -228,6 +222,11 @@ def main(): action="store_true", help="After reset, wait for PTP time to synchronize.", ) + parser.add_argument( + "--skip-reset", + action="store_true", + help="Don't call reset on the hololink device.", + ) args = parser.parse_args() hololink_module.logging_level(args.log_level) logging.info("Initializing.") @@ -267,7 +266,8 @@ def main(): # Run it. hololink = hololink_channel.hololink() hololink.start() - hololink.reset() + if not args.skip_reset: + hololink.reset() if args.ptp_sync: ptp_sync_timeout_s = 10 ptp_sync_timeout = hololink_module.Timeout(ptp_sync_timeout_s) @@ -278,7 +278,8 @@ def main(): ) else: logging.debug("PTP synchronized.") - camera.setup_clock() + if not args.skip_reset: + camera.setup_clock() camera.configure(camera_mode) camera.set_digital_gain_reg(0x4) if args.pattern is not None: diff --git a/examples/imx477_player.py b/examples/imx477_player.py index 7618927..82e4ada 100644 --- a/examples/imx477_player.py +++ b/examples/imx477_player.py @@ -18,7 +18,6 @@ import argparse import ctypes import logging -import os import holoscan from cuda import cuda @@ -132,12 +131,6 @@ def compose(self): interpolation_mode=0, ) - gamma_correction = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - visualizer = holoscan.operators.HolovizOp( self, name="holoviz", @@ -151,8 +144,7 @@ def compose(self): csi_to_bayer_operator, image_processor_operator, {("output", "input")} ) self.add_flow(image_processor_operator, demosaic, {("output", "receiver")}) - self.add_flow(demosaic, gamma_correction, {("transmitter", "input")}) - self.add_flow(gamma_correction, visualizer, {("output", "receivers")}) + self.add_flow(demosaic, visualizer, {("transmitter", "receivers")}) def main(): @@ -185,14 +177,10 @@ def main(): choices=(0, 1), help="which camera to stream: 0 to stream camera connected to j14 or 1 to stream camera connected to j17 (default is 0)", ) - default_infiniband_interface = "roceP5p3s0f0" - try: - default_infiniband_interface = sorted(os.listdir("/sys/class/infiniband"))[0] - except FileNotFoundError: - pass + infiniband_devices = hololink_module.infiniband_devices() parser.add_argument( "--ibv-name", - default=default_infiniband_interface, + default=infiniband_devices[0], help="IBV device to use", ) parser.add_argument( @@ -257,6 +245,7 @@ def main(): ) else: logging.debug("PTP synchronized.") + # Configures the camera for 3840x2160, 60fps camera.configure() if args.pattern: camera.set_pattern() diff --git a/examples/linux_body_pose_estimation.py b/examples/linux_body_pose_estimation.py index f199cbc..03c84ca 100644 --- a/examples/linux_body_pose_estimation.py +++ b/examples/linux_body_pose_estimation.py @@ -130,12 +130,6 @@ def compose(self): interpolation_mode=0, ) - gamma_correction = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - image_shift = hololink_module.operators.ImageShiftToUint8Operator( self, name="image_shift", shift=8 ) @@ -145,6 +139,7 @@ def compose(self): name="holoviz", fullscreen=self._fullscreen, headless=self._headless, + framebuffer_srgb=True, **self.kwargs("holoviz"), ) @@ -187,8 +182,7 @@ def compose(self): csi_to_bayer_operator, image_processor_operator, {("output", "input")} ) self.add_flow(image_processor_operator, demosaic, {("output", "receiver")}) - self.add_flow(demosaic, gamma_correction, {("transmitter", "input")}) - self.add_flow(gamma_correction, image_shift) + self.add_flow(demosaic, image_shift, {("transmitter", "input")}) self.add_flow(image_shift, visualizer, {("output", "receivers")}) self.add_flow(image_shift, preprocessor, {("output", "")}) self.add_flow(preprocessor, format_input) @@ -226,7 +220,7 @@ def main(): default="192.168.0.2", help="IP address of Hololink board", ) - default_engine = os.path.join(os.path.dirname(__file__), "yolov8n-pose.onnx") + default_engine = os.path.join(os.path.dirname(__file__), "yolov8n-pose.engine.fp32") parser.add_argument( "--engine", default=default_engine, diff --git a/examples/linux_imx274_player.py b/examples/linux_imx274_player.py index 14a090e..acab44f 100644 --- a/examples/linux_imx274_player.py +++ b/examples/linux_imx274_player.py @@ -128,17 +128,12 @@ def compose(self): interpolation_mode=0, ) - gamma_correction = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - visualizer = holoscan.operators.HolovizOp( self, name="holoviz", fullscreen=self._fullscreen, headless=self._headless, + framebuffer_srgb=True, ) # @@ -147,17 +142,20 @@ def compose(self): csi_to_bayer_operator, image_processor_operator, {("output", "input")} ) self.add_flow(image_processor_operator, demosaic, {("output", "receiver")}) - self.add_flow(demosaic, gamma_correction, {("transmitter", "input")}) - self.add_flow(gamma_correction, visualizer, {("output", "receivers")}) + self.add_flow(demosaic, visualizer, {("transmitter", "receivers")}) def main(): parser = argparse.ArgumentParser() + modes = hololink_module.sensors.imx274.imx274_mode.Imx274_Mode + mode_choices = [mode.value for mode in modes] + mode_help = " ".join([f"{mode.value}:{mode.name}" for mode in modes]) parser.add_argument( "--camera-mode", type=int, - default=hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS.value, - help="IMX274 mode", + choices=mode_choices, + default=mode_choices[0], + help=mode_help, ) parser.add_argument("--headless", action="store_true", help="Run in headless mode") parser.add_argument( diff --git a/examples/linux_imx477_player.py b/examples/linux_imx477_player.py index 343424c..158c8e2 100644 --- a/examples/linux_imx477_player.py +++ b/examples/linux_imx477_player.py @@ -125,12 +125,6 @@ def compose(self): interpolation_mode=0, ) - gamma_correction = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - visualizer = holoscan.operators.HolovizOp( self, name="holoviz", @@ -144,8 +138,7 @@ def compose(self): csi_to_bayer_operator, image_processor_operator, {("output", "input")} ) self.add_flow(image_processor_operator, demosaic, {("output", "receiver")}) - self.add_flow(demosaic, gamma_correction, {("transmitter", "input")}) - self.add_flow(gamma_correction, visualizer, {("output", "receivers")}) + self.add_flow(demosaic, visualizer, {("transmitter", "receivers")}) def main(): @@ -222,6 +215,7 @@ def main(): hololink = hololink_channel.hololink() hololink.start() hololink.reset() + # Configures the camera for 3840x2160, 60fps camera.configure() if args.pattern: camera.set_pattern() diff --git a/examples/linux_single_network_stereo_imx274_player.py b/examples/linux_single_network_stereo_imx274_player.py new file mode 100644 index 0000000..e3e53d6 --- /dev/null +++ b/examples/linux_single_network_stereo_imx274_player.py @@ -0,0 +1,368 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# See README.md for detailed information. + +import argparse +import ctypes +import logging +import os + +import holoscan +from cuda import cuda + +import hololink as hololink_module + + +class HoloscanApplication(holoscan.core.Application): + def __init__( + self, + headless, + cuda_context, + cuda_device_ordinal, + hololink_channel_left, + camera_left, + hololink_channel_right, + camera_right, + camera_mode, + frame_limit, + window_height, + window_width, + window_title, + ): + logging.info("__init__") + super().__init__() + self._headless = headless + self._cuda_context = cuda_context + self._cuda_device_ordinal = cuda_device_ordinal + self._hololink_channel_left = hololink_channel_left + self._camera_left = camera_left + self._hololink_channel_right = hololink_channel_right + self._camera_right = camera_right + self._camera_mode = camera_mode + self._frame_limit = frame_limit + self._window_height = window_height + self._window_width = window_width + self._window_title = window_title + + def compose(self): + logging.info("compose") + if self._frame_limit: + self._count_left = holoscan.conditions.CountCondition( + self, + name="count_left", + count=self._frame_limit, + ) + condition_left = self._count_left + self._count_right = holoscan.conditions.CountCondition( + self, + name="count_right", + count=self._frame_limit, + ) + condition_right = self._count_right + else: + self._ok_left = holoscan.conditions.BooleanCondition( + self, name="ok_left", enable_tick=True + ) + condition_left = self._ok_left + self._ok_right = holoscan.conditions.BooleanCondition( + self, name="ok_right", enable_tick=True + ) + condition_right = self._ok_right + self._camera_left.set_mode(self._camera_mode) + self._camera_right.set_mode(self._camera_mode) + + csi_to_bayer_pool = holoscan.resources.BlockMemoryPool( + self, + name="pool", + # storage_type of 1 is device memory + storage_type=1, + block_size=self._camera_left._width + * ctypes.sizeof(ctypes.c_uint16) + * self._camera_left._height, + num_blocks=6, + ) + csi_to_bayer_operator_left = hololink_module.operators.CsiToBayerOp( + self, + name="csi_to_bayer_left", + allocator=csi_to_bayer_pool, + cuda_device_ordinal=self._cuda_device_ordinal, + out_tensor_name="left", + ) + self._camera_left.configure_converter(csi_to_bayer_operator_left) + csi_to_bayer_operator_right = hololink_module.operators.CsiToBayerOp( + self, + name="csi_to_bayer_right", + allocator=csi_to_bayer_pool, + cuda_device_ordinal=self._cuda_device_ordinal, + out_tensor_name="right", + ) + self._camera_right.configure_converter(csi_to_bayer_operator_right) + + frame_size = csi_to_bayer_operator_left.get_csi_length() + assert frame_size == csi_to_bayer_operator_right.get_csi_length() + + frame_context = self._cuda_context + receiver_operator_left = hololink_module.operators.LinuxReceiverOperator( + self, + condition_left, + name="receiver_left", + frame_size=frame_size, + frame_context=frame_context, + hololink_channel=self._hololink_channel_left, + device=self._camera_left, + ) + + # + receiver_operator_right = hololink_module.operators.LinuxReceiverOperator( + self, + condition_right, + frame_size=frame_size, + frame_context=frame_context, + hololink_channel=self._hololink_channel_right, + device=self._camera_right, + ) + + bayer_format = self._camera_left.bayer_format() + assert bayer_format == self._camera_right.bayer_format() + pixel_format = self._camera_left.pixel_format() + assert pixel_format == self._camera_right.pixel_format() + image_processor_left = hololink_module.operators.ImageProcessorOp( + self, + name="image_processor_left", + # Optical black value for imx274 is 50 + optical_black=50, + bayer_format=bayer_format.value, + pixel_format=pixel_format.value, + ) + image_processor_right = hololink_module.operators.ImageProcessorOp( + self, + name="image_processor_right", + # Optical black value for imx274 is 50 + optical_black=50, + bayer_format=bayer_format.value, + pixel_format=pixel_format.value, + ) + + rgba_components_per_pixel = 4 + bayer_pool = holoscan.resources.BlockMemoryPool( + self, + name="pool", + # storage_type of 1 is device memory + storage_type=1, + block_size=self._camera_left._width + * rgba_components_per_pixel + * ctypes.sizeof(ctypes.c_uint16) + * self._camera_left._height, + num_blocks=6, + ) + demosaic_left = holoscan.operators.BayerDemosaicOp( + self, + name="demosaic_left", + pool=bayer_pool, + generate_alpha=True, + alpha_value=65535, + bayer_grid_pos=bayer_format.value, + interpolation_mode=0, + in_tensor_name="left", + out_tensor_name="left", + ) + demosaic_right = holoscan.operators.BayerDemosaicOp( + self, + name="demosaic_right", + pool=bayer_pool, + generate_alpha=True, + alpha_value=65535, + bayer_grid_pos=bayer_format.value, + interpolation_mode=0, + in_tensor_name="right", + out_tensor_name="right", + ) + + left_spec = holoscan.operators.HolovizOp.InputSpec( + "left", holoscan.operators.HolovizOp.InputType.COLOR + ) + left_spec_view = holoscan.operators.HolovizOp.InputSpec.View() + left_spec_view.offset_x = 0 + left_spec_view.offset_y = 0 + left_spec_view.width = 0.5 + left_spec_view.height = 1 + left_spec.views = [left_spec_view] + + right_spec = holoscan.operators.HolovizOp.InputSpec( + "right", holoscan.operators.HolovizOp.InputType.COLOR + ) + right_spec_view = holoscan.operators.HolovizOp.InputSpec.View() + right_spec_view.offset_x = 0.5 + right_spec_view.offset_y = 0 + right_spec_view.width = 0.5 + right_spec_view.height = 1 + right_spec.views = [right_spec_view] + + visualizer = holoscan.operators.HolovizOp( + self, + name="holoviz", + headless=self._headless, + framebuffer_srgb=True, + tensors=[left_spec, right_spec], + height=self._window_height, + width=self._window_width, + window_title=self._window_title, + ) + # + self.add_flow( + receiver_operator_left, csi_to_bayer_operator_left, {("output", "input")} + ) + self.add_flow( + receiver_operator_right, csi_to_bayer_operator_right, {("output", "input")} + ) + self.add_flow( + csi_to_bayer_operator_left, image_processor_left, {("output", "input")} + ) + self.add_flow( + csi_to_bayer_operator_right, image_processor_right, {("output", "input")} + ) + self.add_flow(image_processor_left, demosaic_left, {("output", "receiver")}) + self.add_flow(image_processor_right, demosaic_right, {("output", "receiver")}) + self.add_flow(demosaic_left, visualizer, {("transmitter", "receivers")}) + self.add_flow(demosaic_right, visualizer, {("transmitter", "receivers")}) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--camera-mode", + type=int, + default=hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS.value, + help="IMX274 mode", + ) + parser.add_argument("--headless", action="store_true", help="Run in headless mode") + parser.add_argument( + "--frame-limit", + type=int, + default=None, + help="Exit after receiving this many frames", + ) + default_configuration = os.path.join( + os.path.dirname(__file__), "example_configuration.yaml" + ) + parser.add_argument( + "--configuration", + default=default_configuration, + help="Configuration file", + ) + parser.add_argument( + "--hololink", + default="192.168.0.2", + help="IP address of Hololink board", + ) + parser.add_argument( + "--log-level", + type=int, + default=20, + help="Logging level to display", + ) + parser.add_argument( + "--window-height", + type=int, + default=2160 // 8, # arbitrary default + help="Set the height of the displayed window", + ) + parser.add_argument( + "--window-width", + type=int, + default=3840 // 6, # arbitrary default + help="Set the width of the displayed window", + ) + parser.add_argument( + "--title", + help="Set the window title", + ) + args = parser.parse_args() + hololink_module.logging_level(args.log_level) + logging.info("Initializing.") + # Get a handle to the GPU + (cu_result,) = cuda.cuInit(0) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + cu_device_ordinal = 0 + cu_result, cu_device = cuda.cuDeviceGet(cu_device_ordinal) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + cu_result, cu_context = cuda.cuDevicePrimaryCtxRetain(cu_device) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + + # Get a handle to data sources. First, find an enumeration packet + # from the IP address we want to use. + channel_metadata = hololink_module.Enumerator.find_channel(channel_ip=args.hololink) + logging.info(f"{channel_metadata=}") + # Now make separate connection metadata for left and right; and set them to + # use sensor 0 and 1 respectively. This will borrow the data plane + # configuration we found on that interface. + channel_metadata_left = hololink_module.Metadata(channel_metadata) + hololink_module.DataChannel.use_sensor(channel_metadata_left, 0) + channel_metadata_right = hololink_module.Metadata(channel_metadata) + hololink_module.DataChannel.use_sensor(channel_metadata_right, 1) + # + hololink_channel_left = hololink_module.DataChannel(channel_metadata_left) + hololink_channel_right = hololink_module.DataChannel(channel_metadata_right) + # Get a handle to the camera + camera_left = hololink_module.sensors.imx274.dual_imx274.Imx274Cam( + hololink_channel_left, expander_configuration=0 + ) + camera_right = hololink_module.sensors.imx274.dual_imx274.Imx274Cam( + hololink_channel_right, expander_configuration=1 + ) + camera_mode = hololink_module.sensors.imx274.imx274_mode.Imx274_Mode( + args.camera_mode + ) + # What title should we use? + window_title = f"Holoviz - {args.hololink}" + if args.title is not None: + window_title = args.title + # Set up the application + application = HoloscanApplication( + args.headless, + cu_context, + cu_device_ordinal, + hololink_channel_left, + camera_left, + hololink_channel_right, + camera_right, + camera_mode, + args.frame_limit, + args.window_height, + args.window_width, + window_title, + ) + application.config(args.configuration) + # Run it. + hololink = hololink_channel_left.hololink() + assert hololink is hololink_channel_right.hololink() + hololink.start() + hololink.reset() + camera_left.setup_clock() # this also sets camera_right's clock + camera_left.configure(camera_mode) + camera_left.set_digital_gain_reg(0x4) # anecdotal adjustment to image brightness + camera_right.configure(camera_mode) + camera_right.set_digital_gain_reg(0x4) # anecdotal adjustment to image brightness + + application.run() + hololink.stop() + + (cu_result,) = cuda.cuDevicePrimaryCtxRelease(cu_device) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + + +if __name__ == "__main__": + main() diff --git a/examples/linux_tao_peoplenet.py b/examples/linux_tao_peoplenet.py index 213acbd..cfaae42 100644 --- a/examples/linux_tao_peoplenet.py +++ b/examples/linux_tao_peoplenet.py @@ -130,12 +130,6 @@ def compose(self): interpolation_mode=0, ) - gamma_correction = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - image_shift = hololink_module.operators.ImageShiftToUint8Operator( self, name="image_shift", shift=8 ) @@ -145,6 +139,7 @@ def compose(self): name="holoviz", fullscreen=self._fullscreen, headless=self._headless, + framebuffer_srgb=True, **self.kwargs("holoviz"), ) @@ -187,8 +182,7 @@ def compose(self): csi_to_bayer_operator, image_processor_operator, {("output", "input")} ) self.add_flow(image_processor_operator, demosaic, {("output", "receiver")}) - self.add_flow(demosaic, gamma_correction, {("transmitter", "input")}) - self.add_flow(gamma_correction, image_shift) + self.add_flow(demosaic, image_shift, {("transmitter", "input")}) self.add_flow(image_shift, visualizer, {("output", "receivers")}) self.add_flow(image_shift, preprocessor, {("output", "")}) self.add_flow(preprocessor, format_input) diff --git a/examples/single_network_stereo_imx274_player.py b/examples/single_network_stereo_imx274_player.py new file mode 100644 index 0000000..c10fabb --- /dev/null +++ b/examples/single_network_stereo_imx274_player.py @@ -0,0 +1,391 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# See README.md for detailed information. + +import argparse +import ctypes +import logging +import os + +import holoscan +from cuda import cuda + +import hololink as hololink_module + + +class HoloscanApplication(holoscan.core.Application): + def __init__( + self, + headless, + cuda_context, + cuda_device_ordinal, + hololink_channel_left, + ibv_name, + ibv_port, + camera_left, + hololink_channel_right, + camera_right, + camera_mode, + frame_limit, + window_height, + window_width, + window_title, + ): + logging.info("__init__") + super().__init__() + self._headless = headless + self._cuda_context = cuda_context + self._cuda_device_ordinal = cuda_device_ordinal + self._hololink_channel_left = hololink_channel_left + self._ibv_name = ibv_name + self._ibv_port = ibv_port + self._camera_left = camera_left + self._hololink_channel_right = hololink_channel_right + self._camera_right = camera_right + self._camera_mode = camera_mode + self._frame_limit = frame_limit + self._window_height = window_height + self._window_width = window_width + self._window_title = window_title + + def compose(self): + logging.info("compose") + if self._frame_limit: + self._count_left = holoscan.conditions.CountCondition( + self, + name="count_left", + count=self._frame_limit, + ) + condition_left = self._count_left + self._count_right = holoscan.conditions.CountCondition( + self, + name="count_right", + count=self._frame_limit, + ) + condition_right = self._count_right + else: + self._ok_left = holoscan.conditions.BooleanCondition( + self, name="ok_left", enable_tick=True + ) + condition_left = self._ok_left + self._ok_right = holoscan.conditions.BooleanCondition( + self, name="ok_right", enable_tick=True + ) + condition_right = self._ok_right + self._camera_left.set_mode(self._camera_mode) + self._camera_right.set_mode(self._camera_mode) + + csi_to_bayer_pool = holoscan.resources.BlockMemoryPool( + self, + name="pool", + # storage_type of 1 is device memory + storage_type=1, + block_size=self._camera_left._width + * ctypes.sizeof(ctypes.c_uint16) + * self._camera_left._height, + num_blocks=6, + ) + csi_to_bayer_operator_left = hololink_module.operators.CsiToBayerOp( + self, + name="csi_to_bayer_left", + allocator=csi_to_bayer_pool, + cuda_device_ordinal=self._cuda_device_ordinal, + out_tensor_name="left", + ) + self._camera_left.configure_converter(csi_to_bayer_operator_left) + csi_to_bayer_operator_right = hololink_module.operators.CsiToBayerOp( + self, + name="csi_to_bayer_right", + allocator=csi_to_bayer_pool, + cuda_device_ordinal=self._cuda_device_ordinal, + out_tensor_name="right", + ) + self._camera_right.configure_converter(csi_to_bayer_operator_right) + + frame_size = csi_to_bayer_operator_left.get_csi_length() + assert frame_size == csi_to_bayer_operator_right.get_csi_length() + + frame_context = self._cuda_context + receiver_operator_left = hololink_module.operators.RoceReceiverOp( + self, + condition_left, + name="receiver_left", + frame_size=frame_size, + frame_context=frame_context, + ibv_name=self._ibv_name, + ibv_port=self._ibv_port, + hololink_channel=self._hololink_channel_left, + device=self._camera_left, + ) + + # + receiver_operator_right = hololink_module.operators.RoceReceiverOp( + self, + condition_right, + name="receiver_right", + frame_size=frame_size, + frame_context=frame_context, + ibv_name=self._ibv_name, + ibv_port=self._ibv_port, + hololink_channel=self._hololink_channel_right, + device=self._camera_right, + ) + + bayer_format = self._camera_left.bayer_format() + assert bayer_format == self._camera_right.bayer_format() + pixel_format = self._camera_left.pixel_format() + assert pixel_format == self._camera_right.pixel_format() + image_processor_left = hololink_module.operators.ImageProcessorOp( + self, + name="image_processor_left", + # Optical black value for imx274 is 50 + optical_black=50, + bayer_format=bayer_format.value, + pixel_format=pixel_format.value, + ) + image_processor_right = hololink_module.operators.ImageProcessorOp( + self, + name="image_processor_right", + # Optical black value for imx274 is 50 + optical_black=50, + bayer_format=bayer_format.value, + pixel_format=pixel_format.value, + ) + + rgba_components_per_pixel = 4 + bayer_pool = holoscan.resources.BlockMemoryPool( + self, + name="pool", + # storage_type of 1 is device memory + storage_type=1, + block_size=self._camera_left._width + * rgba_components_per_pixel + * ctypes.sizeof(ctypes.c_uint16) + * self._camera_left._height, + num_blocks=6, + ) + demosaic_left = holoscan.operators.BayerDemosaicOp( + self, + name="demosaic_left", + pool=bayer_pool, + generate_alpha=True, + alpha_value=65535, + bayer_grid_pos=bayer_format.value, + interpolation_mode=0, + in_tensor_name="left", + out_tensor_name="left", + ) + demosaic_right = holoscan.operators.BayerDemosaicOp( + self, + name="demosaic_right", + pool=bayer_pool, + generate_alpha=True, + alpha_value=65535, + bayer_grid_pos=bayer_format.value, + interpolation_mode=0, + in_tensor_name="right", + out_tensor_name="right", + ) + + left_spec = holoscan.operators.HolovizOp.InputSpec( + "left", holoscan.operators.HolovizOp.InputType.COLOR + ) + left_spec_view = holoscan.operators.HolovizOp.InputSpec.View() + left_spec_view.offset_x = 0 + left_spec_view.offset_y = 0 + left_spec_view.width = 0.5 + left_spec_view.height = 1 + left_spec.views = [left_spec_view] + + right_spec = holoscan.operators.HolovizOp.InputSpec( + "right", holoscan.operators.HolovizOp.InputType.COLOR + ) + right_spec_view = holoscan.operators.HolovizOp.InputSpec.View() + right_spec_view.offset_x = 0.5 + right_spec_view.offset_y = 0 + right_spec_view.width = 0.5 + right_spec_view.height = 1 + right_spec.views = [right_spec_view] + + visualizer = holoscan.operators.HolovizOp( + self, + name="holoviz", + headless=self._headless, + framebuffer_srgb=True, + tensors=[left_spec, right_spec], + height=self._window_height, + width=self._window_width, + window_title=self._window_title, + ) + # + self.add_flow( + receiver_operator_left, csi_to_bayer_operator_left, {("output", "input")} + ) + self.add_flow( + receiver_operator_right, csi_to_bayer_operator_right, {("output", "input")} + ) + self.add_flow( + csi_to_bayer_operator_left, image_processor_left, {("output", "input")} + ) + self.add_flow( + csi_to_bayer_operator_right, image_processor_right, {("output", "input")} + ) + self.add_flow(image_processor_left, demosaic_left, {("output", "receiver")}) + self.add_flow(image_processor_right, demosaic_right, {("output", "receiver")}) + self.add_flow(demosaic_left, visualizer, {("transmitter", "receivers")}) + self.add_flow(demosaic_right, visualizer, {("transmitter", "receivers")}) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--camera-mode", + type=int, + default=hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS.value, + help="IMX274 mode", + ) + parser.add_argument("--headless", action="store_true", help="Run in headless mode") + parser.add_argument( + "--frame-limit", + type=int, + default=None, + help="Exit after receiving this many frames", + ) + default_configuration = os.path.join( + os.path.dirname(__file__), "example_configuration.yaml" + ) + parser.add_argument( + "--configuration", + default=default_configuration, + help="Configuration file", + ) + parser.add_argument( + "--hololink", + default="192.168.0.2", + help="IP address of Hololink board", + ) + parser.add_argument( + "--log-level", + type=int, + default=20, + help="Logging level to display", + ) + infiniband_devices = hololink_module.infiniband_devices() + parser.add_argument( + "--ibv-name", + default=infiniband_devices[0], + help="IBV device to use", + ) + parser.add_argument( + "--ibv-port", + type=int, + default=1, + help="Port number of IBV device", + ) + parser.add_argument( + "--window-height", + type=int, + default=2160 // 8, # arbitrary default + help="Set the height of the displayed window", + ) + parser.add_argument( + "--window-width", + type=int, + default=3840 // 6, # arbitrary default + help="Set the width of the displayed window", + ) + parser.add_argument( + "--title", + help="Set the window title", + ) + args = parser.parse_args() + hololink_module.logging_level(args.log_level) + logging.info("Initializing.") + # Get a handle to the GPU + (cu_result,) = cuda.cuInit(0) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + cu_device_ordinal = 0 + cu_result, cu_device = cuda.cuDeviceGet(cu_device_ordinal) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + cu_result, cu_context = cuda.cuDevicePrimaryCtxRetain(cu_device) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + + # Get a handle to data sources. First, find an enumeration packet + # from the IP address we want to use. + channel_metadata = hololink_module.Enumerator.find_channel(channel_ip=args.hololink) + logging.info(f"{channel_metadata=}") + # Now make separate connection metadata for left and right; and set them to + # use sensor 0 and 1 respectively. This will borrow the data plane + # configuration we found on that interface. + channel_metadata_left = hololink_module.Metadata(channel_metadata) + hololink_module.DataChannel.use_sensor(channel_metadata_left, 0) + channel_metadata_right = hololink_module.Metadata(channel_metadata) + hololink_module.DataChannel.use_sensor(channel_metadata_right, 1) + # + hololink_channel_left = hololink_module.DataChannel(channel_metadata_left) + hololink_channel_right = hololink_module.DataChannel(channel_metadata_right) + # Get a handle to the camera + camera_left = hololink_module.sensors.imx274.dual_imx274.Imx274Cam( + hololink_channel_left, expander_configuration=0 + ) + camera_right = hololink_module.sensors.imx274.dual_imx274.Imx274Cam( + hololink_channel_right, expander_configuration=1 + ) + camera_mode = hololink_module.sensors.imx274.imx274_mode.Imx274_Mode( + args.camera_mode + ) + # What title should we use? + window_title = f"Holoviz - {args.hololink}" + if args.title is not None: + window_title = args.title + # Set up the application + application = HoloscanApplication( + args.headless, + cu_context, + cu_device_ordinal, + hololink_channel_left, + args.ibv_name, + args.ibv_port, + camera_left, + hololink_channel_right, + camera_right, + camera_mode, + args.frame_limit, + args.window_height, + args.window_width, + window_title, + ) + application.config(args.configuration) + # Run it. + hololink = hololink_channel_left.hololink() + assert hololink is hololink_channel_right.hololink() + hololink.start() + hololink.reset() + camera_left.setup_clock() # this also sets camera_right's clock + camera_left.configure(camera_mode) + camera_left.set_digital_gain_reg(0x4) # anecdotal adjustment to image brightness + camera_right.configure(camera_mode) + camera_right.set_digital_gain_reg(0x4) # anecdotal adjustment to image brightness + + application.run() + hololink.stop() + + (cu_result,) = cuda.cuDevicePrimaryCtxRelease(cu_device) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + + +if __name__ == "__main__": + main() diff --git a/examples/stereo_imx274_player.py b/examples/stereo_imx274_player.py index cbe33de..200e8a7 100644 --- a/examples/stereo_imx274_player.py +++ b/examples/stereo_imx274_player.py @@ -119,7 +119,7 @@ def compose(self): ) self._camera_right.configure_converter(csi_to_bayer_operator_right) - frame_size = csi_to_bayer_operator_right.get_csi_length() + frame_size = csi_to_bayer_operator_left.get_csi_length() assert frame_size == csi_to_bayer_operator_right.get_csi_length() frame_context = self._cuda_context @@ -203,17 +203,6 @@ def compose(self): out_tensor_name="right", ) - gamma_correction_left = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction_left", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - gamma_correction_right = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction_right", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - left_spec = holoscan.operators.HolovizOp.InputSpec( "left", holoscan.operators.HolovizOp.InputType.COLOR ) @@ -238,6 +227,7 @@ def compose(self): self, name="holoviz", headless=self._headless, + framebuffer_srgb=True, tensors=[left_spec, right_spec], height=self._window_height, width=self._window_width, @@ -258,12 +248,8 @@ def compose(self): ) self.add_flow(image_processor_left, demosaic_left, {("output", "receiver")}) self.add_flow(image_processor_right, demosaic_right, {("output", "receiver")}) - self.add_flow(demosaic_left, gamma_correction_left, {("transmitter", "input")}) - self.add_flow( - demosaic_right, gamma_correction_right, {("transmitter", "input")} - ) - self.add_flow(gamma_correction_left, visualizer, {("output", "receivers")}) - self.add_flow(gamma_correction_right, visualizer, {("output", "receivers")}) + self.add_flow(demosaic_left, visualizer, {("transmitter", "receivers")}) + self.add_flow(demosaic_right, visualizer, {("transmitter", "receivers")}) def main(): @@ -305,14 +291,10 @@ def main(): default=20, help="Logging level to display", ) - default_infiniband_interfaces = ["roceP5p3s0f0", "roceP5p3s0f1"] - try: - default_infiniband_interfaces = sorted(os.listdir("/sys/class/infiniband")) - except FileNotFoundError: - pass + infiniband_devices = hololink_module.infiniband_devices() parser.add_argument( "--ibv-name-left", - default=default_infiniband_interfaces[0], + default=infiniband_devices[0], help="IBV device to use", ) parser.add_argument( @@ -323,7 +305,7 @@ def main(): ) parser.add_argument( "--ibv-name-right", - default=default_infiniband_interfaces[1], + default=infiniband_devices[1], help="IBV device to use", ) parser.add_argument( diff --git a/examples/tao_peoplenet.py b/examples/tao_peoplenet.py index e2c73e5..d55f4ac 100644 --- a/examples/tao_peoplenet.py +++ b/examples/tao_peoplenet.py @@ -333,17 +333,12 @@ def compose(self): interpolation_mode=0, ) - gamma_correction = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - visualizer = holoscan.operators.HolovizOp( self, name="holoviz", fullscreen=self._fullscreen, headless=self._headless, + framebuffer_srgb=True, **self.kwargs("holoviz"), ) @@ -386,8 +381,7 @@ def compose(self): csi_to_bayer_operator, image_processor_operator, {("output", "input")} ) self.add_flow(image_processor_operator, demosaic, {("output", "receiver")}) - self.add_flow(demosaic, gamma_correction, {("transmitter", "input")}) - self.add_flow(gamma_correction, image_shift) + self.add_flow(demosaic, image_shift, {("transmitter", "input")}) self.add_flow(image_shift, visualizer, {("output", "receivers")}) self.add_flow(image_shift, preprocessor, {("output", "")}) self.add_flow(preprocessor, format_input) @@ -439,14 +433,10 @@ def main(): default=20, help="Logging level to display", ) - default_infiniband_interface = "roceP5p3s0f0" - try: - default_infiniband_interface = sorted(os.listdir("/sys/class/infiniband"))[0] - except FileNotFoundError: - pass + infiniband_devices = hololink_module.infiniband_devices() parser.add_argument( "--ibv-name", - default=default_infiniband_interface, + default=infiniband_devices[0], help="IBV device to use", ) parser.add_argument( diff --git a/pytest.ini b/pytest.ini index 3b4b399..d652bb9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,13 +3,15 @@ log_format=%(levelname)s %(relativeCreated)d %(funcName)s %(filename)s:%(lineno) log_cli=true log_cli_level=25 markers= - accelerated_networking: Skip this test if "--unaccelerated-networking" is on the command line + accelerated_networking: Skip this test if "--unaccelerated-only" is on the command line skip_unless_mgbe: Marks tests as skip unless running with the "--mgbe" command line switch skip_unless_ptp: Include PTP tests skip_unless_imx274: Marks tests as skip unless running with the "--imx274" command line switch skip_unless_imx477: Marks tests as skip unless running with the "--imx477" command line switch skip_unless_igpu: Marks tests as skip unless running with the "--igpu" command line switch skip_unless_dgpu: Marks tests as skip unless running with the "--dgpu" command line switch + skip_unless_hsb: Marks tests as skip unless running with the "--hsb" command line switch, "--imx274" implies this + skip_unless_hsb_nano: Marks tests as skip unless running with the "--hsb-nano" command line switch pythonpath=. examples testpaths=tests -addopts="-p no:cacheprovider" +addopts="-p no:cacheprovider" "--tb=short" diff --git a/python/hololink/__init__.py b/python/hololink/__init__.py index 5bdb8ba..f421e2e 100644 --- a/python/hololink/__init__.py +++ b/python/hololink/__init__.py @@ -21,28 +21,31 @@ CAM_I2C_CTRL, CLNX_SPI_CTRL, CPNX_SPI_CTRL, + DP_ADDRESS_0, + DP_ADDRESS_1, + DP_ADDRESS_2, + DP_ADDRESS_3, + DP_BUFFER_LENGTH, + DP_BUFFER_MASK, DP_HOST_IP, DP_HOST_MAC_HIGH, DP_HOST_MAC_LOW, DP_HOST_UDP_PORT, DP_PACKET_SIZE, - DP_ROCE_BUF_END_LSB_0, - DP_ROCE_BUF_END_MSB_0, - DP_ROCE_CFG, - DP_ROCE_RKEY_0, - DP_ROCE_VADDR_LSB_0, - DP_ROCE_VADDR_MSB_0, + DP_QP, + DP_RKEY, DP_VIP_MASK, FPGA_DATE, FPGA_VERSION, HOLOLINK_100G_BOARD_ID, - HOLOLINK_BOARD_ID, HOLOLINK_LITE_BOARD_ID, + HOLOLINK_NANO_BOARD_ID, I2C_BUSY, I2C_CORE_EN, I2C_DONE, I2C_DONE_CLEAR, I2C_START, + METADATA_SIZE, MICROCHIP_POLARFIRE_BOARD_ID, RD_DWORD, REQUEST_FLAGS_ACK_REQUEST, @@ -53,39 +56,51 @@ Enumerator, Hololink, Metadata, + NvtxTrace, Timeout, TimeoutError, UnsupportedVersion, ) -from .native import Deserializer, Serializer, local_mac +from .native import ( + UDP_PACKET_SIZE, + Deserializer, + Serializer, + local_ip_and_mac, + local_ip_and_mac_from_socket, + local_mac, +) +from .tools import infiniband_devices __all__ = [ "BL_I2C_CTRL", "CAM_I2C_CTRL", "CLNX_SPI_CTRL", "CPNX_SPI_CTRL", + "DP_ADDRESS_0", + "DP_ADDRESS_1", + "DP_ADDRESS_2", + "DP_ADDRESS_3", + "DP_BUFFER_LENGTH", + "DP_BUFFER_MASK", "DP_HOST_IP", "DP_HOST_MAC_HIGH", "DP_HOST_MAC_LOW", "DP_HOST_UDP_PORT", "DP_PACKET_SIZE", - "DP_ROCE_BUF_END_LSB_0", - "DP_ROCE_BUF_END_MSB_0", - "DP_ROCE_CFG", - "DP_ROCE_RKEY_0", - "DP_ROCE_VADDR_LSB_0", - "DP_ROCE_VADDR_MSB_0", + "DP_QP", + "DP_RKEY", "DP_VIP_MASK", "FPGA_DATE", "FPGA_VERSION", "HOLOLINK_100G_BOARD_ID", - "HOLOLINK_BOARD_ID", + "HOLOLINK_NANO_BOARD_ID", "HOLOLINK_LITE_BOARD_ID", "I2C_BUSY", "I2C_CORE_EN", "I2C_DONE_CLEAR", "I2C_DONE", "I2C_START", + "METADATA_SIZE", "MICROCHIP_POLARFIRE_BOARD_ID", "RD_DWORD", "REQUEST_FLAGS_ACK_REQUEST", @@ -97,6 +112,10 @@ "Enumerator", "Hololink", "Metadata", + "NvtxTrace", + "infiniband_devices", + "local_ip_and_mac", + "local_ip_and_mac_from_socket", "local_mac", "native", "operators", @@ -105,6 +124,7 @@ "Serializer", "Timeout", "TimeoutError", + "UDP_PACKET_SIZE", "UnsupportedVersion", ] diff --git a/python/hololink/hololink.cpp b/python/hololink/hololink.cpp index bdff581..d4e49cb 100644 --- a/python/hololink/hololink.cpp +++ b/python/hololink/hololink.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -151,6 +152,10 @@ PYBIND11_MODULE(_hololink, m) } return metadata; })) + .def(py::init([](const Metadata& source) { + auto metadata = std::make_shared(source); + return metadata; + })) /** * @returns an iterator object that can iterate over all objects in Metadata */ @@ -175,11 +180,12 @@ PYBIND11_MODULE(_hololink, m) return element; }, "name"_a, "value"_a) - .def("__repr__", [](const Metadata& metadata) { return fmt::format("{}", metadata); }); + .def("__repr__", [](const Metadata& metadata) { return fmt::format("{}", metadata); }) + .def("update", &Metadata::update, "other"_a); py::class_>(m, "Enumerator") - .def(py::init(), - "local_interface"_a = std::string(), "enumeration_port"_a = 10001u, + .def(py::init(), + "local_interface"_a = std::string(), "bootp_request_port"_a = 12267u, "bootp_reply_port"_a = 12268u) .def_static("enumerated", &Enumerator::enumerated, "call_back"_a, "timeout"_a = std::shared_ptr()) @@ -213,20 +219,23 @@ PYBIND11_MODULE(_hololink, m) m.attr("I2C_DONE") = I2C_DONE; m.attr("FPGA_VERSION") = FPGA_VERSION; m.attr("FPGA_DATE") = FPGA_DATE; + m.attr("METADATA_SIZE") = METADATA_SIZE; m.attr("DP_PACKET_SIZE") = DP_PACKET_SIZE; m.attr("DP_HOST_MAC_LOW") = DP_HOST_MAC_LOW; m.attr("DP_HOST_MAC_HIGH") = DP_HOST_MAC_HIGH; m.attr("DP_HOST_IP") = DP_HOST_IP; m.attr("DP_HOST_UDP_PORT") = DP_HOST_UDP_PORT; m.attr("DP_VIP_MASK") = DP_VIP_MASK; - m.attr("DP_ROCE_CFG") = DP_ROCE_CFG; - m.attr("DP_ROCE_RKEY_0") = DP_ROCE_RKEY_0; - m.attr("DP_ROCE_VADDR_MSB_0") = DP_ROCE_VADDR_MSB_0; - m.attr("DP_ROCE_VADDR_LSB_0") = DP_ROCE_VADDR_LSB_0; - m.attr("DP_ROCE_BUF_END_MSB_0") = DP_ROCE_BUF_END_MSB_0; - m.attr("DP_ROCE_BUF_END_LSB_0") = DP_ROCE_BUF_END_LSB_0; + m.attr("DP_ADDRESS_0") = DP_ADDRESS_0; + m.attr("DP_ADDRESS_1") = DP_ADDRESS_1; + m.attr("DP_ADDRESS_2") = DP_ADDRESS_2; + m.attr("DP_ADDRESS_3") = DP_ADDRESS_3; + m.attr("DP_BUFFER_LENGTH") = DP_BUFFER_LENGTH; + m.attr("DP_BUFFER_MASK") = DP_BUFFER_MASK; + m.attr("DP_QP") = DP_QP; + m.attr("DP_RKEY") = DP_RKEY; m.attr("HOLOLINK_LITE_BOARD_ID") = HOLOLINK_LITE_BOARD_ID; - m.attr("HOLOLINK_BOARD_ID") = HOLOLINK_BOARD_ID; + m.attr("HOLOLINK_NANO_BOARD_ID") = HOLOLINK_NANO_BOARD_ID; m.attr("HOLOLINK_100G_BOARD_ID") = HOLOLINK_100G_BOARD_ID; m.attr("MICROCHIP_POLARFIRE_BOARD_ID") = MICROCHIP_POLARFIRE_BOARD_ID; @@ -239,16 +248,19 @@ PYBIND11_MODULE(_hololink, m) .def("hololink", &DataChannel::hololink) .def("peer_ip", &DataChannel::peer_ip) .def("authenticate", &DataChannel::authenticate, "qp_number"_a, "rkey"_a) - .def("configure", &DataChannel::configure, "frame_address"_a, "frame_size"_a, - "local_data_port"_a) - .def("write_uint32", &DataChannel::write_uint32, "address"_a, "value"_a); + .def("configure", &DataChannel::configure, "frame_memory"_a, "frame_size"_a, "page_size"_a, "pages"_a, "local_data_port"_a) + .def("unconfigure", &DataChannel::unconfigure) + .def_static("use_multicast", &DataChannel::use_multicast, "metadata"_a, "address"_a, "port"_a) + .def_static("use_broadcast", &DataChannel::use_broadcast, "metadata"_a, "port"_a) + .def("configure_socket", &DataChannel::configure_socket, "socket_fd"_a) + .def_static("use_sensor", &DataChannel::use_sensor, "metadata"_a, "sensor_number"_a); py::register_exception(m, "TimeoutError"); py::register_exception(m, "UnsupportedVersion"); py::class_>(m, "Hololink") - .def(py::init(), "peer_ip"_a, - "control_port"_a, "serial_number"_a) + .def(py::init(), "peer_ip"_a, + "control_port"_a, "serial_number"_a, "sequence_number_checking"_a) .def_static("from_enumeration_metadata", &Hololink::from_enumeration_metadata, "metadata"_a) .def_static("reset_framework", &Hololink::reset_framework) .def_static("enumerated", &Hololink::enumerated, "metadata"_a) @@ -257,17 +269,25 @@ PYBIND11_MODULE(_hololink, m) .def("stop", &Hololink::stop) .def("reset", &Hololink::reset) .def("get_fpga_version", &Hololink::get_fpga_version, - "timeout"_a = std::shared_ptr()) + "timeout"_a = std::shared_ptr(), "check_sequence"_a = true) .def("get_fpga_date", &Hololink::get_fpga_date) - .def("write_uint32", &Hololink::write_uint32, "address"_a, "value"_a, - "timeout"_a = std::shared_ptr(), "retry"_a = true) - .def("read_uint32", &Hololink::read_uint32, "address"_a, - "timeout"_a = std::shared_ptr()) + .def( + "write_uint32", + [](Hololink& me, uint32_t address, uint32_t value, const std::shared_ptr& timeout, bool retry) { + return me.write_uint32(address, value, timeout, retry); + }, + "address"_a, "value"_a, "timeout"_a = std::shared_ptr(), "retry"_a = true) + .def( + "read_uint32", + [](Hololink& me, uint32_t address, const std::shared_ptr& timeout) { + return me.read_uint32(address, timeout); + }, + "address"_a, "timeout"_a = std::shared_ptr()) .def("setup_clock", &Hololink::setup_clock, "clock_profile"_a) .def("get_i2c", &Hololink::get_i2c, "i2c_address"_a) .def("get_spi", &Hololink::get_spi, "spi_address"_a, "chip_select"_a, "clock_divisor"_a = 0x0F, "cpol"_a = 1, "cpha"_a = 1, "width"_a = 1) - .def("get_gpio", &Hololink::get_gpio) + .def("get_gpio", &Hololink::get_gpio, "metadata"_a) .def("send_control", &Hololink::send_control) .def( "on_reset", @@ -366,13 +386,17 @@ PYBIND11_MODULE(_hololink, m) .def("set_direction", &Hololink::GPIO::set_direction, "pin"_a, "direction"_a) .def("get_direction", &Hololink::GPIO::get_direction, "pin"_a) .def("set_value", &Hololink::GPIO::set_value, "pin"_a, "value"_a) - .def("get_value", &Hololink::GPIO::get_value, "pin"_a); + .def("get_value", &Hololink::GPIO::get_value, "pin"_a) + .def("get_supported_pin_num", &Hololink::GPIO::get_supported_pin_num); gpio.attr("IN") = Hololink::GPIO::IN; gpio.attr("OUT") = Hololink::GPIO::OUT; gpio.attr("LOW") = Hololink::GPIO::LOW; gpio.attr("HIGH") = Hololink::GPIO::HIGH; gpio.attr("GPIO_PIN_RANGE") = Hololink::GPIO::GPIO_PIN_RANGE; + py::class_>(m, "NvtxTrace") + .def_static("setThreadName", &native::NvtxTrace::setThreadName, "threadName"_a) + .def_static("event_u64", &native::NvtxTrace::event_u64, "message"_a, "datum"_a); } // PYBIND11_MODULE } // namespace hololink diff --git a/python/hololink/native/__init__.py b/python/hololink/native/__init__.py index e7a36db..7155e05 100644 --- a/python/hololink/native/__init__.py +++ b/python/hololink/native/__init__.py @@ -15,6 +15,26 @@ # See README.md for detailed information. -from ._native import ArpWrapper, Deserializer, Serializer, local_mac +from ._native import ( + PAGE_SIZE, + UDP_PACKET_SIZE, + ArpWrapper, + Deserializer, + Serializer, + local_ip_and_mac, + local_ip_and_mac_from_socket, + local_mac, + round_up, +) -__all__ = ["Deserializer", "local_mac", "Serializer", "ArpWrapper"] +__all__ = [ + "ArpWrapper", + "Deserializer", + "PAGE_SIZE", + "Serializer", + "UDP_PACKET_SIZE", + "local_ip_and_mac", + "local_ip_and_mac_from_socket", + "local_mac", + "round_up", +] diff --git a/python/hololink/native/native.cpp b/python/hololink/native/native.cpp index 8141611..a0afa6b 100644 --- a/python/hololink/native/native.cpp +++ b/python/hololink/native/native.cpp @@ -130,6 +130,15 @@ PYBIND11_MODULE(_native, m) } return value; }) + .def("next_uint64_le", + [](Deserializer& me) { + uint64_t value = 0; + bool r = me.next_uint64_le(value); + if (!r) { + throw std::runtime_error("Buffer underflow"); + } + return value; + }) // next_buffer returns a pointer to an internal buffer, Python should not take ownership but // couple the lifetime of the pointer to the Deserializer instance .def( @@ -250,6 +259,14 @@ PYBIND11_MODULE(_native, m) m.def("local_mac", &local_mac); + m.def("local_ip_and_mac", &local_ip_and_mac, "destination_ip"_a, "port"_a); + + m.def("local_ip_and_mac_from_socket", &local_ip_and_mac_from_socket, "socket_fd"_a); + + m.attr("UDP_PACKET_SIZE") = UDP_PACKET_SIZE; + m.attr("PAGE_SIZE") = PAGE_SIZE; + + m.def("round_up", &round_up, "value"_a, "alignment"_a); } // PYBIND11_MODULE } // namespace hololink::native diff --git a/python/hololink/operators/CMakeLists.txt b/python/hololink/operators/CMakeLists.txt index e42f7d2..97e3ef0 100644 --- a/python/hololink/operators/CMakeLists.txt +++ b/python/hololink/operators/CMakeLists.txt @@ -14,11 +14,10 @@ # limitations under the License. # include the operators -add_subdirectory(csi_to_bayer) if(DEFINED ENV{CONTAINER_TYPE} AND "$ENV{CONTAINER_TYPE}" STREQUAL "igpu") add_subdirectory(argus_isp) endif() +add_subdirectory(csi_to_bayer) add_subdirectory(image_processor) -add_subdirectory(gamma_correction) add_subdirectory(linux_receiver) add_subdirectory(roce_receiver) diff --git a/python/hololink/operators/__init__.py b/python/hololink/operators/__init__.py index f9239fa..62021e6 100644 --- a/python/hololink/operators/__init__.py +++ b/python/hololink/operators/__init__.py @@ -19,10 +19,9 @@ import sys _MODULES = { + "ArgusIspOp": "argus_isp", "BaseReceiverOp": "base_receiver_op", "CsiToBayerOp": "csi_to_bayer", - "GammaCorrectionOp": "gamma_correction", - "ArgusIspOp": "argus_isp", "ImageProcessorOp": "image_processor", "ImageShiftToUint8Operator": "image_shift_to_uint8_operator", "LinuxReceiver": "linux_receiver", diff --git a/python/hololink/operators/base_receiver_op.py b/python/hololink/operators/base_receiver_op.py index 39c1c19..4a17dc7 100644 --- a/python/hololink/operators/base_receiver_op.py +++ b/python/hololink/operators/base_receiver_op.py @@ -22,6 +22,8 @@ import holoscan from cuda import cuda +import hololink as hololink_module + class BaseReceiverOp(holoscan.core.Operator): def __init__( @@ -29,7 +31,6 @@ def __init__( *args, hololink_channel=None, device=None, - frame_memory=None, frame_size=None, frame_context=None, **kwargs, @@ -37,14 +38,21 @@ def __init__( super().__init__(*args, **kwargs) self._hololink_channel = hololink_channel self._device = device - self._frame_size = frame_size - self._frame_memory = frame_memory self._frame_context = frame_context self._ok = False - self._metadata = None - # We'll allocate this for you if you like. - if self._frame_memory is None: - self._frame_memory = self._allocate(self._frame_size) + self._count = 0 + self._frame_size = frame_size + aligned_frame_size = hololink_module.native.round_up( + frame_size, hololink_module.native.PAGE_SIZE + ) + self._metadata_size = hololink_module.METADATA_SIZE + self._allocation_size = aligned_frame_size + self._metadata_size + self._frame_memory = self._allocate(self._allocation_size) + # + self._frame_ready_condition = holoscan.conditions.AsynchronousCondition( + self.fragment, name="frame_ready_condition" + ) + self.add_arg(self._frame_ready_condition) def setup(self, spec): logging.info("setup") @@ -52,7 +60,7 @@ def setup(self, spec): def start(self): unowned_memory = cp.cuda.UnownedMemory( - self._frame_memory, self._frame_size, self + self._frame_memory, self._allocation_size, self ) self._cp_frame = cp.ndarray( (self._frame_size,), @@ -66,11 +74,22 @@ def start(self): self._start_receiver() local_ip, local_port = self._local_ip_and_port() logging.info(f"{local_ip=} {local_port=}") + page_size = self._allocation_size + pages = 1 + distal_memory_address_start = 0 # See received_address_offset() self._hololink_channel.configure( - self._frame_memory, self._frame_size, local_port + distal_memory_address_start, self._frame_size, page_size, pages, local_port + ) + self._frame_ready_condition.event_state = ( + holoscan.conditions.AsynchronousEventState.EVENT_WAITING ) self._device.start() + def received_address_offset(self): + # This address is added to the address received from HSB; + # HSB is configured to start with address 0. + return self._frame_memory + def _start_receiver(self): raise NotImplementedError() @@ -80,6 +99,7 @@ def _local_ip_and_port(self): def stop(self): self._device.stop() + self._hololink_channel.unconfigure() self._stop() self._frame_size = 0 del self._cp_frame @@ -91,22 +111,31 @@ def _stop(self): def compute(self, op_input, op_output, context): timeout_ms = 1000 # metadata is a dict or None - self._metadata = self._get_next_frame(timeout_ms) - if not self._metadata: - if self._ok: - self._ok = False - logging.error("Ingress frame timeout; ignoring.") - else: - self._ok = True + metadata = self._get_next_frame(timeout_ms) + if not metadata: + return self.timeout(op_input, op_output, context) + self._frame_ready_condition.event_state = ( + holoscan.conditions.AsynchronousEventState.EVENT_WAITING + ) + self._count += 1 + self._ok = True + # Publish the metadata from get_next_frame out to the pipeline. + for key, value in metadata.items(): + self.metadata[key] = value op_output.emit({"": self._cp_frame}, "output") + def timeout(self, op_input, op_output, context): + if self._ok: + self._ok = False + logging.error( + f"Ingress frame timeout; ignoring; received {self._count} frames so far." + ) + return None + def _get_next_frame(self, timeout_ms): """Returns metadata: dict or None""" raise NotImplementedError() - def metadata(self): - return self._metadata - def _allocate(self, size, flags=0): (cu_result,) = cuda.cuInit(0) assert cu_result == cuda.CUresult.CUDA_SUCCESS @@ -131,3 +160,8 @@ def _allocate(self, size, flags=0): cu_result, device_deviceptr = cuda.cuMemHostGetDevicePointer(host_deviceptr, 0) assert cu_result == cuda.CUresult.CUDA_SUCCESS return int(device_deviceptr) + + def frame_ready(self): + self._frame_ready_condition.event_state = ( + holoscan.conditions.AsynchronousEventState.EVENT_DONE + ) diff --git a/python/hololink/operators/gamma_correction/gamma_correction.cpp b/python/hololink/operators/gamma_correction/gamma_correction.cpp deleted file mode 100644 index 7708f8b..0000000 --- a/python/hololink/operators/gamma_correction/gamma_correction.cpp +++ /dev/null @@ -1,85 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include // for unordered_map -> dict, etc. - -#include -#include -#include - -#include -#include -#include -#include - -using std::string_literals::operator""s; -using pybind11::literals::operator""_a; - -#define STRINGIFY(x) #x -#define MACRO_STRINGIFY(x) STRINGIFY(x) - -namespace py = pybind11; - -namespace hololink::operators { - -/* Trampoline classes for handling Python kwargs - * - * These add a constructor that takes a Fragment for which to initialize the operator. - * The explicit parameter list and default arguments take care of providing a Pythonic - * kwarg-based interface with appropriate default values matching the operator's - * default parameters in the C++ API `setup` method. - * - * The sequence of events in this constructor is based on Fragment::make_operator - */ -class PyGammaCorrectionOp : public GammaCorrectionOp { -public: - /* Inherit the constructors */ - using GammaCorrectionOp::GammaCorrectionOp; - - // Define a constructor that fully initializes the object. - PyGammaCorrectionOp(holoscan::Fragment* fragment, float gamma, int cuda_device_ordinal, const std::string& name = "gamma_correction") - : GammaCorrectionOp(holoscan::ArgList { holoscan::Arg { "gamma", gamma }, holoscan::Arg { "cuda_device_ordinal", cuda_device_ordinal } }) - { - name_ = name; - fragment_ = fragment; - spec_ = std::make_shared(fragment); - setup(*spec_.get()); - } -}; - -PYBIND11_MODULE(_gamma_correction, m) -{ -#ifdef VERSION_INFO - m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); -#else - m.attr("__version__") = "dev"; -#endif - - py::class_>(m, "GammaCorrectionOp") - .def(py::init(), - "fragment"_a, - "gamma"_a = 2.2f, - "cuda_device_ordinal"_a = 0, - "name"_a = "gamma_correction"s) - .def("setup", &GammaCorrectionOp::setup, "spec"_a); - -} // PYBIND11_MODULE - -} // namespace hololink::operators diff --git a/python/hololink/operators/linux_receiver/linux_receiver.cpp b/python/hololink/operators/linux_receiver/linux_receiver.cpp index fa054d6..7ad5c2e 100644 --- a/python/hololink/operators/linux_receiver/linux_receiver.cpp +++ b/python/hololink/operators/linux_receiver/linux_receiver.cpp @@ -17,6 +17,7 @@ #include +#include #include using pybind11::literals::operator""_a; @@ -39,7 +40,7 @@ PYBIND11_MODULE(_linux_receiver, m) // NOTE: pybind11 never implicitly release the GIL (see https://pybind11.readthedocs.io/en/stable/advanced/misc.html#global-interpreter-lock-gil), // therefore for blocking function explicitly release the GIL using `py::call_guard()`. py::class_(m, "LinuxReceiver") - .def(py::init(), "cu_buffer"_a, "cu_buffer_size"_a, "socket"_a) + .def(py::init(), "cu_buffer"_a, "cu_buffer_size"_a, "socket"_a, "received_address_offset"_a) .def("run", &LinuxReceiver::run, py::call_guard()) .def("close", &LinuxReceiver::close) .def( @@ -50,7 +51,8 @@ PYBIND11_MODULE(_linux_receiver, m) }, py::call_guard(), "timeout_ms"_a) .def("get_qp_number", &LinuxReceiver::get_qp_number) - .def("get_rkey", &LinuxReceiver::get_rkey); + .def("get_rkey", &LinuxReceiver::get_rkey) + .def("set_frame_ready", &LinuxReceiver::set_frame_ready, "frame_ready"_a); py::class_(m, "LinuxReceiverMetadata") .def_readonly("frame_packets_received", &LinuxReceiverMetadata::frame_packets_received) @@ -62,7 +64,26 @@ PYBIND11_MODULE(_linux_receiver, m) .def_readonly("frame_end_ns", &LinuxReceiverMetadata::frame_end_ns) .def_readonly("imm_data", &LinuxReceiverMetadata::imm_data) .def_readonly("packets_dropped", &LinuxReceiverMetadata::packets_dropped) - .def_readonly("received_ns", &LinuxReceiverMetadata::received_ns); + .def_readonly("received_s", &LinuxReceiverMetadata::received_s) + .def_readonly("received_ns", &LinuxReceiverMetadata::received_ns) + .def_property_readonly("timestamp_s", [](LinuxReceiverMetadata& me) { + return me.frame_metadata.timestamp_s; + }) + .def_property_readonly("timestamp_ns", [](LinuxReceiverMetadata& me) { + return me.frame_metadata.timestamp_ns; + }) + .def_property_readonly("metadata_s", [](LinuxReceiverMetadata& me) { + return me.frame_metadata.metadata_s; + }) + .def_property_readonly("metadata_ns", [](LinuxReceiverMetadata& me) { + return me.frame_metadata.metadata_ns; + }) + .def_property_readonly("crc", [](LinuxReceiverMetadata& me) { + return me.frame_metadata.crc; + }) + .def_property_readonly("psn", [](LinuxReceiverMetadata& me) { + return me.frame_metadata.psn; + }); } // PYBIND11_MODULE diff --git a/python/hololink/operators/linux_receiver_operator.py b/python/hololink/operators/linux_receiver_operator.py index 0a9dfb7..2bdaa18 100644 --- a/python/hololink/operators/linux_receiver_operator.py +++ b/python/hololink/operators/linux_receiver_operator.py @@ -15,7 +15,6 @@ # See README.md for detailed information. -import datetime import logging import os import socket @@ -28,6 +27,7 @@ MS_PER_SEC = 1000 US_PER_SEC = 1000 * MS_PER_SEC NS_PER_SEC = 1000 * US_PER_SEC +SEC_PER_NS = 1.0 / NS_PER_SEC class LinuxReceiverOperator(hololink_module.operators.BaseReceiverOp): @@ -47,14 +47,20 @@ def __init__(self, *args, receiver_affinity=None, **kwargs): def _start_receiver(self): self._check_buffer_size(self._frame_size) + self._hololink_channel.configure_socket(self._data_socket.fileno()) self._receiver = hololink_module.operators.LinuxReceiver( self._frame_memory, self._frame_size, self._data_socket.fileno(), + self.received_address_offset(), ) - self._data_socket.bind(("", 0)) + + def _ready(receiver): + self.frame_ready() + + self._receiver.set_frame_ready(_ready) self._receiver_thread = threading.Thread( - daemon=True, name="receiver_thread", target=self._run + daemon=True, name=self.name, target=self._run ) self._receiver_thread.start() self._hololink_channel.authenticate( @@ -77,22 +83,19 @@ def _get_next_frame(self, timeout_ms): ok, receiver_metadata = self._receiver.get_next_frame(timeout_ms) if not ok: return None - now_ns = datetime.datetime.now(datetime.timezone.utc).timestamp() * NS_PER_SEC - # Extend the timestamp we got from the data, - # (which is ns plus 2 bits of seconds). Note that - # we don't look at the 2 bits of seconds here. - ns = receiver_metadata.imm_data % NS_PER_SEC - timestamp_ns = (now_ns - (now_ns % NS_PER_SEC)) + ns - # always round down - if timestamp_ns > now_ns: - timestamp_ns -= NS_PER_SEC application_metadata = { "frame_number": receiver_metadata.frame_number, "frame_packets_received": receiver_metadata.frame_packets_received, "frame_bytes_received": receiver_metadata.frame_bytes_received, + "received_s": receiver_metadata.received_s, "received_ns": receiver_metadata.received_ns, - "timestamp_ns": timestamp_ns, + "timestamp_s": receiver_metadata.timestamp_s, + "timestamp_ns": receiver_metadata.timestamp_ns, + "metadata_s": receiver_metadata.metadata_s, + "metadata_ns": receiver_metadata.metadata_ns, "packets_dropped": receiver_metadata.packets_dropped, + "crc": receiver_metadata.crc, + "psn": receiver_metadata.psn, } return application_metadata diff --git a/python/hololink/operators/operator_util.hpp b/python/hololink/operators/operator_util.hpp index 372eeb2..1d40f8c 100644 --- a/python/hololink/operators/operator_util.hpp +++ b/python/hololink/operators/operator_util.hpp @@ -22,6 +22,7 @@ #include +#include #include #include #include @@ -46,8 +47,8 @@ void add_positional_condition_and_resource_args(holoscan::Operator* op, const py } else if (py::isinstance(*it)) { op->add_arg(it->cast>()); } else { - HOLOSCAN_LOG_WARN("Unhandled positional argument detected (only Condition and Resource " - "objects can be parsed positionally)"); + HSB_LOG_WARN("Unhandled positional argument detected (only Condition and Resource " + "objects can be parsed positionally)"); } } } diff --git a/python/hololink/operators/roce_receiver/roce_receiver.cpp b/python/hololink/operators/roce_receiver/roce_receiver.cpp index b13bba1..ecb6213 100644 --- a/python/hololink/operators/roce_receiver/roce_receiver.cpp +++ b/python/hololink/operators/roce_receiver/roce_receiver.cpp @@ -104,9 +104,10 @@ class PyRoceReceiverOp : public RoceReceiverOp { ); } - std::shared_ptr get_next_frame(double timeout_ms) override + std::tuple> get_next_frame(double timeout_ms) override { - PYBIND11_OVERRIDE(std::shared_ptr, /* Return type */ + typedef std::tuple> FrameData; + PYBIND11_OVERRIDE(FrameData, /* Return type */ RoceReceiverOp, /* Parent class */ get_next_frame, /* Name of function in C++ (must match Python name) */ timeout_ms); @@ -132,7 +133,6 @@ PYBIND11_MODULE(_roce_receiver, m) "frame_memory"_a = 0, "ibv_name"_a = "roceP5p3s0f0", "ibv_port"_a = 1, "name"_a = "roce_receiver"s) .def("get_next_frame", &RoceReceiverOp::get_next_frame, "timeout_ms"_a) - .def("metadata", &RoceReceiverOp::metadata) .def("setup", &RoceReceiverOp::setup, "spec"_a) .def("start", &RoceReceiverOp::start) .def("stop", &RoceReceiverOp::stop); diff --git a/python/hololink/sensors/imx274/dual_imx274.py b/python/hololink/sensors/imx274/dual_imx274.py index 85d9e87..9222751 100644 --- a/python/hololink/sensors/imx274/dual_imx274.py +++ b/python/hololink/sensors/imx274/dual_imx274.py @@ -75,10 +75,7 @@ def start(self): # # Setting these register is time-consuming. for reg, val in imx274_mode.imx274_start: - if ( - reg == imx274_mode.IMX274_TABLE_WAIT_MS - or reg == imx274_mode.IMX274_TABLE_END - ): + if reg == imx274_mode.IMX274_TABLE_WAIT_MS: time.sleep(val / 1000) # the val is in ms else: self.set_register(reg, val) @@ -86,13 +83,12 @@ def start(self): def stop(self): """Stop Streaming""" for reg, val in imx274_mode.imx274_stop: - if ( - reg == imx274_mode.IMX274_TABLE_WAIT_MS - or reg == imx274_mode.IMX274_TABLE_END - ): + if reg == imx274_mode.IMX274_TABLE_WAIT_MS: time.sleep(val / 1000) # the val is in ms else: self.set_register(reg, val) + # Let the egress buffer drain. + time.sleep(0.1) self._running = False def get_version(self): @@ -110,9 +106,7 @@ def get_register(self, register): CAM_I2C_ADDRESS, write_bytes[: serializer.length()], read_byte_count ) deserializer = hololink_module.Deserializer(reply) - r = deserializer.next_uint32_be() - # since the register value is only 8b - r = r & 0xFF + r = deserializer.next_uint8() logging.debug( "get_register(register=%d(0x%X))=%d(0x%X)" % (register, register, r, r) ) @@ -151,14 +145,16 @@ def configure_camera(self, imx274_mode_set): == imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS.value ): mode_list = imx274_mode.imx274_mode_1920x1080_60fps + elif ( + imx274_mode_set.value + == imx274_mode.Imx274_Mode.IMX274_MODE_3840X2160_60FPS_12BITS.value + ): + mode_list = imx274_mode.imx274_mode_3840X2160_60fps_12bits else: logging.error(f"{imx274_mode_set} mode is not present.") for reg, val in mode_list: - if ( - reg == imx274_mode.IMX274_TABLE_WAIT_MS - or reg == imx274_mode.IMX274_TABLE_END - ): + if reg == imx274_mode.IMX274_TABLE_WAIT_MS: time.sleep(val / 1000) # the val is in ms else: self.set_register(reg, val) @@ -175,10 +171,9 @@ def set_exposure_reg(self, value=0x0C): logging.warn(f"Exposure value {value} is higher than the maximum.") value = 0xFFFF - reg_value = format(value, "04x") - self.set_register(int(imx274_mode.REG_EXP_LSB, 16), int(reg_value[2:4], 16)) - self.set_register(int(imx274_mode.REG_EXP_MSB, 16), int(reg_value[0:2], 16)) - time.sleep(int(imx274_mode.IMX274_WAIT_MS, 16) / 1000) + self.set_register(imx274_mode.REG_EXP_LSB, (value >> 8) & 0xFF) + self.set_register(imx274_mode.REG_EXP_MSB, value & 0xFF) + time.sleep(imx274_mode.IMX274_WAIT_MS / 1000) def set_digital_gain_reg(self, value=0x0000): """ @@ -198,8 +193,8 @@ def set_digital_gain_reg(self, value=0x0000): elif value >= 0x02: reg_value = 0x01 - self.set_register(int(imx274_mode.REG_DG, 16), reg_value) - time.sleep(int(imx274_mode.IMX274_WAIT_MS, 16) / 1000) + self.set_register(imx274_mode.REG_DG, reg_value) + time.sleep(imx274_mode.IMX274_WAIT_MS / 1000) def set_analog_gain_reg(self, value=0x0C): if value < 0x00: @@ -210,10 +205,9 @@ def set_analog_gain_reg(self, value=0x0C): logging.warn(f"AG value {value} is more than maximum.") value = 0xFFFF - reg_value = format(value, "04x") - self.set_register(int(imx274_mode.REG_AG_LSB, 16), int(reg_value[2:4], 16)) - self.set_register(int(imx274_mode.REG_AG_MSB, 16), int(reg_value[0:2], 16)) - time.sleep(int(imx274_mode.IMX274_WAIT_MS, 16) / 1000) + self.set_register(imx274_mode.REG_AG_LSB, (value >> 8) & 0xFF) + self.set_register(imx274_mode.REG_AG_MSB, value & 0xFF) + time.sleep(imx274_mode.IMX274_WAIT_MS / 1000) def set_mode(self, imx274_mode_set): if imx274_mode_set.value < len(imx274_mode.Imx274_Mode): @@ -233,19 +227,33 @@ def configure_converter(self, converter): line_start_size, line_end_size, ) = self._hololink.csi_size() - assert self._pixel_format == hololink_module.sensors.csi.PixelFormat.RAW_10 - # We get 175 bytes of metadata in RAW10 mode - metadata_size = line_start_size + 175 + line_end_size - converter.configure( - self._width, - self._height, - self._pixel_format, - frame_start_size + metadata_size, - frame_end_size, - line_start_size, - line_end_size, - margin_top=8, # sensor has 8 lines of optical black before the real image data starts - ) + if self._pixel_format == hololink_module.sensors.csi.PixelFormat.RAW_10: + # We get 175 bytes of metadata in RAW10 mode + metadata_size = line_start_size + 175 + line_end_size + converter.configure( + self._width, + self._height, + self._pixel_format, + frame_start_size + metadata_size, + frame_end_size, + line_start_size, + line_end_size, + margin_top=8, # sensor has 8 lines of optical black before the real image data starts + ) + elif self._pixel_format == hololink_module.sensors.csi.PixelFormat.RAW_12: + metadata_size = line_start_size + 175 + line_end_size + converter.configure( + self._width, + self._height, + self._pixel_format, + frame_start_size + metadata_size, + frame_end_size, + line_start_size, + line_end_size, + margin_top=16, # sensor has 16 lines of optical black before the real image data starts + ) + else: + logging.error("Incorrect pixel format for IMX274") def pixel_format(self): return self._pixel_format diff --git a/python/hololink/sensors/imx274/imx274_mode.py b/python/hololink/sensors/imx274/imx274_mode.py index e6f7807..432f99e 100644 --- a/python/hololink/sensors/imx274/imx274_mode.py +++ b/python/hololink/sensors/imx274/imx274_mode.py @@ -15,265 +15,335 @@ limitations under the License. """ -from collections import OrderedDict, namedtuple +from collections import namedtuple from enum import Enum import hololink # values are on hex number system to be consistent with rest of the list -IMX274_TABLE_WAIT_MS = "0000" -IMX274_TABLE_END = "01" -IMX274_WAIT_MS = "01" -IMX274_WAIT_MS_START = "0f" +IMX274_TABLE_WAIT_MS = "imx274-table-wait-ms" +IMX274_WAIT_MS = 0x01 +IMX274_WAIT_MS_START = 0x0F # Register addresses for camera properties. They only accept 8bits of value. # Analog Gain -REG_AG_MSB = "300B" -REG_AG_LSB = "300A" +REG_AG_MSB = 0x300B +REG_AG_LSB = 0x300A # Exposure -REG_EXP_MSB = "300D" -REG_EXP_LSB = "300C" +REG_EXP_MSB = 0x300D +REG_EXP_LSB = 0x300C # Digital Gain -REG_DG = "3012" - -imx274_start = OrderedDict( - [ - ("3000", "00"), # mode select streaming on - ("303E", "02"), - (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS_START), - ("30F4", "00"), - ("3018", "A2"), - (IMX274_TABLE_END, IMX274_WAIT_MS_START), - ] -) -imx274_start = [(int(x, 16), int(y, 16)) for x, y in imx274_start.items()] +REG_DG = 0x3012 +imx274_start = [ + (0x3000, 0x00), # mode select streaming on + (0x303E, 0x02), + (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS_START), + (0x30F4, 0x00), + (0x3018, 0xA2), + (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS_START), +] -imx274_stop = OrderedDict( - [ - (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS), - ("3000", "01"), # mode select streaming off - (IMX274_TABLE_END, "00"), - ] -) -imx274_stop = [(int(x, 16), int(y, 16)) for x, y in imx274_stop.items()] +imx274_stop = [ + (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS), + (0x3000, 0x01), # mode select streaming off +] # test pattern -tp_colorbars = OrderedDict( - [ - # test pattern - ("303C", "11"), - ("303D", "0B"), - ("370B", "11"), - ("370E", "00"), - ("377F", "01"), - ("3781", "01"), - ] -) -tp_colorbars = [(int(x, 16), int(y, 16)) for x, y in tp_colorbars.items()] +tp_colorbars = [ + # test pattern + (0x303C, 0x11), + (0x303D, 0x0B), + (0x370B, 0x11), + (0x370E, 0x00), + (0x377F, 0x01), + (0x3781, 0x01), +] # Mode : 3840X2160 10 bits 60fps # value pairs use hex number system -imx274_mode_3840X2160_60fps = OrderedDict( - [ - (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS), - ("3000", "12"), - ("3120", "F0"), - ("3122", "02"), - ("3129", "9c"), - ("312A", "02"), - ("312D", "02"), - ("310B", "00"), - ("304C", "00"), - ("304D", "03"), - ("331C", "1A"), - ("3502", "02"), - ("3529", "0E"), - ("352A", "0E"), - ("352B", "0E"), - ("3538", "0E"), - ("3539", "0E"), - ("3553", "00"), - ("357D", "05"), - ("357F", "05"), - ("3581", "04"), - ("3583", "76"), - ("3587", "01"), - ("35BB", "0E"), - ("35BC", "0E"), - ("35BD", "0E"), - ("35BE", "0E"), - ("35BF", "0E"), - ("366E", "00"), - ("366F", "00"), - ("3670", "00"), - ("3671", "00"), - ("30EE", "01"), - ("3304", "32"), - ("3306", "32"), - ("3590", "32"), - ("3686", "32"), - # resolution */ - ("30E2", "01"), - ("30F6", "07"), - ("30F7", "01"), - ("30F8", "C6"), - ("30F9", "11"), - ("3130", "78"), - ("3131", "08"), - ("3132", "70"), - ("3133", "08"), - # crop */ - ("30DD", "01"), - ("30DE", "04"), - ("30E0", "03"), - ("3037", "01"), - ("3038", "0C"), - ("3039", "00"), - ("303A", "0C"), - ("303B", "0F"), - # mode setting */ - ("3004", "01"), - ("3005", "01"), - ("3006", "00"), - ("3007", "02"), - ("300C", "0C"), - ("300D", "00"), - ("300E", "00"), - ("3019", "00"), - ("3A41", "08"), - ("3342", "0A"), - ("3343", "00"), - ("3344", "16"), - ("3345", "00"), - ("3528", "0E"), - ("3554", "1F"), - ("3555", "01"), - ("3556", "01"), - ("3557", "01"), - ("3558", "01"), - ("3559", "00"), - ("355A", "00"), - ("35BA", "0E"), - ("366A", "1B"), - ("366B", "1A"), - ("366C", "19"), - ("366D", "17"), - ("33A6", "01"), - ("306B", "05"), - (IMX274_TABLE_END, IMX274_WAIT_MS), - ] -) imx274_mode_3840X2160_60fps = [ - (int(x, 16), int(y, 16)) for x, y in imx274_mode_3840X2160_60fps.items() + (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS), + (0x3000, 0x12), + (0x3120, 0xF0), + (0x3122, 0x02), + (0x3129, 0x9C), + (0x312A, 0x02), + (0x312D, 0x02), + (0x310B, 0x00), + (0x304C, 0x00), + (0x304D, 0x03), + (0x331C, 0x1A), + (0x3502, 0x02), + (0x3529, 0x0E), + (0x352A, 0x0E), + (0x352B, 0x0E), + (0x3538, 0x0E), + (0x3539, 0x0E), + (0x3553, 0x00), + (0x357D, 0x05), + (0x357F, 0x05), + (0x3581, 0x04), + (0x3583, 0x76), + (0x3587, 0x01), + (0x35BB, 0x0E), + (0x35BC, 0x0E), + (0x35BD, 0x0E), + (0x35BE, 0x0E), + (0x35BF, 0x0E), + (0x366E, 0x00), + (0x366F, 0x00), + (0x3670, 0x00), + (0x3671, 0x00), + (0x30EE, 0x01), + (0x3304, 0x32), + (0x3306, 0x32), + (0x3590, 0x32), + (0x3686, 0x32), + # resolution */ + (0x30E2, 0x01), + (0x30F6, 0x07), + (0x30F7, 0x01), + (0x30F8, 0xC6), + (0x30F9, 0x11), + (0x3130, 0x78), + (0x3131, 0x08), + (0x3132, 0x70), + (0x3133, 0x08), + # crop */ + (0x30DD, 0x01), + (0x30DE, 0x04), + (0x30E0, 0x03), + (0x3037, 0x01), + (0x3038, 0x0C), + (0x3039, 0x00), + (0x303A, 0x0C), + (0x303B, 0x0F), + # mode setting */ + (0x3004, 0x01), + (0x3005, 0x01), + (0x3006, 0x00), + (0x3007, 0x02), + (0x300C, 0x0C), + (0x300D, 0x00), + (0x300E, 0x00), + (0x3019, 0x00), + (0x3A41, 0x08), + (0x3342, 0x0A), + (0x3343, 0x00), + (0x3344, 0x16), + (0x3345, 0x00), + (0x3528, 0x0E), + (0x3554, 0x1F), + (0x3555, 0x01), + (0x3556, 0x01), + (0x3557, 0x01), + (0x3558, 0x01), + (0x3559, 0x00), + (0x355A, 0x00), + (0x35BA, 0x0E), + (0x366A, 0x1B), + (0x366B, 0x1A), + (0x366C, 0x19), + (0x366D, 0x17), + (0x33A6, 0x01), + (0x306B, 0x05), + (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS), +] + +# Mode : 3840X2160 12 bits 60fps +# value pairs use hex number system +imx274_mode_3840X2160_60fps_12bits = [ + (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS), + (0x3000, 0x12), + (0x3120, 0xF0), + (0x3122, 0x02), + (0x3129, 0x9C), + (0x312A, 0x02), + (0x312D, 0x02), + (0x310B, 0x00), + (0x304C, 0x00), + (0x304D, 0x03), + (0x331C, 0x1A), + (0x3502, 0x02), + (0x3529, 0x0E), + (0x352A, 0x0E), + (0x352B, 0x0E), + (0x3538, 0x0E), + (0x3539, 0x0E), + (0x3553, 0x00), + (0x357D, 0x05), + (0x357F, 0x05), + (0x3581, 0x04), + (0x3583, 0x76), + (0x3587, 0x01), + (0x35BB, 0x0E), + (0x35BC, 0x0E), + (0x35BD, 0x0E), + (0x35BE, 0x0E), + (0x35BF, 0x0E), + (0x366E, 0x00), + (0x366F, 0x00), + (0x3670, 0x00), + (0x3671, 0x00), + (0x30EE, 0x01), + (0x3304, 0x32), + (0x3306, 0x32), + (0x3590, 0x32), + (0x3686, 0x32), + # resolution */ + (0x30E2, 0x00), + (0x30F6, 0xED), + (0x30F7, 0x01), + (0x30F8, 0x08), + (0x30F9, 0x13), + (0x3130, 0x94), + (0x3131, 0x08), + (0x3132, 0x70), + (0x3133, 0x08), + # crop */ + (0x30DD, 0x01), + (0x30DE, 0x04), # crop 18 lines - 12h + (0x30E0, 0x03), # 6 lines of ignored area + (0x3037, 0x01), + (0x3038, 0x0C), # 12 H lines to crop + (0x3039, 0x00), + (0x303A, 0x0C), # next cut at 3852 + (0x303B, 0x0F), + # mode setting */ + (0x3004, 0x00), + (0x3005, 0x07), + (0x3006, 0x00), + (0x3007, 0x02), + (0x300C, 0x0C), + (0x300D, 0x00), + (0x300E, 0x00), + (0x3019, 0x00), + (0x3A41, 0x10), + (0x3342, 0xFF), + (0x3343, 0x01), + (0x3344, 0xFF), + (0x3345, 0x01), + (0x3528, 0x0F), + (0x3554, 0x00), + (0x3555, 0x00), + (0x3556, 0x00), + (0x3557, 0x00), + (0x3558, 0x00), + (0x3559, 0x1F), + (0x355A, 0x1F), + (0x35BA, 0x0F), + (0x366A, 0x00), + (0x366B, 0x00), + (0x366C, 0x00), + (0x366D, 0x00), + (0x33A6, 0x01), + (0x306B, 0x07), + (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS), ] # Mode : 1920x1080 10 bits 60fps # value pairs use hex number system -imx274_mode_1920x1080_60fps = OrderedDict( - [ - (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS), - ("3000", "12"), # mode select streaming on - # input freq. 24M - ("3120", "F0"), - ("3122", "02"), - ("3129", "9c"), - ("312A", "02"), - ("312D", "02"), - ("310B", "00"), - ("304C", "00"), - ("304D", "03"), - ("331C", "1A"), - ("3502", "02"), - ("3529", "0E"), - ("352A", "0E"), - ("352B", "0E"), - ("3538", "0E"), - ("3539", "0E"), - ("3553", "00"), - ("357D", "05"), - ("357F", "05"), - ("3581", "04"), - ("3583", "76"), - ("3587", "01"), - ("35BB", "0E"), - ("35BC", "0E"), - ("35BD", "0E"), - ("35BE", "0E"), - ("35BF", "0E"), - ("366E", "00"), - ("366F", "00"), - ("3670", "00"), - ("3671", "00"), - ("30EE", "01"), - ("3304", "32"), - ("3306", "32"), - ("3590", "32"), - ("3686", "32"), - # resolution - ("30E2", "02"), - ("30F6", "04"), - ("30F7", "01"), - ("30F8", "0C"), - ("30F9", "12"), - ("3130", "40"), - ("3131", "04"), - ("3132", "38"), - ("3133", "04"), - # crop - ("30DD", "01"), - ("30DE", "07"), - ("30DF", "00"), - ("30E0", "04"), - ("30E1", "00"), - ("3037", "01"), - ("3038", "0C"), - ("3039", "00"), - ("303A", "0C"), - ("303B", "0F"), - # mode setting - ("3004", "02"), - ("3005", "21"), - ("3006", "00"), - ("3007", "B1"), - ("300C", "08"), # SHR: Minimum 8 - ("300D", "00"), - ("3019", "00"), - ("3A41", "08"), - ("3342", "0A"), - ("3343", "00"), - ("3344", "1A"), - ("3345", "00"), - ("3528", "0E"), - ("3554", "00"), - ("3555", "01"), - ("3556", "01"), - ("3557", "01"), - ("3558", "01"), - ("3559", "00"), - ("355A", "00"), - ("35BA", "0E"), - ("366A", "1B"), - ("366B", "1A"), - ("366C", "19"), - ("366D", "17"), - ("33A6", "01"), - ("306B", "05"), - (IMX274_TABLE_END, IMX274_WAIT_MS), - ] -) imx274_mode_1920x1080_60fps = [ - (int(x, 16), int(y, 16)) for x, y in imx274_mode_1920x1080_60fps.items() + (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS), + (0x3000, 0x12), # mode select streaming on + # input freq. 24M + (0x3120, 0xF0), + (0x3122, 0x02), + (0x3129, 0x9C), + (0x312A, 0x02), + (0x312D, 0x02), + (0x310B, 0x00), + (0x304C, 0x00), + (0x304D, 0x03), + (0x331C, 0x1A), + (0x3502, 0x02), + (0x3529, 0x0E), + (0x352A, 0x0E), + (0x352B, 0x0E), + (0x3538, 0x0E), + (0x3539, 0x0E), + (0x3553, 0x00), + (0x357D, 0x05), + (0x357F, 0x05), + (0x3581, 0x04), + (0x3583, 0x76), + (0x3587, 0x01), + (0x35BB, 0x0E), + (0x35BC, 0x0E), + (0x35BD, 0x0E), + (0x35BE, 0x0E), + (0x35BF, 0x0E), + (0x366E, 0x00), + (0x366F, 0x00), + (0x3670, 0x00), + (0x3671, 0x00), + (0x30EE, 0x01), + (0x3304, 0x32), + (0x3306, 0x32), + (0x3590, 0x32), + (0x3686, 0x32), + # resolution + (0x30E2, 0x02), + (0x30F6, 0x04), + (0x30F7, 0x01), + (0x30F8, 0x0C), + (0x30F9, 0x12), + (0x3130, 0x40), + (0x3131, 0x04), + (0x3132, 0x38), + (0x3133, 0x04), + # crop + (0x30DD, 0x01), + (0x30DE, 0x07), + (0x30DF, 0x00), + (0x30E0, 0x04), + (0x30E1, 0x00), + (0x3037, 0x01), + (0x3038, 0x0C), + (0x3039, 0x00), + (0x303A, 0x0C), + (0x303B, 0x0F), + # mode setting + (0x3004, 0x02), + (0x3005, 0x21), + (0x3006, 0x00), + (0x3007, 0xB1), + (0x300C, 0x08), # SHR: Minimum 8 + (0x300D, 0x00), + (0x3019, 0x00), + (0x3A41, 0x08), + (0x3342, 0x0A), + (0x3343, 0x00), + (0x3344, 0x1A), + (0x3345, 0x00), + (0x3528, 0x0E), + (0x3554, 0x00), + (0x3555, 0x01), + (0x3556, 0x01), + (0x3557, 0x01), + (0x3558, 0x01), + (0x3559, 0x00), + (0x355A, 0x00), + (0x35BA, 0x0E), + (0x366A, 0x1B), + (0x366B, 0x1A), + (0x366C, 0x19), + (0x366D, 0x17), + (0x33A6, 0x01), + (0x306B, 0x05), + (IMX274_TABLE_WAIT_MS, IMX274_WAIT_MS), ] class Imx274_Mode(Enum): IMX274_MODE_3840X2160_60FPS = 0 IMX274_MODE_1920X1080_60FPS = 1 - Unknown = 2 + IMX274_MODE_3840X2160_60FPS_12BITS = 2 + Unknown = 3 frame_format = namedtuple( @@ -289,3 +359,7 @@ class Imx274_Mode(Enum): Imx274_Mode.IMX274_MODE_1920X1080_60FPS.value, frame_format(1920, 1080, 60, hololink.sensors.csi.PixelFormat.RAW_10), ) +imx_frame_format.insert( + Imx274_Mode.IMX274_MODE_3840X2160_60FPS_12BITS.value, + frame_format(3840, 2160, 60, hololink.sensors.csi.PixelFormat.RAW_12), +) diff --git a/python/hololink/sensors/imx477.py b/python/hololink/sensors/imx477.py index 847af7b..34f1c4d 100644 --- a/python/hololink/sensors/imx477.py +++ b/python/hololink/sensors/imx477.py @@ -17,7 +17,6 @@ import logging import time -from collections import OrderedDict import hololink as hololink_module @@ -30,454 +29,447 @@ # Camera default mode HEIGHT = 2160 WIDTH = 3840 -IMX477_TABLE_WAIT_MS = "0000" -IMX477_TABLE_END = "01" -IMX477_WAIT_MS = "01" -IMX477_WAIT_MS_START = "0f" +IMX477_TABLE_WAIT_MS = "(wait)" +IMX477_TABLE_END = "(end)" +IMX477_WAIT_MS = 1 # Camera Reg -imx477_mode_3840X2160_60fps = OrderedDict( - [ - (IMX477_TABLE_WAIT_MS, IMX477_WAIT_MS), - ("e000", "00"), - ("e07a", "01"), - ("4ae9", "18"), - ("4aea", "08"), - ("f61c", "04"), - ("f61e", "04"), - ("4ae9", "21"), - ("4aea", "80"), - ("38a8", "1f"), - ("38a9", "ff"), - ("38aa", "1f"), - ("38ab", "ff"), - ("55d4", "00"), - ("55d5", "00"), - ("55d6", "07"), - ("55d7", "ff"), - ("55e8", "07"), - ("55e9", "ff"), - ("55ea", "00"), - ("55eb", "00"), - ("574c", "07"), - ("574d", "ff"), - ("574e", "00"), - ("574f", "00"), - ("5754", "00"), - ("5755", "00"), - ("5756", "07"), - ("5757", "ff"), - ("5973", "04"), - ("5974", "01"), - ("5d13", "c3"), - ("5d14", "58"), - ("5d15", "a3"), - ("5d16", "1d"), - ("5d17", "65"), - ("5d18", "8c"), - ("5d1a", "06"), - ("5d1b", "a9"), - ("5d1c", "45"), - ("5d1d", "3a"), - ("5d1e", "ab"), - ("5d1f", "15"), - ("5d21", "0e"), - ("5d22", "52"), - ("5d23", "aa"), - ("5d24", "7d"), - ("5d25", "57"), - ("5d26", "a8"), - ("5d37", "5a"), - ("5d38", "5a"), - ("5d77", "7f"), - ("7b75", "0e"), - ("7b76", "0b"), - ("7b77", "08"), - ("7b78", "0a"), - ("7b79", "47"), - ("7b7c", "00"), - ("7b7d", "00"), - ("8d1f", "00"), - ("8d27", "00"), - ("9004", "03"), - ("9200", "50"), - ("9201", "6c"), - ("9202", "71"), - ("9203", "00"), - ("9204", "71"), - ("9205", "01"), - ("9371", "6a"), - ("9373", "6a"), - ("9375", "64"), - ("991a", "00"), - ("996b", "8c"), - ("996c", "64"), - ("996d", "50"), - ("9a4c", "0d"), - ("9a4d", "0d"), - ("a001", "0a"), - ("a003", "0a"), - ("a005", "0a"), - ("a006", "01"), - ("a007", "c0"), - ("a009", "c0"), - ("3d8a", "01"), - ("4421", "04"), - ("7b3b", "01"), - ("7b4c", "00"), - ("9905", "00"), - ("9907", "00"), - ("9909", "00"), - ("990b", "00"), - ("9944", "3c"), - ("9947", "3c"), - ("994a", "8c"), - ("994b", "50"), - ("994c", "1b"), - ("994d", "8c"), - ("994e", "50"), - ("994f", "1b"), - ("9950", "8c"), - ("9951", "1b"), - ("9952", "0a"), - ("9953", "8c"), - ("9954", "1b"), - ("9955", "0a"), - ("9a13", "04"), - ("9a14", "04"), - ("9a19", "00"), - ("9a1c", "04"), - ("9a1d", "04"), - ("9a26", "05"), - ("9a27", "05"), - ("9a2c", "01"), - ("9a2d", "03"), - ("9a2f", "05"), - ("9a30", "05"), - ("9a41", "00"), - ("9a46", "00"), - ("9a47", "00"), - ("9c17", "35"), - ("9c1d", "31"), - ("9c29", "50"), - ("9c3b", "2f"), - ("9c41", "6b"), - ("9c47", "2d"), - ("9c4d", "40"), - ("9c6b", "00"), - ("9c71", "c8"), - ("9c73", "32"), - ("9c75", "04"), - ("9c7d", "2d"), - ("9c83", "40"), - ("9c94", "3f"), - ("9c95", "3f"), - ("9c96", "3f"), - ("9c97", "00"), - ("9c98", "00"), - ("9c99", "00"), - ("9c9a", "3f"), - ("9c9b", "3f"), - ("9c9c", "3f"), - ("9ca0", "0f"), - ("9ca1", "0f"), - ("9ca2", "0f"), - ("9ca3", "00"), - ("9ca4", "00"), - ("9ca5", "00"), - ("9ca6", "1e"), - ("9ca7", "1e"), - ("9ca8", "1e"), - ("9ca9", "00"), - ("9caa", "00"), - ("9cab", "00"), - ("9cac", "09"), - ("9cad", "09"), - ("9cae", "09"), - ("9cbd", "50"), - ("9cbf", "50"), - ("9cc1", "50"), - ("9cc3", "40"), - ("9cc5", "40"), - ("9cc7", "40"), - ("9cc9", "0a"), - ("9ccb", "0a"), - ("9ccd", "0a"), - ("9d17", "35"), - ("9d1d", "31"), - ("9d29", "50"), - ("9d3b", "2f"), - ("9d41", "6b"), - ("9d47", "42"), - ("9d4d", "5a"), - ("9d6b", "00"), - ("9d71", "c8"), - ("9d73", "32"), - ("9d75", "04"), - ("9d7d", "42"), - ("9d83", "5a"), - ("9d94", "3f"), - ("9d95", "3f"), - ("9d96", "3f"), - ("9d97", "00"), - ("9d98", "00"), - ("9d99", "00"), - ("9d9a", "3f"), - ("9d9b", "3f"), - ("9d9c", "3f"), - ("9d9d", "1f"), - ("9d9e", "1f"), - ("9d9f", "1f"), - ("9da0", "0f"), - ("9da1", "0f"), - ("9da2", "0f"), - ("9da3", "00"), - ("9da4", "00"), - ("9da5", "00"), - ("9da6", "1e"), - ("9da7", "1e"), - ("9da8", "1e"), - ("9da9", "00"), - ("9daa", "00"), - ("9dab", "00"), - ("9dac", "09"), - ("9dad", "09"), - ("9dae", "09"), - ("9dc9", "0a"), - ("9dcb", "0a"), - ("9dcd", "0a"), - ("9e17", "35"), - ("9e1d", "31"), - ("9e29", "50"), - ("9e3b", "2f"), - ("9e41", "6b"), - ("9e47", "2d"), - ("9e4d", "40"), - ("9e6b", "00"), - ("9e71", "c8"), - ("9e73", "32"), - ("9e75", "04"), - ("9e94", "0f"), - ("9e95", "0f"), - ("9e96", "0f"), - ("9e97", "00"), - ("9e98", "00"), - ("9e99", "00"), - ("9ea0", "0f"), - ("9ea1", "0f"), - ("9ea2", "0f"), - ("9ea3", "00"), - ("9ea4", "00"), - ("9ea5", "00"), - ("9ea6", "3f"), - ("9ea7", "3f"), - ("9ea8", "3f"), - ("9ea9", "00"), - ("9eaa", "00"), - ("9eab", "00"), - ("9eac", "09"), - ("9ead", "09"), - ("9eae", "09"), - ("9ec9", "0a"), - ("9ecb", "0a"), - ("9ecd", "0a"), - ("9f17", "35"), - ("9f1d", "31"), - ("9f29", "50"), - ("9f3b", "2f"), - ("9f41", "6b"), - ("9f47", "42"), - ("9f4d", "5a"), - ("9f6b", "00"), - ("9f71", "c8"), - ("9f73", "32"), - ("9f75", "04"), - ("9f94", "0f"), - ("9f95", "0f"), - ("9f96", "0f"), - ("9f97", "00"), - ("9f98", "00"), - ("9f99", "00"), - ("9f9a", "2f"), - ("9f9b", "2f"), - ("9f9c", "2f"), - ("9f9d", "00"), - ("9f9e", "00"), - ("9f9f", "00"), - ("9fa0", "0f"), - ("9fa1", "0f"), - ("9fa2", "0f"), - ("9fa3", "00"), - ("9fa4", "00"), - ("9fa5", "00"), - ("9fa6", "1e"), - ("9fa7", "1e"), - ("9fa8", "1e"), - ("9fa9", "00"), - ("9faa", "00"), - ("9fab", "00"), - ("9fac", "09"), - ("9fad", "09"), - ("9fae", "09"), - ("9fc9", "0a"), - ("9fcb", "0a"), - ("9fcd", "0a"), - ("a14b", "ff"), - ("a151", "0c"), - ("a153", "50"), - ("a155", "02"), - ("a157", "00"), - ("a1ad", "ff"), - ("a1b3", "0c"), - ("a1b5", "50"), - ("a1b9", "00"), - ("a24b", "ff"), - ("a257", "00"), - ("a2ad", "ff"), - ("a2b9", "00"), - ("b21f", "04"), - ("b35c", "00"), - ("b35e", "08"), - ("0220", "00"), - ("0221", "11"), - ("3140", "02"), - ("3c00", "00"), - ("3c01", "03"), - ("3c02", "a2"), - ("3f0d", "01"), - ("5748", "07"), - ("5749", "ff"), - ("574a", "00"), - ("574b", "00"), - ("7b53", "01"), - ("9369", "73"), - ("936b", "64"), - ("936d", "5f"), - ("9304", "00"), - ("9305", "00"), - ("9e9a", "2f"), - ("9e9b", "2f"), - ("9e9c", "2f"), - ("9e9d", "00"), - ("9e9e", "00"), - ("9e9f", "00"), - ("a2a9", "60"), - ("a2b7", "00"), - ("e04c", "00"), - ("e04d", "7f"), - ("e04e", "00"), - ("e04f", "1f"), - ("3e20", "01"), - ("3e37", "00"), - ("0114", "03"), - ("0112", "08"), - ("0113", "08"), - ("0808", "00"), - ("080a", "00"), - ("080b", "7f"), - ("080c", "00"), - ("080d", "4f"), - ("080e", "00"), - ("080f", "77"), - ("0810", "00"), - ("0811", "5f"), - ("0812", "00"), - ("0813", "57"), - ("0814", "00"), - ("0815", "4f"), - ("0816", "01"), - ("0817", "27"), - ("0818", "00"), - ("0819", "3f"), - ("0820", "41"), - ("0821", "0A"), - ("0822", "00"), - ("0823", "00"), - ("0101", "00"), - ("0340", "08"), - ("0341", "ca"), - ("0342", "19"), - ("0343", "80"), - ("3237", "00"), - ("3900", "00"), - ("3901", "00"), - ("BCF1", "02"), - ("0136", "18"), - ("0137", "00"), - ("0310", "01"), - ("0305", "04"), - ("0306", "01"), - ("0307", "5E"), - ("0303", "02"), - ("0301", "05"), - ("030B", "02"), - ("0309", "08"), - ("030d", "04"), - ("030e", "01"), - ("030f", "5E"), - ("00E3", "00"), - ("00E4", "00"), - ("0900", "00"), - ("0901", "00"), - ("0902", "02"), - ("0381", "01"), - ("0383", "01"), - ("0385", "01"), - ("0387", "01"), - ("0408", "00"), - ("0409", "00"), - ("040a", "00"), - ("040b", "00"), - ("040c", "0f"), - ("040d", "d8"), # d8 - ("040e", "08"), # 08 - ("040f", "70"), # 38 - ("0401", "00"), - ("0404", "00"), - ("0405", "20"), - ("034C", "0f"), - ("034D", "00"), - ("034E", "08"), - ("034F", "70"), - ("0200", "07"), - ("0201", "90"), - ("0350", "00"), - ("3F50", "00"), - ("3F56", "01"), - ("3F57", "41"), - ("0204", "01"), - ("0205", "60"), - ("3ff9", "00"), - ("020e", "03"), - ("020f", "50"), - ("0210", "05"), - ("0211", "03"), - ("0212", "04"), - ("0213", "6A"), - ("0214", "03"), - ("0215", "50"), - ("3030", "00"), - ("3032", "01"), - ("3033", "00"), - ("0B05", "00"), - ("0B06", "00"), - ("3100", "00"), - ("0600", "00"), - ("0601", "00"), - ("0602", "00"), - ("0603", "00"), - ("0604", "00"), - ("0605", "00"), - ("0606", "00"), - ("0607", "00"), - ("0608", "00"), - ("0609", "00"), - (IMX477_TABLE_END, IMX477_WAIT_MS), - ] -) - imx477_mode_3840X2160_60fps = [ - (int(x, 16), int(y, 16)) for x, y in imx477_mode_3840X2160_60fps.items() + (IMX477_TABLE_WAIT_MS, IMX477_WAIT_MS), + (0xE000, 0x00), + (0xE07A, 0x01), + (0x4AE9, 0x18), + (0x4AEA, 0x08), + (0xF61C, 0x04), + (0xF61E, 0x04), + (0x4AE9, 0x21), + (0x4AEA, 0x80), + (0x38A8, 0x1F), + (0x38A9, 0xFF), + (0x38AA, 0x1F), + (0x38AB, 0xFF), + (0x55D4, 0x00), + (0x55D5, 0x00), + (0x55D6, 0x07), + (0x55D7, 0xFF), + (0x55E8, 0x07), + (0x55E9, 0xFF), + (0x55EA, 0x00), + (0x55EB, 0x00), + (0x574C, 0x07), + (0x574D, 0xFF), + (0x574E, 0x00), + (0x574F, 0x00), + (0x5754, 0x00), + (0x5755, 0x00), + (0x5756, 0x07), + (0x5757, 0xFF), + (0x5973, 0x04), + (0x5974, 0x01), + (0x5D13, 0xC3), + (0x5D14, 0x58), + (0x5D15, 0xA3), + (0x5D16, 0x1D), + (0x5D17, 0x65), + (0x5D18, 0x8C), + (0x5D1A, 0x06), + (0x5D1B, 0xA9), + (0x5D1C, 0x45), + (0x5D1D, 0x3A), + (0x5D1E, 0xAB), + (0x5D1F, 0x15), + (0x5D21, 0x0E), + (0x5D22, 0x52), + (0x5D23, 0xAA), + (0x5D24, 0x7D), + (0x5D25, 0x57), + (0x5D26, 0xA8), + (0x5D37, 0x5A), + (0x5D38, 0x5A), + (0x5D77, 0x7F), + (0x7B75, 0x0E), + (0x7B76, 0x0B), + (0x7B77, 0x08), + (0x7B78, 0x0A), + (0x7B79, 0x47), + (0x7B7C, 0x00), + (0x7B7D, 0x00), + (0x8D1F, 0x00), + (0x8D27, 0x00), + (0x9004, 0x03), + (0x9200, 0x50), + (0x9201, 0x6C), + (0x9202, 0x71), + (0x9203, 0x00), + (0x9204, 0x71), + (0x9205, 0x01), + (0x9371, 0x6A), + (0x9373, 0x6A), + (0x9375, 0x64), + (0x991A, 0x00), + (0x996B, 0x8C), + (0x996C, 0x64), + (0x996D, 0x50), + (0x9A4C, 0x0D), + (0x9A4D, 0x0D), + (0xA001, 0x0A), + (0xA003, 0x0A), + (0xA005, 0x0A), + (0xA006, 0x01), + (0xA007, 0xC0), + (0xA009, 0xC0), + (0x3D8A, 0x01), + (0x4421, 0x04), + (0x7B3B, 0x01), + (0x7B4C, 0x00), + (0x9905, 0x00), + (0x9907, 0x00), + (0x9909, 0x00), + (0x990B, 0x00), + (0x9944, 0x3C), + (0x9947, 0x3C), + (0x994A, 0x8C), + (0x994B, 0x50), + (0x994C, 0x1B), + (0x994D, 0x8C), + (0x994E, 0x50), + (0x994F, 0x1B), + (0x9950, 0x8C), + (0x9951, 0x1B), + (0x9952, 0x0A), + (0x9953, 0x8C), + (0x9954, 0x1B), + (0x9955, 0x0A), + (0x9A13, 0x04), + (0x9A14, 0x04), + (0x9A19, 0x00), + (0x9A1C, 0x04), + (0x9A1D, 0x04), + (0x9A26, 0x05), + (0x9A27, 0x05), + (0x9A2C, 0x01), + (0x9A2D, 0x03), + (0x9A2F, 0x05), + (0x9A30, 0x05), + (0x9A41, 0x00), + (0x9A46, 0x00), + (0x9A47, 0x00), + (0x9C17, 0x35), + (0x9C1D, 0x31), + (0x9C29, 0x50), + (0x9C3B, 0x2F), + (0x9C41, 0x6B), + (0x9C47, 0x2D), + (0x9C4D, 0x40), + (0x9C6B, 0x00), + (0x9C71, 0xC8), + (0x9C73, 0x32), + (0x9C75, 0x04), + (0x9C7D, 0x2D), + (0x9C83, 0x40), + (0x9C94, 0x3F), + (0x9C95, 0x3F), + (0x9C96, 0x3F), + (0x9C97, 0x00), + (0x9C98, 0x00), + (0x9C99, 0x00), + (0x9C9A, 0x3F), + (0x9C9B, 0x3F), + (0x9C9C, 0x3F), + (0x9CA0, 0x0F), + (0x9CA1, 0x0F), + (0x9CA2, 0x0F), + (0x9CA3, 0x00), + (0x9CA4, 0x00), + (0x9CA5, 0x00), + (0x9CA6, 0x1E), + (0x9CA7, 0x1E), + (0x9CA8, 0x1E), + (0x9CA9, 0x00), + (0x9CAA, 0x00), + (0x9CAB, 0x00), + (0x9CAC, 0x09), + (0x9CAD, 0x09), + (0x9CAE, 0x09), + (0x9CBD, 0x50), + (0x9CBF, 0x50), + (0x9CC1, 0x50), + (0x9CC3, 0x40), + (0x9CC5, 0x40), + (0x9CC7, 0x40), + (0x9CC9, 0x0A), + (0x9CCB, 0x0A), + (0x9CCD, 0x0A), + (0x9D17, 0x35), + (0x9D1D, 0x31), + (0x9D29, 0x50), + (0x9D3B, 0x2F), + (0x9D41, 0x6B), + (0x9D47, 0x42), + (0x9D4D, 0x5A), + (0x9D6B, 0x00), + (0x9D71, 0xC8), + (0x9D73, 0x32), + (0x9D75, 0x04), + (0x9D7D, 0x42), + (0x9D83, 0x5A), + (0x9D94, 0x3F), + (0x9D95, 0x3F), + (0x9D96, 0x3F), + (0x9D97, 0x00), + (0x9D98, 0x00), + (0x9D99, 0x00), + (0x9D9A, 0x3F), + (0x9D9B, 0x3F), + (0x9D9C, 0x3F), + (0x9D9D, 0x1F), + (0x9D9E, 0x1F), + (0x9D9F, 0x1F), + (0x9DA0, 0x0F), + (0x9DA1, 0x0F), + (0x9DA2, 0x0F), + (0x9DA3, 0x00), + (0x9DA4, 0x00), + (0x9DA5, 0x00), + (0x9DA6, 0x1E), + (0x9DA7, 0x1E), + (0x9DA8, 0x1E), + (0x9DA9, 0x00), + (0x9DAA, 0x00), + (0x9DAB, 0x00), + (0x9DAC, 0x09), + (0x9DAD, 0x09), + (0x9DAE, 0x09), + (0x9DC9, 0x0A), + (0x9DCB, 0x0A), + (0x9DCD, 0x0A), + (0x9E17, 0x35), + (0x9E1D, 0x31), + (0x9E29, 0x50), + (0x9E3B, 0x2F), + (0x9E41, 0x6B), + (0x9E47, 0x2D), + (0x9E4D, 0x40), + (0x9E6B, 0x00), + (0x9E71, 0xC8), + (0x9E73, 0x32), + (0x9E75, 0x04), + (0x9E94, 0x0F), + (0x9E95, 0x0F), + (0x9E96, 0x0F), + (0x9E97, 0x00), + (0x9E98, 0x00), + (0x9E99, 0x00), + (0x9EA0, 0x0F), + (0x9EA1, 0x0F), + (0x9EA2, 0x0F), + (0x9EA3, 0x00), + (0x9EA4, 0x00), + (0x9EA5, 0x00), + (0x9EA6, 0x3F), + (0x9EA7, 0x3F), + (0x9EA8, 0x3F), + (0x9EA9, 0x00), + (0x9EAA, 0x00), + (0x9EAB, 0x00), + (0x9EAC, 0x09), + (0x9EAD, 0x09), + (0x9EAE, 0x09), + (0x9EC9, 0x0A), + (0x9ECB, 0x0A), + (0x9ECD, 0x0A), + (0x9F17, 0x35), + (0x9F1D, 0x31), + (0x9F29, 0x50), + (0x9F3B, 0x2F), + (0x9F41, 0x6B), + (0x9F47, 0x42), + (0x9F4D, 0x5A), + (0x9F6B, 0x00), + (0x9F71, 0xC8), + (0x9F73, 0x32), + (0x9F75, 0x04), + (0x9F94, 0x0F), + (0x9F95, 0x0F), + (0x9F96, 0x0F), + (0x9F97, 0x00), + (0x9F98, 0x00), + (0x9F99, 0x00), + (0x9F9A, 0x2F), + (0x9F9B, 0x2F), + (0x9F9C, 0x2F), + (0x9F9D, 0x00), + (0x9F9E, 0x00), + (0x9F9F, 0x00), + (0x9FA0, 0x0F), + (0x9FA1, 0x0F), + (0x9FA2, 0x0F), + (0x9FA3, 0x00), + (0x9FA4, 0x00), + (0x9FA5, 0x00), + (0x9FA6, 0x1E), + (0x9FA7, 0x1E), + (0x9FA8, 0x1E), + (0x9FA9, 0x00), + (0x9FAA, 0x00), + (0x9FAB, 0x00), + (0x9FAC, 0x09), + (0x9FAD, 0x09), + (0x9FAE, 0x09), + (0x9FC9, 0x0A), + (0x9FCB, 0x0A), + (0x9FCD, 0x0A), + (0xA14B, 0xFF), + (0xA151, 0x0C), + (0xA153, 0x50), + (0xA155, 0x02), + (0xA157, 0x00), + (0xA1AD, 0xFF), + (0xA1B3, 0x0C), + (0xA1B5, 0x50), + (0xA1B9, 0x00), + (0xA24B, 0xFF), + (0xA257, 0x00), + (0xA2AD, 0xFF), + (0xA2B9, 0x00), + (0xB21F, 0x04), + (0xB35C, 0x00), + (0xB35E, 0x08), + (0x0220, 0x00), + (0x0221, 0x11), + (0x3140, 0x02), + (0x3C00, 0x00), + (0x3C01, 0x03), + (0x3C02, 0xA2), + (0x3F0D, 0x01), + (0x5748, 0x07), + (0x5749, 0xFF), + (0x574A, 0x00), + (0x574B, 0x00), + (0x7B53, 0x01), + (0x9369, 0x73), + (0x936B, 0x64), + (0x936D, 0x5F), + (0x9304, 0x00), + (0x9305, 0x00), + (0x9E9A, 0x2F), + (0x9E9B, 0x2F), + (0x9E9C, 0x2F), + (0x9E9D, 0x00), + (0x9E9E, 0x00), + (0x9E9F, 0x00), + (0xA2A9, 0x60), + (0xA2B7, 0x00), + (0xE04C, 0x00), + (0xE04D, 0x7F), + (0xE04E, 0x00), + (0xE04F, 0x1F), + (0x3E20, 0x01), + (0x3E37, 0x00), + (0x0114, 0x03), + (0x0112, 0x08), + (0x0113, 0x08), + (0x0808, 0x00), + (0x080A, 0x00), + (0x080B, 0x7F), + (0x080C, 0x00), + (0x080D, 0x4F), + (0x080E, 0x00), + (0x080F, 0x77), + (0x0810, 0x00), + (0x0811, 0x5F), + (0x0812, 0x00), + (0x0813, 0x57), + (0x0814, 0x00), + (0x0815, 0x4F), + (0x0816, 0x01), + (0x0817, 0x27), + (0x0818, 0x00), + (0x0819, 0x3F), + (0x0820, 0x41), + (0x0821, 0x0A), + (0x0822, 0x00), + (0x0823, 0x00), + (0x0101, 0x00), + (0x0340, 0x08), + (0x0341, 0xCA), + (0x0342, 0x19), + (0x0343, 0x80), + (0x3237, 0x00), + (0x3900, 0x00), + (0x3901, 0x00), + (0xBCF1, 0x02), + (0x0136, 0x18), + (0x0137, 0x00), + (0x0310, 0x01), + (0x0305, 0x04), + (0x0306, 0x01), + (0x0307, 0x5E), + (0x0303, 0x02), + (0x0301, 0x05), + (0x030B, 0x02), + (0x0309, 0x08), + (0x030D, 0x04), + (0x030E, 0x01), + (0x030F, 0x5E), + (0x00E3, 0x00), + (0x00E4, 0x00), + (0x0900, 0x00), + (0x0901, 0x00), + (0x0902, 0x02), + (0x0381, 0x01), + (0x0383, 0x01), + (0x0385, 0x01), + (0x0387, 0x01), + (0x0408, 0x00), + (0x0409, 0x00), + (0x040A, 0x00), + (0x040B, 0x00), + (0x040C, 0x0F), + (0x040D, 0xD8), # d8 + (0x040E, 0x08), # 08 + (0x040F, 0x70), # 38 + (0x0401, 0x00), + (0x0404, 0x00), + (0x0405, 0x20), + (0x034C, 0x0F), + (0x034D, 0x00), + (0x034E, 0x08), + (0x034F, 0x70), + (0x0200, 0x07), + (0x0201, 0x90), + (0x0350, 0x00), + (0x3F50, 0x00), + (0x3F56, 0x01), + (0x3F57, 0x41), + (0x0204, 0x01), + (0x0205, 0x60), + (0x3FF9, 0x00), + (0x020E, 0x03), + (0x020F, 0x50), + (0x0210, 0x05), + (0x0211, 0x03), + (0x0212, 0x04), + (0x0213, 0x6A), + (0x0214, 0x03), + (0x0215, 0x50), + (0x3030, 0x00), + (0x3032, 0x01), + (0x3033, 0x00), + (0x0B05, 0x00), + (0x0B06, 0x00), + (0x3100, 0x00), + (0x0600, 0x00), + (0x0601, 0x00), + (0x0602, 0x00), + (0x0603, 0x00), + (0x0604, 0x00), + (0x0605, 0x00), + (0x0606, 0x00), + (0x0607, 0x00), + (0x0608, 0x00), + (0x0609, 0x00), + (IMX477_TABLE_END, IMX477_WAIT_MS), ] diff --git a/python/hololink/sensors/udp_cam.py b/python/hololink/sensors/udp_cam.py index b1259a5..1144296 100644 --- a/python/hololink/sensors/udp_cam.py +++ b/python/hololink/sensors/udp_cam.py @@ -133,6 +133,3 @@ def configure_converter(self, converter): line_start_size, line_end_size, ) - - -VIRTUAL_PORTS = [0x1A00, 0x1C00] diff --git a/python/hololink/operators/gamma_correction/CMakeLists.txt b/python/hololink/tools.py similarity index 70% rename from python/hololink/operators/gamma_correction/CMakeLists.txt rename to python/hololink/tools.py index 9a36661..876146b 100644 --- a/python/hololink/operators/gamma_correction/CMakeLists.txt +++ b/python/hololink/tools.py @@ -13,11 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -include(pybind11_add_hololink_module) +# See README.md for detailed information. -pybind11_add_hololink_module( - CPP_CMAKE_TARGET gamma_correction - CLASS_NAME "GammaCorrectionOp" - IMPORT "import holoscan.core" - SOURCES gamma_correction.cpp -) +import os + + +# +def infiniband_devices(): + """ + Return a sorted list of Infiniband devices. + """ + files = [] + try: + files = os.listdir("/sys/class/infiniband") + except FileNotFoundError: + pass + return sorted(files) diff --git a/python/tools/hololink.py b/python/tools/hololink.py index 0d2466a..8c488ec 100644 --- a/python/tools/hololink.py +++ b/python/tools/hololink.py @@ -149,7 +149,7 @@ def call_back(enumerator, packet, metadata): def _make_bootp_reply(metadata, new_device_ip, local_ip): reply = bytearray(1000) - serializer = hololink_module.native.Serializer(reply) + serializer = hololink_module.Serializer(reply) BOOTREPLY = 2 serializer.append_uint8(BOOTREPLY) # opcode serializer.append_uint8(metadata["hardware_type"]) @@ -857,7 +857,7 @@ def __init__(self, programmer): def board_id_ok(self, board_id): board_ids = [ - hololink_module.HOLOLINK_BOARD_ID, + hololink_module.HOLOLINK_NANO_BOARD_ID, hololink_module.HOLOLINK_LITE_BOARD_ID, ] return board_id in board_ids @@ -923,7 +923,7 @@ def fetch_manifest(self, section): raise Exception('Unsupported strategy "{strategy_name}" specified.') self._strategy = strategy_constructor(self) if "licenses" not in self._manifest: - # Note that removing the "license" section from the manifest file, + # Note that removing the "licenses" section from the manifest file, # in order to achieve this condition, constitutes agreement with it. self._skip_eula = True @@ -1029,8 +1029,11 @@ def manual_enumeration(args): m = { "configuration_address": 0, "control_port": 8192, - "cpnx_version": 0x2402, + "cpnx_version": 0x2410, + "data_plane": 0, "peer_ip": args.hololink, + "sensor": 0, + "sequence_number_checking": 0, "serial_number": "100", "vip_mask": 0, "board_id": 2, @@ -1045,7 +1048,7 @@ def _program(args): programmer.fetch_manifest("hololink") programmer.check_eula(args) programmer.check_images() - if args.manual_enumeration: + if args.force: channel_metadata = manual_enumeration(args) else: channel_metadata = hololink_module.Enumerator.find_channel( @@ -1064,9 +1067,9 @@ def main(): help="IP address of Hololink board", ) parser.add_argument( - "--manual-enumeration", + "--force", action="store_true", - help="Don't use reported enumeration data from the board", + help="Don't rely on enumeration data for device connection.", ) parser.add_argument( "--log-level", @@ -1233,7 +1236,7 @@ def main(): if args.needs_hololink: # - if args.manual_enumeration: + if args.force: channel_metadata = manual_enumeration(args) else: channel_metadata = hololink_module.Enumerator.find_channel( diff --git a/scripts/manifest.yaml b/scripts/manifest.yaml index d8fef28..8ee53d2 100644 --- a/scripts/manifest.yaml +++ b/scripts/manifest.yaml @@ -1,28 +1,28 @@ hololink: archive: - enrollment_date: '2025-01-16T21:35:58.659160+00:00' - version: '2407' + enrollment_date: '2025-01-15T21:49:41.070318+00:00' + version: '2412' content: NVIDIA_RTL_License_Agreement.txt: md5: e8c77cea2712a6e3883c49b063ebc816 size: 16929 - url: https://api.ngc.nvidia.com/v2/resources/org/nvidia/team/clara-holoscan/holoscan_sensor_bridge_fpga_ip/2407/files?redirect=true&path=NVIDIA_RTL_License_Agreement.txt - fpga_clnx_v2407.bit: - md5: 673db16ede425bd77b313bfc40f82588 - size: 383843 - url: https://api.ngc.nvidia.com/v2/resources/org/nvidia/team/clara-holoscan/holoscan_sensor_bridge_fpga_ip/2407/files?redirect=true&path=fpga_clnx_v2407.bit - fpga_cpnx_v2407.bit: - md5: 6ad1a7d71b12ff26bcf7541c80ddd16e - size: 1960836 - url: https://api.ngc.nvidia.com/v2/resources/org/nvidia/team/clara-holoscan/holoscan_sensor_bridge_fpga_ip/2407/files?redirect=true&path=fpga_cpnx_v2407.bit + url: https://api.ngc.nvidia.com/v2/resources/org/nvidia/team/clara-holoscan/holoscan_sensor_bridge_fpga_ip/2412/files?redirect=true&path=NVIDIA_RTL_License_Agreement.txt + fpga_clnx_2412.bit: + md5: 2db576e58caa97ff034f6e9ad986402c + size: 376082 + url: https://api.ngc.nvidia.com/v2/resources/org/nvidia/team/clara-holoscan/holoscan_sensor_bridge_fpga_ip/2412/files?redirect=true&path=fpga_clnx_2412.bit + fpga_cpnx_2412.bit: + md5: b8743ff78238834f98a2c319d4b1f226 + size: 1961524 + url: https://api.ngc.nvidia.com/v2/resources/org/nvidia/team/clara-holoscan/holoscan_sensor_bridge_fpga_ip/2412/files?redirect=true&path=fpga_cpnx_2412.bit hololink-hdl.zip: - md5: 21de471e09dce015946bd11f02bcd2b6 - size: 1294292 - url: https://api.ngc.nvidia.com/v2/resources/org/nvidia/team/clara-holoscan/holoscan_sensor_bridge_fpga_ip/2407/files?redirect=true&path=hololink-hdl.zip + md5: 4f0f24cfe3ebe163ee6fb30c6c4ffd72 + size: 1399259 + url: https://api.ngc.nvidia.com/v2/resources/org/nvidia/team/clara-holoscan/holoscan_sensor_bridge_fpga_ip/2412/files?redirect=true&path=hololink-hdl.zip images: - - content: fpga_clnx_v2407.bit + - content: fpga_clnx_2412.bit context: clnx - - content: fpga_cpnx_v2407.bit + - content: fpga_cpnx_2412.bit context: cpnx licenses: - NVIDIA_RTL_License_Agreement.txt diff --git a/src/hololink/CMakeLists.txt b/src/hololink/CMakeLists.txt index de79b0c..5b1b768 100644 --- a/src/hololink/CMakeLists.txt +++ b/src/hololink/CMakeLists.txt @@ -20,6 +20,7 @@ add_library(hololink STATIC data_channel.cpp enumerator.cpp hololink.cpp + logging.cpp metadata.cpp timeout.cpp ) diff --git a/src/hololink/data_channel.cpp b/src/hololink/data_channel.cpp index 1b29550..beac962 100644 --- a/src/hololink/data_channel.cpp +++ b/src/hololink/data_channel.cpp @@ -19,18 +19,33 @@ #include -#include +#include namespace hololink { namespace { - // This memory map used by the Enumeraotr is only supported on CPNX FPGAs that are this + // This memory map used by the Enumerator is only supported on CPNX FPGAs that are this // version or newer. - constexpr int64_t MINIMUM_CPNX_VERSION = 0x2402; + constexpr int64_t MINIMUM_CPNX_VERSION = 0x2410; - // Camera Receiver interfaces - constexpr uint32_t VP_START[] { 0x00, 0x80 }; + // Distance between sensor configuration blocks + constexpr uint32_t SENSOR_CONFIGURATION_SIZE = 0x40; + + /** Hololink-lite data plane configuration is implied by the value + * passed in the bootp transaction_id field, which is coopted + * by FPGA to imply which port is publishing the request. We use + * that port ID to figure out what the address of the port's + * configuration data is; which is the value listed here. + */ + struct HololinkChannelConfiguration { + uint32_t configuration_address; + uint32_t vip_mask; + }; + static const std::map BOOTP_TRANSACTION_ID_MAP { + { 0, HololinkChannelConfiguration { 0x02000000, 0x1 } }, + { 1, HololinkChannelConfiguration { 0x02010000, 0x2 } }, + }; } // anonymous namespace @@ -45,11 +60,28 @@ DataChannel::DataChannel(const Metadata& metadata, const std::function("configuration_address").value(); + network_configuration_address_ = metadata.get("configuration_address").value(); peer_ip_ = metadata.get("peer_ip").value(); vip_mask_ = metadata.get("vip_mask").value(); + data_plane_ = metadata.get("data_plane").value(); + sensor_ = metadata.get("sensor").value(); + sensor_configuration_address_ = SENSOR_CONFIGURATION_SIZE * sensor_; qp_number_ = 0; rkey_ = 0; + multicast_ = ""; + multicast_port_ = 0; + broadcast_port_ = 0; + auto multicast = metadata.get("multicast"); + auto multicast_port = metadata.get("multicast_port"); + auto broadcast_port = metadata.get("broadcast_port"); + if (broadcast_port) { + broadcast_port_ = static_cast(broadcast_port.value()); + HSB_LOG_INFO(fmt::format("DataChannel broadcast port={}.", broadcast_port_)); + } else if (multicast && multicast_port) { + multicast_ = multicast.value(); + multicast_port_ = static_cast(multicast_port.value()); + HSB_LOG_INFO(fmt::format("DataChannel multicast address={} port={}.", multicast_, multicast_port_)); + } } /*static*/ bool DataChannel::enumerated(const Metadata& metadata) @@ -60,9 +92,23 @@ DataChannel::DataChannel(const Metadata& metadata, const std::function("peer_ip")) { return false; } + if (!metadata.get("data_plane")) { + return false; + } return Hololink::enumerated(metadata); } +/* static */ void DataChannel::use_multicast(Metadata& metadata, std::string address, uint16_t port) +{ + metadata["multicast"] = address; + metadata["multicast_port"] = static_cast(port); +} + +/* static */ void DataChannel::use_broadcast(Metadata& metadata, uint16_t port) +{ + metadata["broadcast_port"] = static_cast(port); +} + std::shared_ptr DataChannel::hololink() const { return hololink_; } const std::string& DataChannel::peer_ip() const { return peer_ip_; } @@ -73,64 +119,207 @@ void DataChannel::authenticate(uint32_t qp_number, uint32_t rkey) rkey_ = rkey; } -void DataChannel::configure(uint64_t frame_address, uint64_t frame_size, uint32_t local_data_port) +static uint32_t compute_payload_size(uint32_t frame_size) { - const uint32_t header_size = 78; - const uint32_t cache_size = 128; const uint32_t mtu = 1472; // TCP/IP illustrated vol 1 (1994), section 11.6, page 151 - const uint32_t payload_size = ((mtu - header_size + cache_size - 1) / cache_size) * cache_size; + const uint32_t header_size = 78; + const uint32_t page_size = hololink::native::PAGE_SIZE; + const uint32_t payload_size = ((mtu - header_size + page_size - 1) / page_size) * page_size; const uint64_t packets = (frame_size + payload_size - 1) / payload_size; // round up - HOLOSCAN_LOG_INFO( + HSB_LOG_INFO( "header_size={} payload_size={} packets={}", header_size, payload_size, packets); - const std::string& peer_ip = this->peer_ip(); - auto [local_ip, local_device, local_mac] = native::local_ip_and_mac(peer_ip); - configure_internal(frame_size, payload_size, header_size, local_mac, local_ip, local_data_port, - qp_number_, rkey_, frame_address, frame_size); + return payload_size; } -bool DataChannel::write_uint32(uint32_t reg, uint32_t value) +void DataChannel::configure(uint64_t frame_memory, size_t frame_size, size_t page_size, unsigned pages, uint32_t local_data_port) { - return hololink_->write_uint32(address_ + reg, value); -} + // Contract enforcement + if (frame_memory & (hololink::native::PAGE_SIZE - 1)) { + throw std::runtime_error(fmt::format("frame_memory={:#x} must be {}-byte aligned.", frame_memory, hololink::native::PAGE_SIZE)); + } + if (page_size & (hololink::native::PAGE_SIZE - 1)) { + throw std::runtime_error(fmt::format("page_size={:#x} must be {}-byte aligned.", page_size, hololink::native::PAGE_SIZE)); + } + uint32_t aligned_frame_size = hololink::native::round_up(frame_size, hololink::native::PAGE_SIZE); + uint32_t metadata_size = hololink::native::PAGE_SIZE; + uint32_t aligned_frame_with_metadata = aligned_frame_size + metadata_size; + if (page_size < aligned_frame_with_metadata) { + throw std::runtime_error(fmt::format("page_size={:#x} must be at least {:#x} bytes.", page_size, aligned_frame_with_metadata)); + } + if (pages > 4) { + throw std::runtime_error(fmt::format("pages={} can be at most 4.", pages)); + } + if (pages < 1) { + throw std::runtime_error(fmt::format("pages={} must be at least 1.", pages)); + } -void DataChannel::configure_internal(uint64_t frame_size, uint32_t payload_size, - uint32_t header_size, const native::MacAddress& local_mac, const std::string& local_ip, - uint32_t local_data_port, uint32_t qp_number, uint32_t rkey, uint64_t address, uint64_t size) -{ - // This is for FPGA 0116 in classic data plane mode - const uint32_t mac_high = (local_mac[0] << 8) | (local_mac[1] << 0); - const uint32_t mac_low + // Ok, we're good. + uint32_t payload_size = compute_payload_size(frame_size); + + const std::string& peer_ip = this->peer_ip(); + auto [local_ip, local_device, local_mac] = native::local_ip_and_mac(peer_ip); + + // Data plane destination addresses + uint32_t mac_high = (local_mac[0] << 8) | (local_mac[1] << 0); + uint32_t mac_low = ((local_mac[2] << 24) | (local_mac[3] << 16) | (local_mac[4] << 8) | (local_mac[5] << 0)); + in_addr_t ip = inet_network(local_ip.c_str()); + uint32_t udp_port = local_data_port; - const in_addr_t ip = inet_network(local_ip.c_str()); + // Override those if we're using multicast or broadcast. + if (broadcast_port_) { + ip = INADDR_BROADCAST; + uint8_t broadcast_mac[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; + mac_high = (broadcast_mac[0] << 8) | (broadcast_mac[1] << 0); + mac_low = ((broadcast_mac[2] << 24) | (broadcast_mac[3] << 16) | (broadcast_mac[4] << 8) | (broadcast_mac[5] << 0)); + udp_port = static_cast(broadcast_port_); + } else if (!multicast_.empty()) { + ip = inet_network(multicast_.c_str()); + uint8_t broadcast_mac[] = { 0x01, 0x00, 0x5E, static_cast((ip >> 16) & 0x7F), static_cast((ip >> 8) & 0xFF), static_cast((ip >> 0) & 0xFF) }; + mac_high = (broadcast_mac[0] << 8) | (broadcast_mac[1] << 0); + mac_low = ((broadcast_mac[2] << 24) | (broadcast_mac[3] << 16) | (broadcast_mac[4] << 8) | (broadcast_mac[5] << 0)); + udp_port = static_cast(multicast_port_); + } // Clearing DP_VIP_MASK should be unnecessary-- we should only // be here following a reset, but be defensive and make // sure we're not transmitting anything while we update. - write_uint32(DP_VIP_MASK, 0); - write_uint32(DP_PACKET_SIZE, header_size + payload_size); - write_uint32(DP_HOST_MAC_LOW, mac_low); - write_uint32(DP_HOST_MAC_HIGH, mac_high); - write_uint32(DP_HOST_IP, ip); - write_uint32(DP_HOST_UDP_PORT, local_data_port); +#define PAGES(x) ((x) >> 7) + hololink_->and_uint32(network_configuration_address_ + DP_VIP_MASK, ~vip_mask_); + hololink_->write_uint32(network_configuration_address_ + DP_PACKET_SIZE, PAGES(payload_size)); + // Write the addresses for the pages we'll receive into. + hololink_->write_uint32(sensor_configuration_address_ + DP_QP, qp_number_); + hololink_->write_uint32(sensor_configuration_address_ + DP_RKEY, rkey_); + hololink_->write_uint32(sensor_configuration_address_ + DP_ADDRESS_0, (pages > 0) ? PAGES(frame_memory) : 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_ADDRESS_1, (pages > 1) ? PAGES(frame_memory + page_size) : 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_ADDRESS_2, (pages > 2) ? PAGES(frame_memory + (page_size * 2)) : 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_ADDRESS_3, (pages > 3) ? PAGES(frame_memory + (page_size * 3)) : 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_BUFFER_LENGTH, frame_size); + hololink_->write_uint32(sensor_configuration_address_ + DP_BUFFER_MASK, (1 << pages) - 1); + hololink_->write_uint32(sensor_configuration_address_ + DP_HOST_MAC_LOW, mac_low); + hololink_->write_uint32(sensor_configuration_address_ + DP_HOST_MAC_HIGH, mac_high); + hololink_->write_uint32(sensor_configuration_address_ + DP_HOST_IP, ip); + hololink_->write_uint32(sensor_configuration_address_ + DP_HOST_UDP_PORT, udp_port); + // 0x1 meaning to connect sensor 1 to the current ethernet port + hololink_->or_uint32(network_configuration_address_ + DP_VIP_MASK, vip_mask_); +} + +void DataChannel::unconfigure() +{ + // This stops transmission. + hololink_->and_uint32(network_configuration_address_ + DP_VIP_MASK, ~vip_mask_); // - // "31:28 = end buf - // 27:24 = start buf - // 23: 0 = qp" - // Only use DMA descriptor ("buf") 0. - // We write the same addressing information into both VPs for - // this ethernet port; DP_VIP_MASK from the map above selects - // which one of these is actually used in the hardware. - for (auto&& vp : VP_START) { - write_uint32(DP_ROCE_CFG + vp, qp_number & 0x00FF'FFFF); - write_uint32(DP_ROCE_RKEY_0 + vp, rkey); - write_uint32(DP_ROCE_VADDR_MSB_0 + vp, (address >> 32)); - write_uint32(DP_ROCE_VADDR_LSB_0 + vp, (address & 0xFFFF'FFFF)); - write_uint32(DP_ROCE_BUF_END_MSB_0 + vp, ((address + size) >> 32)); - write_uint32(DP_ROCE_BUF_END_LSB_0 + vp, ((address + size) & 0xFFFF'FFFF)); + hololink_->write_uint32(sensor_configuration_address_ + DP_BUFFER_MASK, 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_BUFFER_LENGTH, 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_QP, 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_RKEY, 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_ADDRESS_0, 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_ADDRESS_1, 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_ADDRESS_2, 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_ADDRESS_3, 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_HOST_MAC_LOW, 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_HOST_MAC_HIGH, 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_HOST_IP, 0); + hololink_->write_uint32(sensor_configuration_address_ + DP_HOST_UDP_PORT, 0); + // +} + +void DataChannel::configure_socket(int socket_fd) +{ + const std::string& peer_ip = this->peer_ip(); + auto [local_ip, local_device, local_mac] = native::local_ip_and_mac(peer_ip); + + if (broadcast_port_) { + // + HSB_LOG_INFO(fmt::format("DataChannel configure_socket socket_fd={} broadcast_port={} local_ip={} local_device={}.", socket_fd, broadcast_port_, local_ip, local_device)); + int on = 1; + // Allow us to receive broadcast. + int r = setsockopt(socket_fd, SOL_SOCKET, SO_BROADCAST, &on, sizeof(on)); + if (r != 0) { + throw std::runtime_error(fmt::format("SO_BROADCAST failed with errno={}: \"{}\"", errno, strerror(errno))); + } + // Allow other programs to receive these broadcast packets. + r = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on)); + if (r != 0) { + throw std::runtime_error(fmt::format("SO_REUSEPORT failed with errno={}: \"{}\"", errno, strerror(errno))); + } + // We use INADDR_ANY to receive but listen only to the specific receiver interface. + r = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, local_device.data(), local_device.size()); + if (r != 0) { + throw std::runtime_error(fmt::format("SO_BINDTODEVICE failed with errno={}: \"{}\"", errno, strerror(errno))); + } + // Configure what we listen to. + sockaddr_in address {}; + address.sin_family = AF_INET; + address.sin_port = htons(broadcast_port_); + address.sin_addr.s_addr = INADDR_ANY; + if (bind(socket_fd, (sockaddr*)&address, sizeof(address)) < 0) { + throw std::runtime_error(fmt::format("bind failed with errno={}: \"{}\"", errno, strerror(errno))); + } + HSB_LOG_INFO(fmt::format("(done) DataChannel configure_socket socket_fd={} multicast_address={}.", socket_fd, multicast_)); + } else if (!multicast_.empty()) { + // + HSB_LOG_INFO(fmt::format("DataChannel configure_socket socket_fd={} multicast_address={} multicast_port={} local_ip={} local_device={}.", socket_fd, multicast_, multicast_port_, local_ip, local_device)); + struct ip_mreq mreq = {}; // fill with zeros. + int r = inet_aton(multicast_.c_str(), &mreq.imr_multiaddr); + if (r) { + r = inet_aton(local_ip.c_str(), &mreq.imr_interface); + } + if (!r) { + throw std::runtime_error(fmt::format("DataChannel failed socket configuration for multicast={}.", multicast_)); + } + r = setsockopt(socket_fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); + if (r) { + throw std::runtime_error(fmt::format("DataChannel failed to set socket configuration for multicast={}, errno={}.", multicast_, errno)); + } + sockaddr_in address {}; + address.sin_family = AF_INET; + address.sin_port = htons(multicast_port_); + if (inet_pton(AF_INET, multicast_.c_str(), &address.sin_addr) != 1) { + throw std::runtime_error( + fmt::format("Failed to convert address {}", multicast_)); + } + + if (bind(socket_fd, (sockaddr*)&address, sizeof(address)) < 0) { + throw std::runtime_error(fmt::format("bind failed with errno={}: \"{}\"", errno, strerror(errno))); + } + HSB_LOG_INFO(fmt::format("(done) DataChannel configure_socket socket_fd={} multicast_address={}.", socket_fd, multicast_)); + } else { + // Not multicast; use bind(local_ip,0) so that the kernel assigns us a UDP port. + sockaddr_in address {}; + address.sin_family = AF_INET; + address.sin_port = htons(0); + if (inet_pton(AF_INET, local_ip.c_str(), &address.sin_addr) != 1) { + throw std::runtime_error( + fmt::format("Failed to convert address {}", local_ip)); + } + + if (bind(socket_fd, (sockaddr*)&address, sizeof(address)) < 0) { + throw std::runtime_error(fmt::format("bind failed with errno={}: \"{}\"", errno, strerror(errno))); + } + HSB_LOG_INFO(fmt::format("(done) DataChannel configure_socket socket_fd={} local_ip={}.", socket_fd, local_ip)); } - // 0x1 meaning to connect sensor 1 to the current ethernet port - write_uint32(DP_VIP_MASK, vip_mask_); +} + +/* static */ void DataChannel::use_data_plane_configuration(Metadata& metadata, int64_t data_plane) +{ + auto channel_configuration = BOOTP_TRANSACTION_ID_MAP.find(data_plane); + if (channel_configuration == BOOTP_TRANSACTION_ID_MAP.cend()) { + throw std::runtime_error(fmt::format("use_data_plane failed, data_plane={} is out-of-range.", data_plane)); + } + HSB_LOG_TRACE(fmt::format("data_plane={}", data_plane)); + metadata["configuration_address"] = channel_configuration->second.configuration_address; +} + +/* static */ void DataChannel::use_sensor(Metadata& metadata, int64_t sensor_number) +{ + auto channel_configuration = BOOTP_TRANSACTION_ID_MAP.find(sensor_number); + if (channel_configuration == BOOTP_TRANSACTION_ID_MAP.cend()) { + throw std::runtime_error(fmt::format("use_sensor failed, sensor_number={} is out-of-range.", sensor_number)); + } + HSB_LOG_TRACE(fmt::format("sensor_number={}", sensor_number)); + metadata["sensor"] = sensor_number; + metadata["vip_mask"] = channel_configuration->second.vip_mask; } } // namespace hololink diff --git a/src/hololink/data_channel.hpp b/src/hololink/data_channel.hpp index 23a41a5..b6d4465 100644 --- a/src/hololink/data_channel.hpp +++ b/src/hololink/data_channel.hpp @@ -30,26 +30,32 @@ namespace hololink { -// Note that these are offsets from VP_START. -constexpr uint32_t DP_PACKET_SIZE = 0x30C; -constexpr uint32_t DP_HOST_MAC_LOW = 0x310; -constexpr uint32_t DP_HOST_MAC_HIGH = 0x314; -constexpr uint32_t DP_HOST_IP = 0x318; -constexpr uint32_t DP_HOST_UDP_PORT = 0x31C; -constexpr uint32_t DP_VIP_MASK = 0x324; - -// Fields in DP_ROCE_CFG -// "31:28 = end buf -// 27:24 = start buf -// 23: 0 = qp" -constexpr uint32_t DP_ROCE_CFG = 0x1000; -constexpr uint32_t DP_ROCE_RKEY_0 = 0x1004; -constexpr uint32_t DP_ROCE_VADDR_MSB_0 = 0x1008; -constexpr uint32_t DP_ROCE_VADDR_LSB_0 = 0x100C; -constexpr uint32_t DP_ROCE_BUF_END_MSB_0 = 0x1010; -constexpr uint32_t DP_ROCE_BUF_END_LSB_0 = 0x1014; +// Note that these are offsets enumeration metadata "configuration_address". +constexpr uint32_t DP_PACKET_SIZE = 0x304; +constexpr uint32_t DP_VIP_MASK = 0x30C; + +// DMA descriptor registers. +constexpr uint32_t DP_QP = 0x1000; +constexpr uint32_t DP_RKEY = 0x1004; +// these are all page addresses; the actual byte address in the +// packet will be this page address * 128. +constexpr uint32_t DP_ADDRESS_0 = 0x1008; +constexpr uint32_t DP_ADDRESS_1 = 0x100C; +constexpr uint32_t DP_ADDRESS_2 = 0x1010; +constexpr uint32_t DP_ADDRESS_3 = 0x1014; +constexpr uint32_t DP_BUFFER_LENGTH = 0x1018; // this is in bytes +constexpr uint32_t DP_BUFFER_MASK = 0x101C; // each bit enables a buffer +constexpr uint32_t DP_HOST_MAC_LOW = 0x1020; +constexpr uint32_t DP_HOST_MAC_HIGH = 0x1024; +constexpr uint32_t DP_HOST_IP = 0x1028; +constexpr uint32_t DP_HOST_UDP_PORT = 0x102C; + +class Enumerator; class DataChannel { + // Enumerator calls our methods to reconfigure Metadata. + friend class Enumerator; + public: /** * @brief Construct a new DataChannel object @@ -69,6 +75,16 @@ class DataChannel { */ static bool enumerated(const Metadata& metadata); + /** + * Update the enumeration metadata to use a multicast destination address. + */ + static void use_multicast(Metadata& metadata, std::string address, uint16_t port); + + /** + * Update the enumeration metadata to use a broadcast destination address. + */ + static void use_broadcast(Metadata& metadata, uint16_t port); + /** * @brief * @@ -98,29 +114,47 @@ class DataChannel { * @param frame_size * @param local_data_port */ - void configure(uint64_t frame_address, uint64_t frame_size, uint32_t local_data_port); + void configure(uint64_t frame_memory, size_t frame_size, size_t page_size, unsigned pages, uint32_t local_data_port); /** - * @brief - * - * @param reg - * @param value - * @return true - * @return false + * Clear the operating state set up by configure(). */ - bool write_uint32(uint32_t reg, uint32_t value); + void unconfigure(); + + /** + * Configure the receiver to handle this traffic; this + * is useful when using, say, multicast. + */ + void configure_socket(int socket_fd); + + /** + * Configure the given metadata to exchange data from the + * given sensor port. Given enumeration data for a specific + * IP address, use this call to configure the sensor to listen + * to. Multiple sensors can transmit using the same IP address. + */ + static void use_sensor(Metadata& metadata, int64_t sensor_number); + +protected: + /** + * Configure the given metadata to send data to the given + * host interface. + */ + static void use_data_plane_configuration(Metadata& metadata, int64_t data_plane); private: std::shared_ptr hololink_; - uint32_t address_; + uint32_t network_configuration_address_; + uint32_t sensor_configuration_address_; std::string peer_ip_; uint32_t vip_mask_; uint32_t qp_number_; uint32_t rkey_; - - void configure_internal(uint64_t frame_size, uint32_t payload_size, uint32_t header_size, - const native::MacAddress& local_mac, const std::string& local_ip, uint32_t local_data_port, - uint32_t qp_number, uint32_t rkey, uint64_t address, uint64_t size); + uint32_t data_plane_; + uint32_t sensor_; + std::string multicast_; + uint16_t multicast_port_; + uint16_t broadcast_port_; }; } // namespace hololink diff --git a/src/hololink/enumerator.cpp b/src/hololink/enumerator.cpp index e1382ae..220629f 100644 --- a/src/hololink/enumerator.cpp +++ b/src/hololink/enumerator.cpp @@ -34,30 +34,10 @@ #include #include +#include #include #include -#include - -namespace { - -/** Hololink-lite data plane configuration is implied by the value - * passed in the bootp transaction_id field, which is coopted - * by FPGA to imply which port is publishing the request. We use - * that port ID to figure out what the address of the port's - * configuration data is; which is the value listed here. - */ -struct HololinkChannelConfiguration { - uint32_t configuration_address; - uint32_t vip_mask; -}; -static const std::map BOOTP_TRANSACTION_ID_MAP { - { 0, HololinkChannelConfiguration { 0x02000000, 0x1 } }, - { 1, HololinkChannelConfiguration { 0x02010000, 0x2 } }, -}; - -} // anonynous namespace - namespace hololink { namespace { @@ -135,7 +115,7 @@ namespace { { for (struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { - HOLOSCAN_LOG_TRACE(fmt::format("cmsg_level={} cmsg_type={} cmsg_data_len={}", + HSB_LOG_TRACE(fmt::format("cmsg_level={} cmsg_type={} cmsg_data_len={}", cmsg->cmsg_level, cmsg->cmsg_type, cmsg->cmsg_len)); if ((cmsg->cmsg_level == IPPROTO_IP) && (cmsg->cmsg_type == IP_PKTINFO)) { @@ -152,7 +132,7 @@ namespace { metadata["interface_address"] = std::string(inet_ntoa(pkt_info->ipi_spec_dst)), metadata["destination_address"] = std::string(inet_ntoa(pkt_info->ipi_addr)); - HOLOSCAN_LOG_TRACE( + HSB_LOG_TRACE( fmt::format("ipi_ifindex={} interface={} ipi_spec_dst={} ipi_addr={}", metadata["interface_index"], metadata["interface"], metadata["interface_address"], metadata["destination_address"])); @@ -160,61 +140,6 @@ namespace { } } - static void deserialize_enumeration(const std::vector& packet, Metadata& metadata) - { - native::Deserializer deserializer(packet.data(), packet.size()); - - uint8_t board_id = 0; - if (!deserializer.next_uint8(board_id)) { - throw std::runtime_error("Unable to deserialize enumeration packet."); - } - - metadata["type"] = "enumeration"; - metadata["board_id"] = board_id; - if (board_id == HOLOLINK_LITE_BOARD_ID) { - metadata["board_description"] = "hololink-lite"; - } else if (board_id == HOLOLINK_BOARD_ID) { - metadata["board_description"] = "hololink"; - } else if (board_id == HOLOLINK_100G_BOARD_ID) { - metadata["board_description"] = "hololink 100G"; - } else if (board_id == MICROCHIP_POLARFIRE_BOARD_ID) { - metadata["board_description"] = "Microchip Polarfire"; - } else { - metadata["board_description"] = "N/A"; - } - - if ((board_id == HOLOLINK_LITE_BOARD_ID) - || (board_id == HOLOLINK_BOARD_ID) - || (board_id == HOLOLINK_100G_BOARD_ID) - || (board_id == MICROCHIP_POLARFIRE_BOARD_ID)) { - std::string board_version; - std::string serial_number; - uint16_t cpnx_version = 0; - uint16_t cpnx_crc = 0; - uint16_t clnx_version = 0; - uint16_t clnx_crc = 0; - - if (!(next_buffer_as_string(deserializer, board_version, 20) - && next_buffer_as_string(deserializer, serial_number, 7) - && deserializer.next_uint16_le(cpnx_version) - && deserializer.next_uint16_le(cpnx_crc) - && deserializer.next_uint16_le(clnx_version) - && deserializer.next_uint16_le(clnx_crc))) { - throw std::runtime_error("Unable to deserialize enumeration packet."); - } - - constexpr int default_control_port = 8192; - - metadata["board_version"] = board_version; - metadata["serial_number"] = serial_number; - metadata["cpnx_version"] = cpnx_version; - metadata["cpnx_crc"] = cpnx_crc; - metadata["clnx_version"] = clnx_version; - metadata["clnx_crc"] = clnx_crc; - metadata["control_port"] = default_control_port; - } - } - static void deserialize_bootp_request(const std::vector& packet, Metadata& metadata) { native::Deserializer deserializer(packet.data(), packet.size()); @@ -244,7 +169,13 @@ namespace { deserializer.next_uint32_be(server_ip_address) && // expected to be 0s deserializer.next_uint32_be(gateway_ip_address) && deserializer.next_buffer(hardware_address))) { - throw std::runtime_error("Unable to deserialize bootp request packet."); + // Don't flood the log. + static unsigned reports = 0; + if (reports < 5) { + HSB_LOG_ERROR("Unable to deserialize bootp request packet."); + reports++; + } + return; } std::stringstream mac_id_stream; @@ -259,7 +190,34 @@ namespace { const uint8_t* ignore = nullptr; deserializer.pointer(ignore, 64); // server_hostname deserializer.pointer(ignore, 128); // boot_filename - deserializer.pointer(ignore, 64); // vendor information + + // Vendor information has more for us. + constexpr uint8_t expected_vendor_tag = 0xE0; + uint8_t vendor_tag = 0; + uint8_t vendor_tag_length = 0; + constexpr uint32_t expected_vendor_id = 0x4E564441; // 'NVDA' + uint32_t vendor_id = 0; + uint8_t data_plane = 0; + constexpr uint8_t expected_enum_version = 0x01; + uint8_t enum_version = 0; + uint16_t board_id = 0; + if (!(deserializer.next_uint8(vendor_tag) + && (vendor_tag == expected_vendor_tag) + && deserializer.next_uint8(vendor_tag_length) + && deserializer.next_uint32_be(vendor_id) + && (vendor_id == expected_vendor_id) + && deserializer.next_uint8(data_plane) + && deserializer.next_uint8(enum_version) + && (enum_version == expected_enum_version) + && deserializer.next_uint16_le(board_id))) { + // Don't flood the log. + static unsigned reports = 0; + if (reports < 5) { + HSB_LOG_ERROR("Unable to deserialize bootp request vendor data."); + reports++; + } + return; + } metadata["type"] = "bootp_request"; metadata["op"] = op; @@ -275,51 +233,97 @@ namespace { metadata["gateway_ip_address"] = std::string(inet_ntoa({ ntohl(gateway_ip_address) })); metadata["hardware_address"] = hardware_address; metadata["mac_id"] = mac_id; + metadata["data_plane"] = static_cast(data_plane); + metadata["board_id"] = board_id; + + if (board_id == HOLOLINK_LITE_BOARD_ID) { + metadata["board_description"] = "hololink-lite"; + } else if (board_id == HOLOLINK_NANO_BOARD_ID) { + metadata["board_description"] = "hololink-nano"; + } else if (board_id == HOLOLINK_100G_BOARD_ID) { + metadata["board_description"] = "hololink 100G"; + } else if (board_id == MICROCHIP_POLARFIRE_BOARD_ID) { + metadata["board_description"] = "Microchip Polarfire"; + } else { + metadata["board_description"] = "N/A"; + } + if ((board_id == HOLOLINK_LITE_BOARD_ID) + || (board_id == HOLOLINK_NANO_BOARD_ID) + || (board_id == HOLOLINK_100G_BOARD_ID) + || (board_id == MICROCHIP_POLARFIRE_BOARD_ID)) { + std::string board_version; + std::string serial_number; + uint16_t cpnx_version = 0; + uint16_t cpnx_crc = 0; + uint16_t clnx_version = 0; + uint16_t clnx_crc = 0; + + if (!(next_buffer_as_string(deserializer, board_version, 20) + && next_buffer_as_string(deserializer, serial_number, 7) + && deserializer.next_uint16_le(cpnx_version) + && deserializer.next_uint16_le(cpnx_crc) + && deserializer.next_uint16_le(clnx_version) + && deserializer.next_uint16_le(clnx_crc))) { + // Don't flood the log. + static unsigned reports = 0; + if (reports < 5) { + HSB_LOG_ERROR("Unable to deserialize bootp request board data."); + reports++; + } + return; + } + + constexpr int default_control_port = 8192; + + metadata["board_version"] = board_version; + metadata["serial_number"] = serial_number; + metadata["cpnx_version"] = cpnx_version; + metadata["cpnx_crc"] = cpnx_crc; + metadata["clnx_version"] = clnx_version; + metadata["clnx_crc"] = clnx_crc; + metadata["control_port"] = default_control_port; + metadata["sequence_number_checking"] = 1; + } } } // anonymous namespace -Enumerator::Enumerator(const std::string& local_interface, uint32_t enumeration_port, +Enumerator::Enumerator(const std::string& local_interface, uint32_t bootp_request_port, uint32_t bootp_reply_port) : local_interface_(local_interface) - , enumeration_port_(enumeration_port) , bootp_request_port_(bootp_request_port) , bootp_reply_port_(bootp_reply_port) { } /*static*/ void Enumerator::enumerated( - const std::function& call_back, const std::shared_ptr& timeout) + const std::function& call_back, const std::shared_ptr& timeout) { Enumerator enumerator(""); - std::map data_plane_by_peer_ip; enumerator.enumeration_packets( - [call_back, &data_plane_by_peer_ip]( - Enumerator&, const std::vector& packet, const Metadata& metadata) -> bool { - HOLOSCAN_LOG_DEBUG(fmt::format("Enumeration metadata={}", metadata)); + [call_back]( + Enumerator&, const std::vector& packet, Metadata& metadata) -> bool { + HSB_LOG_DEBUG(fmt::format("Enumeration metadata={}", metadata)); auto peer_ip = metadata.get("peer_ip"); if (!peer_ip) { return true; } - Metadata& channel_metadata = data_plane_by_peer_ip[*peer_ip]; - channel_metadata.update(metadata); - // transaction_id actually indicates which data plane instance we're talking to - auto transaction_id = metadata.get("transaction_id"); // may not exist - if (transaction_id) { - HOLOSCAN_LOG_TRACE(fmt::format("transaction_id={}", transaction_id.value())); - auto channel_configuration = BOOTP_TRANSACTION_ID_MAP.find(transaction_id.value()); - if (channel_configuration != BOOTP_TRANSACTION_ID_MAP.cend()) { - channel_metadata["configuration_address"] - = channel_configuration->second.configuration_address; - channel_metadata["vip_mask"] = channel_configuration->second.vip_mask; - } + // Add some supplemental data. + auto opt_data_plane = metadata.get("data_plane"); + if (!opt_data_plane.has_value()) { + // 2410 and later always provide this. + return true; } - + int data_plane = opt_data_plane.value(); + DataChannel::use_data_plane_configuration(metadata, data_plane); + // By default, use the data_plane ID to select the sensor. + int sensor_number = data_plane; + DataChannel::use_sensor(metadata, sensor_number); // Do we have the information we need? - HOLOSCAN_LOG_DEBUG(fmt::format("channel_metadata={}", channel_metadata)); - if (DataChannel::enumerated(channel_metadata)) { - if (!call_back(channel_metadata)) { + HSB_LOG_DEBUG(fmt::format("metadata={}", metadata)); + if (DataChannel::enumerated(metadata)) { + if (!call_back(metadata)) { return false; } } @@ -335,7 +339,7 @@ Enumerator::Enumerator(const std::string& local_interface, uint32_t enumeration_ bool found = false; enumerated( - [&channel_ip, &channel_metadata, &found](const Metadata& metadata) -> bool { + [&channel_ip, &channel_metadata, &found](Metadata& metadata) -> bool { auto peer_ip = metadata.get("peer_ip"); if (peer_ip && (peer_ip == channel_ip)) { channel_metadata = metadata; @@ -355,13 +359,12 @@ Enumerator::Enumerator(const std::string& local_interface, uint32_t enumeration_ } void Enumerator::enumeration_packets( - const std::function&, const Metadata&)>& call_back, + const std::function&, Metadata&)>& call_back, const std::shared_ptr& timeout) { - Socket enumeration_socket(local_interface_, enumeration_port_); Socket bootp_socket(local_interface_, bootp_request_port_); - constexpr size_t receive_message_size = 8192; + constexpr size_t receive_message_size = hololink::native::UDP_PACKET_SIZE; std::vector iobuf(receive_message_size); std::array controlbuf; @@ -387,7 +390,7 @@ void Enumerator::enumeration_packets( select_timeout = nullptr; } - std::array fds { enumeration_socket.get(), bootp_socket.get() }; + std::array fds { bootp_socket.get() }; fd_set r; FD_ZERO(&r); for (auto&& fd : fds) { @@ -401,15 +404,11 @@ void Enumerator::enumeration_packets( num_fds++; const int result = select(num_fds, &r, nullptr, &x, select_timeout); if (result == -1) { - if (errno == EINTR) { - // retry - continue; - } throw std::runtime_error(fmt::format("select failed with errno={}: \"{}\"", errno, strerror(errno))); } for (auto&& fd : fds) { if (FD_ISSET(fd, &x)) { - HOLOSCAN_LOG_ERROR("Error reading enumeration sockets."); + HSB_LOG_ERROR("Error reading enumeration sockets."); return; } } @@ -442,13 +441,13 @@ void Enumerator::enumeration_packets( ssize_t received_bytes; do { received_bytes = recvmsg(fd, &msg, 0); - if ((received_bytes == -1) && (errno != EINTR)) { + if (received_bytes == -1) { throw std::runtime_error(fmt::format("recvmsg failed with errno={}: \"{}\"", errno, strerror(errno))); } } while (received_bytes <= 0); const std::string peer_address_string(inet_ntoa(peer_address.sin_addr)); - HOLOSCAN_LOG_TRACE(fmt::format( + HSB_LOG_TRACE(fmt::format( "enumeration peer_address \"{}:{}\", ancdata size {}, msg_flags {}, packet size {}", peer_address_string, ntohs(peer_address.sin_port), msg.msg_controllen, msg.msg_flags, received_bytes)); @@ -458,9 +457,7 @@ void Enumerator::enumeration_packets( metadata["_socket_fd"] = fd; deserialize_ancdata(msg, metadata); - if (fd == enumeration_socket.get()) { - deserialize_enumeration(iobuf, metadata); - } else if (fd == bootp_socket.get()) { + if (fd == bootp_socket.get()) { deserialize_bootp_request(iobuf, metadata); } @@ -524,7 +521,7 @@ void Enumerator::send_bootp_reply( // Don't flood the console. static bool first = true; if (first) { - HOLOSCAN_LOG_DEBUG(fmt::format("sendmsg failed with errno={}: \"{}\"", errno, strerror(errno))); + HSB_LOG_DEBUG(fmt::format("sendmsg failed with errno={}: \"{}\"", errno, strerror(errno))); first = false; } } diff --git a/src/hololink/enumerator.hpp b/src/hololink/enumerator.hpp index 29b24aa..6bafcf1 100644 --- a/src/hololink/enumerator.hpp +++ b/src/hololink/enumerator.hpp @@ -39,12 +39,11 @@ class Enumerator { * @brief Construct a new Enumerator object * * @param local_interface blank for all local interfaces - * @param enumeration_port * @param bootp_request_port * @param bootp_reply_port */ explicit Enumerator(const std::string& local_interface = std::string(), - uint32_t enumeration_port = 10001u, uint32_t bootp_request_port = 12267u, + uint32_t bootp_request_port = 12267u, uint32_t bootp_reply_port = 12268u); Enumerator() = delete; @@ -56,7 +55,7 @@ class Enumerator { * @param call_back * @param timeout */ - static void enumerated(const std::function& call_back, + static void enumerated(const std::function& call_back, const std::shared_ptr& timeout = std::shared_ptr()); /** @@ -64,7 +63,7 @@ class Enumerator { * * @param channel_ip * @param timeout - * @return const Metadata& + * @return Metadata& */ static Metadata find_channel(const std::string& channel_ip, const std::shared_ptr& timeout = std::make_shared(20.f)); @@ -78,7 +77,7 @@ class Enumerator { * @param timeout */ void enumeration_packets( - const std::function&, const Metadata&)>& call_back, + const std::function&, Metadata&)>& call_back, const std::shared_ptr& timeout = std::shared_ptr()); /** @@ -91,7 +90,6 @@ class Enumerator { private: const std::string local_interface_; - const uint32_t enumeration_port_; const uint32_t bootp_request_port_; const uint32_t bootp_reply_port_; }; diff --git a/src/hololink/hololink.cpp b/src/hololink/hololink.cpp index 855b82e..4f89212 100644 --- a/src/hololink/hololink.cpp +++ b/src/hololink/hololink.cpp @@ -26,11 +26,13 @@ #include #include +#include #include +#include +#include #include #include -#include #include "metadata.hpp" @@ -48,21 +50,65 @@ namespace { constexpr uint32_t SPI_CFG_CPHA = 0b0000'0000'0010'0000; // GPIO Registers - // bitmask 0:F, ecah bit correspondes to a GPIO pin - // GPIO_OUTPUT_REGISTER - W - set output pin values - // GPIO_DIRECTION_REGISTER - R/W - set/read GPIO pin direction - // GPIO_STATUS_REGISTER - R - read input GPIO value - constexpr uint32_t GPIO_OUTPUT_REGISTER = 0x0000'000C; - constexpr uint32_t GPIO_DIRECTION_REGISTER = 0x0000'002C; - constexpr uint32_t GPIO_STATUS_REGISTER = 0x0000'008C; + // bitmask 0:1F, each bit correspondes to a GPIO pin + // GPIO_OUTPUT_BASE_REGISTER - W - set output pin values + // GPIO_DIRECTION_BASE_REGISTER - R/W - set/read GPIO pin direction + // GPIO_STATUS_BASE_REGISTER - R - read input GPIO value + // + // FPGA can support up to 256 GPIO pins that are spread + // on 8 OUTPUT/DIRECTION/STATUS registers. + // For each type of register, the address offset is 4: + // OUTPUT registers are: 0x0C(base),0x10,0x14,0x18....0x28 + // DIRECTION registers are: 0x2C(base),0x20,0x24,0x28....0x38 + // STATUS registers are: 0x8C(base),0x90,0x94,0x98....0xA8 + constexpr uint32_t GPIO_OUTPUT_BASE_REGISTER = 0x0000'000C; + constexpr uint32_t GPIO_DIRECTION_BASE_REGISTER = 0x0000'002C; + constexpr uint32_t GPIO_STATUS_BASE_REGISTER = 0x0000'008C; + constexpr uint32_t GPIO_REGISTER_ADDRESS_OFFSET = 0x0000'0004; + + static char const* response_code_description(uint32_t response_code) + { + switch (response_code) { + case RESPONSE_SUCCESS: + return "RESPONSE_SUCCESS"; + case RESPONSE_ERROR_GENERAL: + return "RESPONSE_ERROR_GENERAL"; + case RESPONSE_INVALID_ADDR: + return "RESPONSE_INVALID_ADDR"; + case RESPONSE_INVALID_CMD: + return "RESPONSE_INVALID_CMD"; + case RESPONSE_INVALID_PKT_LENGTH: + return "RESPONSE_INVALID_PKT_LENGTH"; + case RESPONSE_INVALID_FLAGS: + return "RESPONSE_INVALID_FLAGS"; + case RESPONSE_BUFFER_FULL: + return "RESPONSE_BUFFER_FULL"; + case RESPONSE_INVALID_BLOCK_SIZE: + return "RESPONSE_INVALID_BLOCK_SIZE"; + case RESPONSE_INVALID_INDIRECT_ADDR: + return "RESPONSE_INVALID_INDIRECT_ADDR"; + case RESPONSE_COMMAND_TIMEOUT: + return "RESPONSE_COMMAND_TIMEOUT"; + case RESPONSE_SEQUENCE_CHECK_FAIL: + return "RESPONSE_SEQUENCE_CHECK_FAIL"; + default: + return "(unknown)"; + } + } + + // Allocate buffers for control plane requests and replies to this + // size, which is guaranteed to be large enough for the largest + // of any of those buffers. + constexpr uint32_t CONTROL_PACKET_SIZE = 20; } // anonymous namespace Hololink::Hololink( - const std::string& peer_ip, uint32_t control_port, const std::string& serial_number) + const std::string& peer_ip, uint32_t control_port, const std::string& serial_number, bool sequence_number_checking) : peer_ip_(peer_ip) , control_port_(control_port) , serial_number_(serial_number) + , sequence_number_checking_(sequence_number_checking) , execute_mutex_() { } @@ -87,8 +133,11 @@ Hololink::Hololink( throw std::runtime_error("Metadata has no \"control_port\""); } + auto opt_sequence_number_checking = metadata.get("sequence_number_checking"); + bool sequence_number_checking = (opt_sequence_number_checking != 0); + r = std::make_shared( - peer_ip.value(), control_port.value(), serial_number.value()); + peer_ip.value(), control_port.value(), serial_number.value(), sequence_number_checking); hololink_by_serial_number[serial_number.value()] = r; } else { @@ -102,7 +151,7 @@ Hololink::Hololink( { auto it = hololink_by_serial_number.begin(); while (it != hololink_by_serial_number.end()) { - HOLOSCAN_LOG_INFO("Removing hololink \"{}\"", it->first); + HSB_LOG_INFO("Removing hololink \"{}\"", it->first); it = hololink_by_serial_number.erase(it); } } @@ -139,9 +188,18 @@ void Hololink::start() } // ARP packets are slow, so allow for more timeout on this initial read. auto get_fpga_version_timeout = std::make_shared(30.f, 0.2f); - version_ = get_fpga_version(get_fpga_version_timeout); + + // Because we're at the start of our session with HSB, let's reset it to + // use the sequence number that we have from our constructor. Following + // this, unless the user specifies otherwise, we'll always check the + // sequence number on every transaction-- which will trigger a fault if + // another program goes in and does any sort of control-plane transaction. + // Note that when a control plane request triggers a fault, the actual + // command is ignored. + bool check_sequence = false; + version_ = get_fpga_version(get_fpga_version_timeout, check_sequence); datecode_ = get_fpga_date(); - HOLOSCAN_LOG_INFO("FPGA version={:#x} datecode={:#x}", version_, datecode_); + HSB_LOG_INFO("FPGA version={:#x} datecode={:#x}", version_, datecode_); } void Hololink::stop() { control_socket_.reset(); } @@ -170,19 +228,34 @@ void Hololink::reset() // we won't get a reply. write_uint32(0x4, 0x8, nullptr, /*retry*/ false); } catch (const std::exception& e) { - HOLOSCAN_LOG_INFO("ignoring error {}.", e.what()); + HSB_LOG_INFO("ignoring error {}.", e.what()); } // Now wait for the device to come back up. // This guy raises an exception if we're not found; // this can happen if set-ip is used in one-time // mode. - Enumerator::find_channel(peer_ip_, std::make_shared(30.f)); + Metadata channel_metadata = Enumerator::find_channel(peer_ip_, std::make_shared(30.f)); + + // When the connection was lost, the host flushes its ARP cache. + // Because ARP requests are slow, let's just set the ARP cache here, + // because we know the MAC ID and the IP address of the system that + // just enumerated. This avoids timeouts when we try fetching the FPGA + // version ID while the kernel is waiting for ARP to be updated. + std::string interface = channel_metadata.get("interface").value(); + std::string client_ip_address = channel_metadata.get("client_ip_address").value(); + std::string mac_id = channel_metadata.get("mac_id").value(); + hololink::native::ArpWrapper::arp_set(control_socket_.get(), interface.c_str(), client_ip_address.c_str(), mac_id.c_str()); + + // At this point, the device has reset its latched sequence number to 0; so + // our next request should have a sequence value of 1. If our reset didn't + // work, we'll detect that with a sequence number fault in the reply. + sequence_ = 1; // ARP packets are slow, so allow for more timeout on this initial read. auto get_fpga_version_timeout = std::make_shared(30.f, 0.2f); uint32_t version = get_fpga_version(get_fpga_version_timeout); - HOLOSCAN_LOG_INFO("version={:#x}", version); + HSB_LOG_INFO("version={:#x}", version); // Now go through and reset all registered clients. for (std::shared_ptr reset_controller : reset_controllers_) { @@ -190,9 +263,9 @@ void Hololink::reset() } } -uint32_t Hololink::get_fpga_version(const std::shared_ptr& timeout) +uint32_t Hololink::get_fpga_version(const std::shared_ptr& timeout, bool check_sequence) { - const uint32_t version = read_uint32(FPGA_VERSION, timeout); + const uint32_t version = read_uint32(FPGA_VERSION, timeout, check_sequence); return version; } @@ -203,7 +276,7 @@ uint32_t Hololink::get_fpga_date() } bool Hololink::write_uint32( - uint32_t address, uint32_t value, const std::shared_ptr& in_timeout, bool retry) + uint32_t address, uint32_t value, const std::shared_ptr& in_timeout, bool retry, bool sequence_check) { uint32_t count = 0; std::exception_ptr eptr; @@ -211,7 +284,7 @@ bool Hololink::write_uint32( try { while (true) { count += 1; - bool status = write_uint32_(address, value, timeout, retry /*response_expected*/); + bool status = write_uint32_(address, value, timeout, retry, sequence_check); if (status) { return status; } @@ -238,9 +311,9 @@ bool Hololink::write_uint32( } bool Hololink::write_uint32_(uint32_t address, uint32_t value, - const std::shared_ptr& timeout = std::shared_ptr(), bool response_expected) + const std::shared_ptr& timeout, bool response_expected, bool sequence_check) { - HOLOSCAN_LOG_DEBUG("write_uint32(address={:#x}, value={:#x})", address, value); + HSB_LOG_DEBUG("write_uint32(address={:#x}, value={:#x})", address, value); if ((address & 3) != 0) { throw std::runtime_error( fmt::format("Invalid address \"{:#x}\", has to be a multiple of four", address)); @@ -248,11 +321,21 @@ bool Hololink::write_uint32_(uint32_t address, uint32_t value, // BLOCKING on ack or timeout // This routine serializes a write_uint32 request // and forwards it to the device. - const uint16_t sequence = next_sequence(); + + // HSB only supports a single command/response at a time-- + // in other words we need to inhibit other threads from sending + // a command until we receive the response for the current one. + std::lock_guard lock(execute_mutex_); + + const uint16_t sequence = next_sequence(lock); // Serialize - std::vector request(20); + std::vector request(CONTROL_PACKET_SIZE); native::Serializer serializer(request.data(), request.size()); - if (!(serializer.append_uint8(WR_DWORD) && serializer.append_uint8(REQUEST_FLAGS_ACK_REQUEST) + uint8_t flags = REQUEST_FLAGS_ACK_REQUEST; + if (sequence_check) { + flags |= REQUEST_FLAGS_SEQUENCE_CHECK; + } + if (!(serializer.append_uint8(WR_DWORD) && serializer.append_uint8(flags) && serializer.append_uint16_be(sequence) && serializer.append_uint8(0) // reserved && serializer.append_uint8(0) // reserved && serializer.append_uint32_be(address) && serializer.append_uint32_be(value))) { @@ -260,28 +343,29 @@ bool Hololink::write_uint32_(uint32_t address, uint32_t value, } request.resize(serializer.length()); - std::vector reply(20); - auto [status, response_code, deserializer] = execute(sequence, request, reply, timeout); + std::vector reply(CONTROL_PACKET_SIZE); + auto [status, optional_response_code, deserializer] = execute(sequence, request, reply, timeout, lock); if (!status) { // timed out return false; } - if (response_code != RESPONSE_SUCCESS) { - if (!response_code.has_value()) { + if (optional_response_code != RESPONSE_SUCCESS) { + if (!optional_response_code.has_value()) { if (response_expected) { - HOLOSCAN_LOG_ERROR( + HSB_LOG_ERROR( "write_uint32 address={:#X} value={:#X} response_code=None", address, value); return false; } } + uint32_t response_code = optional_response_code.value(); throw std::runtime_error( - fmt::format("write_uint32 address={:#X} value={:#X} response_code={:#X}", address, - value, response_code.value())); + fmt::format("write_uint32 address={:#X} value={:#X} response_code={:#X}({})", address, + value, response_code, response_code_description(response_code))); } return true; } -uint32_t Hololink::read_uint32(uint32_t address, const std::shared_ptr& in_timeout) +uint32_t Hololink::read_uint32(uint32_t address, const std::shared_ptr& in_timeout, bool check_sequence) { uint32_t count = 0; std::exception_ptr eptr; @@ -289,7 +373,7 @@ uint32_t Hololink::read_uint32(uint32_t address, const std::shared_ptr& try { while (true) { count += 1; - auto [status, value] = read_uint32_(address, timeout); + auto [status, value] = read_uint32_(address, timeout, check_sequence); if (status) { return value.value(); } @@ -312,9 +396,9 @@ uint32_t Hololink::read_uint32(uint32_t address, const std::shared_ptr& } std::tuple> Hololink::read_uint32_( - uint32_t address, const std::shared_ptr& timeout) + uint32_t address, const std::shared_ptr& timeout, bool sequence_check) { - HOLOSCAN_LOG_DEBUG("read_uint32(address={:#x})", address); + HSB_LOG_DEBUG("read_uint32(address={:#x})", address); if ((address & 3) != 0) { throw std::runtime_error( fmt::format("Invalid address \"{:#x}\", has to be a multiple of four", address)); @@ -322,43 +406,56 @@ std::tuple> Hololink::read_uint32_( // BLOCKING on ack or timeout // This routine serializes a read_uint32 request // and forwards it to the device. - uint16_t sequence = next_sequence(); + + // HSB only supports a single command/response at a time-- + // in other words we need to inhibit other threads from sending + // a command until we receive the response for the current one. + std::lock_guard lock(execute_mutex_); + + uint16_t sequence = next_sequence(lock); // Serialize - std::vector request(20); + std::vector request(CONTROL_PACKET_SIZE); native::Serializer serializer(request.data(), request.size()); - if (!(serializer.append_uint8(RD_DWORD) && serializer.append_uint8(REQUEST_FLAGS_ACK_REQUEST) + uint8_t flags = REQUEST_FLAGS_ACK_REQUEST; + if (sequence_check) { + flags |= REQUEST_FLAGS_SEQUENCE_CHECK; + } + if (!(serializer.append_uint8(RD_DWORD) && serializer.append_uint8(flags) && serializer.append_uint16_be(sequence) && serializer.append_uint8(0) // reserved && serializer.append_uint8(0) // reserved && serializer.append_uint32_be(address))) { throw std::runtime_error("Unable to serialize"); } request.resize(serializer.length()); - HOLOSCAN_LOG_TRACE("read_uint32: {}....{}", request, sequence); + HSB_LOG_TRACE("read_uint32: {}....{}", request, sequence); - std::vector reply(20); - auto [status, response_code, deserializer] = execute(sequence, request, reply, timeout); + std::vector reply(CONTROL_PACKET_SIZE); + auto [status, optional_response_code, deserializer] = execute(sequence, request, reply, timeout, lock); if (!status) { // timed out return { false, {} }; } - if (response_code != RESPONSE_SUCCESS) { + if (optional_response_code != RESPONSE_SUCCESS) { + uint32_t response_code = optional_response_code.value(); throw std::runtime_error( - fmt::format("read_uint32 response_code={}", response_code.value())); + fmt::format("read_uint32 response_code={}({})", response_code, response_code_description(response_code))); } uint8_t reserved; uint32_t response_address; uint32_t value; + uint16_t latched_sequence; if (!(deserializer->next_uint8(reserved) /* reserved */ && deserializer->next_uint32_be(response_address) /* address */ - && deserializer->next_uint32_be(value))) { + && deserializer->next_uint32_be(value) + && deserializer->next_uint16_be(latched_sequence))) { throw std::runtime_error("Unable to deserialize"); } assert(response_address == address); - HOLOSCAN_LOG_DEBUG("read_uint32(address={:#x})={:#x}", address, value); + HSB_LOG_DEBUG("read_uint32(address={:#x})={:#x}", address, value); return { true, value }; } -uint16_t Hololink::next_sequence() +uint16_t Hololink::next_sequence(std::lock_guard&) { uint16_t r = sequence_; sequence_ = sequence_ + 1; @@ -367,16 +464,11 @@ uint16_t Hololink::next_sequence() std::tuple, std::shared_ptr> Hololink::execute( uint16_t sequence, const std::vector& request, std::vector& reply, - const std::shared_ptr& timeout) + const std::shared_ptr& timeout, std::lock_guard&) { - HOLOSCAN_LOG_TRACE("Sending request={}", request); + HSB_LOG_TRACE("Sending request={}", request); double request_time = Timeout::now_s(); - // HSB only supports a single command/response at a time-- - // in other words we need to inhibit other threads from sending - // a command until we receive the response for the current one. - std::lock_guard lock(execute_mutex_); - send_control(request); while (true) { reply = receive_control(timeout); @@ -396,8 +488,8 @@ std::tuple, std::shared_ptr> && deserializer->next_uint8(response_code))) { throw std::runtime_error("Unable to deserialize"); } - HOLOSCAN_LOG_TRACE("reply reply_sequence={} response_code={} sequence={}", reply_sequence, - response_code, sequence); + HSB_LOG_TRACE("reply reply_sequence={} response_code={}({}) sequence={}", reply_sequence, + response_code, response_code_description(response_code), sequence); if (sequence == reply_sequence) { return { true, response_code, deserializer }; } @@ -406,7 +498,7 @@ std::tuple, std::shared_ptr> void Hololink::send_control(const std::vector& request) { - HOLOSCAN_LOG_TRACE( + HSB_LOG_TRACE( "_send_control request={} peer_ip={} control_port={}", request, peer_ip_, control_port_); sockaddr_in address {}; address.sin_family = AF_INET; @@ -452,15 +544,11 @@ std::vector Hololink::receive_control(const std::shared_ptr& t fd_set x = r; int result = select(control_socket_.get() + 1, &r, nullptr, &x, select_timeout); if (result == -1) { - if (errno == EINTR) { - // retry - continue; - } throw std::runtime_error( fmt::format("select failed with errno={}: \"{}\"", errno, strerror(errno))); } if (FD_ISSET(control_socket_.get(), &x)) { - HOLOSCAN_LOG_ERROR("Error reading enumeration sockets."); + HSB_LOG_ERROR("Error reading enumeration sockets."); return {}; } if (result == 0) { @@ -468,7 +556,7 @@ std::vector Hololink::receive_control(const std::shared_ptr& t continue; } - std::vector received(8192); + std::vector received(hololink::native::UDP_PACKET_SIZE); sockaddr_in peer_address {}; peer_address.sin_family = AF_UNSPEC; socklen_t peer_address_len = sizeof(peer_address); @@ -476,7 +564,7 @@ std::vector Hololink::receive_control(const std::shared_ptr& t do { received_bytes = recvfrom(control_socket_.get(), received.data(), received.size(), 0, (sockaddr*)&peer_address, &peer_address_len); - if ((received_bytes == -1) && (errno != EINTR)) { + if (received_bytes == -1) { throw std::runtime_error( fmt::format("recvfrom failed with errno={}: \"{}\"", errno, strerror(errno))); } @@ -490,7 +578,7 @@ std::vector Hololink::receive_control(const std::shared_ptr& t void Hololink::executed(double request_time, const std::vector& request, double reply_time, const std::vector& reply) { - HOLOSCAN_LOG_TRACE("Got reply={}", reply); + HSB_LOG_TRACE("Got reply={}", reply); } void Hololink::add_read_retries(uint32_t n) { } @@ -499,11 +587,11 @@ void Hololink::add_write_retries(uint32_t n) { } void Hololink::write_renesas(I2c& i2c, const std::vector& data) { - HOLOSCAN_LOG_TRACE("write_renesas data={}", data); + HSB_LOG_TRACE("write_renesas data={}", data); uint32_t read_byte_count = 0; constexpr uint32_t RENESAS_I2C_ADDRESS = 0x09; std::vector reply = i2c.i2c_transaction(RENESAS_I2C_ADDRESS, data, read_byte_count); - HOLOSCAN_LOG_TRACE("reply={}.", reply); + HSB_LOG_TRACE("reply={}.", reply); } void Hololink::setup_clock(const std::vector>& clock_profile) @@ -561,9 +649,37 @@ std::shared_ptr Hololink::get_spi(uint32_t spi_address, uint32_t return std::make_shared(*this, spi_address, spi_cfg); } -std::shared_ptr Hololink::get_gpio() +std::shared_ptr Hololink::get_gpio(Metadata& metadata) { - auto r = std::make_shared(*this); + + // get board id from enumaration metadata + int64_t board_id = metadata.get("board_id").value(); + uint32_t gpio_pin_number; + + // set number of GPIO pins per board + // nano - 54 + // 10G - 16 + // microchip - 0 + // unknown - set to 16 as default + switch (board_id) { + case HOLOLINK_NANO_BOARD_ID: + gpio_pin_number = 54; + break; + + case HOLOLINK_LITE_BOARD_ID: + gpio_pin_number = 16; + break; + + case MICROCHIP_POLARFIRE_BOARD_ID: + throw std::runtime_error(fmt::format("GPIO is not supported on this Hololink board!")); + break; + + default: + throw std::runtime_error(fmt::format("Invalid Hololink board id:{}!", board_id)); + break; + } + + auto r = std::make_shared(*this, gpio_pin_number); return r; } @@ -587,7 +703,7 @@ std::vector Hololink::I2c::i2c_transaction(uint32_t peripheral_i2c_addr const std::vector& write_bytes, uint32_t read_byte_count, const std::shared_ptr& in_timeout) { - HOLOSCAN_LOG_DEBUG("i2c_transaction peripheral={:#x} len(write_bytes)={} read_byte_count={}", + HSB_LOG_DEBUG("i2c_transaction peripheral={:#x} len(write_bytes)={} read_byte_count={}", peripheral_i2c_address, write_bytes.size(), read_byte_count); if (peripheral_i2c_address >= 0x80) { throw std::runtime_error( @@ -611,7 +727,10 @@ std::vector Hololink::I2c::i2c_transaction(uint32_t peripheral_i2c_addr // Hololink FPGA doesn't support resetting the I2C interface; // so the best we can do is make sure it's not busy. uint32_t value = hololink_.read_uint32(reg_control_, timeout); - assert((value & I2C_BUSY) == 0); + if (value & I2C_BUSY) { + throw std::runtime_error(fmt::format( + "Unexpected I2C_BUSY bit set, reg_control={:#x}, control value={:#x}", reg_control_, value)); + } // // set the device address and enable the i2c controller // I2C_DONE_CLEAR -> 1 @@ -622,7 +741,7 @@ std::vector Hololink::I2c::i2c_transaction(uint32_t peripheral_i2c_addr hololink_.write_uint32(reg_control_, control, timeout); // make sure DONE is 0. value = hololink_.read_uint32(reg_control_, timeout); - HOLOSCAN_LOG_DEBUG("control value={:#x}", value); + HSB_LOG_DEBUG("control value={:#x}", value); assert((value & I2C_DONE) == 0); // write the num_bytes uint32_t num_bytes = (write_byte_count << 0) | (read_byte_count << 8); @@ -655,7 +774,7 @@ std::vector Hololink::I2c::i2c_transaction(uint32_t peripheral_i2c_addr } if (!timeout->retry()) { // timed out - HOLOSCAN_LOG_DEBUG("Timed out."); + HSB_LOG_DEBUG("Timed out."); throw TimeoutError( fmt::format("i2c_transaction i2c_address={:#x}", peripheral_i2c_address)); } @@ -663,14 +782,14 @@ std::vector Hololink::I2c::i2c_transaction(uint32_t peripheral_i2c_addr // Poll until done. Future version will have an event packet too. while (true) { value = hololink_.read_uint32(reg_control_, timeout); - HOLOSCAN_LOG_TRACE("control={:#x}.", value); + HSB_LOG_TRACE("control={:#x}.", value); const uint32_t done = value & I2C_DONE; if (done != 0) { break; } if (!timeout->retry()) { // timed out - HOLOSCAN_LOG_DEBUG("Timed out."); + HSB_LOG_DEBUG("Timed out."); throw TimeoutError( fmt::format("i2c_transaction i2c_address={:#x}", peripheral_i2c_address)); } @@ -778,7 +897,7 @@ std::vector Hololink::Spi::spi_transaction(const std::vector& } if (!timeout->retry()) { // timed out - HOLOSCAN_LOG_DEBUG("Timed out."); + HSB_LOG_DEBUG("Timed out."); throw TimeoutError(fmt::format("spi_transaction control={:#x}", reg_control_)); } } @@ -801,31 +920,41 @@ std::vector Hololink::Spi::spi_transaction(const std::vector& return r; } -Hololink::GPIO::GPIO(Hololink& hololink) +Hololink::GPIO::GPIO(Hololink& hololink, uint32_t gpio_pin_number) : hololink_(hololink) { + if (gpio_pin_number > GPIO_PIN_RANGE) { + HSB_LOG_ERROR("Number of GPIO pins requested={} exceeds system limits={}", gpio_pin_number, GPIO_PIN_RANGE); + throw std::runtime_error(fmt::format("Number of GPIO pins requested={} exceeds system limits={}", gpio_pin_number, GPIO_PIN_RANGE)); + } + + gpio_pin_number_ = gpio_pin_number; } void Hololink::GPIO::set_direction(uint32_t pin, uint32_t direction) { - if (pin < GPIO_PIN_RANGE) { + if (pin < gpio_pin_number_) { + + uint32_t register_address = GPIO_DIRECTION_BASE_REGISTER + ((pin / 32) * GPIO_REGISTER_ADDRESS_OFFSET); + uint32_t pin_bit = pin % 32; // map 0-255 to 0-31 // Read direction register - uint32_t reg_val = hololink_.read_uint32(GPIO_DIRECTION_REGISTER); + uint32_t reg_val = hololink_.read_uint32(register_address); // modify direction pin value if (direction == IN) { - reg_val = set_bit(reg_val, pin); + reg_val = set_bit(reg_val, pin_bit); } else if (direction == OUT) { - reg_val = clear_bit(reg_val, pin); + reg_val = clear_bit(reg_val, pin_bit); } else { // raise exception throw std::runtime_error(fmt::format("GPIO:{},invalid direction:{}", pin, direction)); } // write back modified value - hololink_.write_uint32(GPIO_DIRECTION_REGISTER, reg_val); - HOLOSCAN_LOG_INFO("GPIO:{},set to direction:{}", pin, direction); + hololink_.write_uint32(register_address, reg_val); + + HSB_LOG_DEBUG("GPIO:{},set to direction:{}", pin, direction); return; } @@ -835,9 +964,13 @@ void Hololink::GPIO::set_direction(uint32_t pin, uint32_t direction) uint32_t Hololink::GPIO::get_direction(uint32_t pin) { - if (pin < GPIO_PIN_RANGE) { - uint32_t reg_val = hololink_.read_uint32(GPIO_DIRECTION_REGISTER); - return read_bit(reg_val, pin); + if (pin < gpio_pin_number_) { + + uint32_t register_address = GPIO_DIRECTION_BASE_REGISTER + ((pin / 32) * GPIO_REGISTER_ADDRESS_OFFSET); + uint32_t pin_bit = pin % 32; // map 0-255 to 0-31 + + uint32_t reg_val = hololink_.read_uint32(register_address); + return read_bit(reg_val, pin_bit); } // raise exception @@ -846,27 +979,32 @@ uint32_t Hololink::GPIO::get_direction(uint32_t pin) void Hololink::GPIO::set_value(uint32_t pin, uint32_t value) { - if (pin < GPIO_PIN_RANGE) { + if (pin < gpio_pin_number_) { // make sure this is an output pin const uint32_t direction = get_direction(pin); + uint32_t status_register_address = GPIO_STATUS_BASE_REGISTER + ((pin / 32) * GPIO_REGISTER_ADDRESS_OFFSET); // read from status + uint32_t output_register_address = GPIO_OUTPUT_BASE_REGISTER + ((pin / 32) * GPIO_REGISTER_ADDRESS_OFFSET); // write to output + uint32_t pin_bit = pin % 32; // map 0-255 to 0-31 + if (direction == OUT) { // Read output register values - uint32_t reg_val = hololink_.read_uint32(GPIO_STATUS_REGISTER); + uint32_t reg_val = hololink_.read_uint32(status_register_address); // Modify pin in the register if (value == HIGH) { - reg_val = set_bit(reg_val, pin); + reg_val = set_bit(reg_val, pin_bit); } else if (value == LOW) { - reg_val = clear_bit(reg_val, pin); + reg_val = clear_bit(reg_val, pin_bit); } else { // raise exception throw std::runtime_error(fmt::format("GPIO:{},invalid value:{}", pin, value)); } // write back modified value - hololink_.write_uint32(GPIO_OUTPUT_REGISTER, reg_val); - HOLOSCAN_LOG_INFO("GPIO:{},set to value:{}", pin, value); + hololink_.write_uint32(output_register_address, reg_val); + + HSB_LOG_DEBUG("GPIO:{},set to value:{}", pin, value); return; } else { // raise exception @@ -881,14 +1019,23 @@ void Hololink::GPIO::set_value(uint32_t pin, uint32_t value) uint32_t Hololink::GPIO::get_value(uint32_t pin) { - if (pin < GPIO_PIN_RANGE) { - const uint32_t reg_val = hololink_.read_uint32(GPIO_STATUS_REGISTER); - return read_bit(reg_val, pin); + if (pin < gpio_pin_number_) { + + uint32_t register_address = GPIO_STATUS_BASE_REGISTER + ((pin / 32) * GPIO_REGISTER_ADDRESS_OFFSET); + uint32_t pin_bit = pin % 32; // map 0-255 to 0-31 + + const uint32_t reg_val = hololink_.read_uint32(register_address); + return read_bit(reg_val, pin_bit); } // raise exception throw std::runtime_error(fmt::format("GPIO:{},invalid pin", pin)); } +uint32_t Hololink::GPIO::get_supported_pin_num(void) +{ + return gpio_pin_number_; +} + /*static*/ uint32_t Hololink::GPIO::set_bit(uint32_t value, uint32_t bit) { return value | (1 << bit); @@ -913,26 +1060,29 @@ class Hololink::NamedLock { /** Constructs a lock using the shm_open() call to access a named * semaphore with the given name. */ - NamedLock(std::string name) - : sem_(0) + NamedLock(Hololink& hololink, std::string name) + : fd_(-1) { - // Allow one thread to lock this guy. sem_open requires - // an initial "/" -- don't burden our callers with knowing this. - std::string formatted_name = fmt::format("/{}", name); + // We use lockf on this file as our interprocess locking + // mechanism; that way if this program exits unexpectedly + // we don't leave the lock held. (An earlier implementation + // using shm_open didn't guarantee releasing the lock if we exited due + // to the user pressing control/C.) + std::string formatted_name = hololink.device_specific_filename(name); int permissions = 0666; // make sure other processes can write - sem_ = sem_open(formatted_name.c_str(), O_CREAT, permissions, 1); - if (sem_ == SEM_FAILED) { + fd_ = open(formatted_name.c_str(), O_WRONLY | O_CREAT, permissions); + if (fd_ < 0) { throw std::runtime_error( - fmt::format("sem_open failed with errno={}: \"{}\"", errno, strerror(errno))); + fmt::format("open({}, ...) failed with errno={}: \"{}\"", formatted_name, errno, strerror(errno))); } } ~NamedLock() noexcept(false) { - int r = sem_close(sem_); + int r = close(fd_); if (r != 0) { throw std::runtime_error( - fmt::format("sem_close failed with errno={}: \"{}\"", errno, strerror(errno))); + fmt::format("close failed with errno={}: \"{}\"", errno, strerror(errno))); } } @@ -942,10 +1092,10 @@ class Hololink::NamedLock { void lock() { // Block until we're the owner. - int r = sem_wait(sem_); + int r = lockf(fd_, F_LOCK, 0); if (r != 0) { throw std::runtime_error( - fmt::format("sem_wait failed with errno={}: \"{}\"", errno, strerror(errno))); + fmt::format("lockf failed with errno={}: \"{}\"", errno, strerror(errno))); } } @@ -956,26 +1106,32 @@ class Hololink::NamedLock { void unlock() { // Let another process take ownership. - int r = sem_post(sem_); + int r = lockf(fd_, F_ULOCK, 0); if (r != 0) { throw std::runtime_error( - fmt::format("sem_post failed with errno={}: \"{}\"", errno, strerror(errno))); + fmt::format("lockf failed with errno={}: \"{}\"", errno, strerror(errno))); } } protected: - sem_t* sem_; + int fd_; }; Hololink::NamedLock& Hololink::i2c_lock() { - static NamedLock lock("hololink-i2c-lock"); + static NamedLock lock(this[0], "hololink-i2c-lock"); return lock; } Hololink::NamedLock& Hololink::spi_lock() { - static NamedLock lock("hololink-spi-lock"); + static NamedLock lock(this[0], "hololink-spi-lock"); + return lock; +} + +Hololink::NamedLock& Hololink::lock() +{ + static NamedLock lock(this[0], "hololink-lock"); return lock; } @@ -984,6 +1140,22 @@ void Hololink::on_reset(std::shared_ptr reset_control reset_controllers_.push_back(reset_controller); } +std::string Hololink::device_specific_filename(std::string name) +{ + // Create a directory, if necessary, with our serial number. + auto path = std::filesystem::temp_directory_path(); + path.append("hololink"); + path.append(serial_number_); + if (!std::filesystem::exists(path)) { + if (!std::filesystem::create_directories(path)) { + throw std::runtime_error( + fmt::format("create_directory({}) failed with errno={}: \"{}\"", std::string(path), errno, strerror(errno))); + } + } + path.append(name); + return std::string(path); +} + Hololink::ResetController::~ResetController() { } @@ -993,7 +1165,7 @@ bool Hololink::ptp_synchronize(const std::shared_ptr& timeout) // Wait for a non-zero time value while (true) { std::shared_ptr read_timeout = Timeout::default_timeout(); - auto [status, value] = read_uint32_(FPGA_PTP_SYNC_TS_0, read_timeout); + auto [status, value] = read_uint32_(FPGA_PTP_SYNC_TS_0, read_timeout, sequence_number_checking_); if (status) { uint32_t ptp_count = value.value(); if (ptp_count != 0) { @@ -1010,4 +1182,40 @@ bool Hololink::ptp_synchronize(const std::shared_ptr& timeout) return true; } +Hololink::FrameMetadata Hololink::deserialize_metadata(const uint8_t* metadata_buffer, unsigned metadata_buffer_size) +{ + hololink::native::Deserializer deserializer(metadata_buffer, metadata_buffer_size); + FrameMetadata r = {}; // fill with 0s + if (!(deserializer.next_uint32_be(r.flags) + && deserializer.next_uint32_be(r.psn) + && deserializer.next_uint32_be(r.crc) + && deserializer.next_uint64_be(r.timestamp_s) + && deserializer.next_uint32_be(r.timestamp_ns) + && deserializer.next_uint64_be(r.bytes_written) + && deserializer.next_uint32_be(r.frame_number) + && deserializer.next_uint64_be(r.metadata_s) + && deserializer.next_uint32_be(r.metadata_ns))) { + throw std::runtime_error(fmt::format("Buffer underflow in metadata")); + } + HSB_LOG_TRACE("flags={:#x} psn={:#x} crc={:#x} timestamp_s={:#x} timestamp_ns={:#x} bytes_written={:#x} frame_number={:#x}", + r.flags, r.psn, r.crc, r.timestamp_s, r.timestamp_ns, r.bytes_written, r.frame_number); + return r; +} + +bool Hololink::and_uint32(uint32_t address, uint32_t mask) +{ + std::lock_guard lock(this->lock()); + uint32_t value = read_uint32(address); + value &= mask; + return write_uint32(address, value); +} + +bool Hololink::or_uint32(uint32_t address, uint32_t mask) +{ + std::lock_guard lock(this->lock()); + uint32_t value = read_uint32(address); + value |= mask; + return write_uint32(address, value); +} + } // namespace hololink diff --git a/src/hololink/hololink.hpp b/src/hololink/hololink.hpp index 6b9e659..3a667c4 100644 --- a/src/hololink/hololink.hpp +++ b/src/hololink/hololink.hpp @@ -51,9 +51,19 @@ constexpr uint32_t WR_DWORD = 0x04; constexpr uint32_t RD_DWORD = 0x14; // request packet flag bits constexpr uint32_t REQUEST_FLAGS_ACK_REQUEST = 0b0000'0001; +constexpr uint32_t REQUEST_FLAGS_SEQUENCE_CHECK = 0b0000'0010; // response codes -constexpr uint32_t RESPONSE_SUCCESS = 0; +constexpr uint32_t RESPONSE_SUCCESS = 0x00; +constexpr uint32_t RESPONSE_ERROR_GENERAL = 0x02; +constexpr uint32_t RESPONSE_INVALID_ADDR = 0x03; constexpr uint32_t RESPONSE_INVALID_CMD = 0x04; +constexpr uint32_t RESPONSE_INVALID_PKT_LENGTH = 0x05; +constexpr uint32_t RESPONSE_INVALID_FLAGS = 0x06; +constexpr uint32_t RESPONSE_BUFFER_FULL = 0x07; +constexpr uint32_t RESPONSE_INVALID_BLOCK_SIZE = 0x08; +constexpr uint32_t RESPONSE_INVALID_INDIRECT_ADDR = 0x09; +constexpr uint32_t RESPONSE_COMMAND_TIMEOUT = 0x0A; +constexpr uint32_t RESPONSE_SEQUENCE_CHECK_FAIL = 0x0B; // control flags constexpr uint32_t I2C_START = 0b0000'0000'0000'0001; @@ -72,10 +82,13 @@ constexpr uint32_t FPGA_PTP_SYNC_TS_0 = 0x180; constexpr uint32_t FPGA_PTP_OFM = 0x18C; // board IDs -constexpr uint32_t HOLOLINK_LITE_BOARD_ID = 1u; -constexpr uint32_t HOLOLINK_BOARD_ID = 2u; +constexpr uint32_t HOLOLINK_LITE_BOARD_ID = 2u; constexpr uint32_t HOLOLINK_100G_BOARD_ID = 3u; constexpr uint32_t MICROCHIP_POLARFIRE_BOARD_ID = 4u; +constexpr uint32_t HOLOLINK_NANO_BOARD_ID = 5u; + +// Other constants +constexpr uint32_t METADATA_SIZE = 128; class TimeoutError : public std::runtime_error { public: @@ -93,11 +106,19 @@ class UnsupportedVersion : public std::runtime_error { } }; +/** + * Defined in data_channel.hpp. + */ +class DataChannel; + /** * @brief * */ class Hololink { + /** DataChannel calls some methods we don't want to share. */ + friend class DataChannel; + public: /** * @brief Construct a new Hololink object @@ -107,7 +128,7 @@ class Hololink { * @param serial_number */ explicit Hololink( - const std::string& peer_ip, uint32_t control_port, const std::string& serial_number); + const std::string& peer_ip, uint32_t control_port, const std::string& serial_number, bool sequence_number_checking); Hololink() = delete; virtual ~Hololink() = default; @@ -135,9 +156,8 @@ class Hololink { static bool enumerated(const Metadata& metadata); /** - * @brief - * - * @return std::tuple + * Returns (frame_start_size, frame_end_size, line_start_size, line_end_size), + * all are in bytes. */ std::tuple csi_size(); @@ -160,7 +180,7 @@ class Hololink { * @param timeout * @returns the FPGA version */ - uint32_t get_fpga_version(const std::shared_ptr& timeout = std::shared_ptr()); + uint32_t get_fpga_version(const std::shared_ptr& timeout = std::shared_ptr(), bool check_sequence = true); /** * @returns the FPGA date @@ -178,7 +198,19 @@ class Hololink { * @return false */ bool write_uint32(uint32_t address, uint32_t value, - const std::shared_ptr& in_timeout = std::shared_ptr(), bool retry = true); + const std::shared_ptr& in_timeout, bool retry, bool sequence_check); + + bool write_uint32(uint32_t address, uint32_t value, + const std::shared_ptr& timeout, bool retry = true) + { + return write_uint32(address, value, timeout, retry, sequence_number_checking_); + } + + bool write_uint32(uint32_t address, uint32_t value) + { + const std::shared_ptr& timeout = std::shared_ptr(); + return write_uint32(address, value, timeout); + } /** * @brief Returns the value found at the location or calls hololink timeout if there's a @@ -189,7 +221,18 @@ class Hololink { * @return uint32_t */ uint32_t read_uint32( - uint32_t address, const std::shared_ptr& in_timeout = std::shared_ptr()); + uint32_t address, const std::shared_ptr& in_timeout, bool check_sequence); + + uint32_t read_uint32(uint32_t address, const std::shared_ptr& timeout) + { + return read_uint32(address, timeout, sequence_number_checking_); + } + + uint32_t read_uint32(uint32_t address) + { + const std::shared_ptr& timeout = std::shared_ptr(); + return read_uint32(address, timeout); + } /** * @brief Setup the clock @@ -205,6 +248,7 @@ class Hololink { * the device-- the same is true for SPI. */ class NamedLock; + friend class NamedLock; class I2c { public: @@ -342,7 +386,7 @@ class Hololink { * * @param hololink */ - explicit GPIO(Hololink& hololink); + explicit GPIO(Hololink& hololink, uint32_t gpio_pin_number); GPIO() = delete; // Direction constants @@ -353,8 +397,12 @@ class Hololink { inline static constexpr uint32_t LOW = 0; inline static constexpr uint32_t HIGH = 1; - // 16 pins - range 0...15 - inline static constexpr uint32_t GPIO_PIN_RANGE = 0x10; + // 256 pins in FPGA - range 0...255 + // Diffrent board configurations will use different pin numbers + // Lattice 10G - 16 pins + // Bajoran nano - 54 pins + // Future platform up to 256 pins supported by FPGA + inline static constexpr uint32_t GPIO_PIN_RANGE = 0x100; /** * @brief @@ -388,8 +436,17 @@ class Hololink { */ uint32_t get_value(uint32_t pin); + /** + * @brief + * + * @param + * @return uint32_t + */ + uint32_t get_supported_pin_num(void); + private: Hololink& hololink_; + uint32_t gpio_pin_number_; static uint32_t set_bit(uint32_t value, uint32_t bit); static uint32_t clear_bit(uint32_t value, uint32_t bit); @@ -401,7 +458,7 @@ class Hololink { * * @return std::shared_ptr */ - std::shared_ptr get_gpio(); + std::shared_ptr get_gpio(Metadata& metadata); /** * @brief @@ -428,6 +485,24 @@ class Hololink { */ bool ptp_synchronize(const std::shared_ptr& timeout); + /** + * Tool for deserializing HSB received metadata blob. + */ + typedef struct { + uint32_t flags; + uint32_t psn; + uint32_t crc; + uint32_t frame_number; + // Time when the first sample data for the frame was received + uint32_t timestamp_ns; + uint64_t timestamp_s; + uint64_t bytes_written; + // Time at which the metadata packet was sent + uint32_t metadata_ns; + uint64_t metadata_s; + } FrameMetadata; + static FrameMetadata deserialize_metadata(const uint8_t* metadata_buffer, unsigned metadata_buffer_size); + protected: /** * @brief Override this guy to record timing around ACKs etc @@ -440,11 +515,43 @@ class Hololink { virtual void executed(double request_time, const std::vector& request, double reply_time, const std::vector& reply); + /** + * Return a filename that, for any program talking to + * this specific device, will always produce the same + * filename for a given value in name. This file is + * not guaranteed to persist past host reboot. This is + * useful e.g. providing locks for transactions to a + * specific board. Note that this works for all processes + * on this host but isn't smart enough to share the same + * space with other hosts. + */ + std::string device_specific_filename(std::string name); + + /** + * Return a named semaphore that guarantees singleton access + * to misc Hololink device resources, across all processes + * on the current machine. and_uint32 and or_uint32 use this. + */ + NamedLock& lock(); + + /** + * Clears any bits of the given memory location with + * the bits not set in mask. + */ + bool and_uint32(uint32_t address, uint32_t mask); + + /** + * Sets any bits of the given memory location with + * the bits set in the mask. + */ + bool or_uint32(uint32_t address, uint32_t mask); + private: const std::string peer_ip_; const uint32_t control_port_; const std::string serial_number_; uint16_t sequence_ = 0x100; + bool sequence_number_checking_ = true; native::UniqueFileDescriptor control_socket_; uint32_t version_; @@ -453,21 +560,28 @@ class Hololink { std::mutex execute_mutex_; // protects command/response transactions with the device. bool write_uint32_(uint32_t address, uint32_t value, const std::shared_ptr& timeout, - bool response_expected = true); + bool response_expected, bool sequence_check); std::tuple> read_uint32_( - uint32_t address, const std::shared_ptr& timeout); + uint32_t address, const std::shared_ptr& timeout, bool sequence_check); + void add_read_retries(uint32_t n); void add_write_retries(uint32_t n); + // Note that we take the lock_guard as a parameter not because we + // actually use it but because we rely on the caller holding a + // mutex for us-- and forcing them to pass that in here guarantees + // that they have it in the first place. std::tuple, std::shared_ptr> execute( uint16_t sequence, const std::vector& request, std::vector& reply, - const std::shared_ptr& timeout); + const std::shared_ptr& timeout, std::lock_guard&); std::vector receive_control(const std::shared_ptr& timeout); void write_renesas(I2c& i2c, const std::vector& data); - uint16_t next_sequence(); + // See the comment above for execute(...) about why we take + // std::lock_guard as a parameter here. + uint16_t next_sequence(std::lock_guard&); }; } // namespace hololink diff --git a/src/hololink/logging.cpp b/src/hololink/logging.cpp new file mode 100644 index 0000000..e5346f0 --- /dev/null +++ b/src/hololink/logging.cpp @@ -0,0 +1,181 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * See README.md for detailed information. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CONSOLE_LOG +#undef SOCKET_LOG + +namespace hololink::logging { + +HsbLogLevel hsb_log_level = HSB_LOG_LEVEL_INVALID; +static HsbLogLevel hsb_log_level_default = HSB_LOG_LEVEL_INFO; +// Allow "HOLOSCAN_LOG_LEVEL" to set our logging level (along with HSDK applications) +static const char* log_level_environment_variable = "HOLOSCAN_LOG_LEVEL"; +#ifdef SOCKET_LOG +// Socket to send our data to. +static int logger_socket = -1; +#endif /* SOCKET_LOG */ + +static void _hsb_logger(char const* file, unsigned line, const char* function, HsbLogLevel level, fmt::string_view format, fmt::format_args args) +{ + std::string message = fmt::vformat(format, args); + // Don't include all the directory information included in "file" + const char* basename = strrchr(file, '/'); + if (basename) { + basename++; // skip the '/' + } else { + basename = file; + } + const char* level_description = "INVALID"; + switch (level) { + case HSB_LOG_LEVEL_TRACE: + level_description = "TRACE"; + break; + case HSB_LOG_LEVEL_DEBUG: + level_description = "DEBUG"; + break; + case HSB_LOG_LEVEL_INFO: + level_description = "INFO"; + break; + case HSB_LOG_LEVEL_WARN: + level_description = "WARN"; + break; + case HSB_LOG_LEVEL_ERROR: + level_description = "ERROR"; + break; + default: + break; + } + pid_t thread_id = gettid(); + std::string msg = fmt::format("{} {}:{} {} tid={:#x} -- {}", level_description, basename, line, function, thread_id, message); + +#ifdef CONSOLE_LOG + fprintf(stderr, "%s\n", msg.c_str()); +#endif /* CONSOLE_LOG */ + +#ifdef SOCKET_LOG + int flags = 0; + ssize_t r = send(logger_socket, msg.data(), msg.size(), flags); + if (r <= 0) { + throw std::runtime_error("hsb_log send failed"); + } +#endif /* SOCKET_LOG */ +} + +static int create_logger_socket(const char* sender_ip, const char* destination_ip = "255.255.255.255") +{ + // Set up the socket. + int s = socket(AF_INET, SOCK_DGRAM, 0); + if (s == -1) { + throw std::runtime_error(fmt::format("create_logger_socket failed to create socket, errno={}({})", errno, strerror(errno))); + } + // Allow us to send broadcast + int enable = 1; + int r = setsockopt(s, SOL_SOCKET, SO_BROADCAST, &enable, sizeof(enable)); + if (r == -1) { + throw std::runtime_error(fmt::format("create_logger_socket, setsockopt failed, errno={}({})", errno, strerror(errno))); + } + // Send from this local address + struct sockaddr_in address = { + .sin_family = AF_INET, + }; + r = inet_pton(AF_INET, sender_ip, (void*)&address.sin_addr); + if (r != 1) { + throw std::runtime_error(fmt::format("create_logger_socket, inet_pton({}) failed, errno={}({})", sender_ip, errno, strerror(errno))); + } + r = bind(s, (struct sockaddr*)&address, sizeof(address)); + if (r == -1) { + throw std::runtime_error(fmt::format("create_logger_socket, bind failed, errno={}({})", errno, strerror(errno))); + } + // Use syslog's destination udp port to make wireshark show us the content + uint16_t port = 514; + address = { + .sin_family = AF_INET, + .sin_port = htons(port), + }; + r = inet_pton(AF_INET, destination_ip, (void*)&address.sin_addr); + if (r != 1) { + throw std::runtime_error(fmt::format("create_logger_socket, inet_pton({}) failed, errno={}({})", destination_ip, errno, strerror(errno))); + } + r = connect(s, (struct sockaddr*)&address, sizeof(address)); + if (r == -1) { + throw std::runtime_error(fmt::format("Failed to connect logger socket, errno={}({})", errno, strerror(errno))); + } + + return s; +} + +static void _initial_hsb_log(char const* file, unsigned line, const char* function, HsbLogLevel level, fmt::string_view format, fmt::format_args args) +{ + // If the application hasn't already rewritten this value, set it + // to a reasonable default. + if (hsb_log_level == HSB_LOG_LEVEL_INVALID) { + hsb_log_level = hsb_log_level_default; + } + + // Allow the environment to override that. + char const* env_log_level = getenv(log_level_environment_variable); + if (env_log_level) { + if (strcasecmp(env_log_level, "TRACE") == 0) { + hsb_log_level = HSB_LOG_LEVEL_TRACE; + } else if (strcasecmp(env_log_level, "DEBUG") == 0) { + hsb_log_level = HSB_LOG_LEVEL_DEBUG; + } else if (strcasecmp(env_log_level, "INFO") == 0) { + hsb_log_level = HSB_LOG_LEVEL_INFO; + } else if (strcasecmp(env_log_level, "WARN") == 0) { + hsb_log_level = HSB_LOG_LEVEL_WARN; + } else if (strcasecmp(env_log_level, "ERROR") == 0) { + hsb_log_level = HSB_LOG_LEVEL_ERROR; + } else { + throw std::runtime_error(fmt::format("Invalid environment setting in \"{}\".", log_level_environment_variable)); + } + } + +#ifdef SOCKET_LOG + // Note that this requires that something listens to this; + // so in another terminal, run 'sudo nc -lkup 514' + logger_socket = create_logger_socket("127.0.0.1", "127.0.0.1"); +#endif /* SOCKET_LOG */ + + // We only need to be called once. + hsb_logger = _hsb_logger; + + // Does this specific logger call still apply? + if (level < hsb_log_level) { + return; + } + + // Then show it. + hsb_logger(file, line, function, level, format, args); +} + +HsbLogger hsb_logger = _initial_hsb_log; + +} // namespace hololink::logging diff --git a/src/hololink/logging.hpp b/src/hololink/logging.hpp new file mode 100644 index 0000000..abb78ff --- /dev/null +++ b/src/hololink/logging.hpp @@ -0,0 +1,85 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * See README.md for detailed information. + */ + +#ifndef SRC_HOLOLINK_LOGGING +#define SRC_HOLOLINK_LOGGING + +#include + +#include +#include + +template <> +struct fmt::formatter : fmt::formatter { + auto format(CUresult cu_result, format_context& ctx) const + { + return fmt::formatter::format(static_cast(cu_result), ctx); + } +}; + +namespace hololink::logging { + +// Supported logging levels. +typedef enum { + HSB_LOG_LEVEL_TRACE = 10, + HSB_LOG_LEVEL_DEBUG = 20, + HSB_LOG_LEVEL_INFO = 30, + HSB_LOG_LEVEL_WARN = 40, + HSB_LOG_LEVEL_ERROR = 50, + + // Special value here allows the logging subsystem to + // initialize itself while still keeping the test for + // logging levels in the application code (so that the + // compiler may be able to skip a lot of fmt::format... + // variable initialization. This value must be lower than + // all the actual levels. + HSB_LOG_LEVEL_INVALID = 0, +} HsbLogLevel; + +// Callback with logging data. +typedef void (*HsbLogger)(char const* file, unsigned line, const char* function, HsbLogLevel level, fmt::string_view format, fmt::format_args args); + +// Controls which logging calls actually result in calls to hsb_logger. +extern HsbLogLevel hsb_log_level; + +// By default this logger writes to stderr; set a new value here +// to visit the data with your own callback. +extern HsbLogger hsb_logger; + +// +template +static inline void hsb_log(char const* file, unsigned line, const char* function, HsbLogLevel level, const FormatT& format, ArgsT&&... args) +{ + if (level >= hsb_log_level) { + hsb_logger(file, line, function, level, format, fmt::make_format_args>>(args...)); + } +} + +} // namespace hololink::logging + +// It's expected that __VA_ARGS__ includes the format string. +#define HSB_LOG(level, ...) hololink::logging::hsb_log(__FILE__, __LINE__, static_cast(__FUNCTION__), level, __VA_ARGS__) + +#define HSB_LOG_TRACE(...) HSB_LOG(hololink::logging::HsbLogLevel::HSB_LOG_LEVEL_TRACE, __VA_ARGS__) +#define HSB_LOG_DEBUG(...) HSB_LOG(hololink::logging::HsbLogLevel::HSB_LOG_LEVEL_DEBUG, __VA_ARGS__) +#define HSB_LOG_INFO(...) HSB_LOG(hololink::logging::HsbLogLevel::HSB_LOG_LEVEL_INFO, __VA_ARGS__) +#define HSB_LOG_WARN(...) HSB_LOG(hololink::logging::HsbLogLevel::HSB_LOG_LEVEL_WARN, __VA_ARGS__) +#define HSB_LOG_ERROR(...) HSB_LOG(hololink::logging::HsbLogLevel::HSB_LOG_LEVEL_ERROR, __VA_ARGS__) + +#endif /* SRC_HOLOLINK_LOGGING */ diff --git a/src/hololink/native/CMakeLists.txt b/src/hololink/native/CMakeLists.txt index bf71468..e984dbb 100644 --- a/src/hololink/native/CMakeLists.txt +++ b/src/hololink/native/CMakeLists.txt @@ -31,6 +31,7 @@ target_include_directories(native target_link_libraries(native PRIVATE holoscan::core + hololink PUBLIC CUDA::cuda_driver CUDA::nvToolsExt diff --git a/src/hololink/native/cuda_helper.cpp b/src/hololink/native/cuda_helper.cpp index 6d7530a..f72e8c4 100644 --- a/src/hololink/native/cuda_helper.cpp +++ b/src/hololink/native/cuda_helper.cpp @@ -21,7 +21,7 @@ #include -#include +#include /** * NvRTC API error check helper @@ -103,7 +103,7 @@ CudaFunctionLauncher::~CudaFunctionLauncher() try { CudaCheck(cuModuleUnload(module_)); } catch (const std::exception& e) { - HOLOSCAN_LOG_ERROR("CudaFunctionLauncher destructor failed with {}", e.what()); + HSB_LOG_ERROR("CudaFunctionLauncher destructor failed with {}", e.what()); } } @@ -150,10 +150,10 @@ CudaContextScopedPush::~CudaContextScopedPush() CUcontext popped_context; CudaCheck(cuCtxPopCurrent(&popped_context)); if (popped_context != cuda_context_) { - HOLOSCAN_LOG_ERROR("Cuda: Unexpected context popped"); + HSB_LOG_ERROR("Cuda: Unexpected context popped"); } } catch (const std::exception& e) { - HOLOSCAN_LOG_ERROR("ScopedPush destructor failed with {}", e.what()); + HSB_LOG_ERROR("ScopedPush destructor failed with {}", e.what()); } } diff --git a/src/hololink/native/cuda_helper.hpp b/src/hololink/native/cuda_helper.hpp index e5893db..81ee1c4 100644 --- a/src/hololink/native/cuda_helper.hpp +++ b/src/hololink/native/cuda_helper.hpp @@ -34,19 +34,19 @@ namespace hololink::native { /** * CUDA driver API error check helper */ -#define CudaCheck(FUNC) \ - { \ - const CUresult result = FUNC; \ - if (result != CUDA_SUCCESS) { \ - const char* error_name = ""; \ - cuGetErrorName(result, &error_name); \ - const char* error_string = ""; \ - cuGetErrorString(result, &error_string); \ - std::stringstream buf; \ - buf << "[" << __FILE__ << ":" << __LINE__ << "] CUDA driver error " << result << " (" \ - << error_name << "): " << error_string; \ - throw std::runtime_error(buf.str().c_str()); \ - } \ +#define CudaCheck(FUNC) \ + { \ + const CUresult result = FUNC; \ + if (result != CUDA_SUCCESS) { \ + const char* error_name = ""; \ + cuGetErrorName(result, &error_name); \ + const char* error_string = ""; \ + cuGetErrorString(result, &error_string); \ + std::stringstream buf; \ + buf << "[" << __FILE__ << ":" << __LINE__ << "] CUDA driver error " \ + << result << " (" << error_name << "): " << error_string; \ + throw std::runtime_error(buf.str().c_str()); \ + } \ } /** diff --git a/src/hololink/native/deserializer.hpp b/src/hololink/native/deserializer.hpp index d4edf48..9fa2a99 100644 --- a/src/hololink/native/deserializer.hpp +++ b/src/hololink/native/deserializer.hpp @@ -44,8 +44,8 @@ class Deserializer { return false; } - result = (buffer_[position_ + 0] << 0) | (buffer_[position_ + 1] << 8) - | (buffer_[position_ + 2] << 16) | (buffer_[position_ + 3] << 24); + result = (uint32_t(buffer_[position_ + 0]) << 0) | (uint32_t(buffer_[position_ + 1]) << 8) + | (uint32_t(buffer_[position_ + 2]) << 16) | (uint32_t(buffer_[position_ + 3]) << 24); position_ += 4; return true; @@ -85,7 +85,7 @@ class Deserializer { return false; } result = buffer_[position_ + 0]; - result |= (buffer_[position_ + 1] << 8); + result |= (uint16_t(buffer_[position_ + 1]) << 8); position_ += 2; return true; } @@ -138,6 +138,25 @@ class Deserializer { return true; } + // Returns true if result is set; + // false on buffer overflow. + bool next_uint64_le(uint64_t& result) + { + if ((position_ + 8) > limit_) { + return false; + } + result = (uint64_t(buffer_[position_ + 0]) << 0); + result |= (uint64_t(buffer_[position_ + 1]) << 8); + result |= (uint64_t(buffer_[position_ + 2]) << 16); + result |= (uint64_t(buffer_[position_ + 3]) << 24); + result |= (uint64_t(buffer_[position_ + 4]) << 32); + result |= (uint64_t(buffer_[position_ + 5]) << 40); + result |= (uint64_t(buffer_[position_ + 6]) << 48); + result |= (uint64_t(buffer_[position_ + 7]) << 56); + position_ += 8; + return true; + } + // Fetch a pointer to the current offset in the buffer; returns // false on buffer overflow. bool pointer(const uint8_t*& pointer, unsigned n) diff --git a/src/hololink/native/networking.cpp b/src/hololink/native/networking.cpp index d487bbe..79c615a 100644 --- a/src/hololink/native/networking.cpp +++ b/src/hololink/native/networking.cpp @@ -26,7 +26,7 @@ #include #include -#include +#include namespace hololink::native { @@ -61,7 +61,7 @@ std::tuple local_ip_and_mac( for (auto&& req : ifreq_buffer) { const std::string name(req.ifr_ifrn.ifrn_name); const in_addr ip = ((struct sockaddr_in*)&req.ifr_ifru.ifru_addr)->sin_addr; - HOLOSCAN_LOG_TRACE("name={} ip={}", name, inet_ntoa(ip)); + HSB_LOG_TRACE("name={} ip={}", name, inet_ntoa(ip)); interface_by_ip[ip.s_addr] = name; } @@ -106,11 +106,64 @@ std::tuple local_ip_and_mac( std::copy(ifhwaddr_request.ifr_ifru.ifru_addr.sa_data, ifhwaddr_request.ifr_ifru.ifru_addr.sa_data + mac.max_size(), mac.begin()); - HOLOSCAN_LOG_DEBUG("destination_ip={} local_ip={} mac_id={:x}:{:x}:{:x}:{:x}:{:x}:{:x}", + HSB_LOG_DEBUG("destination_ip={} local_ip={} mac_id={:x}:{:x}:{:x}:{:x}:{:x}:{:x}", destination_ip, inet_ntoa(ip.sin_addr), mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); return { inet_ntoa(ip.sin_addr), binterface, mac }; } +std::tuple local_ip_and_mac_from_socket(int socket_fd) +{ + // Start with a map of IP address to interfaces. + std::map interface_by_ip; + // First, find out how many interfaces there are. + ifconf ifconf_request {}; + if (ioctl(socket_fd, SIOCGIFCONF, &ifconf_request) < 0) { + throw std::runtime_error( + fmt::format("ioctl failed with errno={}: \"{}\"", errno, strerror(errno))); + } + assert(ifconf_request.ifc_len > 0); + // + std::vector ifreq_buffer(ifconf_request.ifc_len / sizeof(ifreq)); + ifconf_request.ifc_ifcu.ifcu_req = ifreq_buffer.data(); + if (ioctl(socket_fd, SIOCGIFCONF, &ifconf_request) < 0) { + throw std::runtime_error( + fmt::format("ioctl failed with errno={}: \"{}\"", errno, strerror(errno))); + } + assert(ifconf_request.ifc_len == ifreq_buffer.size() * sizeof(ifreq)); + assert(ifconf_request.ifc_ifcu.ifcu_req == ifreq_buffer.data()); + for (auto&& req : ifreq_buffer) { + const std::string name(req.ifr_ifrn.ifrn_name); + const in_addr ip = ((struct sockaddr_in*)&req.ifr_ifru.ifru_addr)->sin_addr; + HSB_LOG_TRACE("name={} ip={}", name, inet_ntoa(ip)); + interface_by_ip[ip.s_addr] = name; + } + + sockaddr_in ip {}; + ip.sin_family = AF_UNSPEC; + socklen_t ip_len = sizeof(ip); + if (getsockname(socket_fd, (sockaddr*)&ip, &ip_len) < 0) { + throw std::runtime_error( + fmt::format("getsockname failed with errno={}: \"{}\"", errno, strerror(errno))); + } + const std::string binterface = interface_by_ip[ip.sin_addr.s_addr]; + + ifreq ifhwaddr_request {}; + std::strncpy(ifhwaddr_request.ifr_ifrn.ifrn_name, binterface.c_str(), + sizeof(ifhwaddr_request.ifr_ifrn.ifrn_name)); + if (ioctl(socket_fd, SIOCGIFHWADDR, &ifhwaddr_request) < 0) { + throw std::runtime_error( + fmt::format("ioctl failed with errno={}: \"{}\"", errno, strerror(errno))); + } + MacAddress mac; + static_assert(mac.max_size() <= sizeof(ifhwaddr_request.ifr_ifru.ifru_addr.sa_data)); + std::copy(ifhwaddr_request.ifr_ifru.ifru_addr.sa_data, + ifhwaddr_request.ifr_ifru.ifru_addr.sa_data + mac.max_size(), + mac.begin()); + HSB_LOG_DEBUG("local_ip={} mac_id={:x}:{:x}:{:x}:{:x}:{:x}:{:x}", + inet_ntoa(ip.sin_addr), mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); + return { inet_ntoa(ip.sin_addr), binterface, mac }; +} + /** * @brief * diff --git a/src/hololink/native/networking.hpp b/src/hololink/native/networking.hpp index 38c7a18..f7f57c5 100644 --- a/src/hololink/native/networking.hpp +++ b/src/hololink/native/networking.hpp @@ -21,7 +21,9 @@ #include // for close() #include +#include #include +#include #include #include @@ -29,6 +31,24 @@ namespace hololink::native { +// When we recv from a UDP socket, use +// a buffer large enough to accomodate +// a 9k jumbo packet. +constexpr uint32_t UDP_PACKET_SIZE = 10240; + +// All our I/O are aligned to this page size. +constexpr uint32_t PAGE_SIZE = 128; + +// Round up +constexpr static uint32_t round_up(uint32_t value, uint32_t alignment) +{ + // This only works when alignment is a power of two. + if (alignment & (alignment - 1)) { + throw std::runtime_error(fmt::format("round_up called with an invalid alignment={:#x}; it must be a power of two.", alignment)); + } + return (value + alignment - 1) & ~(alignment - 1); +} + /// MAC (medium access control) address using MacAddress = std::array; @@ -45,6 +65,14 @@ using UniqueFileDescriptor = std::unique_ptr, Nullable::Delet std::tuple local_ip_and_mac( const std::string& destination_ip, uint32_t port = 1); +/** + * @brief Works only on Linux. + * + * @returns our IP address, interface name, and the MAC ID for the interface that + * socket_fd uses to transmit. + */ +std::tuple local_ip_and_mac_from_socket(int socket_fd); + /** * @brief Get the Mac ID for the given interface by name * diff --git a/src/hololink/native/serializer.hpp b/src/hololink/native/serializer.hpp index aae2c3d..1690bd1 100644 --- a/src/hololink/native/serializer.hpp +++ b/src/hololink/native/serializer.hpp @@ -107,6 +107,23 @@ class Serializer { return true; } + bool append_uint64_be(uint64_t value) + { + if ((position_ + 8) > limit_) { + return false; + } + buffer_[position_] = (value >> 56) & 0xFF; + buffer_[position_ + 1] = (value >> 48) & 0xFF; + buffer_[position_ + 2] = (value >> 40) & 0xFF; + buffer_[position_ + 3] = (value >> 32) & 0xFF; + buffer_[position_ + 4] = (value >> 24) & 0xFF; + buffer_[position_ + 5] = (value >> 16) & 0xFF; + buffer_[position_ + 6] = (value >> 8) & 0xFF; + buffer_[position_ + 7] = (value >> 0) & 0xFF; + position_ += 8; + return true; + } + protected: uint8_t* buffer_; size_t limit_; diff --git a/src/hololink/operators/CMakeLists.txt b/src/hololink/operators/CMakeLists.txt index 512a7c1..19f6416 100644 --- a/src/hololink/operators/CMakeLists.txt +++ b/src/hololink/operators/CMakeLists.txt @@ -35,11 +35,10 @@ target_link_libraries(base_receiver_op CUDA::cuda_driver ) -add_subdirectory(csi_to_bayer) if(DEFINED ENV{CONTAINER_TYPE} AND "$ENV{CONTAINER_TYPE}" STREQUAL "igpu") add_subdirectory(argus_isp) endif() -add_subdirectory(gamma_correction) +add_subdirectory(csi_to_bayer) add_subdirectory(image_processor) add_subdirectory(linux_receiver) add_subdirectory(roce_receiver) diff --git a/src/hololink/operators/argus_isp/argus_impl.cpp b/src/hololink/operators/argus_isp/argus_impl.cpp index 3298424..c2afd3d 100644 --- a/src/hololink/operators/argus_isp/argus_impl.cpp +++ b/src/hololink/operators/argus_isp/argus_impl.cpp @@ -17,6 +17,8 @@ #include "argus_impl.hpp" +#include + namespace hololink::operators { ArgusImpl::ArgusImpl(std::shared_ptr cameraProvider) @@ -40,11 +42,11 @@ void ArgusImpl::setup_camera_devices() "Error while listing camera devices: " "Unable to get camera devices list from camera provider interface " "(Argus Status: {})", - status)); + static_cast(status))); } if (camera_devices_.size() == 0) { - HOLOSCAN_LOG_WARN("no camera devices are available"); + HSB_LOG_WARN("no camera devices are available"); } else if (camera_devices_.size() > 0) { for (uint32_t i = 0; i < camera_devices_.size(); i++) { Argus::ICameraProperties* i_camera_properties = Argus::interface_cast(camera_devices_[i]); @@ -79,7 +81,7 @@ void ArgusImpl::set_sensor_mode_info(uint32_t sensorModeIndex) throw std::runtime_error("Failed to get sensor mode interface"); } - HOLOSCAN_LOG_INFO(fmt::format("Capturing from mono device using sensor mode {} ({}x{})\n", + HSB_LOG_INFO(fmt::format("Capturing from mono device using sensor mode {} ({}x{})\n", sensorModeIndex, i_sensor_mode_->getResolution().width(), i_sensor_mode_->getResolution().height())); diff --git a/src/hololink/operators/argus_isp/argus_isp.cpp b/src/hololink/operators/argus_isp/argus_isp.cpp index 3632cc8..65a9c5a 100644 --- a/src/hololink/operators/argus_isp/argus_isp.cpp +++ b/src/hololink/operators/argus_isp/argus_isp.cpp @@ -136,7 +136,7 @@ void ArgusIspOp::compute(holoscan::InputContext& input, // get the CUDA stream from the input message gxf_result_t stream_handler_result = cuda_stream_handler_.from_message(context.context(), entity); if (stream_handler_result != GXF_SUCCESS) { - throw std::runtime_error("Failed to get the CUDA stream from incoming messages"); + throw std::runtime_error(fmt::format("Failed to get the CUDA stream from incoming messages: {}", GxfResultStr(stream_handler_result))); } const auto input_tensor = entity.get(); @@ -160,7 +160,7 @@ void ArgusIspOp::compute(holoscan::InputContext& input, if (dtype.code != kDLUInt || dtype.bits != 16) { throw std::runtime_error(fmt::format("Unexpected image data type '(code: {}, bits: {})'," "expected '(code: {}, bits: {})'", - dtype.code, dtype.bits, kDLUInt, 16)); + static_cast(dtype.code), dtype.bits, static_cast(kDLUInt), 16)); } const auto input_shape = input_tensor->shape(); @@ -304,7 +304,7 @@ void ArgusIspOp::compute(holoscan::InputContext& input, oSizeROI, npp_stream_ctx_); if (status != NPP_SUCCESS) { - throw std::runtime_error(fmt::format("Failed with \"{}\" to convert NV12 to RGB\n", status)); + throw std::runtime_error(fmt::format("Failed with \"{}\" to convert NV12 to RGB\n", static_cast(status))); } // pass the CUDA stream to the output message stream_handler_result = cuda_stream_handler_.to_message(out_message); diff --git a/src/hololink/operators/base_receiver_op.cpp b/src/hololink/operators/base_receiver_op.cpp index 21ccc95..4c6b8dd 100644 --- a/src/hololink/operators/base_receiver_op.cpp +++ b/src/hololink/operators/base_receiver_op.cpp @@ -21,11 +21,11 @@ #include #include -#include - #include +#include #include +#include /** * @brief This macro defining a YAML converter which throws for unsupported types. @@ -69,62 +69,50 @@ void BaseReceiverOp::setup(holoscan::OperatorSpec& spec) device_stop_, "device_stop", "DeviceStop", "Function to be called to stop the device"); spec.param(frame_context_, "frame_context", "FrameContext", "CUDA context"); spec.param(frame_size_, "frame_size", "FrameSize", "Size of one frame in bytes"); - spec.param(user_frame_memory_, "frame_memory", "FrameMemory", "Frame memory (optional)", 0ull, - holoscan::ParameterFlag::kOptional); + + auto frag = fragment(); + frame_ready_condition_ = frag->make_condition("frame_ready_condition"); + add_arg(frame_ready_condition_); + frame_count_ = 0; } void BaseReceiverOp::start() { - // We'll allocate this for you if you like. - if (!user_frame_memory_.has_value() || (user_frame_memory_.get() == 0ull)) { - frame_memory_ = allocate(frame_size_); - } else { - frame_memory_ = user_frame_memory_.get(); - } - - HOLOSCAN_LOG_INFO("frame_size={} frame={}", frame_size_.get(), frame_memory_); - // data_socket_.reset(socket(AF_INET, SOCK_DGRAM, 0)); if (!data_socket_) { throw std::runtime_error("Failed to create socket"); } + hololink_channel_->configure_socket(data_socket_.get()); start_receiver(); - - auto [local_ip, local_port] = local_ip_and_port(); - HOLOSCAN_LOG_INFO("local_ip={} local_port={}", local_ip, local_port); - - hololink_channel_->configure(frame_memory_, frame_size_, local_port); device_start_.get()(); } void BaseReceiverOp::stop() { device_stop_.get()(); - stop_(); - - if (!user_frame_memory_.has_value()) { - // if we allocated the memory, free it - deviceptr_.release(); - host_deviceptr_.release(); - } + stop_receiver(); } void BaseReceiverOp::compute(holoscan::InputContext& input, holoscan::OutputContext& output, holoscan::ExecutionContext& context) { const double timeout_ms = 1000.f; - metadata_ = get_next_frame(timeout_ms); - if (!metadata_) { - if (ok_) { - ok_ = false; - HOLOSCAN_LOG_ERROR("Ingress frame timeout; ignoring."); - } - } else { - ok_ = true; + auto [frame_memory, frame_metadata] = get_next_frame(timeout_ms); + if (!frame_metadata) { + timeout(input, output, context); + // In this case, we have no frame data to write to the application, + // so we'll not produce any output. The rest of the objects in the pipeline + // will be skipped (due to no input) and execution will come back to us. + return; } + ok_ = true; + frame_count_ += 1; + // Clear our asynchronous event + frame_ready_condition_->event_state(holoscan::AsynchronousEventState::EVENT_WAITING); + // Create an Entity and use GXF tensor to wrap the CUDA memory. nvidia::gxf::Expected out_message = nvidia::gxf::Entity::New(context.context()); @@ -145,18 +133,54 @@ void BaseReceiverOp::compute(holoscan::InputContext& input, holoscan::OutputCont const uint64_t element_size = nvidia::gxf::PrimitiveTypeSize(element_type); if (!gxf_tensor.value()->wrapMemory(shape, element_type, element_size, nvidia::gxf::ComputeTrivialStrides(shape, element_size), - nvidia::gxf::MemoryStorageType::kDevice, reinterpret_cast(frame_memory_), + nvidia::gxf::MemoryStorageType::kDevice, reinterpret_cast(frame_memory), [](void*) { // release function, nothing to do return nvidia::gxf::Success; })) { throw std::runtime_error("Failed to add wrap memory"); } + // Publish the received metadata to the pipeline. + auto const& meta = metadata(); + for (auto const& x : *frame_metadata) { + // x.second is hololink::Metadata's map content type, + // e.g. std::variant>. + // Poll though our various types in order to figure out what to + // add to meta. + if (std::holds_alternative(x.second)) { + auto value = std::get(x.second); + meta->set(x.first, value); + continue; + } + if (std::holds_alternative(x.second)) { + auto value = std::get(x.second); + meta->set(x.first, value); + continue; + } + if (std::holds_alternative>(x.second)) { + auto value = std::get>(x.second); + meta->set(x.first, value); + continue; + } + throw std::runtime_error(fmt::format("Unable to copy metadata \"{}\".", x.first)); + } // Emit the tensor. output.emit(out_message.value(), "output"); } -std::shared_ptr BaseReceiverOp::metadata() const { return metadata_; } +void BaseReceiverOp::timeout(holoscan::InputContext& input, holoscan::OutputContext& output, + holoscan::ExecutionContext& context) +{ + if (ok_) { + ok_ = false; + HSB_LOG_ERROR("Ingress frame timeout; ignoring."); + } +} + +void BaseReceiverOp::frame_ready() +{ + frame_ready_condition_->event_state(holoscan::AsynchronousEventState::EVENT_DONE); +} std::tuple BaseReceiverOp::local_ip_and_port() { @@ -174,16 +198,16 @@ std::tuple BaseReceiverOp::local_ip_and_port() return { local_ip, local_port }; } -CUdeviceptr BaseReceiverOp::allocate(size_t size, uint32_t flags) +ReceiverMemoryDescriptor::ReceiverMemoryDescriptor(CUcontext cu_context, size_t size, uint32_t flags) { CudaCheck(cuInit(0)); - CudaCheck(cuCtxSetCurrent(frame_context_)); + CudaCheck(cuCtxSetCurrent(cu_context)); CUdevice device; CudaCheck(cuCtxGetDevice(&device)); int integrated = 0; CudaCheck(cuDeviceGetAttribute(&integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, device)); - HOLOSCAN_LOG_TRACE("integrated={}", integrated); + HSB_LOG_TRACE("integrated={}", integrated); if (integrated == 0) { // We're a discrete GPU device; so allocate using cuMemAlloc/cuMemFree deviceptr_.reset([size] { @@ -191,20 +215,26 @@ CUdeviceptr BaseReceiverOp::allocate(size_t size, uint32_t flags) CudaCheck(cuMemAlloc(&device_deviceptr, size)); return device_deviceptr; }()); - return deviceptr_.get(); + mem_ = deviceptr_.get(); + } else { + // We're an integrated device (e.g. Tegra) so we must allocate + // using cuMemHostAlloc/cuMemFreeHost + host_deviceptr_.reset([size, flags] { + void* host_deviceptr; + CudaCheck(cuMemHostAlloc(&host_deviceptr, size, flags)); + return host_deviceptr; + }()); + + CUdeviceptr device_deviceptr; + CudaCheck(cuMemHostGetDevicePointer(&device_deviceptr, host_deviceptr_.get(), 0)); + mem_ = device_deviceptr; } +} - // We're an integrated device (e.g. Tegra) so we must allocate - // using cuMemHostAlloc/cuMemFreeHost - host_deviceptr_.reset([size, flags] { - void* host_deviceptr; - CudaCheck(cuMemHostAlloc(&host_deviceptr, size, flags)); - return host_deviceptr; - }()); - - CUdeviceptr device_deviceptr; - CudaCheck(cuMemHostGetDevicePointer(&device_deviceptr, host_deviceptr_.get(), 0)); - return device_deviceptr; +ReceiverMemoryDescriptor::~ReceiverMemoryDescriptor() +{ + host_deviceptr_.release(); + deviceptr_.release(); } } // namespace hololink::operators diff --git a/src/hololink/operators/base_receiver_op.hpp b/src/hololink/operators/base_receiver_op.hpp index 2de3f11..ea76174 100644 --- a/src/hololink/operators/base_receiver_op.hpp +++ b/src/hololink/operators/base_receiver_op.hpp @@ -26,8 +26,7 @@ #include -#include -#include +#include #include #include @@ -39,6 +38,25 @@ class DataChannel; namespace hololink::operators { +class ReceiverMemoryDescriptor { +public: + /** + * Allocate a region of GPU memory which will be freed + * on destruction. + */ + explicit ReceiverMemoryDescriptor(CUcontext context, size_t size, uint32_t flags = 0); + ReceiverMemoryDescriptor() = delete; + //~ReceiverMemoryDescriptor() = default; + ~ReceiverMemoryDescriptor(); + + CUdeviceptr get() { return mem_; }; + +protected: + native::UniqueCUdeviceptr deviceptr_; + native::UniqueCUhostptr host_deviceptr_; + CUdeviceptr mem_; +}; + class BaseReceiverOp : public holoscan::Operator { public: HOLOSCAN_OPERATOR_FORWARD_ARGS(BaseReceiverOp); @@ -51,32 +69,29 @@ class BaseReceiverOp : public holoscan::Operator { void compute(holoscan::InputContext&, holoscan::OutputContext& op_output, holoscan::ExecutionContext&) override; - std::shared_ptr metadata() const; - protected: holoscan::Parameter hololink_channel_; holoscan::Parameter> device_start_; holoscan::Parameter> device_stop_; holoscan::Parameter frame_context_; holoscan::Parameter frame_size_; - holoscan::Parameter user_frame_memory_; + std::shared_ptr frame_ready_condition_; + uint64_t frame_count_; native::UniqueFileDescriptor data_socket_; - CUdeviceptr frame_memory_; virtual void start_receiver() = 0; - virtual void stop_() = 0; - virtual std::shared_ptr get_next_frame(double timeout_ms) = 0; + virtual void stop_receiver() = 0; + virtual std::tuple> get_next_frame(double timeout_ms) = 0; virtual std::tuple local_ip_and_port(); + virtual void timeout(holoscan::InputContext& input, holoscan::OutputContext& output, + holoscan::ExecutionContext& context); + + // Subclasses call this in order to queue up a call to compute. + void frame_ready(); private: - std::shared_ptr metadata_; bool ok_ = false; - - native::UniqueCUdeviceptr deviceptr_; - native::UniqueCUhostptr host_deviceptr_; - - CUdeviceptr allocate(size_t size, uint32_t flags = 0); }; } // namespace hololink::operators diff --git a/src/hololink/operators/csi_to_bayer/csi_to_bayer.cpp b/src/hololink/operators/csi_to_bayer/csi_to_bayer.cpp index e490267..6cd100d 100644 --- a/src/hololink/operators/csi_to_bayer/csi_to_bayer.cpp +++ b/src/hololink/operators/csi_to_bayer/csi_to_bayer.cpp @@ -17,6 +17,7 @@ #include "csi_to_bayer.hpp" +#include #include #include @@ -150,7 +151,7 @@ void CsiToBayerOp::compute(holoscan::InputContext& input, holoscan::OutputContex gxf_result_t stream_handler_result = cuda_stream_handler_.from_message(context.context(), entity); if (stream_handler_result != GXF_SUCCESS) { - throw std::runtime_error("Failed to get the CUDA stream from incoming messages"); + throw std::runtime_error(fmt::format("Failed to get the CUDA stream from incoming messages: {}", GxfResultStr(stream_handler_result))); } const auto maybe_tensor = entity.get(); @@ -163,7 +164,7 @@ void CsiToBayerOp::compute(holoscan::InputContext& input, holoscan::OutputContex if (input_tensor->storage_type() == nvidia::gxf::MemoryStorageType::kHost) { if (!is_integrated_ && !host_memory_warning_) { host_memory_warning_ = true; - HOLOSCAN_LOG_WARN( + HSB_LOG_WARN( "The input tensor is stored in host memory, this will reduce performance of this " "operator. For best performance store the input tensor in device memory."); } diff --git a/src/hololink/operators/gamma_correction/gamma_correction.cpp b/src/hololink/operators/gamma_correction/gamma_correction.cpp deleted file mode 100644 index cdf64e2..0000000 --- a/src/hololink/operators/gamma_correction/gamma_correction.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gamma_correction.hpp" - -#include -#include - -namespace { - -const char* source = R"( -extern "C" { - -/** - * Apply gamma correction. - * - * @param in [in] pointer to image - * @param components [in] components per pixel - * @param width [in] width of the image - * @param height [in] height of the image - */ -__global__ void applyGammaCorrection(unsigned short *image, - int components, - int width, - int height) -{ - int idx_x = blockIdx.x * blockDim.x + threadIdx.x; - int idx_y = blockIdx.y * blockDim.y + threadIdx.y; - - if ((idx_x >= width) || (idx_y >= height)) - return; - - const int index = (idx_y * width + idx_x) * components; - const float range = (1 << (sizeof(unsigned short) * 8)) - 1; - - // apply gamma correction to each component except alpha - for (int component = 0; component < min(components, 3); ++component) { - float value = (float)(image[index + component]); - value = powf(value / range, 1.f / GAMMA) * range; - image[index + component] = (unsigned short)(value + 0.5f); - } -} - -})"; - -} // anonymous namespace - -namespace hololink::operators { - -void GammaCorrectionOp::setup(holoscan::OperatorSpec& spec) -{ - spec.input("input"); - spec.output("output"); - - spec.param(gamma_, "gamma", "Gamma", "Gamma correction value", 2.2f); - spec.param( - cuda_device_ordinal_, "cuda_device_ordinal", "CudaDeviceOrdinal", "Device to use for CUDA operations", 0); - cuda_stream_handler_.define_params(spec); -} - -void GammaCorrectionOp::start() -{ - CudaCheck(cuInit(0)); - CUdevice device; - CudaCheck(cuDeviceGet(&cuda_device_, cuda_device_ordinal_.get())); - CudaCheck(cuDevicePrimaryCtxRetain(&cuda_context_, cuda_device_)); - int integrated = 0; - CudaCheck(cuDeviceGetAttribute(&integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, cuda_device_)); - is_integrated_ = (integrated != 0); - - hololink::native::CudaContextScopedPush cur_cuda_context(cuda_context_); - - cuda_function_launcher_.reset(new hololink::native::CudaFunctionLauncher( - source, { "applyGammaCorrection" }, { fmt::format("-D GAMMA={}", gamma_.get()) })); -} - -void GammaCorrectionOp::stop() -{ - hololink::native::CudaContextScopedPush cur_cuda_context(cuda_context_); - - cuda_function_launcher_.reset(); - - CudaCheck(cuDevicePrimaryCtxRelease(cuda_device_)); - cuda_context_ = nullptr; -} - -void GammaCorrectionOp::compute(holoscan::InputContext& input, holoscan::OutputContext& output, holoscan::ExecutionContext& context) -{ - auto maybe_entity = input.receive("input"); - if (!maybe_entity) { - throw std::runtime_error("Failed to receive input"); - } - - auto& entity = static_cast(maybe_entity.value()); - - // get the CUDA stream from the input message - gxf_result_t stream_handler_result = cuda_stream_handler_.from_message(context.context(), entity); - if (stream_handler_result != GXF_SUCCESS) { - throw std::runtime_error("Failed to get the CUDA stream from incoming messages"); - } - - const auto maybe_tensor = entity.get(); - if (!maybe_tensor) { - throw std::runtime_error("Tensor not found in message"); - } - - const auto input_tensor = maybe_tensor.value(); - - if (input_tensor->storage_type() == nvidia::gxf::MemoryStorageType::kHost) { - if (!is_integrated_ && !host_memory_warning_) { - host_memory_warning_ = true; - HOLOSCAN_LOG_WARN( - "The input tensor is stored in host memory, this will reduce performance of this " - "operator. For best performance store the input tensor in device memory."); - } - } else if (input_tensor->storage_type() != nvidia::gxf::MemoryStorageType::kDevice) { - throw std::runtime_error( - fmt::format("Unsupported storage type {}", (int)input_tensor->storage_type())); - } - - if (input_tensor->rank() != 3) { - throw std::runtime_error("Tensor must be an image"); - } - if (input_tensor->element_type() != nvidia::gxf::PrimitiveType::kUnsigned16) { - throw std::runtime_error(fmt::format("Unexpected image data type '{}', expected '{}'", int(input_tensor->element_type()), int(nvidia::gxf::PrimitiveType::kUnsigned16))); - } - - const uint32_t height = input_tensor->shape().dimension(0); - const uint32_t width = input_tensor->shape().dimension(1); - const uint32_t components = input_tensor->shape().dimension(2); - - hololink::native::CudaContextScopedPush cur_cuda_context(cuda_context_); - const cudaStream_t cuda_stream = cuda_stream_handler_.get_cuda_stream(context.context()); - - if (gamma_ != 1.f) { - cuda_function_launcher_->launch( - "applyGammaCorrection", - { width, height, 1 }, - cuda_stream, - input_tensor->pointer(), components, width, height); - } - - // pass the CUDA stream to the output message - auto out_message = nvidia::gxf::Expected(entity); - stream_handler_result - = cuda_stream_handler_.to_message(out_message); - if (stream_handler_result != GXF_SUCCESS) { - throw std::runtime_error("Failed to add the CUDA stream to the outgoing messages"); - } - - // Emit the tensor - output.emit(entity); -} - -} // namespace hololink::operators diff --git a/src/hololink/operators/gamma_correction/gamma_correction.hpp b/src/hololink/operators/gamma_correction/gamma_correction.hpp deleted file mode 100644 index c6b432c..0000000 --- a/src/hololink/operators/gamma_correction/gamma_correction.hpp +++ /dev/null @@ -1,62 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef SRC_HOLOLINK_OPERATORS_GAMMA_CORRECTION_GAMMA_CORRECTION -#define SRC_HOLOLINK_OPERATORS_GAMMA_CORRECTION_GAMMA_CORRECTION - -#include - -#include -#include -#include - -#include - -namespace hololink::native { - -class CudaFunctionLauncher; - -} // namespace hololink::native - -namespace hololink::operators { - -class GammaCorrectionOp : public holoscan::Operator { -public: - HOLOSCAN_OPERATOR_FORWARD_ARGS(GammaCorrectionOp); - - void start() override; - void stop() override; - void setup(holoscan::OperatorSpec& spec) override; - void compute(holoscan::InputContext&, holoscan::OutputContext& op_output, holoscan::ExecutionContext&) override; - -private: - holoscan::Parameter gamma_; - holoscan::Parameter cuda_device_ordinal_; - - CUcontext cuda_context_ = nullptr; - CUdevice cuda_device_ = 0; - bool is_integrated_ = false; - bool host_memory_warning_ = false; - - holoscan::CudaStreamHandler cuda_stream_handler_; - - std::shared_ptr cuda_function_launcher_; -}; - -} // namespace hololink::operators - -#endif /* SRC_HOLOLINK_OPERATORS_GAMMA_CORRECTION_GAMMA_CORRECTION */ diff --git a/src/hololink/operators/image_processor/image_processor.cpp b/src/hololink/operators/image_processor/image_processor.cpp index 0621e6e..f599b01 100644 --- a/src/hololink/operators/image_processor/image_processor.cpp +++ b/src/hololink/operators/image_processor/image_processor.cpp @@ -17,6 +17,7 @@ #include "image_processor.hpp" +#include #include #include @@ -348,7 +349,7 @@ void ImageProcessorOp::compute(holoscan::InputContext& input, holoscan::OutputCo // get the CUDA stream from the input message gxf_result_t stream_handler_result = cuda_stream_handler_.from_message(context.context(), entity); if (stream_handler_result != GXF_SUCCESS) { - throw std::runtime_error("Failed to get the CUDA stream from incoming messages"); + throw std::runtime_error(fmt::format("Failed to get the CUDA stream from incoming messages: {}", GxfResultStr(stream_handler_result))); } const auto maybe_tensor = entity.get(); @@ -361,7 +362,7 @@ void ImageProcessorOp::compute(holoscan::InputContext& input, holoscan::OutputCo if (input_tensor->storage_type() == nvidia::gxf::MemoryStorageType::kHost) { if (!is_integrated_ && !host_memory_warning_) { host_memory_warning_ = true; - HOLOSCAN_LOG_WARN( + HSB_LOG_WARN( "The input tensor is stored in host memory, this will reduce performance of this " "operator. For best performance store the input tensor in device memory."); } diff --git a/src/hololink/operators/linux_receiver/CMakeLists.txt b/src/hololink/operators/linux_receiver/CMakeLists.txt index 416f1c2..29c9edc 100644 --- a/src/hololink/operators/linux_receiver/CMakeLists.txt +++ b/src/hololink/operators/linux_receiver/CMakeLists.txt @@ -30,5 +30,6 @@ target_include_directories(linux_receiver target_link_libraries(linux_receiver PRIVATE hololink::native + hololink::operators::base_receiver_op CUDA::cuda_driver ) diff --git a/src/hololink/operators/linux_receiver/linux_receiver.cpp b/src/hololink/operators/linux_receiver/linux_receiver.cpp index 60a4e2e..e10d314 100644 --- a/src/hololink/operators/linux_receiver/linux_receiver.cpp +++ b/src/hololink/operators/linux_receiver/linux_receiver.cpp @@ -32,13 +32,12 @@ #include +#include +#include #include +#include #include -#define TRACE(fmt, ...) /* ignored */ -#define DEBUG(fmt...) fprintf(stderr, "DEBUG -- " fmt) -#define ERROR(fmt...) fprintf(stderr, "ERROR -- " fmt) - #define NUM_OF(x) (sizeof(x) / sizeof(x[0])) namespace hololink::operators { @@ -73,10 +72,12 @@ class LinuxReceiverDescriptor { LinuxReceiver::LinuxReceiver(CUdeviceptr cu_buffer, size_t cu_buffer_size, - int socket) + int socket, + uint64_t received_address_offset) : cu_buffer_(cu_buffer) , cu_buffer_size_(cu_buffer_size) , socket_(socket) + , received_address_offset_(received_address_offset) , ready_(false) , exit_(false) , ready_mutex_(PTHREAD_MUTEX_INITIALIZER) @@ -87,17 +88,18 @@ LinuxReceiver::LinuxReceiver(CUdeviceptr cu_buffer, , available_(NULL) , busy_(NULL) , cu_stream_(0) + , frame_ready_([](const LinuxReceiver&) {}) { int r = pthread_mutex_init(&ready_mutex_, NULL); if (r != 0) { - ERROR("pthread_mutex_init failed, r=%d.\n", r); + throw std::runtime_error("pthread_mutex_init failed."); } pthread_condattr_t pthread_condattr; pthread_condattr_init(&pthread_condattr); pthread_condattr_setclock(&pthread_condattr, CLOCK_MONOTONIC); r = pthread_cond_init(&ready_condition_, &pthread_condattr); if (r != 0) { - ERROR("pthread_cond_init failed, r=%d.\n", r); + throw std::runtime_error("pthread_cond_init failed."); } // set the receive timeout, we use that to check to periodically return from the @@ -106,13 +108,13 @@ LinuxReceiver::LinuxReceiver(CUdeviceptr cu_buffer, timeout.tv_sec = 0; timeout.tv_usec = 100000; if (setsockopt(socket_, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) < 0) { - ERROR("setsockopt failed errno=%d.\n", (int)errno); + throw std::runtime_error(fmt::format("setsockopt failed errno={}", errno)); } // See get_next_frame. CUresult cu_result = cuStreamCreate(&cu_stream_, CU_STREAM_NON_BLOCKING); if (cu_result != CUDA_SUCCESS) { - ERROR("cuSteramCreate failed, cu_result=%d.\n", (int)cu_result); + throw std::runtime_error(fmt::format("cuStreamCreate failed, cu_result={}.", cu_result)); } } @@ -124,7 +126,7 @@ LinuxReceiver::~LinuxReceiver() void LinuxReceiver::run() { - DEBUG("Starting.\n"); + HSB_LOG_DEBUG("Starting."); native::NvtxTrace::setThreadName("linux_receiver"); // Round the buffer size up to 64k @@ -133,8 +135,7 @@ void LinuxReceiver::run() // Allocate three pages, details below CUresult cu_result = cuMemHostAlloc((void**)(&local_), buffer_size * 3, CU_MEMHOSTALLOC_WRITECOMBINED); if (cu_result != CUDA_SUCCESS) { - ERROR("cuMemHostAlloc failed, cu_result=%d.\n", (int)cu_result); - return; + throw std::runtime_error(fmt::format("cuMemHostAlloc failed, cu_result={}.", cu_result)); } // Construct a descriptor for each page LinuxReceiverDescriptor d0(&local_[buffer_size * 0]); @@ -148,7 +149,7 @@ void LinuxReceiver::run() available_.store(&d2); // Received UDP message goes here. - uint8_t received[8192]; + uint8_t received[hololink::native::UDP_PACKET_SIZE]; unsigned frame_count = 0, packet_count = 0; unsigned frame_packets_received = 0, frame_bytes_received = 0; @@ -163,11 +164,13 @@ void LinuxReceiver::run() int recv_errno = errno; // Get the clock as close to the packet receipt as possible. - if (clock_gettime(CLOCK_MONOTONIC, &now) != 0) { - ERROR("clock_gettime failed, errno=%d.\n", (int)errno); + if (clock_gettime(CLOCK_REALTIME, &now) != 0) { + HSB_LOG_ERROR("clock_gettime failed, errno={}", errno); break; } + HSB_LOG_TRACE("received_bytes={} recv_errno={}.", received_bytes, recv_errno); + if (received_bytes <= 0) { // check if there is a timeout if ((recv_errno == EAGAIN) || (recv_errno == EWOULDBLOCK) || (recv_errno == EINTR)) { @@ -178,7 +181,7 @@ void LinuxReceiver::run() // if not, continue continue; } - ERROR("recv returned received_bytes=%d, recv_errno=%d.\n", (int)received_bytes, (int)recv_errno); + HSB_LOG_ERROR("recv returned received_bytes={}, recv_errno={}", received_bytes, recv_errno); break; } @@ -201,7 +204,7 @@ void LinuxReceiver::run() && deserializer.next_uint24_be(qp) && deserializer.next_uint8(ack_request) && deserializer.next_uint24_be(psn))) { - ERROR("Unable to decode runt IB request, received_bytes=%d.\n", (int)received_bytes); + HSB_LOG_ERROR("Unable to decode runt IB request, received_bytes={}", received_bytes); break; } @@ -227,9 +230,10 @@ void LinuxReceiver::run() && deserializer.next_uint32_be(rkey) && deserializer.next_uint32_be(size) && deserializer.pointer(content, size)) { - TRACE("opcode=2A address=0x%llX size=0x%X\n", (unsigned long long)address, (unsigned)size); - if ((address >= cu_buffer_) && (address + size <= (cu_buffer_ + cu_buffer_size_))) { - uint64_t offset = address - cu_buffer_; + HSB_LOG_TRACE("opcode=2A address={:x} size={:x}", address, size); + uint64_t target_address = address + received_address_offset_; + if ((target_address >= cu_buffer_) && (target_address + size <= (cu_buffer_ + cu_buffer_size_))) { + uint64_t offset = target_address - cu_buffer_; memcpy(&receiving->memory_[offset], content, size); frame_bytes_received += size; } @@ -246,9 +250,10 @@ void LinuxReceiver::run() frame_count++; native::NvtxTrace::event_u64("frame_count", frame_count); - TRACE("opcode=2B address=0x%llX size=0x%X\n", (unsigned long long)address, (unsigned)size); - if ((address >= cu_buffer_) && (address + size <= (cu_buffer_ + cu_buffer_size_))) { - uint64_t offset = address - cu_buffer_; + HSB_LOG_TRACE("opcode=2B address={:#x} size={:x}", address, size); + uint64_t target_address = address + received_address_offset_; + if ((target_address >= cu_buffer_) && (target_address + size <= (cu_buffer_ + cu_buffer_size_))) { + uint64_t offset = target_address - cu_buffer_; memcpy(&receiving->memory_[offset], content, size); frame_bytes_received += size; } @@ -260,6 +265,7 @@ void LinuxReceiver::run() // was in available_ (but not consumed by // the application) // - signal the pipeline so it wakes up if necessary. + Hololink::FrameMetadata frame_metadata = Hololink::deserialize_metadata(content, size); LinuxReceiverMetadata& metadata = receiving->metadata_; metadata.frame_packets_received = frame_packets_received; metadata.frame_bytes_received = frame_bytes_received; @@ -270,9 +276,9 @@ void LinuxReceiver::run() metadata.frame_end_ns = now.tv_nsec; metadata.imm_data = imm_data; metadata.packets_dropped = packets_dropped; - metadata.received_ns = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); + metadata.received_s = now.tv_sec; + metadata.received_ns = now.tv_nsec; + metadata.frame_metadata = frame_metadata; receiving = available_.exchange(receiving); signal(); @@ -284,7 +290,7 @@ void LinuxReceiver::run() break; } - ERROR("Unable to decode IB request with opcode=0x%X.\n", (unsigned)opcode); + HSB_LOG_ERROR("Unable to decode IB request with opcode={:x}", opcode); } while (false); } @@ -293,28 +299,31 @@ void LinuxReceiver::run() cu_result = cuMemFreeHost((void*)(local_)); if (cu_result != CUDA_SUCCESS) { - ERROR("cuMemFreeHost failed, cu_result=%d.\n", (int)cu_result); + HSB_LOG_ERROR("cuMemFreeHost failed, cu_result={}", cu_result); return; } local_ = NULL; - DEBUG("Done.\n"); + HSB_LOG_DEBUG("Done."); } void LinuxReceiver::signal() { int r = pthread_mutex_lock(&ready_mutex_); if (r != 0) { - ERROR("pthread_mutex_lock returned r=%d.\n", r); + throw std::runtime_error(fmt::format("pthread_mutex_lock returned r={}.", r)); } ready_ = true; r = pthread_cond_signal(&ready_condition_); if (r != 0) { - ERROR("pthread_cond_signal returned r=%d.\n", r); + throw std::runtime_error(fmt::format("pthread_cond_signal returned r={}.", r)); } r = pthread_mutex_unlock(&ready_mutex_); if (r != 0) { - ERROR("pthread_mutex_unlock returned r=%d.\n", r); + throw std::runtime_error(fmt::format("pthread_mutex_unlock returned r={}.", r)); } + // Provide the local callback, letting the application know + // that get_next_frame won't block. + frame_ready_(this[0]); } bool LinuxReceiver::get_next_frame(unsigned timeout_ms, LinuxReceiverMetadata& metadata) @@ -330,19 +339,19 @@ bool LinuxReceiver::get_next_frame(unsigned timeout_ms, LinuxReceiverMetadata& m // finishes before the pipeline uses the destination buffer. CUresult cu_result = cuMemcpyHtoDAsync(cu_buffer_, busy_->memory_, cu_buffer_size_, cu_stream_); if (cu_result != CUDA_SUCCESS) { - ERROR("cuMemcpyHtoD failed, cu_result=%d.\n", (int)cu_result); + HSB_LOG_ERROR("cuMemcpyHtoDAsync failed, cu_result={}", cu_result); r = false; } else { cu_result = cuStreamSynchronize(cu_stream_); if (cu_result != CUDA_SUCCESS) { - ERROR("cuStreamSynchronize failed, cu_result=%d.\n", (int)cu_result); + HSB_LOG_ERROR("cuStreamSynchronize failed, cu_result={}", cu_result); r = false; } } metadata = busy_->metadata_; } else { // run() exited. - ERROR("get_next_frame failed, receiver has terminated.\n"); + HSB_LOG_ERROR("get_next_frame failed, receiver has terminated."); r = false; } } @@ -353,12 +362,11 @@ bool LinuxReceiver::wait(unsigned timeout_ms) { int status = pthread_mutex_lock(&ready_mutex_); if (status != 0) { - ERROR("pthread_mutex_lock returned status=%d.\n", status); - return false; + throw std::runtime_error(fmt::format("pthread_mutex_lock returned status={}.", status)); } struct timespec now; if (clock_gettime(CLOCK_MONOTONIC, &now) != 0) { - ERROR("clock_gettime failed, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("clock_gettime failed, errno={}", errno); } struct timespec timeout = add_ms(now, timeout_ms); @@ -368,7 +376,7 @@ bool LinuxReceiver::wait(unsigned timeout_ms) break; } if (status != 0) { - ERROR("pthread_cond_wait returned status=%d.\n", status); + HSB_LOG_ERROR("pthread_cond_wait returned status={}", status); break; } } @@ -376,7 +384,7 @@ bool LinuxReceiver::wait(unsigned timeout_ms) ready_ = false; status = pthread_mutex_unlock(&ready_mutex_); if (status != 0) { - ERROR("pthread_mutex_unlock returned status=%d.\n", status); + throw std::runtime_error(fmt::format("pthread_mutex_unlock returned status={}.", status)); } return r; } @@ -386,4 +394,9 @@ void LinuxReceiver::close() exit_ = true; } +void LinuxReceiver::set_frame_ready(std::function frame_ready) +{ + frame_ready_ = frame_ready; +} + } // namespace hololink::operators diff --git a/src/hololink/operators/linux_receiver/linux_receiver.hpp b/src/hololink/operators/linux_receiver/linux_receiver.hpp index 770a18f..ddd2f03 100644 --- a/src/hololink/operators/linux_receiver/linux_receiver.hpp +++ b/src/hololink/operators/linux_receiver/linux_receiver.hpp @@ -26,6 +26,8 @@ #include +#include + namespace hololink::operators { class LinuxReceiverMetadata { @@ -39,19 +41,12 @@ class LinuxReceiverMetadata { uint64_t frame_end_s; uint64_t frame_end_ns; uint32_t imm_data; - int64_t received_ns; // ns from the PTP epoch. + int64_t received_s; + int64_t received_ns; // Data accumulated over the life of the application - // uint64_t packets_received; - // uint64_t frames_received; - // uint64_t frames_dropped; - // uint64_t frames_timed_out; - // uint64_t frames_seen; - // uint64_t checksum_errors; - // uint64_t unexpected_byte_counters; - // uint64_t unexpected_packet_counts; - // uint64_t data_overflow; uint64_t packets_dropped; - // uint64_t rejected; + // Data received directly from HSB. + Hololink::FrameMetadata frame_metadata; }; class LinuxReceiverDescriptor; @@ -60,7 +55,8 @@ class LinuxReceiver { public: LinuxReceiver(CUdeviceptr cu_buffer, size_t cu_buffer_size, - int socket); + int socket, + uint64_t received_address_offset); ~LinuxReceiver(); @@ -89,6 +85,12 @@ class LinuxReceiver { uint32_t get_rkey() { return rkey_; }; + /** + * If the application schedules the call to get_next_frame after this + * callback occurs, then get_next_frame won't block. + */ + void set_frame_ready(std::function frame_ready); + protected: // Blocks execution until signal() is called; // @returns false if timeout_ms elapses before @@ -102,6 +104,7 @@ class LinuxReceiver { CUdeviceptr cu_buffer_; size_t cu_buffer_size_; int socket_; + uint64_t received_address_offset_; bool volatile ready_; bool volatile exit_; pthread_mutex_t ready_mutex_; @@ -112,6 +115,7 @@ class LinuxReceiver { std::atomic available_; LinuxReceiverDescriptor* busy_; CUstream cu_stream_; // Used to control cuMemcpyHtoDAsync. + std::function frame_ready_; }; } // namespace hololink::operators diff --git a/src/hololink/operators/roce_receiver/roce_receiver.cpp b/src/hololink/operators/roce_receiver/roce_receiver.cpp index 28165f9..518191a 100644 --- a/src/hololink/operators/roce_receiver/roce_receiver.cpp +++ b/src/hololink/operators/roce_receiver/roce_receiver.cpp @@ -26,13 +26,9 @@ #include #include -#include +#include #include -#define TRACE(fmt, ...) /* ignored */ -#define DEBUG(fmt...) fprintf(stderr, "DEBUG -- " fmt) -#define ERROR(fmt...) fprintf(stderr, "ERROR -- " fmt) - #undef PERIODIC_STATUS #define NUM_OF(x) (sizeof(x) / sizeof(x[0])) @@ -44,17 +40,28 @@ RoceReceiver::RoceReceiver( unsigned ibv_port, CUdeviceptr cu_buffer, size_t cu_buffer_size, + size_t cu_frame_size, + size_t cu_page_size, + unsigned pages, + size_t metadata_offset, const char* peer_ip) : ibv_name_(strdup(ibv_name)) , ibv_port_(ibv_port) , cu_buffer_(cu_buffer) , cu_buffer_size_(cu_buffer_size) + , cu_frame_size_(cu_frame_size) + , cu_page_size_(cu_page_size) + , pages_(pages) + , metadata_offset_(metadata_offset) , peer_ip_(strdup(peer_ip)) , ib_qp_(NULL) , ib_mr_(NULL) , ib_cq_(NULL) , ib_pd_(NULL) , ib_context_(NULL) + , ib_completion_channel_(NULL) + , qp_number_(0) + , rkey_(0) , ready_(false) , ready_mutex_(PTHREAD_MUTEX_INITIALIZER) , ready_condition_(PTHREAD_COND_INITIALIZER) @@ -62,30 +69,35 @@ RoceReceiver::RoceReceiver( , frame_number_(0) , rx_write_requests_fd_(-1) , rx_write_requests_(0) - , frame_end_ {} + , received_ {} , imm_data_(0) , event_time_ {} - , received_ns_(0) + , current_buffer_(0) + , metadata_stream_(0) + , dropped_(0) + , received_psn_(0) + , received_page_(0) + , frame_ready_([](const RoceReceiver&) {}) { - DEBUG("cu_buffer=0x%llX cu_buffer_size=%u\n", - (unsigned long long)cu_buffer, (unsigned)cu_buffer_size); + HSB_LOG_DEBUG("cu_buffer={:#x} cu_frame_size={:#x} cu_page_size={} pages={}", + cu_buffer, cu_frame_size, cu_page_size, pages); int r = pthread_mutex_init(&ready_mutex_, NULL); if (r != 0) { - ERROR("pthread_mutex_init failed, r=%d.\n", r); + throw std::runtime_error("pthread_mutex_init failed."); } pthread_condattr_t pthread_condattr; pthread_condattr_init(&pthread_condattr); pthread_condattr_setclock(&pthread_condattr, CLOCK_MONOTONIC); r = pthread_cond_init(&ready_condition_, &pthread_condattr); if (r != 0) { - ERROR("pthread_cond_init failed, r=%d.\n", r); + throw std::runtime_error("pthread_cond_init failed."); } int pipe_fds[2] = { -1, -1 }; // If these aren't updated, we'll get an error when we try to read, which is good. r = pipe(pipe_fds); if (r != 0) { - ERROR("Pipe failed.\n"); + throw std::runtime_error("pipe call failed."); } control_r_ = pipe_fds[0]; control_w_ = pipe_fds[1]; @@ -95,15 +107,29 @@ RoceReceiver::RoceReceiver( "/sys/class/infiniband/%s/ports/%d/hw_counters/rx_write_requests", ibv_name_, ibv_port_); if (written < 0) { - ERROR("Error writing to rx_write_requests_filename.\n"); + throw std::runtime_error("Error writing to rx_write_requests_filename."); } else if (((size_t)written) >= sizeof(rx_write_requests_filename)) { - ERROR("Buffer isn't large enough to compute rx_write_requests filename.\n"); + throw std::runtime_error("Buffer isn't large enough to compute rx_write_requests filename."); } else { rx_write_requests_fd_ = open(rx_write_requests_filename, O_RDONLY); } if (rx_write_requests_fd_ < 0) { - ERROR("Unable to fetch rx_write_requests.\n"); + // Note that the rest of the code is OK if this occurs. + HSB_LOG_ERROR("Unable to fetch rx_write_requests; ignoring."); + } + + // We use this to synchronize metadata readout. + CUresult cu_result = cuStreamCreate(&metadata_stream_, CU_STREAM_NON_BLOCKING); + if (cu_result != CUDA_SUCCESS) { + throw std::runtime_error(fmt::format("cuStreamCreate failed, cu_result={}.", cu_result)); + } + + // Set metadata_buffer_ content to some value that's easily distinguished. + cu_result = cuMemHostAlloc((void**)&metadata_buffer_, hololink::METADATA_SIZE, 0); + if (cu_result != CUDA_SUCCESS) { + throw std::runtime_error(fmt::format("cuMemHostAlloc failed, cu_result={}.", cu_result)); } + memset(metadata_buffer_, 0xEE, hololink::METADATA_SIZE); } RoceReceiver::~RoceReceiver() @@ -111,11 +137,15 @@ RoceReceiver::~RoceReceiver() pthread_cond_destroy(&ready_condition_); pthread_mutex_destroy(&ready_mutex_); ::close(rx_write_requests_fd_); // we ignore an error here if fd==-1 + cuMemFreeHost(metadata_buffer_); + + free(ibv_name_); + free(peer_ip_); } bool RoceReceiver::start() { - DEBUG("Starting.\n"); + HSB_LOG_DEBUG("Starting."); // ibv calls seem to have trouble with // reentrancy. No problem; since we're only @@ -128,17 +158,17 @@ bool RoceReceiver::start() int num_devices = 0; struct ibv_device** ib_devices = ibv_get_device_list(&num_devices); if (!ib_devices) { - ERROR("ibv_get_device_list failed; errno=%d.\n", (int)errno); + HSB_LOG_ERROR("ibv_get_device_list failed; errno={}.", errno); return false; } if (num_devices < 0) { - ERROR("ibv_get_device_list set unexpected value for num_devices=%d.\n", num_devices); + HSB_LOG_ERROR("ibv_get_device_list set unexpected value for num_devices={}.", num_devices); return false; } struct ibv_device* ib_device = NULL; for (unsigned i = 0; i < (unsigned)num_devices; i++) { const char* device_name = ibv_get_device_name(ib_devices[i]); - DEBUG("ibv_get_device_list[%d]=%s.\n", i, device_name); + HSB_LOG_DEBUG("ibv_get_device_list[{}]={}.", i, device_name); if (strcmp(device_name, ibv_name_) != 0) { continue; } @@ -146,7 +176,7 @@ bool RoceReceiver::start() break; } if (ib_device == NULL) { - ERROR("ibv_get_device_list didnt find a device named \"%s\".\n", ibv_name_); + HSB_LOG_ERROR("ibv_get_device_list didnt find a device named \"{}\".", ibv_name_); ibv_free_device_list(ib_devices); return false; } @@ -154,7 +184,7 @@ bool RoceReceiver::start() // Open the IB device ib_context_ = ibv_open_device(ib_device); if (!ib_context_) { - ERROR("ibv_open_device failed, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("ibv_open_device failed, errno={}.", errno); return false; } ibv_free_device_list(ib_devices); // Note that "ib_device" is invalid after this. @@ -166,21 +196,21 @@ bool RoceReceiver::start() int flags = fcntl(ib_context_->async_fd, F_GETFL); int r = fcntl(ib_context_->async_fd, F_SETFL, flags | O_NONBLOCK); if (r < 0) { - ERROR("Can't configure async_fd=%d with O_NONBLOCK, errno=%d.\n", (int)ib_context_->async_fd, (int)errno); + HSB_LOG_ERROR("Can't configure async_fd={} with O_NONBLOCK, errno={}.", ib_context_->async_fd, errno); return false; } // struct ibv_device_attr ib_device_attr = { 0 }; // C fills the rest with 0s if (ibv_query_device(ib_context_, &ib_device_attr)) { - ERROR("ibv_query_device failed, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("ibv_query_device failed, errno={}.", errno); free_ib_resources(); return false; } struct ibv_port_attr ib_port_attr = { .flags = 0 }; // C fills the rest with 0s if (ibv_query_port(ib_context_, ibv_port_, &ib_port_attr)) { - ERROR("ibv_query_port failed, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("ibv_query_port failed, errno={}.", errno); free_ib_resources(); return false; } @@ -197,7 +227,7 @@ bool RoceReceiver::start() } struct ibv_gid_entry* u = &(ib_gid_entry); - DEBUG("gid_index=%u gid_entry(gid_index=%u port_num=%u gid_type=%u ndev_ifindex=%d subnet_prefix=%d interface_id=0x%X)\n", (unsigned)gid_index, (unsigned)u->gid_index, (unsigned)u->port_num, (unsigned)u->gid_type, (unsigned)u->ndev_ifindex, (unsigned)u->gid.global.subnet_prefix, (unsigned)u->gid.global.interface_id); + HSB_LOG_DEBUG("gid_index={} gid_entry(gid_index={} port_num={} gid_type={} ndev_ifindex={} subnet_prefix={} interface_id={:#x})", gid_index, u->gid_index, u->port_num, u->gid_type, u->ndev_ifindex, u->gid.global.subnet_prefix, u->gid.global.interface_id); if (ib_gid_entry.gid_type != IBV_GID_TYPE_ROCE_V2) { continue; @@ -212,7 +242,7 @@ bool RoceReceiver::start() break; } if (!ok) { - ERROR("Cannot find GID for IBV_GID_TYPE_ROCE_V2.\n"); + HSB_LOG_ERROR("Cannot find GID for IBV_GID_TYPE_ROCE_V2."); free_ib_resources(); return false; } @@ -220,7 +250,7 @@ bool RoceReceiver::start() // Create a protection domain ib_pd_ = ibv_alloc_pd(ib_context_); if (ib_pd_ == NULL) { - ERROR("Cannot allocate a protection domain, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("Cannot allocate a protection domain, errno={}.", errno); free_ib_resources(); return false; } @@ -228,7 +258,7 @@ bool RoceReceiver::start() // Create a completion channel. ib_completion_channel_ = ibv_create_comp_channel(ib_context_); if (ib_completion_channel_ == NULL) { - ERROR("Cannot create a completion channel.\n"); + HSB_LOG_ERROR("Cannot create a completion channel."); free_ib_resources(); return false; } @@ -237,24 +267,24 @@ bool RoceReceiver::start() flags = fcntl(ib_completion_channel_->fd, F_GETFL); r = fcntl(ib_completion_channel_->fd, F_SETFL, flags | O_NONBLOCK); if (r < 0) { - ERROR("Can't configure fd=%d with O_NONBLOCK, errno=%d.\n", (int)ib_completion_channel_->fd, (int)errno); + HSB_LOG_ERROR("Can't configure fd={} with O_NONBLOCK, errno={}.", ib_completion_channel_->fd, errno); return false; } // Create a completion queue - unsigned completion_queue_size = 10; + unsigned completion_queue_size = 100; ib_cq_ = ibv_create_cq(ib_context_, completion_queue_size, NULL, ib_completion_channel_, 0); if (ib_cq_ == NULL) { - ERROR("Cannot create a completion queue, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("Cannot create a completion queue, errno={}.", errno); free_ib_resources(); return false; } // Provide access to the frame buffer int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; - ib_mr_ = ibv_reg_mr(ib_pd_, (void*)cu_buffer_, cu_buffer_size_, access); + ib_mr_ = ibv_reg_mr_iova(ib_pd_, (void*)cu_buffer_, cu_buffer_size_, 0, access); if (ib_mr_ == NULL) { - ERROR("Cannot register memory region p=0x%llX size=%u, errno=%d.\n", (unsigned long long)cu_buffer_, (unsigned)cu_buffer_size_, (int)errno); + HSB_LOG_ERROR("Cannot register memory region p={:#x} size={:#x}, errno={}.", cu_buffer_, cu_frame_size_, errno); free_ib_resources(); return false; } @@ -276,7 +306,7 @@ bool RoceReceiver::start() }; // C++ sets the rest of the values to 0 ib_qp_ = ibv_create_qp(ib_pd_, &ib_qp_init_attr); if (ib_qp_ == NULL) { - ERROR("Cannot create queue pair, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("Cannot create queue pair, errno={}.", errno); free_ib_resources(); return false; } @@ -292,7 +322,7 @@ bool RoceReceiver::start() }; // C sets the rest to 0s flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS; if (ibv_modify_qp(ib_qp_, &ib_qp_attr, flags)) { - ERROR("Cannot modify queue pair to IBV_QPS_INIT, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("Cannot modify queue pair to IBV_QPS_INIT, errno={}.", errno); free_ib_resources(); return false; } @@ -305,7 +335,7 @@ bool RoceReceiver::start() }; // C fills the rest with 0s struct ibv_recv_wr* bad_wr = NULL; if (ibv_post_recv(ib_qp_, &ib_wr, &bad_wr)) { - ERROR("Cannot post the receiver work list, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("Cannot post the receiver work list, errno={}.", errno); free_ib_resources(); return false; } @@ -314,7 +344,7 @@ bool RoceReceiver::start() // qp is currently INIT; go to RTR unsigned long client_ip = 0; if (inet_pton(AF_INET, peer_ip_, &client_ip) != 1) { - ERROR("Unable to convert \"%s\" to an IP address.\n", peer_ip_); + HSB_LOG_ERROR("Unable to convert \"{}\" to an IP address.", peer_ip_); free_ib_resources(); return false; } @@ -357,13 +387,13 @@ bool RoceReceiver::start() if (!retry) { break; } - ERROR("Cannot modify queue pair to IBV_QPS_RTR, errno=%d: \"%s\"; retrying.\n", r, strerror(r)); + HSB_LOG_ERROR("Cannot modify queue pair to IBV_QPS_RTR, errno={}: \"{}\"; retrying.", r, strerror(r)); useconds_t ms = 200; useconds_t us = ms * 1000; usleep(us); } if (r) { - ERROR("Cannot modify queue pair to IBV_QPS_RTR, errno=%d.\n", r); + HSB_LOG_ERROR("Cannot modify queue pair to IBV_QPS_RTR, errno={}.", r); free_ib_resources(); return false; } @@ -408,7 +438,7 @@ static inline bool before(struct timespec& a, struct timespec& b) void RoceReceiver::blocking_monitor() { native::NvtxTrace::setThreadName("RoceReceiver::run"); - DEBUG("Running.\n"); + HSB_LOG_DEBUG("Running."); struct ibv_wc ib_wc = { 0 }; @@ -422,8 +452,7 @@ void RoceReceiver::blocking_monitor() int r = ibv_req_notify_cq(ib_cq_, 0); if (r != 0) { - ERROR("ibv_req_notify_cq failed, errno=%d.\n", r); - return; + throw std::runtime_error(fmt::format("ibv_req_notify_cq failed, errno={}.", r)); } struct pollfd poll_fds[2] = { @@ -443,12 +472,11 @@ void RoceReceiver::blocking_monitor() int timeout = -1; // stay here forever. int r = poll(poll_fds, NUM_OF(poll_fds), timeout); if (r == -1) { - ERROR("poll returned r=%d, errno=%d.\n", r, (int)errno); - break; + throw std::runtime_error(fmt::format("poll returned r={}, errno={}.", r, errno)); } // Keep this as close to the actual message receipt as possible. - clock_gettime(CLOCK_MONOTONIC, &event_time_); + clock_gettime(CLOCK_REALTIME, const_cast(&event_time_)); // control_r_ if (poll_fds[0].revents) { @@ -456,7 +484,7 @@ void RoceReceiver::blocking_monitor() // telling us that someone closed the control_w_ side (which we do // in LinuxReceiver::close). That specific event is an indication // that this loop is instructed to terminate. - DEBUG("Closing.\n"); + HSB_LOG_DEBUG("Closing."); break; } @@ -468,47 +496,88 @@ void RoceReceiver::blocking_monitor() void* ev_ctx = NULL; r = ibv_get_cq_event(ib_completion_channel_, &ev_cq, &ev_ctx); if (r != 0) { - ERROR("ibv_get_cq_event returned r=%d.\n", r); - break; + throw std::runtime_error(fmt::format("ibv_get_cq_event returned r={}.", r)); } // Ack it and queue up another ibv_ack_cq_events(ev_cq, 1); r = ibv_req_notify_cq(ev_cq, 0); if (r != 0) { - ERROR("ibv_req_notify_cq returned r=%d.\n", r); - break; + throw std::runtime_error(fmt::format("ibv_req_notify_cq returned r={}.", r)); } // Now deal with active events. while (!done_) { r = ibv_poll_cq(ib_cq_, 1, &ib_wc); if (r < 0) { - ERROR("ibv_poll_cq failed, errno=%d.\n", (int)errno); - break; + throw std::runtime_error(fmt::format("ibv_poll_cq failed, errno={}.", errno)); } // Is there a message for us? if (r == 0) { break; } + uint64_t q = (uint64_t)this; + HSB_LOG_TRACE("this={:#x} r={} qp_number_={:#x} imm_data={:#x}", q, r, qp_number_, ntohl(ib_wc.imm_data)); // Note some metadata char buffer[1024]; lseek(rx_write_requests_fd_, 0, SEEK_SET); // may fail if fd==-1, we don't care - ssize_t buffer_size = read(rx_write_requests_fd_, buffer, sizeof(buffer)); // if rx_write_requests_fd_ is -1, then buffer_size_ will be less than 0 + ssize_t buffer_size = read(rx_write_requests_fd_, buffer, sizeof(buffer)); + // Do an atomic update + r = pthread_mutex_lock(&ready_mutex_); + if (r != 0) { + throw std::runtime_error(fmt::format("pthread_mutex_lock returned r={}.", r)); + } + // If the application didn't set ready_ to false, + // then we're overwriting a valid frame. + if (ready_) { + dropped_++; + } if ((buffer_size > 0) && (buffer_size < 1000)) { rx_write_requests_ = strtoull(buffer, NULL, 10); // otherwise we'll continue to use the 0 from the constructor } - frame_number_++; imm_data_ = ntohl(ib_wc.imm_data); // ibverbs just gives us the bytes here - frame_end_ = event_time_; - // frame_end_ uses the monotonic clock, which doesn't define it's epoch; - // received_ns_ is the same but matches the epoch used by PTP. - received_ns_ = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); + received_psn_ = (imm_data_ >> 8) & 0xFFFFFF; + unsigned page = imm_data_ & 0xFF; + if (page >= pages_) { + throw std::runtime_error(fmt::format("Invalid page={}; ignoring.", page)); + } + received_page_ = page; + // Start copying out the metadata chunk. get_next_frame will synchronize on metadata_stream_. + // NOTE that we start this request while we hold ready_mutex_ -- this guarantees that we don't + // start another copy while the foreground is copying data out of this buffer. + CUdeviceptr page_start = page * cu_page_size_; + CUdeviceptr metadata_start = page_start + metadata_offset_; + if ((metadata_start + hololink::METADATA_SIZE) > cu_buffer_size_) { + throw std::runtime_error(fmt::format("metadata_start={:#x}+metadata_size={:#x}(which is {:#x}) exceeds cu_buffer_size={:#x}.", + metadata_start, hololink::METADATA_SIZE, metadata_start + hololink::METADATA_SIZE, cu_buffer_size_)); + } + CUresult cu_result = cuMemcpyDtoHAsync(metadata_buffer_, cu_buffer_ + metadata_start, hololink::METADATA_SIZE, metadata_stream_); + if (cu_result != CUDA_SUCCESS) { + throw std::runtime_error(fmt::format("cmMemcpyDtoHAsync failed, cu_result={}.", cu_result)); + } + current_buffer_ = cu_buffer_ + cu_page_size_ * page; + frame_number_++; + native::NvtxTrace::event_u64("frame_number", frame_number_); + HSB_LOG_TRACE("frame_number={}", frame_number_); + received_.tv_sec = event_time_.tv_sec; + received_.tv_nsec = event_time_.tv_nsec; + HSB_LOG_TRACE("frame_number={} imm_data={:#x} received.tv_sec={:#x} received.tv_nsec={:#x}", + frame_number_, imm_data_, received_.tv_sec, received_.tv_nsec); // Send it - signal(); + ready_ = true; + native::NvtxTrace::event_u64("signal", 1); + r = pthread_cond_signal(&ready_condition_); + if (r != 0) { + throw std::runtime_error(fmt::format("pthread_cond_signal returned r={}.", r)); + } + r = pthread_mutex_unlock(&ready_mutex_); + if (r != 0) { + throw std::runtime_error(fmt::format("pthread_mutex_unlock returned r={}.", r)); + } + // Provide the local callback, letting the application know + // that get_next_frame won't block. + frame_ready_(this[0]); // Add back the work request struct ibv_recv_wr ib_wr = { .wr_id = 1, @@ -516,8 +585,7 @@ void RoceReceiver::blocking_monitor() }; // C fills the rest with 0s struct ibv_recv_wr* bad_wr = NULL; if (ibv_post_recv(ib_qp_, &ib_wr, &bad_wr)) { - ERROR("Cannot post a receiver work list, errno=%d.\n", (int)errno); - break; + throw std::runtime_error(fmt::format("Cannot post a receiver work list, errno={}.", errno)); } } } @@ -526,30 +594,13 @@ void RoceReceiver::blocking_monitor() count++; clock_gettime(CLOCK_MONOTONIC, &now); if (!before(now, report_time)) { - ERROR("count=%u.\n", count); + HSB_LOG_ERROR("count={}.", count); report_time = add_ms(report_time, report_ms); } #endif /* PERIODIC_STATUS */ } free_ib_resources(); - DEBUG("Closed.\n"); -} - -void RoceReceiver::signal() -{ - int r = pthread_mutex_lock(&ready_mutex_); - if (r != 0) { - ERROR("pthread_mutex_lock returned r=%d.\n", r); - } - ready_ = true; - r = pthread_cond_signal(&ready_condition_); - if (r != 0) { - ERROR("pthread_cond_signal returned r=%d.\n", r); - } - r = pthread_mutex_unlock(&ready_mutex_); - if (r != 0) { - ERROR("pthread_mutex_unlock returned r=%d.\n", r); - } + HSB_LOG_DEBUG("Closed."); } void RoceReceiver::close() @@ -562,79 +613,59 @@ void RoceReceiver::free_ib_resources() { if (ib_qp_ != NULL) { if (ibv_destroy_qp(ib_qp_)) { - ERROR("ibv_destroy_qp failed, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("ibv_destroy_qp failed, errno={}.", errno); } ib_qp_ = NULL; } if (ib_mr_ != NULL) { if (ibv_dereg_mr(ib_mr_)) { - ERROR("ibv_dereg_mr failed, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("ibv_dereg_mr failed, errno={}.", errno); } ib_mr_ = NULL; } if (ib_cq_ != NULL) { if (ibv_destroy_cq(ib_cq_)) { - ERROR("ibv_destroy_cq failed, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("ibv_destroy_cq failed, errno={}.", errno); } ib_cq_ = NULL; } if (ib_completion_channel_ != NULL) { if (ibv_destroy_comp_channel(ib_completion_channel_)) { - ERROR("ibv_destroy_comp_channel failed, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("ibv_destroy_comp_channel failed, errno={}.", errno); } ib_completion_channel_ = NULL; } if (ib_pd_ != NULL) { if (ibv_dealloc_pd(ib_pd_)) { - ERROR("ibv_dealloc_pd failed, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("ibv_dealloc_pd failed, errno={}.", errno); } ib_pd_ = NULL; } if (ib_context_ != NULL) { if (ibv_close_device(ib_context_)) { - ERROR("ibv_close_device failed, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("ibv_close_device failed, errno={}.", errno); } ib_context_ = NULL; } - TRACE("Done.\n"); + HSB_LOG_TRACE("Done."); } bool RoceReceiver::get_next_frame(unsigned timeout_ms, RoceReceiverMetadata& metadata) -{ - bool r = wait(timeout_ms); - metadata.frame_number = frame_number_; - if (r) { - metadata.rx_write_requests = rx_write_requests_; - metadata.frame_end_s = frame_end_.tv_sec; - metadata.frame_end_ns = frame_end_.tv_nsec; - metadata.imm_data = imm_data_; - metadata.received_ns = received_ns_; - } else { - metadata.rx_write_requests = 0; - metadata.frame_end_s = 0; - metadata.frame_end_ns = 0; - metadata.imm_data = 0; - metadata.received_ns = 0; - } - return r; -} - -bool RoceReceiver::wait(unsigned timeout_ms) { int status = pthread_mutex_lock(&ready_mutex_); if (status != 0) { - ERROR("pthread_mutex_lock returned status=%d.\n", status); + HSB_LOG_ERROR("pthread_mutex_lock returned status={}.", status); return false; } struct timespec now; if (clock_gettime(CLOCK_MONOTONIC, &now) != 0) { - ERROR("clock_gettime failed, errno=%d.\n", (int)errno); + HSB_LOG_ERROR("clock_gettime failed, errno={}.", errno); } struct timespec timeout = add_ms(now, timeout_ms); @@ -644,15 +675,43 @@ bool RoceReceiver::wait(unsigned timeout_ms) break; } if (status != 0) { - ERROR("pthread_cond_wait returned status=%d.\n", status); + HSB_LOG_ERROR("pthread_cond_wait returned status={}.", status); break; } } bool r = ready_; ready_ = false; + metadata.frame_number = frame_number_; + metadata.dropped = dropped_; + if (r) { + CUresult cu_result = cuStreamSynchronize(metadata_stream_); + if (cu_result != CUDA_SUCCESS) { + throw std::runtime_error(fmt::format("cuStreamSynchronize failed, cu_result={}.", cu_result)); + } + Hololink::FrameMetadata frame_metadata = Hololink::deserialize_metadata(metadata_buffer_, hololink::METADATA_SIZE); + if (frame_metadata.psn != received_psn_) { + // This indicates that the distal end rewrote the receiver buffer. + HSB_LOG_ERROR("Metadata psn={} but received_psn={}.", frame_metadata.psn, received_psn_); + } + metadata.rx_write_requests = rx_write_requests_; + metadata.imm_data = imm_data_; + metadata.received_s = received_.tv_sec; + metadata.received_ns = received_.tv_nsec; + metadata.frame_memory = current_buffer_; + metadata.metadata_memory = current_buffer_ + metadata_offset_; + metadata.frame_metadata = frame_metadata; + } else { + metadata.rx_write_requests = 0; + metadata.imm_data = 0; + metadata.received_s = 0; + metadata.received_ns = 0; + metadata.frame_memory = 0; + metadata.metadata_memory = 0; + metadata.frame_metadata = {}; // All 0s + } status = pthread_mutex_unlock(&ready_mutex_); if (status != 0) { - ERROR("pthread_mutex_unlock returned status=%d.\n", status); + HSB_LOG_ERROR("pthread_mutex_unlock returned status={}.", status); } return r; } @@ -667,12 +726,13 @@ bool RoceReceiver::check_async_events() break; } - switch (ib_async_event.event_type) { + auto event_type = ib_async_event.event_type; + switch (event_type) { case IBV_EVENT_COMM_EST: // Communication established isn't an error; don't complain about it break; default: - ERROR("ib_async_event.event_type=%d.\n", (int)ib_async_event.event_type); + HSB_LOG_ERROR("ib_async_event.event_type={} ({}).", static_cast(event_type), ibv_event_type_str(event_type)); break; } @@ -689,4 +749,17 @@ std::mutex& RoceReceiver::get_lock() return lock; } +uint64_t RoceReceiver::external_frame_memory() +{ + // If we didn't use ibv_reg_mr_iova above, we'd return + // cu_buffer_ here; but ibv_reg_mr_iova always adds it's + // address to the address received from the peripheral. + return 0; +} + +void RoceReceiver::set_frame_ready(std::function frame_ready) +{ + frame_ready_ = frame_ready; +} + } // namespace hololink::operators diff --git a/src/hololink/operators/roce_receiver/roce_receiver.hpp b/src/hololink/operators/roce_receiver/roce_receiver.hpp index dff603d..dfb658a 100644 --- a/src/hololink/operators/roce_receiver/roce_receiver.hpp +++ b/src/hololink/operators/roce_receiver/roce_receiver.hpp @@ -29,6 +29,7 @@ #include +#include #include #include @@ -38,10 +39,14 @@ class RoceReceiverMetadata { public: uint64_t rx_write_requests; // over all of time uint64_t frame_number; - uint64_t frame_end_s; - uint64_t frame_end_ns; uint32_t imm_data; - int64_t received_ns; + uint64_t received_s; + uint64_t received_ns; + CUdeviceptr frame_memory; + CUdeviceptr metadata_memory; + uint32_t dropped; + // Data received directly from HSB. + Hololink::FrameMetadata frame_metadata; }; /** @@ -57,6 +62,10 @@ class RoceReceiver { unsigned ibv_port, CUdeviceptr cu_buffer, size_t cu_buffer_size, + size_t cu_frame_size, + size_t cu_page_size, + unsigned pages, + size_t metadata_offset, const char* peer_ip); ~RoceReceiver(); @@ -80,11 +89,16 @@ class RoceReceiver { uint32_t get_rkey() { return rkey_; }; -protected: - void signal(); + // What target address do we write into HSB? + uint64_t external_frame_memory(); - bool wait(unsigned timeout_ms); + /** + * If the application schedules the call to get_next_frame after this + * callback occurs, then get_next_frame won't block. + */ + void set_frame_ready(std::function frame_ready); +protected: void free_ib_resources(); bool check_async_events(); @@ -94,6 +108,10 @@ class RoceReceiver { unsigned ibv_port_; CUdeviceptr cu_buffer_; size_t cu_buffer_size_; + size_t cu_frame_size_; + size_t cu_page_size_; + unsigned pages_; + size_t metadata_offset_; char* peer_ip_; struct ibv_qp* ib_qp_; struct ibv_mr* ib_mr_; @@ -111,10 +129,16 @@ class RoceReceiver { uint64_t frame_number_; int rx_write_requests_fd_; uint64_t volatile rx_write_requests_; // over all of time - struct timespec frame_end_; uint32_t volatile imm_data_; - struct timespec event_time_; - int64_t volatile received_ns_; + struct timespec volatile event_time_; + struct timespec volatile received_; + CUdeviceptr volatile current_buffer_; + CUstream metadata_stream_; + uint32_t volatile dropped_; + uint8_t* metadata_buffer_; + uint32_t volatile received_psn_; + unsigned volatile received_page_; + std::function frame_ready_; std::mutex& get_lock(); // Ensures reentrency protection for ibv calls. }; diff --git a/src/hololink/operators/roce_receiver/roce_receiver_op.cpp b/src/hololink/operators/roce_receiver/roce_receiver_op.cpp index 4d3caa8..9990655 100644 --- a/src/hololink/operators/roce_receiver/roce_receiver_op.cpp +++ b/src/hololink/operators/roce_receiver/roce_receiver_op.cpp @@ -22,16 +22,13 @@ #include #include +#include #include #include "roce_receiver.hpp" namespace hololink::operators { -static constexpr int64_t MS_PER_SEC = 1000; -static constexpr int64_t US_PER_SEC = 1000 * MS_PER_SEC; -static constexpr int64_t NS_PER_SEC = 1000 * US_PER_SEC; - void RoceReceiverOp::setup(holoscan::OperatorSpec& spec) { // call base class @@ -44,28 +41,48 @@ void RoceReceiverOp::setup(holoscan::OperatorSpec& spec) void RoceReceiverOp::start_receiver() { + size_t metadata_address = hololink::native::round_up(frame_size_.get(), hololink::native::PAGE_SIZE); + // page_size wants to be page aligned; prove that METADATA_SIZE doesn't upset that. + // Prove that PAGE_SIZE is a power of two + static_assert((hololink::native::PAGE_SIZE & (hololink::native::PAGE_SIZE - 1)) == 0); + // Prove that METADATA_SIZE is an even multiple of PAGE_SIZE + static_assert((hololink::METADATA_SIZE & (hololink::native::PAGE_SIZE - 1)) == 0); + size_t page_size = metadata_address + hololink::METADATA_SIZE; + size_t buffer_size = page_size * PAGES; + frame_memory_.reset(new ReceiverMemoryDescriptor(frame_context_, buffer_size)); + HSB_LOG_INFO("frame_size={:#x} frame={:#x} buffer_size={:#x}", frame_size_.get(), frame_memory_->get(), buffer_size); + const std::string& peer_ip = hololink_channel_->peer_ip(); - HOLOSCAN_LOG_INFO( + HSB_LOG_INFO( "ibv_name_={} ibv_port_={} peer_ip={}", ibv_name_.get(), ibv_port_.get(), peer_ip); - receiver_.reset(new RoceReceiver(ibv_name_.get().c_str(), ibv_port_.get(), frame_memory_, - frame_size_.get(), peer_ip.c_str())); + receiver_.reset(new RoceReceiver( + ibv_name_.get().c_str(), + ibv_port_.get(), + frame_memory_->get(), + buffer_size, + frame_size_.get(), + page_size, + PAGES, + metadata_address, + peer_ip.c_str())); + receiver_->set_frame_ready([this](const RoceReceiver&) { + this->frame_ready(); + }); if (!receiver_->start()) { throw std::runtime_error("Failed to start RoceReceiver"); } hololink_channel_->authenticate(receiver_->get_qp_number(), receiver_->get_rkey()); - // we don't actually receive anything here because CX7 hides it. - sockaddr_in address {}; - if (bind(data_socket_.get(), (sockaddr*)&address, sizeof(address)) < 0) { - throw std::runtime_error( - fmt::format("bind failed with errno={}: \"{}\"", errno, strerror(errno))); - } - receiver_thread_.reset(new std::thread(&hololink::operators::RoceReceiverOp::run, this)); - const int error = pthread_setname_np(receiver_thread_->native_handle(), "receiver_thread"); + const int error = pthread_setname_np(receiver_thread_->native_handle(), name().c_str()); if (error != 0) { throw std::runtime_error("Failed to set thread name"); } + + auto [local_ip, local_port] = local_ip_and_port(); + HSB_LOG_INFO("local_ip={} local_port={}", local_ip, local_port); + + hololink_channel_->configure(receiver_->external_frame_memory(), frame_size_, page_size, PAGES, local_port); } void RoceReceiverOp::run() @@ -74,48 +91,58 @@ void RoceReceiverOp::run() receiver_->blocking_monitor(); } -void RoceReceiverOp::stop_() +void RoceReceiverOp::stop_receiver() { + hololink_channel_->unconfigure(); data_socket_.reset(); receiver_->close(); receiver_thread_->join(); receiver_thread_.reset(); + frame_memory_.reset(); } -std::shared_ptr RoceReceiverOp::get_next_frame(double timeout_ms) +std::tuple> RoceReceiverOp::get_next_frame(double timeout_ms) { RoceReceiverMetadata roce_receiver_metadata; if (!receiver_->get_next_frame(timeout_ms, roce_receiver_metadata)) { return {}; } - const int64_t now_ns = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - - // Extend the timestamp we got from the data, - // (which is ns plus 2 bits of seconds). Note that - // we don't look at the 2 bits of seconds here. - const int64_t ns = roce_receiver_metadata.imm_data % NS_PER_SEC; - int64_t timestamp_ns = (now_ns - (now_ns % NS_PER_SEC)) + ns; - if (timestamp_ns > now_ns) { - timestamp_ns -= NS_PER_SEC; - } - auto metadata = std::make_shared(); (*metadata)["frame_number"] = int64_t(roce_receiver_metadata.frame_number); (*metadata)["rx_write_requests"] = int64_t(roce_receiver_metadata.rx_write_requests); - (*metadata)["received_ns"] = roce_receiver_metadata.received_ns; - (*metadata)["timestamp_ns"] = timestamp_ns; + (*metadata)["received_s"] = int64_t(roce_receiver_metadata.received_s); + (*metadata)["received_ns"] = int64_t(roce_receiver_metadata.received_ns); (*metadata)["imm_data"] = int64_t(roce_receiver_metadata.imm_data); - - return metadata; + CUdeviceptr frame_memory = roce_receiver_metadata.frame_memory; + (*metadata)["frame_memory"] = int64_t(frame_memory); + (*metadata)["dropped"] = int64_t(roce_receiver_metadata.dropped); + (*metadata)["timestamp_s"] = int64_t(roce_receiver_metadata.frame_metadata.timestamp_s); + (*metadata)["timestamp_ns"] = int64_t(roce_receiver_metadata.frame_metadata.timestamp_ns); + (*metadata)["metadata_s"] = int64_t(roce_receiver_metadata.frame_metadata.metadata_s); + (*metadata)["metadata_ns"] = int64_t(roce_receiver_metadata.frame_metadata.metadata_ns); + (*metadata)["crc"] = int64_t(roce_receiver_metadata.frame_metadata.crc); + + return { frame_memory, metadata }; } std::tuple RoceReceiverOp::local_ip_and_port() { - auto [local_ip, local_port] = BaseReceiverOp::local_ip_and_port(); - return { local_ip, 4791 }; + sockaddr_in ip {}; + ip.sin_family = AF_UNSPEC; + socklen_t ip_len = sizeof(ip); + if (getsockname(data_socket_.get(), (sockaddr*)&ip, &ip_len) < 0) { + throw std::runtime_error( + fmt::format("getsockname failed with errno={}: \"{}\"", errno, strerror(errno))); + } + + const std::string local_ip = inet_ntoa(ip.sin_addr); + // This is what you'd normally use + // const in_port_t local_port = ip.sin_port; + // But we're going to tell the other side that we're listening + // to the ROCE receiver port at 4791. + const in_port_t local_port = 4791; + return { local_ip, local_port }; } } // namespace hololink::operators diff --git a/src/hololink/operators/roce_receiver/roce_receiver_op.hpp b/src/hololink/operators/roce_receiver/roce_receiver_op.hpp index d6a2a61..7a4eb9a 100644 --- a/src/hololink/operators/roce_receiver/roce_receiver_op.hpp +++ b/src/hololink/operators/roce_receiver/roce_receiver_op.hpp @@ -40,8 +40,8 @@ class RoceReceiverOp : public BaseReceiverOp { // BaseReceiverOp virtual functions void start_receiver() override; - void stop_() override; - std::shared_ptr get_next_frame(double timeout_ms) override; + void stop_receiver() override; + std::tuple> get_next_frame(double timeout_ms) override; std::tuple local_ip_and_port() override; private: @@ -50,6 +50,8 @@ class RoceReceiverOp : public BaseReceiverOp { std::shared_ptr receiver_; std::unique_ptr receiver_thread_; + static constexpr unsigned PAGES = 2; + std::unique_ptr frame_memory_; void run(); }; diff --git a/test-agx-cpnx100.sh b/test-agx-cpnx100.sh new file mode 100644 index 0000000..37fe01e --- /dev/null +++ b/test-agx-cpnx100.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -o errexit + +# +# This script runs `pytest` with the switches appropriate for an AGX +# host connected to a Lattice CPNX100-ETH-SENSOR-BRIDGE with an IMX274 attached; +# the network is connected from the first HSB port to the on-board ethernet. +# +# This test only runs the non-network-accelerated tests and only works with +# the first (192.168.0.2) HSB interface. +# +pytest --imx274 --unaccelerated-only --channel-ips=192.168.0.2 --schedulers=default diff --git a/test-igx-cpnx100.sh b/test-igx-cpnx100.sh new file mode 100644 index 0000000..e25727e --- /dev/null +++ b/test-igx-cpnx100.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -o errexit + +# +# This script runs `pytest` with the switches appropriate for an IGX +# host connected to a Lattice CPNX100-ETH-SENSOR-BRIDGE with an IMX274 attached; +# networks are connected from each HSB port to the appropriate Connectx7 +# port. +# +# This test runs both network-accelerated and non-accelerated tests and looks +# for both (192.168.0.2 and 192.168.0.3) HSB interfaces. +# +pytest --imx274 --ptp diff --git a/tests/conftest.py b/tests/conftest.py index e8762a3..894172a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,7 +16,9 @@ # See README.md for detailed information. import logging +import logging.handlers import os +import socket import threading import traceback from unittest.mock import patch @@ -25,6 +27,31 @@ import hololink as hololink_module +# If desired, forward python logging to UDP port 514 in not exactly a SYSLOG +# compatible way but a way that works great with wireshark. This +# is the same thing we're doing in C++. +if False: + + class UdpWriter: + def __init__(self, sender_ip, destination_ip="255.255.255.255"): + self._socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) + self._socket.bind((sender_ip, 0)) + self._socket.connect((destination_ip, 514)) + + def write(self, msg): + self._socket.send(msg.encode()) + + udp_writer = UdpWriter(sender_ip="127.0.0.1", destination_ip="127.0.0.1") + handler = logging.StreamHandler(stream=udp_writer) + formatter = logging.Formatter( + fmt="%(levelname)s %(relativeCreated)d %(funcName)s %(filename)s:%(lineno)d tid=%(threadName)s -- %(message)s" + ) + handler.setFormatter(formatter) + handler.terminator = "" + logger = logging.getLogger() + logger.addHandler(handler) + @pytest.fixture(scope="function", autouse=True) def forward_exception(): @@ -98,14 +125,10 @@ def pytest_addoption(parser): default=False, help="Don't skip dgpu based test.", ) - default_infiniband_interface = "roceP5p3s0f0" - try: - default_infiniband_interface = sorted(os.listdir("/sys/class/infiniband"))[0] - except (FileNotFoundError, IndexError): - pass + infiniband_interfaces = hololink_module.infiniband_devices() parser.addoption( "--ibv-name", - default=default_infiniband_interface, + default=infiniband_interfaces[0] if infiniband_interfaces else None, help="IBV device to use", ) parser.addoption( @@ -132,6 +155,30 @@ def pytest_addoption(parser): default=False, help="Include tests for IMX477.", ) + parser.addoption( + "--hsb", + action="store_true", + default=False, + help="Don't skip tests using HSB.", + ) + parser.addoption( + "--hsb-nano", + action="store_true", + default=False, + help="Don't skip tests using HSB Nano.", + ) + parser.addoption( + "--channel-ips", + default=["192.168.0.2", "192.168.0.3"], + nargs="+", + help="Use these data plane addresses.", + ) + parser.addoption( + "--schedulers", + default=["default", "greedy", "multithread", "event"], + nargs="+", + help="Use these schedulers.", + ) def pytest_collection_modifyitems(config, items): @@ -167,6 +214,16 @@ def pytest_collection_modifyitems(config, items): for item in items: if "skip_unless_imx477" in item.keywords: item.add_marker(skip_imx477) + if not config.getoption("--hsb") and not config.getoption("--imx274"): + skip_hsb = pytest.mark.skip(reason="Tests only run in --hsb mode.") + for item in items: + if "skip_unless_hsb" in item.keywords: + item.add_marker(skip_hsb) + if not config.getoption("--hsb-nano"): + skip_hsb_nano = pytest.mark.skip(reason="Tests only run in --hsb-nano mode.") + for item in items: + if "skip_unless_hsb_nano" in item.keywords: + item.add_marker(skip_hsb_nano) @pytest.fixture @@ -197,3 +254,23 @@ def ibv_port(request): @pytest.fixture def ibv_name(request): return request.config.getoption("--ibv-name") + + +# If a test has an "channel_ips" (plural) fixture, then pass +# the list from the command line. +@pytest.fixture +def channel_ips(request): + return request.config.getoption("--channel-ips") + + +def pytest_generate_tests(metafunc): + # If a test has an "channel_ip" (singular) fixture, then parameterize + # from the list given on the command line. + if "channel_ip" in metafunc.fixturenames: + channel_ips = metafunc.config.getoption("--channel-ips") + metafunc.parametrize("channel_ip", channel_ips) + # If a test has a "scheduler" (singular) fixture, then parameterize + # from the list given on the command line. + if "scheduler" in metafunc.fixturenames: + schedulers = metafunc.config.getoption("--schedulers") + metafunc.parametrize("scheduler", schedulers) diff --git a/tests/operators.py b/tests/operators.py new file mode 100644 index 0000000..536cecb --- /dev/null +++ b/tests/operators.py @@ -0,0 +1,133 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# See README.md for detailed information. + +import datetime +import logging + +import cupy as cp +import holoscan + +COLOR_PROFILER_START_FRAME = 5 + + +class ColorProfiler(holoscan.core.Operator): + def __init__(self, *args, callback=None, out_tensor_name=None, **kwargs): + super().__init__(*args, **kwargs) + self._count = 0 + self._callback = callback + self._out_tensor_name = out_tensor_name + + def setup(self, spec): + logging.info("setup") + spec.input("input") + spec.output("output") + + def compute(self, op_input, op_output, context): + self._count += 1 + in_message = op_input.receive("input") + cp_frame = cp.asarray(in_message.get("")) # cp_frame.shape is (y,x,4) + op_output.emit({self._out_tensor_name: cp_frame}, "output") + # Give it some time to settle + if self._count < COLOR_PROFILER_START_FRAME: + return + # Compute the Y of YCrCb + r = cp_frame[:, :, 0] + g = cp_frame[:, :, 1] + b = cp_frame[:, :, 2] + y = r * 0.299 + g * 0.587 + b * 0.114 + # + buckets, _ = cp.histogram(y, bins=16, range=(0, 65536)) + self._callback(buckets) + + +MS_PER_SEC = 1000.0 +US_PER_SEC = 1000.0 * MS_PER_SEC +NS_PER_SEC = 1000.0 * US_PER_SEC +SEC_PER_NS = 1.0 / NS_PER_SEC + + +class TimeProfiler(holoscan.core.Operator): + def __init__( + self, + *args, + callback=None, + **kwargs, + ): + super().__init__(*args, **kwargs) + self._count = 0 + self._callback = callback + self._timestamps = [] + self._packets_dropped = None + + def setup(self, spec): + logging.info("setup") + spec.input("input") + spec.output("output") + + def compute(self, op_input, op_output, context): + self._count += 1 + in_message = op_input.receive("input") + cp_frame = cp.asarray(in_message.get("")) # cp_frame.shape is (y,x,4) + op_output.emit({"": cp_frame}, "output") + # + metadata = self.metadata + frame_number = metadata.get("frame_number", 0) + packets_dropped = metadata.get("packets_dropped", 0) + if packets_dropped != self._packets_dropped: + logging.info(f"{packets_dropped=} ({packets_dropped:#X}) {frame_number=}") + self._packets_dropped = packets_dropped + image_timestamp_ns = metadata.get("timestamp_ns", 0) + image_timestamp_s = metadata.get("timestamp_s", 0) + image_timestamp_s += image_timestamp_ns * SEC_PER_NS + received_timestamp_s = metadata.get("received_s", 0) + received_timestamp_ns = metadata.get("received_ns", 0) + received_timestamp_s = received_timestamp_s + received_timestamp_ns * SEC_PER_NS + metadata_timestamp_s = metadata.get("metadata_s", 0) + metadata_timestamp_ns = metadata.get("metadata_ns", 0) + metadata_timestamp_s = metadata_timestamp_s + metadata_timestamp_ns * SEC_PER_NS + pipeline_timestamp_s = datetime.datetime.now(datetime.timezone.utc).timestamp() + self._timestamps.append( + ( + image_timestamp_s, + metadata_timestamp_s, + received_timestamp_s, + pipeline_timestamp_s, + frame_number, + ) + ) + if self._count < 200: + return + self._callback(self._timestamps) + + +class WatchdogOp(holoscan.core.Operator): + def __init__( + self, + *args, + watchdog=None, + **kwargs, + ): + super().__init__(*args, **kwargs) + self._watchdog = watchdog + + def setup(self, spec): + spec.input("input") + + def compute(self, op_input, op_output, context): + in_message = op_input.receive("input") + in_message.get("") + self._watchdog.tap() diff --git a/src/hololink/operators/gamma_correction/CMakeLists.txt b/tests/test_gpio_example_app.py similarity index 56% rename from src/hololink/operators/gamma_correction/CMakeLists.txt rename to tests/test_gpio_example_app.py index eeb7bf4..9324910 100644 --- a/src/hololink/operators/gamma_correction/CMakeLists.txt +++ b/tests/test_gpio_example_app.py @@ -13,24 +13,29 @@ # See the License for the specific language governing permissions and # limitations under the License. -# we don't expose the C++ interface of the operator yet, therfore link -# statically -add_library(gamma_correction STATIC -gamma_correction.cpp - ) - -set_property(TARGET gamma_correction PROPERTY POSITION_INDEPENDENT_CODE ON) - -add_library(hololink::operators::gamma_correction ALIAS gamma_correction) - -target_include_directories(gamma_correction - INTERFACE - $ - $ - ) - -target_link_libraries(gamma_correction - PRIVATE - hololink::native - holoscan::core - ) +# See README.md for detailed information. + +import sys +from unittest import mock + +import pytest + +from examples import gpio_example_app + + +@pytest.mark.skip_unless_hsb_nano +def test_gpio_example_app(frame_limit, capsys): + arguments = [ + sys.argv[0], + "--cycle-limit", + str(frame_limit), + "--sleep-time", + "0", + ] + + with mock.patch("sys.argv", arguments): + gpio_example_app.main() + + # check for errors + captured = capsys.readouterr() + assert captured.err == "" diff --git a/tests/test_hololink_acks.py b/tests/test_hololink_acks.py index 6c42677..9868228 100644 --- a/tests/test_hololink_acks.py +++ b/tests/test_hololink_acks.py @@ -18,6 +18,7 @@ import logging import pytest +import utils import hololink as hololink_module @@ -45,15 +46,30 @@ def test_hololink_acks(hololink_address): peer_ip=metadata["peer_ip"], control_port=metadata["control_port"], serial_number=metadata["serial_number"], + sequence_number_checking=( + False if metadata["sequence_number_checking"] == 0 else True + ), ) - hololink.start() - for i in range(10): - hololink.get_fpga_version() - hololink.get_fpga_date() - hololink.stop() + with utils.PriorityScheduler(): + hololink.start() + # Make sure we have PTP sync first. + ptp_sync_timeout_s = 10 + ptp_sync_timeout = hololink_module.Timeout(ptp_sync_timeout_s) + logging.debug("Waiting for PTP sync.") + if not hololink.ptp_synchronize(ptp_sync_timeout): + raise ValueError( + f"Failed to synchronize PTP after {ptp_sync_timeout_s} seconds; ignoring." + ) + # + for i in range(20): + hololink.get_fpga_version() + hololink.get_fpga_date() + hololink.stop() max_dt = None - for n, (request_time, reply_time) in enumerate(hololink._acks): + settled_acks = hololink._acks[5:] + assert len(settled_acks) > 10 + for n, (request_time, reply_time) in enumerate(settled_acks): dt_s = reply_time - request_time dt_ms = dt_s * 1000.0 dt_us = dt_ms * 1000.0 diff --git a/tests/test_holoscan.py b/tests/test_holoscan.py index bb6a3c7..a3a548a 100644 --- a/tests/test_holoscan.py +++ b/tests/test_holoscan.py @@ -141,16 +141,11 @@ def compose(self): interpolation_mode=0, ) - gamma_correction = hololink_module.operators.GammaCorrectionOp( - self, - name="gamma_correction", - cuda_device_ordinal=self._cuda_device_ordinal, - ) - visualizer = holoscan.operators.HolovizOp( self, name="holoviz", headless=self._headless, + framebuffer_srgb=True, ) watchdog_operator = WatchdogOperator( @@ -165,9 +160,8 @@ def compose(self): csi_to_bayer_operator, image_processor_operator, {("output", "input")} ) self.add_flow(image_processor_operator, demosaic, {("output", "receiver")}) - self.add_flow(demosaic, gamma_correction, {("transmitter", "input")}) - self.add_flow(gamma_correction, visualizer, {("output", "receivers")}) - self.add_flow(gamma_correction, watchdog_operator, {("output", "input")}) + self.add_flow(demosaic, visualizer, {("transmitter", "receivers")}) + self.add_flow(demosaic, watchdog_operator, {("transmitter", "input")}) frame_rate_s = 0.1 diff --git a/tests/test_hsb_sequence_checking.py b/tests/test_hsb_sequence_checking.py new file mode 100644 index 0000000..9c06615 --- /dev/null +++ b/tests/test_hsb_sequence_checking.py @@ -0,0 +1,65 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# See README.md for detailed information. + +import logging + +import pytest + +import hololink as hololink_module + + +@pytest.mark.skip_unless_hsb +def test_hsb_sequence_checking(hololink_address): + logging.info("Initializing.") + # + metadata = hololink_module.Enumerator.find_channel(hololink_address) + + # Create a Hololink controller. Note that this approach to creating + # hololink objects isn't something applications should do. Calling start() + # on that will reset the sequence number in the HSB device. + hololink_a = hololink_module.Hololink( + peer_ip=metadata["peer_ip"], + control_port=metadata["control_port"], + serial_number=metadata["serial_number"], + sequence_number_checking=True, + ) + hololink_a.start() + + # Create another Hololink controller for the same HSB device. Applications + # should never do this. Calling start() on that will reset the sequence + # number in the HSB device. + hololink_b = hololink_module.Hololink( + peer_ip=metadata["peer_ip"], + control_port=metadata["control_port"], + serial_number=metadata["serial_number"], + sequence_number_checking=True, + ) + hololink_b.start() + # Advance the sequence number by performing a control plane transaction. + version_b = hololink_b.get_fpga_version() + logging.info(f"Got {version_b=:#x}") + + # hololink_a's cache of the sequence number is now out-of-date. Performing + # a transaction with it should result in an exception. + try: + bad_version_a = hololink_a.get_fpga_version() + logging.info(f"Got {bad_version_a=:#x}") + except RuntimeError as e: + logging.info(f"Caught {e=}({type(e)=}) as expected.") + return + + assert False and "This should have caused an exception." diff --git a/tests/test_i2c_retry.py b/tests/test_i2c_retry.py index da78b62..cd4e440 100644 --- a/tests/test_i2c_retry.py +++ b/tests/test_i2c_retry.py @@ -77,8 +77,14 @@ def create_hololink(metadata): # Workaround: # Keep a global reference to the Mockhololink instance to prevent it from beeing destroyed. global mh + sequence_number_checking = ( + False if metadata["sequence_number_checking"] == 0 else True + ) mh = MockHololink( - metadata["peer_ip"], metadata["control_port"], metadata["serial_number"] + metadata["peer_ip"], + metadata["control_port"], + metadata["serial_number"], + sequence_number_checking, ) return mh diff --git a/tests/test_linux_tao_peoplenet.py b/tests/test_imx274_latency.py similarity index 65% rename from tests/test_linux_tao_peoplenet.py rename to tests/test_imx274_latency.py index 540efa1..be505c1 100644 --- a/tests/test_linux_tao_peoplenet.py +++ b/tests/test_imx274_latency.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,38 +16,28 @@ # See README.md for detailed information. import sys -from os.path import exists from unittest import mock -from urllib.request import urlretrieve import pytest import hololink as hololink_module -from examples import linux_tao_peoplenet +from examples import imx274_latency +@pytest.mark.skip_unless_ptp @pytest.mark.skip_unless_imx274 +@pytest.mark.accelerated_networking @pytest.mark.parametrize( "camera_mode", # noqa: E501 [ hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS, ], ) -def test_linux_tao_peoplenet( - camera_mode, headless, frame_limit, hololink_address, capsys -): - # Download the PeopleNet ONNX model - file_name = "examples/resnet34_peoplenet_int8.onnx" - if not exists(file_name): - url = "https://api.ngc.nvidia.com/v2/models/org/nvidia/team/tao/peoplenet/pruned_quantized_decrypted_v2.3.3/files?redirect=true&path=resnet34_peoplenet_int8.onnx" - urlretrieve(url, file_name) - +def test_imx274_latency(camera_mode, headless, hololink_address, capsys): arguments = [ sys.argv[0], "--camera-mode", str(camera_mode.value), - "--frame-limit", - str(frame_limit), "--hololink", hololink_address, ] @@ -55,7 +45,7 @@ def test_linux_tao_peoplenet( arguments.extend(["--headless"]) with mock.patch("sys.argv", arguments): - linux_tao_peoplenet.main() + imx274_latency.main() # check for errors captured = capsys.readouterr() diff --git a/tests/test_imx274_pattern.py b/tests/test_imx274_pattern.py index bbd8ba7..6e1fa13 100644 --- a/tests/test_imx274_pattern.py +++ b/tests/test_imx274_pattern.py @@ -19,49 +19,14 @@ import logging import os -import cupy as cp import holoscan +import operators import pytest +import utils from cuda import cuda import hololink as hololink_module - -class Profiler(holoscan.core.Operator): - def __init__(self, *args, callback=None, out_tensor_name=None, **kwargs): - super().__init__(*args, **kwargs) - self._count = 0 - self._callback = callback - self._out_tensor_name = out_tensor_name - - def setup(self, spec): - logging.info("setup") - spec.input("input") - spec.output("output") - - def compute(self, op_input, op_output, context): - self._count += 1 - in_message = op_input.receive("input") - cp_frame = cp.from_dlpack(in_message.get("")) # cp_frame.shape is (y,x,4) - op_output.emit({self._out_tensor_name: cp_frame}, "output") - # Give it some time to settle - if self._count < 20: - return - # Compute the Y of YCrCb - r = cp_frame[:, :, 0] - g = cp_frame[:, :, 1] - b = cp_frame[:, :, 2] - y = r * 0.299 + g * 0.587 + b * 0.114 - logging.debug(f"{y=}") - # - unique = cp.unique(y) - logging.info(f"{unique=}") - buckets, _ = cp.histogram(y, bins=16, range=(0, 65536)) - s = ",".join([f"{x}" for x in buckets]) - logging.info(f"buckets=[{s}]") - self._callback(buckets) - - actual_left = None actual_right = None @@ -98,6 +63,8 @@ def __init__( ibv_port_right, camera_right, camera_mode_right, + watchdog_left, + watchdog_right, ): logging.info("__init__") super().__init__() @@ -114,11 +81,20 @@ def __init__( self._ibv_port_right = ibv_port_right self._camera_right = camera_right self._camera_mode_right = camera_mode_right + self._watchdog_left = watchdog_left + self._watchdog_right = watchdog_right + self._bucket_count_left = 0 + self._bucket_count_right = 0 + self._bucket_count_left_trigger = 10 + self._bucket_count_right_trigger = 10 def compose(self): logging.info("compose") - self._ok = holoscan.conditions.BooleanCondition( - self, name="ok", enable_tick=True + self._ok_left = holoscan.conditions.BooleanCondition( + self, name="ok_left", enable_tick=True + ) + self._ok_right = holoscan.conditions.BooleanCondition( + self, name="ok_right", enable_tick=True ) self._camera_left.set_mode(self._camera_mode_left) self._camera_right.set_mode(self._camera_mode_right) @@ -165,33 +141,55 @@ def compose(self): frame_size = csi_to_bayer_operator_left.get_csi_length() logging.info(f"left {frame_size=}") frame_context = self._cuda_context - receiver_operator_left = hololink_module.operators.RoceReceiverOp( - self, - self._ok, - name="receiver_left", - frame_size=frame_size, - frame_context=frame_context, - ibv_name=self._ibv_name_left, - ibv_port=self._ibv_port_left, - hololink_channel=self._hololink_channel_left, - device=self._camera_left, - ) + if self._ibv_name_left: + receiver_operator_left = hololink_module.operators.RoceReceiverOp( + self, + self._ok_left, + name="receiver_left", + frame_size=frame_size, + frame_context=frame_context, + ibv_name=self._ibv_name_left, + ibv_port=self._ibv_port_left, + hololink_channel=self._hololink_channel_left, + device=self._camera_left, + ) + else: + receiver_operator_left = hololink_module.operators.LinuxReceiverOperator( + self, + self._ok_left, + name="receiver_left", + frame_size=frame_size, + frame_context=frame_context, + hololink_channel=self._hololink_channel_left, + device=self._camera_left, + ) # frame_size = csi_to_bayer_operator_right.get_csi_length() logging.info(f"right {frame_size=}") frame_context = self._cuda_context - receiver_operator_right = hololink_module.operators.RoceReceiverOp( - self, - self._ok, - name="receiver_right", - frame_size=frame_size, - frame_context=frame_context, - ibv_name=self._ibv_name_right, - ibv_port=self._ibv_port_right, - hololink_channel=self._hololink_channel_right, - device=self._camera_right, - ) + if self._ibv_name_right: + receiver_operator_right = hololink_module.operators.RoceReceiverOp( + self, + self._ok_right, + name="receiver_right", + frame_size=frame_size, + frame_context=frame_context, + ibv_name=self._ibv_name_right, + ibv_port=self._ibv_port_right, + hololink_channel=self._hololink_channel_right, + device=self._camera_right, + ) + else: + receiver_operator_right = hololink_module.operators.LinuxReceiverOperator( + self, + self._ok_right, + name="receiver_right", + frame_size=frame_size, + frame_context=frame_context, + hololink_channel=self._hololink_channel_right, + device=self._camera_right, + ) # rgba_components_per_pixel = 4 @@ -239,15 +237,15 @@ def compose(self): ) # - profiler_left = Profiler( + color_profiler_left = operators.ColorProfiler( self, - name="profiler_left", + name="color_profiler_left", callback=lambda buckets: self.left_buckets(buckets), out_tensor_name="left", ) - profiler_right = Profiler( + color_profiler_right = operators.ColorProfiler( self, - name="profiler_right", + name="color_profiler_right", callback=lambda buckets: self.right_buckets(buckets), out_tensor_name="right", ) @@ -284,14 +282,25 @@ def compose(self): window_title="IMX274 pattern test", ) # + watchdog_operator_left = operators.WatchdogOp( + self, + name="watchdog_operator_left", + watchdog=self._watchdog_left, + ) + watchdog_operator_right = operators.WatchdogOp( + self, + name="watchdog_operator_right", + watchdog=self._watchdog_right, + ) + # self.add_flow( receiver_operator_left, csi_to_bayer_operator_left, {("output", "input")} ) self.add_flow( csi_to_bayer_operator_left, demosaic_left, {("output", "receiver")} ) - self.add_flow(demosaic_left, profiler_left, {("transmitter", "input")}) - self.add_flow(profiler_left, visualizer, {("output", "receivers")}) + self.add_flow(demosaic_left, color_profiler_left, {("transmitter", "input")}) + self.add_flow(color_profiler_left, visualizer, {("output", "receivers")}) self.add_flow( receiver_operator_right, csi_to_bayer_operator_right, {("output", "input")} @@ -299,98 +308,275 @@ def compose(self): self.add_flow( csi_to_bayer_operator_right, demosaic_right, {("output", "receiver")} ) - self.add_flow(demosaic_right, profiler_right, {("transmitter", "input")}) - self.add_flow(profiler_right, visualizer, {("output", "receivers")}) - - def _check_done(self): - global actual_left, actual_right - logging.trace(f"{actual_left=} {actual_right=}") - if actual_left is None: - return - if actual_right is None: - return - logging.info("DONE") - self._ok.disable_tick() + self.add_flow(demosaic_right, color_profiler_right, {("transmitter", "input")}) + self.add_flow(color_profiler_right, visualizer, {("output", "receivers")}) + # + self.add_flow( + color_profiler_left, watchdog_operator_left, {("output", "input")} + ) + self.add_flow( + color_profiler_right, watchdog_operator_right, {("output", "input")} + ) def left_buckets(self, buckets): + self._bucket_count_left += 1 global actual_left - if actual_left is None: - actual_left = buckets - self._check_done() + actual_left = buckets + if self._bucket_count_left >= self._bucket_count_left_trigger: + # don't fail the watchdog while we're shutting down. + self._watchdog_left.update(timeout=30) + self._ok_left.disable_tick() def right_buckets(self, buckets): + self._bucket_count_right += 1 global actual_right - if actual_right is None: - actual_right = buckets - self._check_done() + actual_right = buckets + if self._bucket_count_right >= self._bucket_count_right_trigger: + # don't fail the watchdog while we're shutting down. + self._watchdog_right.update(timeout=30) + self._ok_right.disable_tick() + + +# This may execute on unaccelerated configurations, where +# there may be any number of infiniband interfaces (but +# most likely zero). In this case, placate parametrize +# by providing dummy None values in these columns. +sys_ibv_name_left, sys_ibv_name_right = ( + hololink_module.infiniband_devices() + [None, None] +)[:2] + + +expected_4k_results = [ + ( + # left + hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_3840X2160_60FPS, + 10, + # fmt: off + [1038960, 1032480, 6480, 0, 1028160, 8640, 1032480, 0, 0, 1028160, 8640, 1041120, 0, 0, 1028160, 1041120], + # fmt: on + # right + hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_3840X2160_60FPS, + 11, + # fmt: off + [921600, 917760, 7680, 0, 913920, 7680, 917760, 0, 0, 913920, 7680, 925440, 0, 0, 913920, 1847040], + # fmt: on + ), + ( + # left + hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_3840X2160_60FPS, + 11, + # fmt: off + [921600, 917760, 7680, 0, 913920, 7680, 917760, 0, 0, 913920, 7680, 925440, 0, 0, 913920, 1847040], + # fmt: on + # right + hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_3840X2160_60FPS, + 10, + # fmt: off + [1038960, 1032480, 6480, 0, 1028160, 8640, 1032480, 0, 0, 1028160, 8640, 1041120, 0, 0, 1028160, 1041120], + # fmt: on + ), +] + +expected_1080p_results = [ + ( + # left + hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS, + 10, + # fmt: off + [260280, 258120, 1080, 0, 257040, 2160, 258120, 0, 0, 257040, 2160, 260280, 0, 0, 257040, 260280], + # fmt: on + # right + hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS, + 11, + # fmt: off + [0, 0, 0, 0, 0, 1920, 228480, 0, 0, 456960, 3840, 462720, 0, 0, 456960, 462720], + # fmt: on + ), + ( + # left + hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS, + 11, + # fmt: off + [0, 0, 0, 0, 0, 1920, 228480, 0, 0, 456960, 3840, 462720, 0, 0, 456960, 462720], + # fmt: on + # right + hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS, + 10, + # fmt: off + [260280, 258120, 1080, 0, 257040, 2160, 258120, 0, 0, 257040, 2160, 260280, 0, 0, 257040, 260280], + # fmt: on + ), +] + +expected_results = [] +expected_results.extend(expected_4k_results) +expected_results.extend(expected_1080p_results) + + +def run_test( + headless, + channel_metadata_left, + ibv_name_left, + ibv_port_left, + camera_mode_left, + pattern_left, + expected_left, + channel_metadata_right, + ibv_name_right, + ibv_port_right, + camera_mode_right, + pattern_right, + expected_right, + scheduler, +): + # + logging.info("Initializing.") + # + reset_globals() + # Get a handle to the GPU + (cu_result,) = cuda.cuInit(0) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + cu_device_ordinal = 0 + cu_result, cu_device = cuda.cuDeviceGet(cu_device_ordinal) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + cu_result, cu_context = cuda.cuDevicePrimaryCtxRetain(cu_device) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + # + hololink_channel_left = hololink_module.DataChannel(channel_metadata_left) + hololink_channel_right = hololink_module.DataChannel(channel_metadata_right) + # Get a handle to the camera + camera_left = CameraWrapper(hololink_channel_left, expander_configuration=0) + camera_right = CameraWrapper(hololink_channel_right, expander_configuration=1) + # Note that ColorProfiler takes longer on the COLOR_PROFILER_START_FRAMEth frame, where it + # starts running (and builds CUDA code). + with utils.Watchdog( + "frame-reception-left", + initial_timeout=[30] * (operators.COLOR_PROFILER_START_FRAME + 2), + timeout=0.5, + ) as watchdog_left: + with utils.Watchdog( + "frame-reception-right", + initial_timeout=[30] * (operators.COLOR_PROFILER_START_FRAME + 2), + timeout=0.5, + ) as watchdog_right: + # Set up the application + application = PatternTestApplication( + headless, + cu_context, + cu_device_ordinal, + hololink_channel_left, + ibv_name_left, + ibv_port_left, + camera_left, + camera_mode_left, + hololink_channel_right, + ibv_name_right, + ibv_port_right, + camera_right, + camera_mode_right, + watchdog_left, + watchdog_right, + ) + default_configuration = os.path.join( + os.path.dirname(__file__), "example_configuration.yaml" + ) + application.config(default_configuration) + # Run it. + hololink = hololink_channel_left.hololink() + assert hololink is hololink_channel_right.hololink() + hololink.start() + assert camera_left._reset_callbacks == 0 + assert camera_right._reset_callbacks == 0 + hololink.reset() + assert camera_left._reset_callbacks == 1 + assert camera_right._reset_callbacks == 1 + camera_left.setup_clock() # this also sets camera_right's clock + camera_left.configure(camera_mode_left) + camera_left.test_pattern(pattern_left) + camera_right.configure(camera_mode_right) + camera_right.test_pattern(pattern_right) + + # For testing, make sure we call the get_register method. + STANDBY = 0x3000 + camera_left.get_register(STANDBY) + # Configure scheduler. + if scheduler == "event": + app_scheduler = holoscan.schedulers.EventBasedScheduler( + application, + worker_thread_number=4, + name="event_scheduler", + ) + application.scheduler(app_scheduler) + elif scheduler == "multithread": + app_scheduler = holoscan.schedulers.MultiThreadScheduler( + application, + worker_thread_number=4, + name="multithread_scheduler", + ) + application.scheduler(app_scheduler) + elif scheduler == "greedy": + app_scheduler = holoscan.schedulers.GreedyScheduler( + application, + name="greedy_scheduler", + ) + application.scheduler(app_scheduler) + elif scheduler == "default": + # Use the default one. + pass + else: + raise Exception(f"Unexpected {scheduler=}") + # + application.run() + hololink.stop() + + (cu_result,) = cuda.cuDevicePrimaryCtxRelease(cu_device) + assert cu_result == cuda.CUresult.CUDA_SUCCESS + # Now check the buckets. + global actual_left, actual_right + # + logging.info(f"{expected_left=}") + logging.info(f"{actual_left=}") + if expected_left is not None: + left_diffs = [ + abs(a - e) for e, a in zip(expected_left, actual_left, strict=True) + ] + logging.info(f"{left_diffs=}") + left_diff = sum(left_diffs) + logging.info(f"{left_diff=}") + # + logging.info(f"{expected_right=}") + if expected_right is not None: + logging.info(f"{actual_right=}") + right_diffs = [ + abs(a - e) for e, a in zip(expected_right, actual_right, strict=True) + ] + logging.info(f"{right_diffs=}") + right_diff = sum(right_diffs) + logging.info(f"{right_diff=}") + + if expected_left: + assert 0 <= left_diff < 4 + if expected_right: + assert 0 <= right_diff < 4 @pytest.mark.skip_unless_imx274 @pytest.mark.accelerated_networking @pytest.mark.parametrize( "camera_mode_left, pattern_left, expected_left, camera_mode_right, pattern_right, expected_right", # noqa: E501 + expected_results, +) +@pytest.mark.parametrize( + "ibv_name_left, ibv_name_right", [ - ( - # left - hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_3840X2160_60FPS, - 10, - # fmt: off - [1038960, 1032480, 6480, 0, 1028160, 8640, 1032480, 0, 0, 1028160, 8640, 1041120, 0, 0, 1028160, 1041120], - # fmt: on - # right - hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_3840X2160_60FPS, - 11, - # fmt: off - [921600, 917760, 7680, 0, 913920, 7680, 917760, 0, 0, 913920, 7680, 925440, 0, 0, 913920, 1847040], - # fmt: on - ), - ( - # left - hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_3840X2160_60FPS, - 11, - # fmt: off - [921600, 917760, 7680, 0, 913920, 7680, 917760, 0, 0, 913920, 7680, 925440, 0, 0, 913920, 1847040], - # fmt: on - # right - hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_3840X2160_60FPS, - 10, - # fmt: off - [1038960, 1032480, 6480, 0, 1028160, 8640, 1032480, 0, 0, 1028160, 8640, 1041120, 0, 0, 1028160, 1041120], - # fmt: on - ), - ( - # left - hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS, - 10, - # fmt: off - [260280, 258120, 1080, 0, 257040, 2160, 258120, 0, 0, 257040, 2160, 260280, 0, 0, 257040, 260280], - # fmt: on - # right - hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS, - 11, - # fmt: off - [0, 0, 0, 0, 0, 1920, 228480, 0, 0, 456960, 3840, 462720, 0, 0, 456960, 462720], - # fmt: on - ), - ( - # left - hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS, - 11, - # fmt: off - [0, 0, 0, 0, 0, 1920, 228480, 0, 0, 456960, 3840, 462720, 0, 0, 456960, 462720], - # fmt: on - # right - hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS, - 10, - # fmt: off - [260280, 258120, 1080, 0, 257040, 2160, 258120, 0, 0, 257040, 2160, 260280, 0, 0, 257040, 260280], - # fmt: on - ), + (sys_ibv_name_left, sys_ibv_name_right), ], ) @pytest.mark.parametrize( - "scheduler", - ["default", "greedy", "multithread", "event"], + "hololink_left, hololink_right", + [ + ("192.168.0.2", "192.168.0.3"), + ], ) def test_imx274_pattern( camera_mode_left, @@ -400,127 +586,208 @@ def test_imx274_pattern( pattern_right, expected_right, headless, - hololink_address, - capsys, + hololink_left, + hololink_right, scheduler, + ibv_name_left, + ibv_name_right, ): - # - logging.info("Initializing.") - # - reset_globals() - # Get a handle to the GPU - (cu_result,) = cuda.cuInit(0) - assert cu_result == cuda.CUresult.CUDA_SUCCESS - cu_device_ordinal = 0 - cu_result, cu_device = cuda.cuDeviceGet(cu_device_ordinal) - assert cu_result == cuda.CUresult.CUDA_SUCCESS - cu_result, cu_context = cuda.cuDevicePrimaryCtxRetain(cu_device) - assert cu_result == cuda.CUresult.CUDA_SUCCESS # Get a handle to data sources - hololink_left = hololink_address channel_metadata_left = hololink_module.Enumerator.find_channel( channel_ip=hololink_left ) - hololink_channel_left = hololink_module.DataChannel(channel_metadata_left) - ip = [int(x) for x in hololink_address.split(".")] - ip[-1] += 1 - hololink_right = ".".join([f"{x}" for x in ip]) channel_metadata_right = hololink_module.Enumerator.find_channel( channel_ip=hololink_right ) - hololink_channel_right = hololink_module.DataChannel(channel_metadata_right) - # Get a handle to the camera - camera_left = CameraWrapper(hololink_channel_left, expander_configuration=0) - camera_right = CameraWrapper(hololink_channel_right, expander_configuration=1) - # - ibv_name_left, ibv_name_right = sorted(os.listdir("/sys/class/infiniband")) ibv_port_left, ibv_port_right = 1, 1 - # Set up the application - application = PatternTestApplication( + run_test( headless, - cu_context, - cu_device_ordinal, - hololink_channel_left, + channel_metadata_left, ibv_name_left, ibv_port_left, - camera_left, camera_mode_left, - hololink_channel_right, + pattern_left, + expected_left, + channel_metadata_right, ibv_name_right, ibv_port_right, - camera_right, camera_mode_right, + pattern_right, + expected_right, + scheduler, ) - default_configuration = os.path.join( - os.path.dirname(__file__), "example_configuration.yaml" - ) - application.config(default_configuration) - # Run it. - hololink = hololink_channel_left.hololink() - assert hololink is hololink_channel_right.hololink() - hololink.start() - assert camera_left._reset_callbacks == 0 - assert camera_right._reset_callbacks == 0 - hololink.reset() - assert camera_left._reset_callbacks == 1 - assert camera_right._reset_callbacks == 1 - camera_left.setup_clock() # this also sets camera_right's clock - camera_left.configure(camera_mode_left) - camera_left.test_pattern(pattern_left) - camera_right.configure(camera_mode_right) - camera_right.test_pattern(pattern_right) - - # values for scheduler parameters. - if scheduler == "event": - app_scheduler = holoscan.schedulers.EventBasedScheduler( - application, - worker_thread_number=4, - name="event_scheduler", - ) - application.scheduler(app_scheduler) - elif scheduler == "multithread": - app_scheduler = holoscan.schedulers.MultiThreadScheduler( - application, - worker_thread_number=4, - name="multithread_scheduler", - ) - application.scheduler(app_scheduler) - elif scheduler == "greedy": - app_scheduler = holoscan.schedulers.GreedyScheduler( - application, - name="greedy_scheduler", - ) - application.scheduler(app_scheduler) - elif scheduler == "default": - # Use the default one. - pass - else: - raise Exception(f"Unexpected {scheduler=}") - application.run() - hololink.stop() - (cu_result,) = cuda.cuDevicePrimaryCtxRelease(cu_device) - assert cu_result == cuda.CUresult.CUDA_SUCCESS +@pytest.mark.skip_unless_imx274 +@pytest.mark.accelerated_networking +@pytest.mark.parametrize( + "camera_mode_left, pattern_left, expected_left, camera_mode_right, pattern_right, expected_right", # noqa: E501 + expected_results, +) +@pytest.mark.parametrize( + "multicast_left, multicast_left_port, multicast_right, multicast_right_port", # noqa: E501 + [ + ("224.0.0.228", 4791, "224.0.0.229", 4791), + ], +) +@pytest.mark.parametrize( + "ibv_name_left, ibv_name_right", # noqa: E501 + [ + (sys_ibv_name_left, sys_ibv_name_right), + ], +) +@pytest.mark.parametrize( + "hololink_left, hololink_right", + [ + ("192.168.0.2", "192.168.0.3"), + ], +) +def test_imx274_multicast( + camera_mode_left, + pattern_left, + expected_left, + camera_mode_right, + pattern_right, + expected_right, + headless, + hololink_left, + hololink_right, + scheduler, + multicast_left, + multicast_left_port, + multicast_right, + multicast_right_port, + ibv_name_left, + ibv_name_right, +): + # Get a handle to data sources + channel_metadata_left = hololink_module.Enumerator.find_channel( + channel_ip=hololink_left + ) + hololink_module.DataChannel.use_multicast( + channel_metadata_left, multicast_left, multicast_left_port + ) + channel_metadata_right = hololink_module.Enumerator.find_channel( + channel_ip=hololink_right + ) + hololink_module.DataChannel.use_multicast( + channel_metadata_right, multicast_right, multicast_right_port + ) + ibv_port_left, ibv_port_right = 1, 1 + run_test( + headless, + channel_metadata_left, + ibv_name_left, + ibv_port_left, + camera_mode_left, + pattern_left, + expected_left, + channel_metadata_right, + ibv_name_right, + ibv_port_right, + camera_mode_right, + pattern_right, + expected_right, + scheduler, + ) - # Now check the buckets. - global actual_left, actual_right + +# Test stereo patterns across a single network interface. +@pytest.mark.skip_unless_imx274 +@pytest.mark.accelerated_networking +@pytest.mark.parametrize( + "camera_mode_left, pattern_left, expected_left, camera_mode_right, pattern_right, expected_right", # noqa: E501 + expected_1080p_results, +) +@pytest.mark.parametrize( + "ibv_name, ibv_channel_ip", + [ + (sys_ibv_name_left, "192.168.0.2"), + (sys_ibv_name_right, "192.168.0.3"), + ], +) +def test_imx274_stereo_single_interface( + camera_mode_left, + pattern_left, + expected_left, + camera_mode_right, + pattern_right, + expected_right, + headless, + scheduler, + ibv_name, + ibv_channel_ip, +): + # Get a handle to data sources + channel_metadata = hololink_module.Enumerator.find_channel( + channel_ip=ibv_channel_ip + ) + # Now make separate ones for left and right; and set them to + # use sensor 0 and 1 respectively. + channel_metadata_left = hololink_module.Metadata(channel_metadata) + hololink_module.DataChannel.use_sensor(channel_metadata_left, 0) + channel_metadata_right = hololink_module.Metadata(channel_metadata) + hololink_module.DataChannel.use_sensor(channel_metadata_right, 1) # - logging.info(f"{expected_left=}") - logging.info(f"{actual_left=}") - left_diffs = [abs(a - e) for e, a in zip(expected_left, actual_left, strict=True)] - logging.info(f"{left_diffs=}") - left_diff = sum(left_diffs) - logging.info(f"{left_diff=}") + ibv_port = 1 + run_test( + headless, + channel_metadata_left, + ibv_name, + ibv_port, + camera_mode_left, + pattern_left, + expected_left, + channel_metadata_right, + ibv_name, + ibv_port, + camera_mode_right, + pattern_right, + expected_right, + scheduler, + ) + + +# Test stereo patterns across a single network interface using linux sockets. +# This test doesn't actually evaluate the image data due to expected packet losses. +@pytest.mark.skip_unless_imx274 +@pytest.mark.parametrize( + "camera_mode_left, pattern_left, expected_left, camera_mode_right, pattern_right, expected_right", # noqa: E501 + expected_1080p_results, +) +def test_linux_imx274_stereo_single_interface( + camera_mode_left, + pattern_left, + expected_left, + camera_mode_right, + pattern_right, + expected_right, + headless, + scheduler, + channel_ip, +): + # Get a handle to data sources + channel_metadata = hololink_module.Enumerator.find_channel(channel_ip=channel_ip) + # Now make separate ones for left and right; and set them to + # use sensor 0 and 1 respectively. + channel_metadata_left = hololink_module.Metadata(channel_metadata) + hololink_module.DataChannel.use_sensor(channel_metadata_left, 0) + channel_metadata_right = hololink_module.Metadata(channel_metadata) + hololink_module.DataChannel.use_sensor(channel_metadata_right, 1) # - logging.info(f"{expected_right=}") - logging.info(f"{actual_right=}") - right_diffs = [ - abs(a - e) for e, a in zip(expected_right, actual_right, strict=True) - ] - logging.info(f"{right_diffs=}") - right_diff = sum(right_diffs) - logging.info(f"{right_diff=}") - - assert 0 <= left_diff < 4 - assert 0 <= right_diff < 4 + run_test( + headless, + channel_metadata_left, + None, # ibv_name_left + None, # ibv_port_left + camera_mode_left, + pattern_left, + None, # expected_left + channel_metadata_right, + None, # ibv_name_right + None, # ibv_port_right + camera_mode_right, + pattern_right, + None, # expected_right + scheduler, + ) diff --git a/tests/test_imx274_timestamps.py b/tests/test_imx274_timestamps.py index 9fd2e44..5628719 100644 --- a/tests/test_imx274_timestamps.py +++ b/tests/test_imx274_timestamps.py @@ -21,74 +21,14 @@ import sys from unittest import mock -import cupy as cp import holoscan +import operators import pytest +import utils import hololink as hololink_module from examples import imx274_player -MS_PER_SEC = 1000.0 -US_PER_SEC = 1000.0 * MS_PER_SEC -NS_PER_SEC = 1000.0 * US_PER_SEC -SEC_PER_NS = 1.0 / NS_PER_SEC - - -class Profiler(holoscan.core.Operator): - def __init__( - self, - *args, - callback=None, - metadata_callback=None, - hololink_channel=None, - **kwargs, - ): - super().__init__(*args, **kwargs) - self._count = 0 - self._callback = callback - self._metadata_callback = metadata_callback - self._timestamps = [] - self._hololink = hololink_channel.hololink() - self._packets_dropped = None - - def setup(self, spec): - logging.info("setup") - spec.input("input") - spec.output("output") - - def compute(self, op_input, op_output, context): - self._count += 1 - in_message = op_input.receive("input") - cp_frame = cp.from_dlpack(in_message.get("")) # cp_frame.shape is (y,x,4) - op_output.emit({"": cp_frame}, "output") - # - metadata = self.metadata() - frame_number = metadata["frame_number"] - packets_dropped = metadata["packets_dropped"] - if packets_dropped != self._packets_dropped: - logging.info(f"{packets_dropped=} ({packets_dropped:#X}) {frame_number=}") - self._packets_dropped = packets_dropped - image_timestamp_ns = metadata["timestamp_ns"] - received_timestamp_ns = metadata["received_ns"] - pipeline_timestamp_ns = ( - datetime.datetime.now(datetime.timezone.utc).timestamp() * NS_PER_SEC - ) - self._timestamps.append( - ( - image_timestamp_ns, - received_timestamp_ns, - pipeline_timestamp_ns, - frame_number, - ) - ) - if self._count < 200: - return - self._callback(self._timestamps) - - def metadata(self): - return self._metadata_callback() - - timestamps = None network_mode = None roce_network_mode = "ROCE" @@ -121,6 +61,7 @@ def __init__( self._camera = camera self._camera_mode = camera_mode self._frame_limit = frame_limit + self.is_metadata_enabled = True def compose(self): logging.info("compose") @@ -208,12 +149,10 @@ def compose(self): bayer_grid_pos=bayer_format.value, interpolation_mode=0, ) - profiler = Profiler( + profiler = operators.TimeProfiler( self, name="profiler", - hololink_channel=self._hololink_channel, callback=lambda timestamps: self._terminate(timestamps), - metadata_callback=lambda: receiver_operator.metadata(), ) visualizer = holoscan.operators.HolovizOp( self, @@ -233,18 +172,9 @@ def _terminate(self, recorded_timestamps): timestamps = recorded_timestamps -def to_s(timestamp_ns): - return float(timestamp_ns) / 1000 / 1000 / 1000 - - -def diff_s(later_timestamp_ns, earlier_timestamp_ns): - diff_ns = later_timestamp_ns - earlier_timestamp_ns - return to_s(diff_ns) - - # frame_time represents the constant time difference between when the # frame-start and frame-end messages arrive at the FPGA; for IMX274 -# it takes about 8ms for a 1080p or almost 16ms for a 4k image. +# it takes just under 8ms for a 1080p or almost 16ms for a 4k image. # time_limit, the acceptable amount of time between when the frame was sent and # when we got around to looking at it, is much smaller in the RDMA # configuration. @@ -252,31 +182,35 @@ def diff_s(later_timestamp_ns, earlier_timestamp_ns): @pytest.mark.skip_unless_imx274 @pytest.mark.accelerated_networking @pytest.mark.parametrize( - "camera_mode, roce_mode, frame_time, time_limit", # noqa: E501 + "camera_mode, roce_mode, frame_time, time_limit, max_recv_time", # noqa: E501 [ ( hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_3840X2160_60FPS, True, 0.015, 0.004, + 0.0015, ), ( hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS, True, - 0.008, - 0.004, + 0.0075, + 0.0040, + 0.0015, ), ( hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_3840X2160_60FPS, False, 0.015, 0.012, + 0.0015, ), ( hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS, False, - 0.008, - 0.012, + 0.0075, + 0.0120, + 0.0035, ), ], ) @@ -285,6 +219,7 @@ def test_imx274_timestamps( roce_mode, frame_time, time_limit, + max_recv_time, headless, hololink_address, ibv_name, @@ -317,39 +252,49 @@ def test_imx274_timestamps( with mock.patch( "examples.imx274_player.HoloscanApplication", TimestampTestApplication ): - imx274_player.main() + with utils.PriorityScheduler(): + imx274_player.main() # check for errors global timestamps pipeline_dts, receiver_dts = [], [] + metadata_receiver_dts = [] # Allow for startup times to be a bit longer settled_timestamps = timestamps[5:-5] assert len(settled_timestamps) >= 100 for ( - image_timestamp_ns, - received_timestamp_ns, - pipeline_timestamp_ns, + image_timestamp_s, + metadata_timestamp_s, + received_timestamp_s, + pipeline_timestamp_s, frame_number, ) in settled_timestamps: - image_timestamp_s = datetime.datetime.fromtimestamp( - to_s(image_timestamp_ns) - ).isoformat() # strftime("%H:%M:%S.%f") - received_timestamp_s = datetime.datetime.fromtimestamp( - to_s(received_timestamp_ns) - ).isoformat() # strftime("%H:%M:%S.%f") - pipeline_timestamp_s = datetime.datetime.fromtimestamp( - to_s(pipeline_timestamp_ns) - ).isoformat() # strftime("%H:%M:%S.%f") - pipeline_dt = diff_s(pipeline_timestamp_ns, image_timestamp_ns) + image_timestamp = datetime.datetime.fromtimestamp(image_timestamp_s).isoformat() + metadata_timestamp = datetime.datetime.fromtimestamp( + metadata_timestamp_s + ).isoformat() + received_timestamp = datetime.datetime.fromtimestamp( + received_timestamp_s + ).isoformat() + pipeline_timestamp = datetime.datetime.fromtimestamp( + pipeline_timestamp_s + ).isoformat() + pipeline_dt = pipeline_timestamp_s - image_timestamp_s logging.debug( - f"{image_timestamp_s=} {pipeline_timestamp_s=} {pipeline_dt=:0.6f} {frame_number=}" + f"{image_timestamp=} {pipeline_timestamp=} {pipeline_dt=:0.6f} {frame_number=}" ) pipeline_dts.append(round(pipeline_dt, 4)) - receiver_dt = diff_s(received_timestamp_ns, image_timestamp_ns) + receiver_dt = received_timestamp_s - image_timestamp_s logging.debug( - f"{image_timestamp_s=} {received_timestamp_s=} {receiver_dt=:0.6f} {frame_number=}" + f"{image_timestamp=} {received_timestamp=} {receiver_dt=:0.6f} {frame_number=}" ) receiver_dts.append(round(receiver_dt, 4)) + metadata_receiver_dt = received_timestamp_s - metadata_timestamp_s + logging.debug( + f"{metadata_timestamp=} {received_timestamp=} {metadata_receiver_dt=:0.6f} {frame_number=}" + ) + metadata_receiver_dts.append(round(metadata_receiver_dt, 4)) + smallest_time_difference = min(pipeline_dts) largest_time_difference = max(pipeline_dts) logging.info(f"pipeline {smallest_time_difference=} {largest_time_difference=}") @@ -368,3 +313,14 @@ def test_imx274_timestamps( assert (frame_time + 0) <= smallest_time_difference assert smallest_time_difference < largest_time_difference assert largest_time_difference < (frame_time + time_limit) + # + smallest_time_difference = min(metadata_receiver_dts) + largest_time_difference = max(metadata_receiver_dts) + average_time_difference = sum(metadata_receiver_dts) / len(metadata_receiver_dts) + logging.info( + f"FPGA to full frame received {smallest_time_difference=} {largest_time_difference=}" + ) + assert smallest_time_difference < largest_time_difference + # The time taken from the end of image frame received at HSB fpga to full frame + # received on IGX should be less than max_recv_time on average. + assert average_time_difference < max_recv_time diff --git a/tests/test_imx477_pattern.py b/tests/test_imx477_pattern.py index 6c77c99..2296980 100644 --- a/tests/test_imx477_pattern.py +++ b/tests/test_imx477_pattern.py @@ -19,49 +19,13 @@ import logging import os -import cupy as cp import holoscan +import operators import pytest from cuda import cuda import hololink as hololink_module - -class Profiler(holoscan.core.Operator): - def __init__(self, *args, callback=None, out_tensor_name=None, **kwargs): - super().__init__(*args, **kwargs) - self._count = 0 - self._callback = callback - self._out_tensor_name = out_tensor_name - - def setup(self, spec): - logging.info("setup") - spec.input("input") - spec.output("output") - - def compute(self, op_input, op_output, context): - self._count += 1 - in_message = op_input.receive("input") - cp_frame = cp.from_dlpack(in_message.get("")) # cp_frame.shape is (y,x,4) - op_output.emit({self._out_tensor_name: cp_frame}, "output") - # Give it some time to settle - if self._count < 20: - return - # Compute the Y of YCrCb - r = cp_frame[:, :, 0] - g = cp_frame[:, :, 1] - b = cp_frame[:, :, 2] - y = r * 0.299 + g * 0.587 + b * 0.114 - logging.debug(f"{y=}") - # - unique = cp.unique(y) - logging.info(f"{unique=}") - buckets, _ = cp.histogram(y, bins=16, range=(0, 65536)) - s = ",".join([f"{x}" for x in buckets]) - logging.info(f"buckets=[{s}]") - self._callback(buckets) - - actual = None @@ -154,7 +118,7 @@ def compose(self): ) # - profiler = Profiler( + profiler = operators.ColorProfiler( self, name="profiler", callback=lambda buckets: self.buckets(buckets), @@ -188,6 +152,7 @@ def buckets(self, buckets): self._check_done() +@pytest.mark.skip("IMX477 ISN'T SUPPORTED FOR 2410 FPGAs YET") @pytest.mark.skip_unless_imx477 @pytest.mark.parametrize( "camera_mode,expected", diff --git a/tests/test_imx477_timestamps.py b/tests/test_imx477_timestamps.py index a76fcc1..d07e7fd 100644 --- a/tests/test_imx477_timestamps.py +++ b/tests/test_imx477_timestamps.py @@ -21,74 +21,14 @@ import sys from unittest import mock -import cupy as cp import holoscan +import operators import pytest +import utils import hololink as hololink_module from examples import imx477_player -MS_PER_SEC = 1000.0 -US_PER_SEC = 1000.0 * MS_PER_SEC -NS_PER_SEC = 1000.0 * US_PER_SEC -SEC_PER_NS = 1.0 / NS_PER_SEC - - -class Profiler(holoscan.core.Operator): - def __init__( - self, - *args, - callback=None, - metadata_callback=None, - hololink_channel=None, - **kwargs, - ): - super().__init__(*args, **kwargs) - self._count = 0 - self._callback = callback - self._metadata_callback = metadata_callback - self._timestamps = [] - self._hololink = hololink_channel.hololink() - self._packets_dropped = None - - def setup(self, spec): - logging.info("setup") - spec.input("input") - spec.output("output") - - def compute(self, op_input, op_output, context): - self._count += 1 - in_message = op_input.receive("input") - cp_frame = cp.from_dlpack(in_message.get("")) # cp_frame.shape is (y,x,4) - op_output.emit({"": cp_frame}, "output") - # - metadata = self.metadata() - frame_number = metadata["frame_number"] - packets_dropped = metadata["packets_dropped"] - if packets_dropped != self._packets_dropped: - logging.info(f"{packets_dropped=} ({packets_dropped:#X}) {frame_number=}") - self._packets_dropped = packets_dropped - image_timestamp_ns = metadata["timestamp_ns"] - received_timestamp_ns = metadata["received_ns"] - pipeline_timestamp_ns = ( - datetime.datetime.now(datetime.timezone.utc).timestamp() * NS_PER_SEC - ) - self._timestamps.append( - ( - image_timestamp_ns, - received_timestamp_ns, - pipeline_timestamp_ns, - frame_number, - ) - ) - if self._count < 200: - return - self._callback(self._timestamps) - - def metadata(self): - return self._metadata_callback() - - timestamps = None network_mode = None roce_network_mode = "ROCE" @@ -119,6 +59,7 @@ def __init__( self._ibv_port = ibv_port self._camera = camera self._frame_limit = frame_limit + self.is_metadata_enabled = True def compose(self): logging.info("compose") @@ -205,12 +146,10 @@ def compose(self): bayer_grid_pos=bayer_format.value, interpolation_mode=0, ) - profiler = Profiler( + profiler = operators.TimeProfiler( self, name="profiler", - hololink_channel=self._hololink_channel, callback=lambda timestamps: self._terminate(timestamps), - metadata_callback=lambda: receiver_operator.metadata(), ) visualizer = holoscan.operators.HolovizOp( self, @@ -244,6 +183,7 @@ def diff_s(later_timestamp_ns, earlier_timestamp_ns): # time_limit, the acceptable amount of time between when the frame was sent and # when we got around to looking at it, is much smaller in the RDMA # configuration. +@pytest.mark.skip("IMX477 ISN'T SUPPORTED FOR 2410 FPGAs YET") @pytest.mark.skip_unless_ptp @pytest.mark.skip_unless_imx477 @pytest.mark.accelerated_networking @@ -295,7 +235,8 @@ def test_imx477_timestamps( with mock.patch( "examples.imx477_player.HoloscanApplication", TimestampTestApplication ): - imx477_player.main() + with utils.PriorityScheduler(): + imx477_player.main() # check for errors global timestamps diff --git a/tests/test_linux_hwisp_pattern.py b/tests/test_linux_hwisp_pattern.py index 80e73f8..7dd4438 100644 --- a/tests/test_linux_hwisp_pattern.py +++ b/tests/test_linux_hwisp_pattern.py @@ -19,49 +19,13 @@ import logging import os -import cupy as cp import holoscan +import operators import pytest from cuda import cuda import hololink as hololink_module - -class Profiler(holoscan.core.Operator): - def __init__(self, *args, callback=None, out_tensor_name=None, **kwargs): - super().__init__(*args, **kwargs) - self._count = 0 - self._callback = callback - self._out_tensor_name = out_tensor_name - - def setup(self, spec): - logging.info("setup") - spec.input("input") - spec.output("output") - - def compute(self, op_input, op_output, context): - self._count += 1 - in_message = op_input.receive("input") - cp_frame = cp.from_dlpack(in_message.get("")) # cp_frame.shape is (y,x,4) - op_output.emit({self._out_tensor_name: cp_frame}, "output") - # Give it some time to settle - if self._count < 20: - return - # Compute the Y of YCrCb - r = cp_frame[:, :, 0] - g = cp_frame[:, :, 1] - b = cp_frame[:, :, 2] - y = r * 0.299 + g * 0.587 + b * 0.114 - logging.debug(f"{y=}") - # - unique = cp.unique(y) - logging.info(f"{unique=}") - buckets, _ = cp.histogram(y, bins=16, range=(0, 65536)) - s = ",".join([f"{x}" for x in buckets]) - logging.info(f"buckets=[{s}]") - self._callback(buckets) - - actual_left = None actual_right = None @@ -78,13 +42,9 @@ def __init__( cuda_context, cuda_device_ordinal, hololink_channel_left, - ibv_name_left, - ibv_port_left, camera_left, camera_mode_left, hololink_channel_right, - ibv_name_right, - ibv_port_right, camera_right, camera_mode_right, ): @@ -94,20 +54,19 @@ def __init__( self._cuda_context = cuda_context self._cuda_device_ordinal = cuda_device_ordinal self._hololink_channel_left = hololink_channel_left - self._ibv_name_left = ibv_name_left - self._ibv_port_left = ibv_port_left self._camera_left = camera_left self._camera_mode_left = camera_mode_left self._hololink_channel_right = hololink_channel_right - self._ibv_name_right = ibv_name_right - self._ibv_port_right = ibv_port_right self._camera_right = camera_right self._camera_mode_right = camera_mode_right def compose(self): logging.info("compose") - self._ok = holoscan.conditions.BooleanCondition( - self, name="ok", enable_tick=True + self._ok_left = holoscan.conditions.BooleanCondition( + self, name="ok_left", enable_tick=True + ) + self._ok_right = holoscan.conditions.BooleanCondition( + self, name="ok_right", enable_tick=True ) self._camera_left.set_mode(self._camera_mode_left) self._camera_right.set_mode(self._camera_mode_right) @@ -156,12 +115,10 @@ def compose(self): frame_context = self._cuda_context receiver_operator_left = hololink_module.operators.LinuxReceiverOperator( self, - self._ok, + self._ok_left, name="receiver_left", frame_size=frame_size, frame_context=frame_context, - ibv_name=self._ibv_name_left, - ibv_port=self._ibv_port_left, hololink_channel=self._hololink_channel_left, device=self._camera_left, ) @@ -172,12 +129,10 @@ def compose(self): frame_context = self._cuda_context receiver_operator_right = hololink_module.operators.LinuxReceiverOperator( self, - self._ok, + self._ok_right, name="receiver_right", frame_size=frame_size, frame_context=frame_context, - ibv_name=self._ibv_name_right, - ibv_port=self._ibv_port_right, hololink_channel=self._hololink_channel_right, device=self._camera_right, ) @@ -195,7 +150,7 @@ def compose(self): * self._camera_left._height, num_blocks=2, ) - isp_left = holoscan.operators.ArgusIspOp( + isp_left = hololink_module.operators.ArgusIspOp( self, name="isp_left", pool=isp_pool_left, @@ -211,22 +166,22 @@ def compose(self): * self._camera_right._height, num_blocks=2, ) - isp_right = holoscan.operators.ArgusIspOp( + isp_right = hololink_module.operators.ArgusIspOp( self, name="isp_right", pool=isp_pool_right, ) # - profiler_left = Profiler( + color_profiler_left = operators.ColorProfiler( self, - name="profiler_left", + name="color_profiler_left", callback=lambda buckets: self.left_buckets(buckets), out_tensor_name="left", ) - profiler_right = Profiler( + color_profiler_right = operators.ColorProfiler( self, - name="profiler_right", + name="color_profiler_right", callback=lambda buckets: self.right_buckets(buckets), out_tensor_name="right", ) @@ -267,39 +222,30 @@ def compose(self): receiver_operator_left, csi_to_bayer_operator_left, {("output", "input")} ) self.add_flow(csi_to_bayer_operator_left, isp_left, {("output", "receiver")}) - self.add_flow(isp_left, profiler_left, {("transmitter", "input")}) - self.add_flow(profiler_left, visualizer, {("output", "receivers")}) + self.add_flow(isp_left, color_profiler_left, {("transmitter", "input")}) + self.add_flow(color_profiler_left, visualizer, {("output", "receivers")}) self.add_flow( receiver_operator_right, csi_to_bayer_operator_right, {("output", "input")} ) self.add_flow(csi_to_bayer_operator_right, isp_right, {("output", "receiver")}) - self.add_flow(isp_right, profiler_right, {("transmitter", "input")}) - self.add_flow(profiler_right, visualizer, {("output", "receivers")}) - - def _check_done(self): - global actual_left, actual_right - logging.trace(f"{actual_left=} {actual_right=}") - if actual_left is None: - return - if actual_right is None: - return - logging.info("DONE") - self._ok.disable_tick() + self.add_flow(isp_right, color_profiler_right, {("transmitter", "input")}) + self.add_flow(color_profiler_right, visualizer, {("output", "receivers")}) def left_buckets(self, buckets): global actual_left if actual_left is None: actual_left = buckets - self._check_done() + self._ok_left.disable_tick() def right_buckets(self, buckets): global actual_right if actual_right is None: actual_right = buckets - self._check_done() + self._ok_right.disable_tick() +@pytest.mark.skip("https://jirasw.nvidia.com/browse/BAJQ0XTT-173") @pytest.mark.skip_unless_igpu @pytest.mark.skip_unless_imx274 @pytest.mark.parametrize( @@ -379,21 +325,15 @@ def test_linux_hwisp_pattern( hololink_channel_right, expander_configuration=1 ) # - ibv_name_left, ibv_name_right = sorted(os.listdir("/sys/class/infiniband")) - ibv_port_left, ibv_port_right = 1, 1 # Set up the application application = PatternTestApplication( headless, cu_context, cu_device_ordinal, hololink_channel_left, - ibv_name_left, - ibv_port_left, camera_left, camera_mode_left, hololink_channel_right, - ibv_name_right, - ibv_port_right, camera_right, camera_mode_right, ) diff --git a/tests/test_linux_imx274_player.py b/tests/test_linux_imx274_player.py index bea7c58..18c4c83 100644 --- a/tests/test_linux_imx274_player.py +++ b/tests/test_linux_imx274_player.py @@ -19,6 +19,7 @@ from unittest import mock import pytest +import utils import hololink as hololink_module from examples import linux_imx274_player @@ -47,8 +48,16 @@ def test_linux_imx274_player( arguments.extend(["--headless"]) with mock.patch("sys.argv", arguments): - linux_imx274_player.main() - - # check for errors - captured = capsys.readouterr() - assert captured.err == "" + with mock.patch( + "hololink.operators.LinuxReceiverOperator", + utils.MockedLinuxReceiverOperator, + ): + linux_imx274_player.main() + + # Make sure we actually did receive something; + # see utils.py to see how this is set + assert utils.receiver_count > 10 + + # check for errors + captured = capsys.readouterr() + assert captured.err == "" diff --git a/tests/test_serializer.py b/tests/test_serializer.py index ef09dc4..205ee1f 100644 --- a/tests/test_serializer.py +++ b/tests/test_serializer.py @@ -158,3 +158,37 @@ def test_deserialize_uint16_be(): b[1] = 0x30 deserializer = hololink.Deserializer(b, length=2) assert deserializer.next_uint16_be() == 0x8930 + + +def test_deserialize_uint64_be(): + b = bytes( + [ + 0x8A, + 0x7C, + 0x6B, + 0x5A, + 0x8C, + 0x7B, + 0x6A, + 0x53, + ] + ) + deserializer = hololink.Deserializer(b) + assert deserializer.next_uint64_be() == 0x8A7C6B5A8C7B6A53 + + +def test_deserialize_uint64_le(): + b = bytes( + [ + 0x8A, + 0x7C, + 0x6B, + 0x5A, + 0x8C, + 0x7B, + 0x6A, + 0x53, + ] + ) + deserializer = hololink.Deserializer(b) + assert deserializer.next_uint64_le() == 0x536A7B8C5A6B7C8A diff --git a/tests/test_single_network_stereo_imx274_player.py b/tests/test_single_network_stereo_imx274_player.py new file mode 100644 index 0000000..05f6956 --- /dev/null +++ b/tests/test_single_network_stereo_imx274_player.py @@ -0,0 +1,69 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# See README.md for detailed information. + +import sys +from unittest import mock + +import pytest + +from examples import ( + linux_single_network_stereo_imx274_player, + single_network_stereo_imx274_player, +) + + +@pytest.mark.skip_unless_imx274 +@pytest.mark.accelerated_networking +def test_single_network_stereo_imx274_player( + headless, frame_limit, ibv_name, ibv_port, capsys +): + arguments = [ + sys.argv[0], + "--frame-limit", + str(frame_limit), + "--ibv-name", + ibv_name, + "--ibv-port", + str(ibv_port), + ] + if headless: + arguments.extend(["--headless"]) + + with mock.patch("sys.argv", arguments): + single_network_stereo_imx274_player.main() + + # check for errors + captured = capsys.readouterr() + assert captured.err == "" + + +@pytest.mark.skip_unless_imx274 +def test_single_network_linux_stereo_imx274_player(headless, frame_limit, capsys): + arguments = [ + sys.argv[0], + "--frame-limit", + str(frame_limit), + ] + if headless: + arguments.extend(["--headless"]) + + with mock.patch("sys.argv", arguments): + linux_single_network_stereo_imx274_player.main() + + # check for errors + captured = capsys.readouterr() + assert captured.err == "" diff --git a/tests/test_tao_peoplenet.py b/tests/test_tao_peoplenet.py index 9596d75..76c507a 100644 --- a/tests/test_tao_peoplenet.py +++ b/tests/test_tao_peoplenet.py @@ -21,9 +21,20 @@ from urllib.request import urlretrieve import pytest +import utils import hololink as hololink_module -from examples import tao_peoplenet +from examples import linux_tao_peoplenet, tao_peoplenet + + +@pytest.fixture +def peoplenet_onnx_file(): + # Download the PeopleNet ONNX model + file_name = "examples/resnet34_peoplenet_int8.onnx" + if not exists(file_name): + url = "https://api.ngc.nvidia.com/v2/models/org/nvidia/team/tao/peoplenet/pruned_quantized_decrypted_v2.3.3/files?redirect=true&path=resnet34_peoplenet_int8.onnx" + urlretrieve(url, file_name) + yield file_name @pytest.mark.skip_unless_imx274 @@ -35,14 +46,15 @@ ], ) def test_tao_peoplenet( - camera_mode, headless, frame_limit, hololink_address, ibv_name, ibv_port, capsys + camera_mode, + headless, + frame_limit, + hololink_address, + ibv_name, + ibv_port, + capsys, + peoplenet_onnx_file, ): - # Download the PeopleNet ONNX model - file_name = "examples/resnet34_peoplenet_int8.onnx" - if not exists(file_name): - url = "https://api.ngc.nvidia.com/v2/models/org/nvidia/team/tao/peoplenet/pruned_quantized_decrypted_v2.3.3/files?redirect=true&path=resnet34_peoplenet_int8.onnx" - urlretrieve(url, file_name) - arguments = [ sys.argv[0], "--camera-mode", @@ -65,3 +77,41 @@ def test_tao_peoplenet( # check for errors captured = capsys.readouterr() assert captured.err == "" + + +@pytest.mark.skip_unless_imx274 +@pytest.mark.parametrize( + "camera_mode", # noqa: E501 + [ + hololink_module.sensors.imx274.imx274_mode.Imx274_Mode.IMX274_MODE_1920X1080_60FPS, + ], +) +def test_linux_tao_peoplenet( + camera_mode, headless, frame_limit, hololink_address, capsys, peoplenet_onnx_file +): + arguments = [ + sys.argv[0], + "--camera-mode", + str(camera_mode.value), + "--frame-limit", + str(frame_limit), + "--hololink", + hololink_address, + ] + if headless: + arguments.extend(["--headless"]) + + with mock.patch("sys.argv", arguments): + with mock.patch( + "hololink.operators.LinuxReceiverOperator", + utils.MockedLinuxReceiverOperator, + ): + linux_tao_peoplenet.main() + + # Make sure we actually did receive something; + # see utils.py to see how this is set + assert utils.receiver_count > 10 + + # check for errors + captured = capsys.readouterr() + assert captured.err == "" diff --git a/tests/udp_server.py b/tests/udp_server.py index 8b00b4f..fe6dcc9 100644 --- a/tests/udp_server.py +++ b/tests/udp_server.py @@ -15,6 +15,8 @@ # See README.md for detailed information. +import argparse +import collections import ctypes import logging import multiprocessing @@ -25,7 +27,6 @@ import time import traceback -import hololink.sensors.udp_cam as uc import numpy as np import nvtx import utils @@ -40,7 +41,7 @@ run = False # publish bayer data camera_watchdog_trigger = 0 frame_time_s = 1.0 -udp_port = 10203 +udp_port = 8192 control_r, control_w = os.pipe() # How much shared memory for the generated image @@ -54,8 +55,18 @@ csi_image_data = None csi_image_length = None +HololinkChannelConfiguration = collections.namedtuple( + "HololinkChannelConfiguration", ["configuration_address", "vip_mask"] +) +BOOTP_TRANSACTION_ID_MAP = { + 0: HololinkChannelConfiguration(configuration_address=0x02000000, vip_mask=0x1), + 1: HololinkChannelConfiguration(configuration_address=0x02010000, vip_mask=0x2), +} +SENSOR_CONFIGURATION_SIZE = 0x40 +PAGE_SIZE = 128 -def generate_image(memory, port): + +def generate_image(): global bayer_height, bayer_width, bayer_format, pixel_format image, bayer_image = utils.make_image( bayer_height, bayer_width, bayer_format, pixel_format @@ -105,7 +116,7 @@ def get_cam_i2c_register(register_id): assert False and "get_i2c_register invalid register ID." -def set_cam_i2c_register(register_id, value, memory, ix): +def set_cam_i2c_register(register_id, value): logging.trace( "set_cam_i2c_register(register_id=%d(0x%X), value=%d(0x%X))" % (register_id, register_id, value, value) @@ -138,7 +149,7 @@ def set_cam_i2c_register(register_id, value, memory, ix): frame_time_s = 60.0 / value logging.trace("frame_time_s=%s" % (frame_time_s,)) elif register_id == hololink_module.sensors.udp_cam.INITIALIZE: - generate_image(memory, uc.VIRTUAL_PORTS[ix]) + generate_image() else: assert False and "set_i2c_register invalid register ID." @@ -198,19 +209,25 @@ def cam_i2c(memory, i2c_address): deserializer = hololink_module.Deserializer(b) register_id = deserializer.next_uint16_be() value = deserializer.next_uint32_be() - set_cam_i2c_register(register_id, value, memory, 0) + set_cam_i2c_register(register_id, value) else: assert False and "Unexpected register access." -def main(lock=None): +def udp_server(lock): + # logging.debug("Starting.") - udp_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + control_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + control_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) global udp_port - udp_socket.bind(("", udp_port)) - message = bytearray(8192) + control_socket.bind(("", udp_port)) + data_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + data_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + data_socket.bind(("", 12288)) + message = bytearray(hololink_module.UDP_PACKET_SIZE) reply = bytearray(1500) image_count = 0 + latched_sequence = 0 global done, frame_time_s @@ -219,25 +236,30 @@ def main(lock=None): hololink_module.CAM_I2C_CTRL: 0, hololink_module.BL_I2C_CTRL: 0, hololink_module.FPGA_VERSION: 1, - hololink_module.FPGA_DATE: 20230707, + hololink_module.FPGA_DATE: 20241113, } - for port in uc.VIRTUAL_PORTS: + for n, hololink_channel_configuration in BOOTP_TRANSACTION_ID_MAP.items(): + network_configuration_address = ( + hololink_channel_configuration.configuration_address + ) + sensor_configuration_address = n * SENSOR_CONFIGURATION_SIZE u = { - hololink_module.DP_PACKET_SIZE + port: 0, - hololink_module.DP_HOST_MAC_LOW + port: 0, - hololink_module.DP_HOST_MAC_HIGH + port: 0, - hololink_module.DP_HOST_IP + port: 0, - hololink_module.DP_HOST_UDP_PORT + port: 0, - hololink_module.DP_VIP_MASK - + port: 0, # which sensor port connects to which ethernet? # - hololink_module.DP_ROCE_CFG + port: 0, + hololink_module.DP_PACKET_SIZE + network_configuration_address: 0, + hololink_module.DP_VIP_MASK + network_configuration_address: 0, # - hololink_module.DP_ROCE_VADDR_MSB_0 + port: 0, - hololink_module.DP_ROCE_VADDR_LSB_0 + port: 0, - hololink_module.DP_ROCE_BUF_END_MSB_0 + port: 0, - hololink_module.DP_ROCE_BUF_END_LSB_0 + port: 0, - hololink_module.DP_ROCE_RKEY_0 + port: 0, + hololink_module.DP_QP + sensor_configuration_address: 0, + hololink_module.DP_RKEY + sensor_configuration_address: 0, + hololink_module.DP_ADDRESS_0 + sensor_configuration_address: 0, + hololink_module.DP_ADDRESS_1 + sensor_configuration_address: 0, + hololink_module.DP_ADDRESS_2 + sensor_configuration_address: 0, + hololink_module.DP_ADDRESS_3 + sensor_configuration_address: 0, + hololink_module.DP_BUFFER_LENGTH + sensor_configuration_address: 0, + hololink_module.DP_BUFFER_MASK + sensor_configuration_address: 0, + hololink_module.DP_HOST_MAC_LOW + sensor_configuration_address: 0, + hololink_module.DP_HOST_MAC_HIGH + sensor_configuration_address: 0, + hololink_module.DP_HOST_IP + sensor_configuration_address: 0, + hololink_module.DP_HOST_UDP_PORT + sensor_configuration_address: 0, } memory.update(u) @@ -246,8 +268,7 @@ def main(lock=None): frame_trigger = now + frame_time_s cam_i2c_trigger = None logging.info("Ready.") - if lock is not None: - lock.release() + lock.release() while not done: trigger = frame_trigger if (cam_i2c_trigger is not None) and (cam_i2c_trigger < trigger): @@ -257,7 +278,7 @@ def main(lock=None): if trigger > now: timeout = trigger - now global control_r - cr = [udp_socket, control_r] + cr = [control_socket, control_r] cw = [] cx = [] r, w, x = select.select(cr, cw, cx, timeout) @@ -266,14 +287,14 @@ def main(lock=None): now = time.monotonic() if control_r in r: - message = os.read(control_r, 8192) + message = os.read(control_r, len(message)) logging.trace("got control message=%s" % (message,)) if message == b"exit": return - if udp_socket in r: + if control_socket in r: with nvtx.annotate("udp-request"): - length, peer = udp_socket.recvfrom_into(message) + length, peer = control_socket.recvfrom_into(message) logging.trace('Received "%s".' % (message[:length],)) deserializer = hololink_module.Deserializer(message) cmd_code = deserializer.next_uint8() @@ -284,7 +305,8 @@ def main(lock=None): deserializer.next_uint8() # reserved # serializer = hololink_module.Serializer(reply) - serializer.append_uint8(cmd_code) + reply_cmd_code = 0x80 | cmd_code + serializer.append_uint8(reply_cmd_code) serializer.append_uint8(flags) serializer.append_uint16_be(sequence) if cmd_code == hololink_module.WR_DWORD: @@ -320,6 +342,10 @@ def main(lock=None): else: memory[address] = value serializer.append_uint8(hololink_module.RESPONSE_SUCCESS) + serializer.append_uint8(0) # reserved; aligns the next data + serializer.append_uint32_be(address) + serializer.append_uint32_be(value) + serializer.append_uint32_be(latched_sequence) elif cmd_code == hololink_module.RD_DWORD: address = deserializer.next_uint32_be() value = memory[address] @@ -328,12 +354,13 @@ def main(lock=None): serializer.append_uint8(0) # reserved; aligns the next data serializer.append_uint32_be(address) serializer.append_uint32_be(value) + serializer.append_uint32_be(latched_sequence) send_reply = True else: serializer.append_uint8(hololink_module.RESPONSE_INVALID_CMD) send_reply = True if send_reply: - udp_socket.sendto(reply[: serializer.length()], peer) + control_socket.sendto(reply[: serializer.length()], peer) if (cam_i2c_trigger is not None) and (now >= cam_i2c_trigger): with nvtx.annotate("cam-i2c"): @@ -363,8 +390,16 @@ def main(lock=None): image_count += 1 logging.debug("image_count=%s" % (image_count,)) # - port = uc.VIRTUAL_PORTS[0] - ip_address = memory[hololink_module.DP_HOST_IP + port] + sensor = 0 + network_configuration_address = BOOTP_TRANSACTION_ID_MAP[ + sensor + ].configuration_address + sensor_configuration_address = ( + sensor * SENSOR_CONFIGURATION_SIZE + ) + ip_address = memory[ + hololink_module.DP_HOST_IP + sensor_configuration_address + ] ip = [ (ip_address >> 24) & 0xFF, (ip_address >> 16) & 0xFF, @@ -372,21 +407,34 @@ def main(lock=None): (ip_address >> 0) & 0xFF, ] ip = "%d.%d.%d.%d" % (ip[0], ip[1], ip[2], ip[3]) - udp_port = memory[hololink_module.DP_HOST_UDP_PORT + port] + target_udp_port = memory[ + hololink_module.DP_HOST_UDP_PORT + + sensor_configuration_address + ] global csi_image_data, csi_image_length payload_size = ( - memory[hololink_module.DP_PACKET_SIZE + port] - 78 + memory[ + hololink_module.DP_PACKET_SIZE + + network_configuration_address + ] + * PAGE_SIZE ) - address = memory[hololink_module.DP_ROCE_VADDR_MSB_0 + port] - address <<= 32 - address |= memory[hololink_module.DP_ROCE_VADDR_LSB_0 + port] - qp = memory[hololink_module.DP_ROCE_CFG + port] & 0xFF_FFFF - rkey = memory[hololink_module.DP_ROCE_RKEY_0 + port] + assert payload_size > 0 + address = memory[ + hololink_module.DP_ADDRESS_0 + sensor_configuration_address + ] + address <<= 7 + qp = memory[ + hololink_module.DP_QP + sensor_configuration_address + ] + rkey = memory[ + hololink_module.DP_RKEY + sensor_configuration_address + ] with nvtx.annotate("write-frame"): # the last packet is a bit different; don't include that here s, e = 0, payload_size logging.debug( - f"{port=} {csi_image_length=} {payload_size=}" + f"{sensor_configuration_address=:#X} {csi_image_length=} {payload_size=} {ip=} {target_udp_port=}" ) while e < csi_image_length: packet = format_write( @@ -395,7 +443,7 @@ def main(lock=None): bytes(csi_image_data[s:e]), rkey=rkey, ) - udp_socket.sendto(packet, (ip, udp_port)) + data_socket.sendto(packet, (ip, target_udp_port)) s, e = e, e + payload_size last_packet = format_write_immediate( qp, @@ -404,7 +452,7 @@ def main(lock=None): rkey=rkey, immediate_value=image_count, ) - udp_socket.sendto(last_packet, (ip, udp_port)) + data_socket.sendto(last_packet, (ip, target_udp_port)) psn = 0x1000 @@ -463,37 +511,26 @@ def format_write_immediate(qp, address, content, rkey, immediate_value): return r -def main_wrapper(lock): - try: - main(lock) - except Exception as e: - logging.error("Caught %s (%s)" % (e, type(e))) - tb = "".join(traceback.format_exc()) - for s in tb.split("\n"): - logging.info(s) - - class TestServer: def __init__(self, udpcam=None): """If udpcam is None, then we'll start one and use that.""" self._udpcam = udpcam self._process = None + self._lock = multiprocessing.Lock() def __enter__(self): logging.debug("__enter__") if self._udpcam is None: - lock = multiprocessing.Lock() - lock.acquire() + self._lock.acquire() self._process = multiprocessing.Process( - target=main_wrapper, + target=self._run, name="udp-server", daemon=True, - args=(lock,), ) self._process.start() global udp_port self._server_address = "127.0.0.1" - lock.acquire() + self._lock.acquire() else: self._server_address = self._udpcam self._control_port = udp_port @@ -506,6 +543,15 @@ def __exit__(self, *args, **kwargs): os.write(control_w, b"exit") self._process.join() + def _run(self): + try: + udp_server(self._lock) + except Exception as e: + logging.error("Caught %s (%s)" % (e, type(e))) + tb = "".join(traceback.format_exc()) + for s in tb.split("\n"): + logging.error(s) + def address(self): return self._server_address @@ -525,16 +571,41 @@ def get_image(self): return r def channel_metadata(self): + sensor = 0 + hololink_channel_configuration = BOOTP_TRANSACTION_ID_MAP[sensor] + network_configuration_address = ( + hololink_channel_configuration.configuration_address + ) + vip_mask = hololink_channel_configuration.vip_mask metadata = { - "peer_ip": self._server_address, "control_port": self._control_port, - "configuration_address": 0x1A00, + "configuration_address": network_configuration_address, + "cpnx_version": 0x2410, + "data_plane": sensor, + "peer_ip": self._server_address, + "sensor": 0, "serial_number": "AA55", - "cpnx_version": 0x2402, - "vip_mask": 1, + "sequence_number_checking": 0, + "vip_mask": vip_mask, } return hololink_module.Metadata(metadata) +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--log-level", + type=int, + default=20, + help="Logging level to display", + ) + args = parser.parse_args() + hololink_module.logging_level(args.log_level) + # provide it a dummy lock; noone else is running to synchronize with this. + lock = multiprocessing.Lock() + lock.acquire() + udp_server(lock) + + if __name__ == "__main__": main() diff --git a/tests/utils.py b/tests/utils.py index 21f44b1..f9f8004 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -16,6 +16,10 @@ # See README.md for detailed information. import logging +import os +import queue +import threading +import time import numpy as np @@ -191,3 +195,168 @@ def make_image( image = image_encoder(image) bayer_image = bayer_encoder(bayer_image) return image, bayer_image + + +class Watchdog: + """When used this way: + + with Watchdog("watchdog-name", timeout=2) as watchdog: + while True: + watchdog.tap() + do_something() + + If do_something takes longer than 2 seconds to execute, we'll + assert fail with a watchdog timeout. + + Some variations: + + - Allow the first pass to take longer: + + with Watchdog("watchdog-name", initial_timeout=10, timeout=2) as watchdog: + while True: + watchdog.tap() + do_something() + + - allow do_something() to take up to 30 seconds for the first 20 iterations, + then only allow 2 seconds after that: + + with Watchdog("watchdog-name", initial_timeout=[30]*20, timeout=2) as watchdog: + while True: + watchdog.tap() + do_something() + + This accomodates workflows where initialization may make the first n iterations + take longer. + + - use a dynamic timeout by passing in a new limit each call to tap: + + with Watchdog("watchdog-name", timeout=10) as watchdog: + while True: + watchdog.tap(2) + do_something() + + Watchdog always prefers to use the value passed to tap, and will fall back to + the next initial values (if any remain) or finally the value passed as timeout to + the constructor. + """ + + def __init__(self, name, timeout, initial_timeout=None): + self._name = name + logging.trace(f'Creating watchdog@{id(self):#x} "{self._name}"') + if initial_timeout is None: + initial_timeout = [timeout] + try: + self._initial_timeout = iter(initial_timeout) + except TypeError: + self._initial_timeout = iter([initial_timeout]) + self._next_timeout = timeout + self._q = queue.Queue() + self._count = 0 + self._lock = threading.Lock() + self._tap_time = None + + def __enter__(self): + self._thread = threading.Thread(target=self._run, daemon=True) + self._thread.start() + return self + + def __exit__(self, *args): + self._q.put(None) + self._thread.join() + + def tap(self, timeout=None): + self._count += 1 + logging.trace( + f'tapping watchdog@{id(self):#x} "{self._name}" count={self._count} {timeout=}' + ) + self._tap_time = time.monotonic() + self._q.put(self._get_next_timeout(timeout)) + + def _get_next_timeout(self, user_value=None): + with self._lock: + if user_value is not None: + return user_value + try: + timeout = next(self._initial_timeout) + return timeout + except StopIteration: + pass + return self._next_timeout + + def _run(self): + hololink_module.NvtxTrace.setThreadName(self._name) + logging.trace(f'running watchdog@{id(self):#x} "{self._name}".') + try: + timeout = self._get_next_timeout() + while True: + timeout = self._q.get(block=True, timeout=timeout) + if timeout is None: + logging.trace(f'closing watchdog@{id(self):#x} "{self._name}".') + return + except queue.Empty: + pass + dt = "N/A" + if self._tap_time is not None: + now = time.monotonic() + dt = now - self._tap_time + message = f'watchdog@{id(self):#x} "{self._name}" timed out, count={self._count}, {timeout=}, time since last tap={dt}.' + logging.trace(message) + raise Exception(message) + + def update(self, timeout, initial_timeout=None, tap=True): + """Reconfigure how the next tap() works.""" + if initial_timeout is None: + initial_timeout = [timeout] + with self._lock: + try: + self._initial_timeout = iter(initial_timeout) + except TypeError: + self._initial_timeout = iter([initial_timeout]) + self._next_timeout = timeout + if tap: + self.tap() + + +receiver_count = 0 + + +class MockedLinuxReceiverOperator(hololink_module.operators.LinuxReceiverOperator): + """ + Use with unittest.mock("hololink.operators.LinuxReceiverOperator") + to assert fail when the stack doesn't receive a frame in time. + """ + + def __init__(self, *args, **kwargs): + logging.info("Using MockedLinuxReceiverOperator.") + super().__init__(*args, **kwargs) + global receiver_count + receiver_count = self._count + + def compute(self, op_input, op_output, context): + r = super().compute(op_input, op_output, context) + # Allow test fixturing to check the number of times + # we've been called. + global receiver_count + receiver_count = self._count + return r + + def timeout(self, op_input, op_output, context): + logging.error(f"Frame reception timeout, {self._count=}.") + if self._count > 10: + assert False + + +class PriorityScheduler: + def __init__(self): + self._scheduler = os.sched_getscheduler(0) + self._params = os.sched_getparam(0) + + def __enter__(self): + logging.debug("Setting scheduler.") + sched_priority = self._params.sched_priority + 1 + sched_param = os.sched_param(sched_priority=sched_priority) + os.sched_setscheduler(0, os.SCHED_FIFO, sched_param) + + def __exit__(self, *args): + logging.debug("Resetting scheduler.") + os.sched_setscheduler(0, self._scheduler, self._params) From 5bba5195168f1f7cfca45e643250547acd15c2aa Mon Sep 17 00:00:00 2001 From: Patrick O'Grady Date: Mon, 3 Feb 2025 08:30:28 -0800 Subject: [PATCH 2/2] Release 2.0.0, 25.02. Signed-off-by: Patrick O'Grady --- VERSION | 2 +- scripts/manifest-2407.yaml | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 scripts/manifest-2407.yaml diff --git a/VERSION b/VERSION index 04ae6e8..227cea2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.0-EA2 +2.0.0 diff --git a/scripts/manifest-2407.yaml b/scripts/manifest-2407.yaml new file mode 100644 index 0000000..90a5eb3 --- /dev/null +++ b/scripts/manifest-2407.yaml @@ -0,0 +1,29 @@ +hololink: + archive: + enrollment_date: '2025-01-15T20:17:26.407881+00:00' + version: '2407' + content: + NVIDIA_RTL_License_Agreement.txt: + md5: e8c77cea2712a6e3883c49b063ebc816 + size: 16929 + url: https://api.ngc.nvidia.com/v2/resources/org/nvidia/team/clara-holoscan/holoscan_sensor_bridge_fpga_ip/2407/files?redirect=true&path=NVIDIA_RTL_License_Agreement.txt + fpga_clnx_v2407.bit: + md5: 673db16ede425bd77b313bfc40f82588 + size: 383843 + url: https://api.ngc.nvidia.com/v2/resources/org/nvidia/team/clara-holoscan/holoscan_sensor_bridge_fpga_ip/2407/files?redirect=true&path=fpga_clnx_v2407.bit + fpga_cpnx_v2407.bit: + md5: 6ad1a7d71b12ff26bcf7541c80ddd16e + size: 1960836 + url: https://api.ngc.nvidia.com/v2/resources/org/nvidia/team/clara-holoscan/holoscan_sensor_bridge_fpga_ip/2407/files?redirect=true&path=fpga_cpnx_v2407.bit + hololink-hdl.zip: + md5: 21de471e09dce015946bd11f02bcd2b6 + size: 1294292 + url: https://api.ngc.nvidia.com/v2/resources/org/nvidia/team/clara-holoscan/holoscan_sensor_bridge_fpga_ip/2407/files?redirect=true&path=hololink-hdl.zip + images: + - content: fpga_clnx_v2407.bit + context: clnx + - content: fpga_cpnx_v2407.bit + context: cpnx + licenses: + - NVIDIA_RTL_License_Agreement.txt + strategy: sensor_bridge_10