-
Notifications
You must be signed in to change notification settings - Fork 435
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ghstack-source-id: d72da351e7eb7ca69eeaf9241bc604aeaf3bc139 Pull Request resolved: #4354
- Loading branch information
1 parent
d0e65a8
commit 526a461
Showing
9 changed files
with
1,153 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
# | ||
# Simple CMake build system for multimodal runner. | ||
# | ||
# ### Editing this file ### | ||
# | ||
# This file should be formatted with | ||
# ~~~ | ||
# cmake-format -i CMakeLists.txt | ||
# ~~~ | ||
# It should also be cmake-lint clean. | ||
# | ||
cmake_minimum_required(VERSION 3.19) | ||
project(multimodal) | ||
|
||
# Duplicating options as root CMakeLists.txt | ||
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF) | ||
|
||
option(EXECUTORCH_USE_TIKTOKEN "Use Tiktoken as a tokenizer" OFF) | ||
|
||
include(CMakeDependentOption) | ||
# | ||
# pthreadpool: build pthreadpool library. Disable on unsupported platforms | ||
# | ||
cmake_dependent_option( | ||
EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON | ||
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF | ||
) | ||
# | ||
# cpuinfo: build cpuinfo library. Disable on unsupported platforms | ||
# | ||
cmake_dependent_option( | ||
EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON | ||
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF | ||
) | ||
|
||
if(NOT PYTHON_EXECUTABLE) | ||
set(PYTHON_EXECUTABLE python3) | ||
endif() | ||
|
||
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..) | ||
set(TORCH_ROOT ${EXECUTORCH_ROOT}/third-party/pytorch) | ||
|
||
include(${EXECUTORCH_ROOT}/build/Utils.cmake) | ||
|
||
if(NOT PYTHON_EXECUTABLE) | ||
resolve_python_executable() | ||
endif() | ||
|
||
if(NOT CMAKE_CXX_STANDARD) | ||
set(CMAKE_CXX_STANDARD 17) | ||
# Can't set to 11 due to executor_runner.cpp make_unique | ||
endif() | ||
|
||
if(CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$") | ||
set(CMAKE_TOOLCHAIN_IOS ON) | ||
else() | ||
set(CMAKE_TOOLCHAIN_IOS OFF) | ||
endif() | ||
|
||
set(_common_compile_options -Wno-deprecated-declarations -fPIC) | ||
|
||
# Let files say "include <executorch/path/to/header.h>". | ||
set(_common_include_directories ${EXECUTORCH_ROOT}/..) | ||
|
||
# For some reason android build is not able to find where gflags is and hence | ||
# cannot find corresponding .cmake file | ||
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags) | ||
find_package(gflags REQUIRED) | ||
|
||
find_package(Torch CONFIG REQUIRED) | ||
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) | ||
|
||
# | ||
# llama_main: test binary to run llama, with tokenizer and sampler integrated | ||
# | ||
|
||
# find `executorch` libraries Same as for gflags | ||
set(executorch_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../lib/cmake/ExecuTorch) | ||
find_package(executorch CONFIG REQUIRED) | ||
if(CMAKE_TOOLCHAIN_IOS OR ANDROID) | ||
target_link_options_shared_lib(executorch) | ||
endif() | ||
|
||
# custom ops library | ||
if(EXECUTORCH_BUILD_KERNELS_CUSTOM) | ||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../llama2/custom_ops custom_ops) | ||
endif() | ||
|
||
# multimodal_runner library | ||
add_subdirectory(runner) | ||
if(EXECUTORCH_USE_TIKTOKEN) | ||
# find RE2 for tokenizer | ||
set(ABSL_ENABLE_INSTALL ON) | ||
set(ABSL_PROPAGATE_CXX_STD ON) | ||
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE}) | ||
set(CMAKE_POSITION_INDEPENDENT_CODE ON) | ||
add_subdirectory( | ||
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/third-party/abseil-cpp | ||
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp | ||
) | ||
add_subdirectory( | ||
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/third-party/re2 | ||
${CMAKE_CURRENT_BINARY_DIR}/re2 | ||
) | ||
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag}) | ||
target_link_libraries(multimodal_runner PUBLIC re2::re2) | ||
endif() | ||
|
||
set(link_libraries gflags torch) | ||
set(_srcs main.cpp) | ||
|
||
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED) | ||
list( | ||
APPEND | ||
link_libraries | ||
optimized_native_cpu_ops_lib | ||
optimized_kernels | ||
portable_kernels | ||
cpublas | ||
eigen_blas | ||
) | ||
target_link_options_shared_lib(optimized_native_cpu_ops_lib) | ||
else() | ||
list(APPEND link_libraries portable_ops_lib portable_kernels) | ||
target_link_options_shared_lib(portable_ops_lib) | ||
endif() | ||
|
||
# quantized_ops_lib: Register quantized op kernels into the runtime | ||
target_link_options_shared_lib(quantized_ops_lib) | ||
list(APPEND link_libraries quantized_kernels quantized_ops_lib) | ||
|
||
if(EXECUTORCH_BUILD_KERNELS_CUSTOM) | ||
target_link_options_shared_lib(custom_ops) | ||
list(APPEND link_libraries custom_ops) | ||
endif() | ||
|
||
set(XNNPACK_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack) | ||
# Extra compile option and include dir for pthreadpool | ||
if(EXECUTORCH_BUILD_PTHREADPOOL) | ||
list(APPEND _common_compile_options -DET_USE_THREADPOOL) | ||
list(APPEND link_libraries pthreadpool) | ||
# These 2 source files are included in xnnpack_backend | ||
if(NOT TARGET xnnpack_backend) | ||
list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/threadpool.cpp | ||
${XNNPACK_ROOT}/threadpool/threadpool_guard.cpp | ||
) | ||
endif() | ||
list(APPEND _common_include_directories | ||
${XNNPACK_ROOT}/third-party/pthreadpool/include | ||
) | ||
endif() | ||
|
||
# Extra sources for cpuinfo | ||
if(EXECUTORCH_BUILD_CPUINFO) | ||
list(APPEND link_libraries cpuinfo) | ||
list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp) | ||
list(APPEND _common_include_directories | ||
${XNNPACK_ROOT}/third-party/cpuinfo/include | ||
) | ||
endif() | ||
|
||
# XNNPACK | ||
if(TARGET xnnpack_backend) | ||
set(xnnpack_backend_libs xnnpack_backend XNNPACK) | ||
list(APPEND link_libraries ${xnnpack_backend_libs}) | ||
target_link_options_shared_lib(xnnpack_backend) | ||
endif() | ||
|
||
# Vulkan backend | ||
if(TARGET vulkan_backend) | ||
list(APPEND link_libraries vulkan_backend) | ||
target_link_options_shared_lib(vulkan_backend) | ||
endif() | ||
|
||
# Qnn backend | ||
if(TARGET qnn_executorch_backend) | ||
list(APPEND link_libraries qnn_executorch_backend) | ||
target_link_options_shared_lib(qnn_executorch_backend) | ||
endif() | ||
|
||
# MPS backend | ||
if(TARGET mpsdelegate) | ||
list( | ||
APPEND | ||
link_libraries | ||
mpsdelegate | ||
"-framework Foundation" | ||
"-weak_framework MetalPerformanceShaders" | ||
"-weak_framework MetalPerformanceShadersGraph" | ||
"-weak_framework Metal" | ||
) | ||
target_link_options_shared_lib(mpsdelegate) | ||
endif() | ||
|
||
if(TARGET coremldelegate) | ||
find_library(SQLITE_LIBRARY sqlite3) | ||
list( | ||
APPEND | ||
link_libraries | ||
coremldelegate | ||
sqlite3 | ||
"-framework Foundation" | ||
"-framework CoreML" | ||
"-framework Accelerate" | ||
) | ||
target_link_options_shared_lib(coremldelegate) | ||
endif() | ||
|
||
# This one is needed for cpuinfo where it uses android specific log lib | ||
if(ANDROID) | ||
list(APPEND link_libraries log) | ||
endif() | ||
|
||
add_executable(multimodal_main ${_srcs}) | ||
if(CMAKE_BUILD_TYPE STREQUAL "Release") | ||
target_link_options(multimodal_main PRIVATE "LINKER:--gc-sections,-s") | ||
endif() | ||
|
||
target_include_directories(multimodal_main PUBLIC ${_common_include_directories}) | ||
target_link_libraries(multimodal_main PUBLIC multimodal_runner ${link_libraries}) | ||
target_compile_options(multimodal_main PUBLIC ${_common_compile_options}) | ||
|
||
if(APPLE) | ||
target_link_options_shared_lib(executorch) | ||
endif() | ||
|
||
# Print all summary | ||
executorch_print_configuration_summary() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
cmake \ | ||
-DCMAKE_INSTALL_PREFIX=cmake-out \ | ||
-DCMAKE_BUILD_TYPE=Debug \ | ||
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ | ||
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ | ||
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ | ||
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ | ||
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ | ||
-DEXECUTORCH_BUILD_XNNPACK=ON \ | ||
-DEXECUTORCH_DO_NOT_USE_CXX11_ABI=ON \ | ||
-Bcmake-out . | ||
|
||
|
||
cmake --build cmake-out -j9 --target install --config Debug | ||
|
||
dir=examples/models/llava | ||
python_lib=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') | ||
|
||
cmake \ | ||
-DCMAKE_INSTALL_PREFIX=cmake-out \ | ||
-DCMAKE_BUILD_TYPE=Debug \ | ||
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ | ||
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ | ||
-DEXECUTORCH_BUILD_XNNPACK=ON \ | ||
-DCMAKE_PREFIX_PATH="$python_lib" \ | ||
-Bcmake-out/${dir} \ | ||
${dir} | ||
|
||
|
||
cmake --build cmake-out/${dir} -j9 --config Debug | ||
|
||
# cmake-out/examples/models/llava/multimodal_main \ | ||
# --tokenizer_path /data/users/larryliu/llava/tokenizer.bin \ | ||
# --model_path /data/users/larryliu/llava/llava_combined_xnnpack.pte \ | ||
# --prompt "\nWhat are the things I should be cautious about when I visit here?" \ | ||
# --image_path /data/users/larryliu/llava/image.pt \ | ||
# --temperature 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#include <executorch/examples/models/llava/runner/multimodal_runner.h> | ||
#include <gflags/gflags.h> | ||
#include <torch/torch.h> | ||
|
||
#if defined(ET_USE_THREADPOOL) | ||
#include <executorch/backends/xnnpack/threadpool/cpuinfo_utils.h> | ||
#include <executorch/backends/xnnpack/threadpool/threadpool.h> | ||
#endif | ||
|
||
DEFINE_string( | ||
model_path, | ||
"llava.pte", | ||
"Model serialized in flatbuffer format."); | ||
|
||
DEFINE_string(tokenizer_path, "tokenizer.bin", "Tokenizer stuff."); | ||
|
||
DEFINE_string(prompt, "The answer to the ultimate question is", "Prompt."); | ||
|
||
DEFINE_string( | ||
image_path, | ||
"", | ||
"The path to a .pt file, a serialized torch tensor for an image, longest edge resized to 336."); | ||
|
||
DEFINE_double( | ||
temperature, | ||
0.8f, | ||
"Temperature; Default is 0.8f. 0 = greedy argmax sampling (deterministic). Lower temperature = more deterministic"); | ||
|
||
DEFINE_int32( | ||
seq_len, | ||
1024, | ||
"Total number of tokens to generate (prompt + output). Defaults to max_seq_len. If the number of input tokens + seq_len > max_seq_len, the output will be truncated to max_seq_len tokens."); | ||
|
||
DEFINE_int32( | ||
cpu_threads, | ||
-1, | ||
"Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device."); | ||
|
||
int32_t main(int32_t argc, char** argv) { | ||
gflags::ParseCommandLineFlags(&argc, &argv, true); | ||
|
||
// Create a loader to get the data of the program file. There are other | ||
// DataLoaders that use mmap() or point32_t to data that's already in memory, | ||
// and users can create their own DataLoaders to load from arbitrary sources. | ||
const char* model_path = FLAGS_model_path.c_str(); | ||
|
||
const char* tokenizer_path = FLAGS_tokenizer_path.c_str(); | ||
|
||
const char* prompt = FLAGS_prompt.c_str(); | ||
|
||
std::string image_path = FLAGS_image_path; | ||
|
||
double temperature = FLAGS_temperature; | ||
|
||
int32_t seq_len = FLAGS_seq_len; | ||
|
||
int32_t cpu_threads = FLAGS_cpu_threads; | ||
|
||
#if defined(ET_USE_THREADPOOL) | ||
uint32_t num_performant_cores = cpu_threads == -1 | ||
? torch::executorch::cpuinfo::get_num_performant_cores() | ||
: static_cast<uint32_t>(cpu_threads); | ||
ET_LOG( | ||
Info, "Resetting threadpool with num threads = %d", num_performant_cores); | ||
if (num_performant_cores > 0) { | ||
torch::executorch::threadpool::get_threadpool()->_unsafe_reset_threadpool( | ||
num_performant_cores); | ||
} | ||
#endif | ||
// create llama runner | ||
torch::executor::MultiModalRunner runner( | ||
model_path, tokenizer_path, temperature); | ||
|
||
// read image and resize the longest edge to 336 | ||
std::vector<uint8_t> image_data; | ||
// cv::Mat image = cv::imread(image_path, cv::IMREAD_COLOR); | ||
// int longest_edge = std::max(image.rows, image.cols); | ||
// float scale_factor = 336.0f / longest_edge; | ||
// cv::Size new_size(image.cols * scale_factor, image.rows * scale_factor); | ||
// cv::Mat resized_image; | ||
// cv::resize(image, resized_image, new_size); | ||
// image_data.assign(resized_image.datastart, resized_image.dataend); | ||
torch::Tensor image_tensor; | ||
torch::load(image_tensor, image_path); // CHW | ||
ET_LOG( | ||
Info, | ||
"image size(0): %zu, size(1): %zu, size(2): %zu", | ||
image_tensor.size(0), | ||
image_tensor.size(1), | ||
image_tensor.size(2)); | ||
image_data.assign( | ||
image_tensor.data_ptr<uint8_t>(), | ||
image_tensor.data_ptr<uint8_t>() + image_tensor.numel()); | ||
torch::executor::Image image{ | ||
.data = image_data, | ||
.width = static_cast<int32_t>(image_tensor.size(2)), | ||
.height = static_cast<int32_t>(image_tensor.size(1))}; | ||
// generate | ||
runner.generate({image}, prompt, seq_len); | ||
return 0; | ||
} |
Oops, something went wrong.