-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsetup.py
145 lines (123 loc) · 4.6 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# Copyright 2023. All Rights Reserved.
# Author: Bruce-Lee-LY
# Date: 21:14:13 on Tue, Oct 31, 2023
#
# Description: setup decoding attention
# !/usr/bin/python3
# coding=utf-8
import os
import sys
from pathlib import Path
from setuptools import setup, find_packages
import torch
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
def append_nvcc_threads(nvcc_extra_args):
nvcc_threads = os.getenv("NVCC_THREADS") or "4"
return nvcc_extra_args + ["--threads", nvcc_threads]
class NinjaBuildExtension(BuildExtension):
def __init__(self, *args, **kwargs) -> None:
# do not override env MAX_JOBS if already exists
if not os.environ.get("MAX_JOBS"):
import psutil
# calculate the maximum allowed NUM_JOBS based on cores
max_num_jobs_cores = max(1, os.cpu_count() // 2)
# calculate the maximum allowed NUM_JOBS based on free memory
free_memory_gb = psutil.virtual_memory().available / \
(1024 ** 3) # free memory in GB
# each JOB peak memory cost is ~8-9GB when threads = 4
max_num_jobs_memory = int(free_memory_gb / 9)
# pick lower value of jobs based on cores vs memory metric to minimize oom and swap usage during compilation
max_jobs = max(1, min(max_num_jobs_cores, max_num_jobs_memory))
os.environ["MAX_JOBS"] = str(max_jobs)
super().__init__(*args, **kwargs)
print("python version: {}".format(sys.version))
print("torch version: {}".format(torch.__version__))
# ninja build does not work unless include_dirs are abs path
this_dir = os.path.dirname(os.path.abspath(__file__))
# Check, if ATen/CUDAGeneratorImpl.h is found, otherwise use ATen/cuda/CUDAGeneratorImpl.h
# See https://github.com/pytorch/pytorch/pull/70650
generator_flag = []
torch_dir = torch.__path__[0]
if os.path.exists(os.path.join(torch_dir, "include", "ATen", "CUDAGeneratorImpl.h")):
generator_flag = ["-DOLD_GENERATOR_PATH"]
cc_flag = []
cc_flag.append("-gencode")
cc_flag.append("arch=compute_80,code=sm_80")
cc_flag.append("-gencode")
cc_flag.append("arch=compute_86,code=sm_86")
# HACK: The compiler flag -D_GLIBCXX_USE_CXX11_ABI is set to be the same as
# torch._C._GLIBCXX_USE_CXX11_ABI
# https://github.com/pytorch/pytorch/blob/8472c24e3b5b60150096486616d98b7bea01500b/torch/utils/cpp_extension.py#L920
torch._C._GLIBCXX_USE_CXX11_ABI = True
ext_modules = []
ext_modules.append(
CUDAExtension(
name="decoding_attn_cuda",
sources=[
"csrc/torch/decoding_torch.cpp",
"csrc/ops/decoding_attn.cpp",
"csrc/kernel/decoding_attn/decoding_fwd_hdim64.cu",
"csrc/kernel/decoding_attn/decoding_fwd_hdim96.cu",
"csrc/kernel/decoding_attn/decoding_fwd_hdim128.cu",
"csrc/kernel/decoding_attn/decoding_fwd_hdim256.cu",
],
extra_compile_args={
"cxx": ["-O3", "-std=c++17"] + generator_flag,
"nvcc": append_nvcc_threads(
[
"-O3",
"-std=c++17",
"-U__CUDA_NO_HALF_OPERATORS__",
"-U__CUDA_NO_HALF_CONVERSIONS__",
"-U__CUDA_NO_HALF2_OPERATORS__",
"-U__CUDA_NO_BFLOAT16_CONVERSIONS__",
"--expt-relaxed-constexpr",
"--expt-extended-lambda",
"--use_fast_math",
# "--ptxas-options=-v",
# "--ptxas-options=-O2",
# "-lineinfo",
]
+ generator_flag
+ cc_flag
),
},
include_dirs=[
Path(this_dir) / "csrc" / "common",
Path(this_dir) / "csrc" / "kernel",
Path(this_dir) / "csrc" / "ops",
],
)
)
setup(
name="decoding_attn",
version="0.1.0",
packages=find_packages(
exclude=(
"csrc",
"decodng_attn",
)
),
author="Bruce-Lee-LY",
description="Decoding Attention",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/Bruce-Lee-LY/decoding_attention",
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: BSD License",
"Operating System :: Unix",
],
ext_modules=ext_modules,
cmdclass={"build_ext": NinjaBuildExtension},
python_requires=">=3.8",
install_requires=[
"torch",
],
setup_requires=[
"psutil",
"ninja",
],
)