Skip to content

Commit

Permalink
add tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
R-Tars committed Feb 18, 2025
1 parent 18f94b1 commit bf9bac3
Show file tree
Hide file tree
Showing 5 changed files with 252 additions and 131 deletions.
98 changes: 31 additions & 67 deletions examples/BuddyDeepSeekR1/buddy-deepseek-r1-main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@ constexpr size_t MaxVocabSize = 151936;
constexpr size_t MaxTokenLength = 40;

/// Declare DeepSeekR1 forward function.
extern "C" void
_mlir_ciface_forward(MemRef<float, 3> *result,
MemRef<float, 1> *arg0,
MemRef<long long, 2> *arg1,
MemRef<long long, 2> *arg2);
extern "C" void _mlir_ciface_forward(MemRef<float, 3> *result,
MemRef<float, 1> *arg0,
Text<size_t, 2> *arg1,
MemRef<long long, 2> *arg2);

// -----------------------------------------------------------------------------
// Helper Functions
Expand All @@ -51,7 +50,7 @@ void getUserInput(std::string &inputStr) {
void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; }

/// Print information for each iteration.
void printIterInfo(size_t iterIdx, int str, double time) {
void printIterInfo2(size_t iterIdx, std::string str, double time) {
std::cout << "\033[32;1m[Iteration " << iterIdx << "] \033[0m";
std::cout << "Token: " << str << " | "
<< "Time: " << time << "s" << std::endl;
Expand Down Expand Up @@ -103,7 +102,6 @@ void loadParameters(const std::string &paramFilePath,

/// Find the index of the max value.
int findMaxIndex(const float *start, const float *end) {
// std:: cout << "max element: " << *std::max_element(start, end) << std::endl;
return std::distance(start, std::max_element(start, end));
}

Expand All @@ -124,105 +122,71 @@ int main() {

/// Get user message.
std::string inputStr;
// getUserInput(inputStr);
getUserInput(inputStr);

/// Initialize data containers
// - Input container.
// - Result container
// - Output container.
// - Parameters container.
// Text<size_t, 2> outputContainer;
Text<size_t, 2> outputContainer;
MemRef<float, 3> resultContainer({1, 9, 151936});
// Text<size_t, 2> input1Container(inputStr);
Text<size_t, 2> inputContainer(inputStr);
MemRef<float, 1> paramsContainer({ParamsSize});
MemRef<long long, 2> inputContainer({1, 40});
MemRef<long long, 2> attention_mask({1, 40}, 0);
MemRef<long long, 2> outputContainer({1, 40});
long long data[] = {151646, 151646, 151644, 108386, 151645, 151648, 198};
for (int i = 0; i < 7; i++) {
inputContainer.getData()[i] = data[i];
attention_mask.getData()[i] = 1;
}

/// Fill data into containers
// - Input: register vocabulary and tokenize the input string.
// - Output: register vocabulary.
// - Parameters: load parameters from the `arg0` file into the container.
// tokenizeInput(vocabDir, input1Container);
// for (int i = 0 ; i < 10 ; i ++ )
// std::cout << input1Container.getData()[i] << " ";
// std::cout << std::endl;
// outputContainer.loadVocab(vocabDir);
tokenizeInput(vocabDir, inputContainer);
for (int i = 0; i < (int)inputContainer.getTokenCnt(); i++) {
attention_mask.getData()[i] = 1;
}
outputContainer.loadVocab(vocabDir);
loadParameters(paramsDir, paramsContainer);


/// Run LLaMA Inference
/// Run DeepSeekR1 Inference
// - Perform the forward function.
// - Find and append the generated token.
// - Continue iterating until the terminal condition is met.
// int generateLen = MaxTokenLength - inputContainer.getTokenCnt();
for (int i = 0; i < 33; i++) {
int generateLen = MaxTokenLength - inputContainer.getTokenCnt();
for (int i = 0; i < generateLen; i++) {
const auto inferenceStart = std::chrono::high_resolution_clock::now();
// std::cout << "input_ids:" << std::endl;
// for (int j = 0 ; j < 40 ; j ++ )
// std::cout << inputContainer.getData()[j] << " ";
// std::cout << std::endl;

// std::cout << "attention_mask:" << std::endl;
// for (int j = 0 ; j < 40 ; j ++ )
// std::cout << attention_mask.getData()[j] << " ";
// std::cout << std::endl;

// Execute the forward pass of the model.
_mlir_ciface_forward(&resultContainer, &paramsContainer, &inputContainer, &attention_mask);
_mlir_ciface_forward(&resultContainer, &paramsContainer, &inputContainer,
&attention_mask);

const auto inferenceEnd = std::chrono::high_resolution_clock::now();
const std::chrono::duration<double, std::milli> inferenceTime =
inferenceEnd - inferenceStart;

// Determine the generated token.
// int tokenIndex = inputContainer.getTokenCnt() - 1;
int tokenIndex = 6 + i;
int tokenIndex = inputContainer.getTokenCnt() - 1;
const float *startPtr =
resultContainer.getData() + tokenIndex * MaxVocabSize;
const float *endPtr = startPtr + MaxVocabSize;
int maxIndex = findMaxIndex(startPtr, endPtr);
// std::string tok = inputContainer.getStr(maxIndex);
std::string tok = inputContainer.getStr(maxIndex);
// Print the generated token and inference time.
// printIterInfo(i, tok, inferenceTime.count() / 1000);
printIterInfo(i, maxIndex, inferenceTime.count() / 1000);

// Stop if a separator token (2, </s>) or line break token (13 <0x0A>) is
// generated.

// Append the generated token into the input and output container.
// inputContainer.appendTokenIdx(maxIndex);
inputContainer.getData()[7 + i] = maxIndex;
attention_mask.getData()[7 + i] = 1;
outputContainer.getData()[7 + i] = maxIndex;
// outputContainer.appendTokenIdx(maxIndex);
free(resultContainer.release());
printIterInfo2(i, tok, inferenceTime.count() / 1000);

// Stop if a <|end▁of▁sentence|> token is generated.
if (maxIndex == 151643) {
break;
}
// Append the generated token into the input and output container.
// inputContainer.appendTokenIdx(maxIndex);
inputContainer.appendTokenIdx(maxIndex);
attention_mask.getData()[MaxTokenLength - generateLen + i] = 1;
outputContainer.appendTokenIdx(maxIndex);
free(resultContainer.release());
}

/// Print the final result
std::cout << "\n\033[33;1m[Output]\033[0m " << "Result Token:" << std::endl;
// std::cout << "\033[33;1m[Output]\033[0m " << outputContainer.revertLlama()
// << std::endl;



for (int i = 7 ; i < 40 ; i ++ ){

std::cout << outputContainer.getData()[i] << " ";
if (outputContainer.getData()[i] == 151643)
break;
}

std::cout << std::endl;
std::cout << "\n\033[33;1m[Input]\033[0m " << inputStr << std::endl;
std::cout << "\033[33;1m[Output]\033[0m "
<< outputContainer.revertDeepSeekR1() << std::endl;

return 0;
}
30 changes: 16 additions & 14 deletions examples/BuddyDeepSeekR1/import-deepseek-r1.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#
# ===---------------------------------------------------------------------------
#
# This is the test of llama2 model.
# This is the test of DeepSeekR1 model.
#
# ===---------------------------------------------------------------------------

Expand All @@ -38,27 +38,25 @@
"--output-dir",
type=str,
default="./",
help="Directory to save output files."
help="Directory to save output files.",
)
args = parser.parse_args()

# Ensure the output directory exists.
output_dir = args.output_dir
os.makedirs(output_dir, exist_ok=True)

# Retrieve the LLaMA model path from environment variables.
# model_path = "/home/zhuxinye/.cache/huggingface/hub/models--deepseek-ai--DeepSeek-R1-Distill-Qwen-1.5B/snapshots/530ca3e1ad39d440e182c2e4317aa40f012512fa"
model_path = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
# model_path = os.environ.get("DEESEEK_MODEL_PATH")
# if model_path is None:
# raise EnvironmentError(
# "The environment variable 'DEESEEK_MODEL_PATH' is not set or is invalid."
# )
# Retrieve the DeepSeekR1 model path from environment variables.
model_path = os.environ.get("DEEPSEEKR1_MODEL_PATH")
if model_path is None:
model_path = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

# Initialize the tokenizer and model from the specified model path.
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torchscript=True).eval()
model.config.use_cache = False # 关闭缓存,以便完整导入
model = AutoModelForCausalLM.from_pretrained(
model_path, torchscript=True
).eval()
model.config.use_cache = False

# Initialize Dynamo Compiler with specific configurations as an importer.
dynamo_compiler = DynamoCompiler(
Expand All @@ -70,9 +68,13 @@
with torch.no_grad():
data = {
"input_ids": torch.zeros((1, 40), dtype=torch.int64),
"attention_mask": torch.zeros((1, 40), dtype=torch.int64)
"attention_mask": torch.zeros((1, 40), dtype=torch.int64),
}
graphs = dynamo_compiler.importer(model, input_ids=data['input_ids'], attention_mask=data['attention_mask'])
graphs = dynamo_compiler.importer(
model,
input_ids=data["input_ids"],
attention_mask=data["attention_mask"],
)

assert len(graphs) == 1
graph = graphs[0]
Expand Down
Binary file modified examples/BuddyDeepSeekR1/vocab.txt
Binary file not shown.
Loading

0 comments on commit bf9bac3

Please sign in to comment.