-
Notifications
You must be signed in to change notification settings - Fork 127
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
113 changed files
with
7,456 additions
and
720 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,5 +61,4 @@ examples/mnist/*ubyte | |
/vs/MarianDll.VC.VC.opendb | ||
|
||
.vs | ||
.vscode | ||
|
||
.vscode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
v1.12.0 | ||
v1.12.12 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# marian-YYYY-MM-DD-revision.tgz | ||
# This combines marian, marian_decoder in a single TAR file for | ||
# execution in MSFT internal tools FLO and Singularity. | ||
|
||
execute_process( | ||
COMMAND bash -c "TZ=America/Los_Angeles date +%Y-%m-%d" | ||
OUTPUT_VARIABLE TGZ_DATE | ||
OUTPUT_STRIP_TRAILING_WHITESPACE) | ||
|
||
execute_process( | ||
COMMAND git rev-parse --short=7 HEAD | ||
OUTPUT_VARIABLE TGZ_REV | ||
OUTPUT_STRIP_TRAILING_WHITESPACE) | ||
|
||
message("Generating ${CWD}/marian-${TGZ_DATE}-${TGZ_REV}.tgz") | ||
|
||
# check if pigz is available for faster compression | ||
execute_process( | ||
COMMAND bash -c "which pigz || which gzip" | ||
OUTPUT_VARIABLE COMPRESS | ||
OUTPUT_STRIP_TRAILING_WHITESPACE) | ||
|
||
execute_process( | ||
COMMAND tar -I ${COMPRESS} -cvvf "${CWD}/marian-${TGZ_DATE}-${TGZ_REV}.tgz" -C "${CWD}" | ||
marian | ||
marian-decoder | ||
marian-scorer | ||
marian-vocab | ||
marian-conv | ||
WORKING_DIRECTORY "${CWD}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
This script converts *.chpt files to *.pt files, potentially useful for extracting weights only from larger checkpoints. | ||
""" | ||
|
||
import torch | ||
import argparse | ||
|
||
# Create a parser for command line arguments | ||
parser = argparse.ArgumentParser() | ||
|
||
# Add arguments for the source and target files | ||
parser.add_argument("--source", type=str, required=True, help="Path to the source *.chpt file") | ||
parser.add_argument("--target", type=str, required=True, help="Path to the target *.pt file") | ||
|
||
# Parse the command line arguments | ||
args = parser.parse_args() | ||
|
||
# Load the model from the source file | ||
model = torch.load(args.source) | ||
|
||
# Save the model to the target file | ||
torch.save(model, args.target) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
This script converts Huggingface Bert model to Marian weight file. | ||
""" | ||
|
||
import argparse | ||
import numpy as np | ||
import sys | ||
import yaml | ||
|
||
from transformers import XLMRobertaModel | ||
|
||
parser = argparse.ArgumentParser(description='Convert Huggingface Bert model to Marian weight file.') | ||
parser.add_argument('--bert', help='Path to Huggingface Bert PyTorch model', required=True) | ||
parser.add_argument('--marian', help='Output path for Marian weight file', required=True) | ||
args = parser.parse_args() | ||
|
||
huggingface = XLMRobertaModel.from_pretrained(args.bert) | ||
huggingface.eval() | ||
|
||
print(huggingface.config) | ||
|
||
config = dict() | ||
config["type"] = "bert-classifier" | ||
config["input-types"] = ["sequence"] | ||
config["tied-embeddings-all"] = True | ||
config["tied-embeddings-src"] = False | ||
|
||
config["transformer-ffn-depth"] = 2 | ||
config["transformer-train-position-embeddings"] = True | ||
config["transformer-preprocess"] = "" | ||
config["transformer-postprocess"] = "dan" | ||
config["transformer-postprocess-emb"] = "nd" | ||
config["bert-train-type-embeddings"] = False | ||
# @TODO: figure out if it's worth adding `cometModel.name_or_path` to the end of this version string. | ||
config["version"] = "huggingface2marian.py conversion" | ||
|
||
config["enc-depth"] = 0 | ||
config["transformer-dim-ffn"] = huggingface.config.intermediate_size | ||
config["transformer-heads"] = huggingface.config.num_attention_heads | ||
config["transformer-ffn-activation"] = huggingface.config.hidden_act | ||
|
||
config["bert-sep-symbol"] = "</s>" | ||
config["bert-class-symbol"] = "</s>" | ||
|
||
marianModel = dict() | ||
|
||
def transposeOrder(mat): | ||
matT = np.transpose(mat) # just a view with changed row order | ||
return matT.flatten(order="C").reshape(matT.shape) # force row order change and reshape | ||
|
||
|
||
def convert(pd, srcs, trg, transpose=True, bias=False): | ||
if len(srcs) == 1: | ||
for src in srcs: | ||
num = pd[src].detach().numpy() | ||
if bias: | ||
marianModel[trg] = np.atleast_2d(num) | ||
else: | ||
if transpose: | ||
marianModel[trg] = transposeOrder(num) # transpose with row order change | ||
else: | ||
marianModel[trg] = num | ||
else: # path that joins matrices together for fused self-attention | ||
nums = [pd[src].detach().numpy() for src in srcs] | ||
if bias: | ||
nums = [np.transpose(np.atleast_2d(num)) for num in nums] | ||
marianModel[trg] = np.stack(nums, axis=0) | ||
|
||
|
||
def extract(layer, nth, level): | ||
name = type(layer).__name__ | ||
print(" " * level, nth, name) | ||
if name == "BertLayer": | ||
pd = dict(layer.named_parameters()) | ||
for n in pd: | ||
print(" " * (level + 1), n, pd[n].shape) | ||
|
||
convert(pd, ["attention.self.query.weight"], f"encoder_l{nth + 1}_self_Wq", transpose=True) | ||
convert(pd, ["attention.self.key.weight"], f"encoder_l{nth + 1}_self_Wk") | ||
convert(pd, ["attention.self.value.weight"], f"encoder_l{nth + 1}_self_Wv") | ||
|
||
convert(pd, ["attention.self.query.bias"], f"encoder_l{nth + 1}_self_bq", bias=True) | ||
convert(pd, ["attention.self.key.bias"], f"encoder_l{nth + 1}_self_bk", bias=True) | ||
convert(pd, ["attention.self.value.bias"], f"encoder_l{nth + 1}_self_bv", bias=True) | ||
|
||
convert(pd, ["attention.output.dense.weight"], f"encoder_l{nth + 1}_self_Wo") | ||
convert(pd, ["attention.output.dense.bias"], f"encoder_l{nth + 1}_self_bo", bias=True) | ||
|
||
convert(pd, ["attention.output.LayerNorm.weight"], f"encoder_l{nth + 1}_self_Wo_ln_scale", bias=True) | ||
convert(pd, ["attention.output.LayerNorm.bias"], f"encoder_l{nth + 1}_self_Wo_ln_bias", bias=True) | ||
|
||
convert(pd, ["intermediate.dense.weight"], f"encoder_l{nth + 1}_ffn_W1") | ||
convert(pd, ["intermediate.dense.bias"], f"encoder_l{nth + 1}_ffn_b1", bias=True) | ||
convert(pd, ["output.dense.weight"], f"encoder_l{nth + 1}_ffn_W2") | ||
convert(pd, ["output.dense.bias"], f"encoder_l{nth + 1}_ffn_b2", bias=True) | ||
|
||
convert(pd, ["output.LayerNorm.weight"], f"encoder_l{nth + 1}_ffn_ffn_ln_scale", bias=True) | ||
convert(pd, ["output.LayerNorm.bias"], f"encoder_l{nth + 1}_ffn_ffn_ln_bias", bias=True) | ||
|
||
config["enc-depth"] += 1 | ||
|
||
elif name == "BertEmbeddings": | ||
for n, p in layer.named_parameters(): | ||
print(" " * (level + 1), n, p.shape) | ||
pd = dict(layer.named_parameters()) | ||
convert(pd, ["word_embeddings.weight"], f"Wemb", transpose=False) | ||
convert(pd, ["position_embeddings.weight"], f"Wpos", transpose=False) | ||
|
||
config["bert-type-vocab-size"] = 0 | ||
if hasattr(layer, "token_type_embeddings"): | ||
convert(pd, ["token_type_embeddings.weight"], f"Wtype", transpose=False) | ||
config["bert-type-vocab-size"] = pd["token_type_embeddings.weight"].shape[0] | ||
config["bert-train-type-embeddings"] = True | ||
|
||
convert(pd, ["LayerNorm.weight"], f"encoder_emb_ln_scale_pre", bias=True) | ||
convert(pd, ["LayerNorm.bias"], f"encoder_emb_ln_bias_pre", bias=True) | ||
|
||
config["dim-emb"] = pd["word_embeddings.weight"].shape[1] | ||
config["dim-vocabs"] = [ pd["word_embeddings.weight"].shape[0] ] | ||
config["max-length"] = pd["position_embeddings.weight"].shape[0] | ||
|
||
elif name == "BertPooler": | ||
for n, p in layer.named_parameters(): | ||
print(" " * (level + 1), n, p.shape) | ||
|
||
pd = dict(layer.named_parameters()) | ||
convert(pd, ["dense.weight"], "classifier_ff_logit_l1_W") | ||
convert(pd, ["dense.bias"], "classifier_ff_logit_l1_b", bias=True) | ||
|
||
else: | ||
recurse(layer, level + 1) | ||
|
||
def recurse(parent, level=0): | ||
for i, child in enumerate(parent.children()): | ||
extract(child, i, level) | ||
|
||
recurse(huggingface) | ||
|
||
for m in marianModel: | ||
print(m, marianModel[m].shape) | ||
|
||
configYamlStr = yaml.dump(config, default_flow_style=False) | ||
desc = list(configYamlStr) | ||
npDesc = np.chararray((len(desc),)) | ||
npDesc[:] = desc | ||
npDesc.dtype = np.int8 | ||
marianModel["special:model.yml"] = npDesc | ||
|
||
print("\nMarian config:") | ||
print(configYamlStr) | ||
print("Saving Marian model to %s" % (args.marian,)) | ||
np.savez(args.marian, **marianModel) |
Oops, something went wrong.