diff --git a/scripts/autogen.py b/scripts/autogen.py index 70ed79a4e6..b78bfa63b4 100644 --- a/scripts/autogen.py +++ b/scripts/autogen.py @@ -47,7 +47,7 @@ "keras": f"{KERAS_TEAM_GH}/keras/tree/v3.1.1/", "keras_tuner": f"{KERAS_TEAM_GH}/keras-tuner/tree/v1.4.7/", "keras_cv": f"{KERAS_TEAM_GH}/keras-cv/tree/v0.8.2/", - "keras_nlp": f"{KERAS_TEAM_GH}/keras-nlp/tree/v0.8.2/", + "keras_nlp": f"{KERAS_TEAM_GH}/keras-nlp/tree/v0.9.1/", "tf_keras": f"{KERAS_TEAM_GH}/tf-keras/tree/v2.16.0/", } USE_MULTIPROCESSING = False diff --git a/scripts/nlp_api_master.py b/scripts/nlp_api_master.py index 4537682227..0571fb1baf 100644 --- a/scripts/nlp_api_master.py +++ b/scripts/nlp_api_master.py @@ -1,6 +1,105 @@ +BASE_CLASSES = { + "path": "base_classes/", + "title": "Models API", + "toc": True, + "children": [ + { + "path": "backbone", + "title": "Backbone", + "generate": [ + "keras_nlp.models.Backbone", + "keras_nlp.models.Backbone.from_preset", + "keras_nlp.models.Backbone.token_embedding", + "keras_nlp.models.Backbone.enable_lora", + "keras_nlp.models.Backbone.save_lora_weights", + "keras_nlp.models.Backbone.load_lora_weights", + "keras_nlp.models.Backbone.save_to_preset", + ], + }, + { + "path": "causal_lm", + "title": "CausalLM", + "generate": [ + "keras_nlp.models.CausalLM", + "keras_nlp.models.CausalLM.from_preset", + "keras_nlp.models.CausalLM.compile", + "keras_nlp.models.CausalLM.generate", + "keras_nlp.models.CausalLM.preprocessor", + "keras_nlp.models.CausalLM.backbone", + ], + }, + { + "path": "seq_2_seq_lm", + "title": "Seq2SeqLM", + "generate": [ + "keras_nlp.models.Seq2SeqLM", + "keras_nlp.models.Seq2SeqLM.from_preset", + "keras_nlp.models.Seq2SeqLM.compile", + "keras_nlp.models.Seq2SeqLM.generate", + "keras_nlp.models.Seq2SeqLM.preprocessor", + "keras_nlp.models.Seq2SeqLM.backbone", + ], + }, + { + "path": "classifier", + "title": "Classifier", + "generate": [ + "keras_nlp.models.Classifier", + "keras_nlp.models.Classifier.from_preset", + "keras_nlp.models.Classifier.compile", + "keras_nlp.models.Classifier.preprocessor", + "keras_nlp.models.Classifier.backbone", + ], + }, + { + "path": "masked_lm", + "title": "MaskedLM", + "generate": [ + "keras_nlp.models.MaskedLM", + "keras_nlp.models.MaskedLM.from_preset", + "keras_nlp.models.MaskedLM.compile", + "keras_nlp.models.MaskedLM.preprocessor", + "keras_nlp.models.MaskedLM.backbone", + ], + }, + { + "path": "task", + "title": "Task", + "generate": [ + "keras_nlp.models.Task", + "keras_nlp.models.Task.from_preset", + "keras_nlp.models.Task.preprocessor", + "keras_nlp.models.Task.backbone", + ], + }, + { + "path": "preprocessor", + "title": "Preprocessor", + "generate": [ + "keras_nlp.models.Preprocessor", + "keras_nlp.models.Preprocessor.from_preset", + "keras_nlp.models.Preprocessor.tokenizer", + ], + }, + { + "path": "tokenizer", + "title": "Tokenizer", + "generate": [ + "keras_nlp.models.Tokenizer", + "keras_nlp.models.Tokenizer.from_preset", + ], + }, + { + "path": "upload_preset", + "title": "upload_preset", + "generate": ["keras_nlp.upload_preset"], + }, + ], +} + MODELS_MASTER = { "path": "models/", - "title": "Models", + "title": "Pretrained Models", "toc": True, "children": [ { @@ -182,6 +281,60 @@ }, ], }, + { + "path": "bloom/", + "title": "Bloom", + "toc": True, + "children": [ + { + "path": "bloom_tokenizer", + "title": "BloomTokenizer", + "generate": [ + "keras_nlp.models.BloomTokenizer", + "keras_nlp.models.BloomTokenizer.from_preset", + ], + }, + { + "path": "bloom_preprocessor", + "title": "BloomPreprocessor layer", + "generate": [ + "keras_nlp.models.BloomPreprocessor", + "keras_nlp.models.BloomPreprocessor.from_preset", + "keras_nlp.models.BloomPreprocessor.tokenizer", + ], + }, + { + "path": "bloom_backbone", + "title": "BloomBackbone model", + "generate": [ + "keras_nlp.models.BloomBackbone", + "keras_nlp.models.BloomBackbone.from_preset", + "keras_nlp.models.BloomBackbone.token_embedding", + "keras_nlp.models.BloomBackbone.enable_lora", + ], + }, + { + "path": "bloom_causal_lm", + "title": "BloomCausalLM model", + "generate": [ + "keras_nlp.models.BloomCausalLM", + "keras_nlp.models.BloomCausalLM.from_preset", + "keras_nlp.models.BloomCausalLM.generate", + "keras_nlp.models.BloomCausalLM.backbone", + "keras_nlp.models.BloomCausalLM.preprocessor", + ], + }, + { + "path": "bloom_causal_lm_preprocessor", + "title": "BloomCausalLMPreprocessor layer", + "generate": [ + "keras_nlp.models.BloomCausalLMPreprocessor", + "keras_nlp.models.BloomCausalLMPreprocessor.from_preset", + "keras_nlp.models.BloomCausalLMPreprocessor.tokenizer", + ], + }, + ], + }, { "path": "deberta_v3/", "title": "DebertaV3", @@ -362,6 +515,101 @@ }, ], }, + { + "path": "electra/", + "title": "Electra", + "toc": True, + "children": [ + { + "path": "electra_tokenizer", + "title": "ElectraTokenizer", + "generate": [ + "keras_nlp.models.ElectraTokenizer", + "keras_nlp.models.ElectraTokenizer.from_preset", + ], + }, + { + "path": "electra_preprocessor", + "title": "ElectraPreprocessor layer", + "generate": [ + "keras_nlp.models.ElectraPreprocessor", + "keras_nlp.models.ElectraPreprocessor.from_preset", + "keras_nlp.models.ElectraPreprocessor.tokenizer", + ], + }, + { + "path": "electra_backbone", + "title": "ElectraBackbone model", + "generate": [ + "keras_nlp.models.ElectraBackbone", + "keras_nlp.models.ElectraBackbone.from_preset", + "keras_nlp.models.ElectraBackbone.token_embedding", + ], + }, + ], + }, + { + "path": "f_net/", + "title": "FNet", + "toc": True, + "children": [ + { + "path": "f_net_tokenizer", + "title": "FNetTokenizer", + "generate": [ + "keras_nlp.models.FNetTokenizer", + "keras_nlp.models.FNetTokenizer.from_preset", + ], + }, + { + "path": "f_net_preprocessor", + "title": "FNetPreprocessor layer", + "generate": [ + "keras_nlp.models.FNetPreprocessor", + "keras_nlp.models.FNetPreprocessor.from_preset", + "keras_nlp.models.FNetPreprocessor.tokenizer", + ], + }, + { + "path": "f_net3_backbone", + "title": "FNetBackbone model", + "generate": [ + "keras_nlp.models.FNetBackbone", + "keras_nlp.models.FNetBackbone.from_preset", + "keras_nlp.models.FNetBackbone.token_embedding", + ], + }, + { + "path": "f_net_classifier", + "title": "FNetClassifier model", + "generate": [ + "keras_nlp.models.FNetClassifier", + "keras_nlp.models.FNetClassifier.from_preset", + "keras_nlp.models.FNetClassifier.backbone", + "keras_nlp.models.FNetClassifier.preprocessor", + ], + }, + { + "path": "f_net_masked_lm", + "title": "FNetMaskedLM model", + "generate": [ + "keras_nlp.models.FNetMaskedLM", + "keras_nlp.models.FNetMaskedLM.from_preset", + "keras_nlp.models.FNetMaskedLM.backbone", + "keras_nlp.models.FNetMaskedLM.preprocessor", + ], + }, + { + "path": "f_net_masked_lm_preprocessor", + "title": "FNetMaskedLMPreprocessor layer", + "generate": [ + "keras_nlp.models.FNetMaskedLMPreprocessor", + "keras_nlp.models.FNetMaskedLMPreprocessor.from_preset", + "keras_nlp.models.FNetMaskedLMPreprocessor.tokenizer", + ], + }, + ], + }, { "path": "gpt2/", "title": "GPT2", @@ -418,63 +666,55 @@ ], }, { - "path": "f_net/", - "title": "FNet", + "path": "llama/", + "title": "Llama", "toc": True, "children": [ { - "path": "f_net_tokenizer", - "title": "FNetTokenizer", + "path": "llama_tokenizer", + "title": "LlamaTokenizer", "generate": [ - "keras_nlp.models.FNetTokenizer", - "keras_nlp.models.FNetTokenizer.from_preset", + "keras_nlp.models.LlamaTokenizer", + "keras_nlp.models.LlamaTokenizer.from_preset", ], }, { - "path": "f_net_preprocessor", - "title": "FNetPreprocessor layer", + "path": "llama_preprocessor", + "title": "LlamaPreprocessor layer", "generate": [ - "keras_nlp.models.FNetPreprocessor", - "keras_nlp.models.FNetPreprocessor.from_preset", - "keras_nlp.models.FNetPreprocessor.tokenizer", - ], - }, - { - "path": "f_net3_backbone", - "title": "FNetBackbone model", - "generate": [ - "keras_nlp.models.FNetBackbone", - "keras_nlp.models.FNetBackbone.from_preset", - "keras_nlp.models.FNetBackbone.token_embedding", + "keras_nlp.models.LlamaPreprocessor", + "keras_nlp.models.LlamaPreprocessor.from_preset", + "keras_nlp.models.LlamaPreprocessor.tokenizer", ], }, { - "path": "f_net_classifier", - "title": "FNetClassifier model", + "path": "llama_backbone", + "title": "LlamaBackbone model", "generate": [ - "keras_nlp.models.FNetClassifier", - "keras_nlp.models.FNetClassifier.from_preset", - "keras_nlp.models.FNetClassifier.backbone", - "keras_nlp.models.FNetClassifier.preprocessor", + "keras_nlp.models.LlamaBackbone", + "keras_nlp.models.LlamaBackbone.from_preset", + "keras_nlp.models.LlamaBackbone.token_embedding", + "keras_nlp.models.LlamaBackbone.enable_lora", ], }, { - "path": "f_net_masked_lm", - "title": "FNetMaskedLM model", + "path": "llama_causal_lm", + "title": "LlamaCausalLM model", "generate": [ - "keras_nlp.models.FNetMaskedLM", - "keras_nlp.models.FNetMaskedLM.from_preset", - "keras_nlp.models.FNetMaskedLM.backbone", - "keras_nlp.models.FNetMaskedLM.preprocessor", + "keras_nlp.models.LlamaCausalLM", + "keras_nlp.models.LlamaCausalLM.from_preset", + "keras_nlp.models.LlamaCausalLM.generate", + "keras_nlp.models.LlamaCausalLM.backbone", + "keras_nlp.models.LlamaCausalLM.preprocessor", ], }, { - "path": "f_net_masked_lm_preprocessor", - "title": "FNetMaskedLMPreprocessor layer", + "path": "llama_causal_lm_preprocessor", + "title": "LlamaCausalLMPreprocessor layer", "generate": [ - "keras_nlp.models.FNetMaskedLMPreprocessor", - "keras_nlp.models.FNetMaskedLMPreprocessor.from_preset", - "keras_nlp.models.FNetMaskedLMPreprocessor.tokenizer", + "keras_nlp.models.LlamaCausalLMPreprocessor", + "keras_nlp.models.LlamaCausalLMPreprocessor.from_preset", + "keras_nlp.models.LlamaCausalLMPreprocessor.tokenizer", ], }, ], @@ -764,19 +1004,6 @@ "title": "Tokenizers", "toc": True, "children": [ - { - "path": "tokenizer", - "title": "Tokenizer base class", - "generate": [ - "keras_nlp.tokenizers.Tokenizer", - "keras_nlp.tokenizers.Tokenizer.tokenize", - "keras_nlp.tokenizers.Tokenizer.detokenize", - "keras_nlp.tokenizers.Tokenizer.get_vocabulary", - "keras_nlp.tokenizers.Tokenizer.vocabulary_size", - "keras_nlp.tokenizers.Tokenizer.token_to_id", - "keras_nlp.tokenizers.Tokenizer.id_to_token", - ], - }, { "path": "word_piece_tokenizer", "title": "WordPieceTokenizer", @@ -997,6 +1224,7 @@ "toc": True, "children": [ MODELS_MASTER, + BASE_CLASSES, TOKENIZERS_MASTER, PREPROCESSING_LAYERS_MASTER, MODELING_LAYERS_MASTER, diff --git a/scripts/render_tags.py b/scripts/render_tags.py index 772e960458..cfa58c08e1 100644 --- a/scripts/render_tags.py +++ b/scripts/render_tags.py @@ -54,6 +54,21 @@ def format_path(metadata): return "Unknown" +def is_base_class(symbol): + import keras_nlp + + return symbol in ( + keras_nlp.models.Backbone, + keras_nlp.models.Tokenizer, + keras_nlp.models.Preprocessor, + keras_nlp.models.Task, + keras_nlp.models.Classifier, + keras_nlp.models.CausalLM, + keras_nlp.models.MaskedLM, + keras_nlp.models.Seq2SeqLM, + ) + + def render_backbone_table(symbols): """Renders the markdown table for backbone presets as a string.""" @@ -64,7 +79,7 @@ def render_backbone_table(symbols): added_presets = set() # Bakcbone presets for name, symbol in symbols: - if "Backbone" not in name: + if is_base_class(symbol) or "Backbone" not in name: continue presets = symbol.presets # Only keep the ones with pretrained weights for KerasCV Backbones. @@ -101,7 +116,8 @@ def render_classifier_table(symbols): if "Classifier" not in name: continue for preset in symbol.presets: - if preset not in symbol.backbone_cls.presets: + backbone_cls = symbol.backbone_cls + if backbone_cls is not None and preset not in backbone_cls.presets: metadata = symbol.presets[preset]["metadata"] table += ( f"{preset} | " @@ -145,7 +161,7 @@ def render_task_table(symbols): def render_table(symbol): table = TABLE_HEADER_PER_MODEL - if len(symbol.presets) == 0: + if is_base_class(symbol) or len(symbol.presets) == 0: return None for preset in symbol.presets: # Do not print all backbone presets for a task