fine_tune.py

# coding: utf-8

"""
The ``fine_tune.py`` file is used to continue training (or `fine-tune`) a model on a `different
dataset` than the one it was originally trained on.  It requires a saved model archive file, a path
to the data you will continue training with, and a directory in which to write the results.

. code-block:: bash

   $ python fine_tune.py --help
    usage: fine_tune.py [-h] -s SERIALIZATION_DIR -c CONFIG_FILE_PATH -p
                        PRETRAINED_DIR -m PRETRAINED_MODEL_NAME

    optional arguments:
    -h, --help            show this help message and exit
    -s SERIALIZATION_DIR, --serialization_dir SERIALIZATION_DIR
                            Directory in which to save the model and its logs.
    -c CONFIG_FILE_PATH, --config_file_path CONFIG_FILE_PATH
                            Path to parameter file describing the new multi-tasked
                            model to be fine-tuned.
    -p PRETRAINED_DIR, --pretrained_dir PRETRAINED_DIR
                            Directory in which was saved the pre-trained model.
    -m PRETRAINED_MODEL_NAME, --pretrained_model_name PRETRAINED_MODEL_NAME
                            Name of the weight file for the pretrained model to
                            fine-tune in the ``pretrained_dir``.
"""

import argparse
import itertools
import os
import json
import re
from copy import deepcopy
import torch
from typing import List, Dict, Any, Tuple
import logging

logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO
)

from hmtl.tasks import Task
from hmtl.training.multi_task_trainer import MultiTaskTrainer
from hmtl.common import create_and_set_iterators
from evaluate import evaluate
from train import train_model

from allennlp.models.model import Model
from allennlp.data import Vocabulary
from allennlp.data.iterators import DataIterator
from allennlp.commands.train import create_serialization_dir
from allennlp.common.params import Params
from allennlp.common.checks import ConfigurationError
from allennlp.nn import RegularizerApplicator

logger = logging.getLogger(__name__)


if __name__ == "__main__":
    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-s", "--serialization_dir", required=True, help="Directory in which to save the model and its logs.", type=str
    )
    parser.add_argument(
        "-c",
        "--config_file_path",
        required=True,
        help="Path to parameter file describing the new multi-tasked model to be fine-tuned.",
        type=str,
    )
    parser.add_argument(
        "-p", "--pretrained_dir", required=True, help="Directory in which was saved the pre-trained model.", type=str
    )
    parser.add_argument(
        "-m",
        "--pretrained_model_name",
        required=True,
        help="Name of the weight file for the pretrained model to fine-tune in the ``pretrained_dir``.",
        type=str,
    )
    args = parser.parse_args()

    params = Params.from_file(params_file=args.config_file_path)
    serialization_dir = args.serialization_dir
    create_serialization_dir(params, serialization_dir, False)

    serialization_params = deepcopy(params).as_dict(quiet=True)
    with open(os.path.join(serialization_dir, "config.json"), "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    ### Instantiate tasks ###
    task_list = []
    task_keys = [key for key in params.keys() if re.search("^task_", key)]

    for key in task_keys:
        logger.info("Creating %s", key)
        task_params = params.pop(key)
        task_description = task_params.pop("task_description")
        task_data_params = task_params.pop("data_params")

        task = Task.from_params(params=task_description)
        task_list.append(task)

        _, _ = task.load_data_from_params(params=task_data_params)

    ### Load Vocabulary from files and save it to the new serialization_dir ###
    # PLEASE NOTE that here, we suppose that the vocabulary is the same for the pre-trained model
    # and the model to fine-tune. The most noticeable implication of this hypothesis is that the label specs
    # between the two datasets (for pre-training and for fine-tuning) are exactly the same.
    vocab = Vocabulary.from_files(os.path.join(args.pretrained_dir, "vocabulary"))
    logger.info("Vocabulary loaded from %s", os.path.join(args.pretrained_dir, "vocabulary"))

    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))
    logger.info("Save vocabulary to file %s", os.path.join(serialization_dir, "vocabulary"))

    ### Load the data iterators for each task ###
    task_list = create_and_set_iterators(params=params, task_list=task_list, vocab=vocab)

    ### Load Regularizations	###
    regularizer = RegularizerApplicator.from_params(params.pop("regularizer", []))

    ### Create model ###
    model_params = params.pop("model")
    model = Model.from_params(vocab=vocab, params=model_params, regularizer=regularizer)

    logger.info("Loading the pretrained model from %s", os.path.join(args.pretrained_dir, args.pretrained_model_name))
    try:
        pretrained_model_state_path = os.path.join(args.pretrained_dir, args.pretrained_model_name)
        pretrained_model_state = torch.load(pretrained_model_state_path)
        model.load_state_dict(state_dict=pretrained_model_state)
    except:
        raise ConfigurationError(
            "It appears that the configuration of the pretrained model and "
            "the model to fine-tune are not compatible. "
            "Please check the compatibility of the encoders and taggers in the "
            "config files."
        )

    ### Create multi-task trainer ###
    multi_task_trainer_params = params.pop("multi_task_trainer")
    trainer = MultiTaskTrainer.from_params(
        model=model, task_list=task_list, serialization_dir=serialization_dir, params=multi_task_trainer_params
    )

    ### Launch training ###
    metrics = train_model(multi_task_trainer=trainer, recover=False)
    if metrics is not None:
        logging.info("Fine-tuning is finished ! Let's have a drink. It's on the house !")