judgelm/serve/gradio_web_server_v2.py

"""
The gradio demo server for chatting with a single model.
"""

import argparse
from collections import defaultdict
import datetime
import json
import os
import random
import time
import uuid

import gradio as gr
import requests

from judgelm.conversation import SeparatorStyle
from judgelm.constants import (
    LOGDIR,
    WORKER_API_TIMEOUT,
    ErrorCode,
    MODERATION_MSG,
    CONVERSATION_LIMIT_MSG,
    SERVER_ERROR_MSG,
    INACTIVE_MSG,
    INPUT_CHAR_LEN_LIMIT,
    CONVERSATION_TURN_LIMIT,
    SESSION_EXPIRATION_TIME,
)
from judgelm.model.model_adapter import get_conversation_template
from judgelm.model.model_registry import model_info
from judgelm.serve.api_provider import (
    anthropic_api_stream_iter,
    openai_api_stream_iter,
    palm_api_stream_iter,
    init_palm_chat,
)
from judgelm.utils import (
    build_logger,
    violates_moderation,
    get_window_url_params_js,
    parse_gradio_auth_creds,
)


logger = build_logger("gradio_web_server", "gradio_web_server.log")

headers = {"User-Agent": "FastChat Client"}

no_change_btn = gr.Button.update()
enable_btn = gr.Button.update(interactive=True)
disable_btn = gr.Button.update(interactive=False)

controller_url = None
enable_moderation = False

learn_more_md = """
### License
The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
"""

ip_expiration_dict = defaultdict(lambda: 0)

examples = [["What is the capital of France?", 
             "The capital of France is Paris.", 
             "The capital of France is London."],
            ["Write a sentence using the homophone \"won/one.\"\n",
             "The cat meowed.",
             "\"I won the game, but my one friend didn't.\""],
            ["Edit the following sentence \"I wants to visit Europe in my holidays.\"\nI wants to visit Europe in my holidays.",
             "I want to visit Europe during my holidays.",
             "I want to visit Europe during my holidays."],
            ["How is electricity generated?",
             "Electricity is generated through various methods, such as burning fossil fuels, nuclear reactions, and harnessing renewable energy sources like wind and solar power.",
             "Electricity is generated by simply plugging devices into an outlet."],
            ["What's your favorite type of music?",
             "My favorite type of music is classical.",
             "I enjoy listening to rock music."],
            ["Arrange the following words in alphabetical order: quickly, question, queue, quality.\n",
             "quickly\nquestion\nqueue\nquality\n\nExplanation: The given words are already in alphabetical order.",
             "Queuely, Question, Quality, Quickly."]]
examples_reference = [["Create a one-sentence description of the color yellow.",
                    "Yellow is a vibrant and warm color, often associated with sunshine, optimism, and cheerfulness.",
                    "Yellow is a bright and cheerful color that can make a room feel warm and inviting.",
                    "This is the description of the color yellow.Yellow is an emotion.",
                    ],
                    ["Who is the lead singer of the Gorillaz?",
                    "Damon Albarn is the lead singer of the Gorillaz",
                    "The lead singer of the Gorillaz is z世代.",
                    "Damon Albarn is the lead singer of the Gorillaz."
                    ],
                    ["I have a question the movie The Fugitive. What is the name of the character played by Harrison Ford?",
                    "The character played by Harrison Ford in The Fugitive is named Dr. Richard Kimble.",
                    "The character played by Harrison Ford in the movie The Fugitive is Dr. Richard Kimble.",
                    "The character played by Harrison Ford in The Fugitive is Doctor Richard Kimble."],
                    ["Reverse the following number sequence\n59, 22, 15, 11, 5",
                     "The reversed sequence would be 5, 11, 15, 22, 59.",
                     "11, 15, 11, 5, 9",
                     "5, 11, 15, 22, 59"]]

# create num to words dict
num2words = {1:"one", 2:"two", 3:"three", 4:"four", 5:"five",
             6:"six", 7:"seven", 8: "eight", 9: 'nine', 10: 'ten', \
            11: 'eleven', 12: 'twelve', 13: 'thirteen', 14: 'fourteen', \
            15: 'fifteen', 16: 'sixteen', 17: 'seventeen', 18: 'eighteen', 19: 'nineteen'}

class State:
    def __init__(self, model_name):
        self.conv = get_conversation_template(model_name)
        self.conv_id = uuid.uuid4().hex
        self.skip_next = False
        self.model_name = model_name

        if model_name == "palm-2":
            # According to release note, "chat-bison@001" is PaLM 2 for chat.
            # https://cloud.google.com/vertex-ai/docs/release-notes#May_10_2023
            self.palm_chat = init_palm_chat("chat-bison@001")

    def to_gradio_chatbot(self):
        return self.conv.to_gradio_chatbot()

    def dict(self):
        base = self.conv.dict()
        base.update(
            {
                "conv_id": self.conv_id,
                "model_name": self.model_name,
            }
        )
        return base


def set_global_vars(controller_url_, enable_moderation_):
    global controller_url, enable_moderation
    controller_url = controller_url_
    enable_moderation = enable_moderation_


def get_conv_log_filename():
    t = datetime.datetime.now()
    name = os.path.join(LOGDIR, f"{t.year}-{t.month:02d}-{t.day:02d}-conv.json")
    return name


def get_model_list(controller_url, add_chatgpt, add_claude, add_palm):
    ret = requests.post(controller_url + "/refresh_all_workers")
    assert ret.status_code == 200
    ret = requests.post(controller_url + "/list_models")
    models = ret.json()["models"]

    # Add API providers
    if add_chatgpt:
        models += ["gpt-3.5-turbo", "gpt-4"]
    if add_claude:
        models += ["claude-2", "claude-instant-1"]
    if add_palm:
        models += ["palm-2"]

    priority = {k: f"___{i:02d}" for i, k in enumerate(model_info)}
    models.sort(key=lambda x: priority.get(x, x))
    logger.info(f"Models: {models}")
    return models


def load_demo_single_v2(models, url_params):
    selected_model = models[0] if len(models) > 0 else ""
    if "model" in url_params:
        model = url_params["model"]
        if model in models:
            selected_model = model

    dropdown_update = gr.Dropdown.update(
        choices=models, value=selected_model, visible=True
    )

    state = None
    # return (
    #     state,
    #     dropdown_update,
    #     gr.Chatbot.update(visible=True),
    #     gr.Textbox.update(visible=True),
    #     gr.Button.update(visible=True),
    #     gr.Row.update(visible=True),
    #     gr.Accordion.update(visible=True),
    #     gr.Textbox.update(visible=True),
    #     gr.Textbox.update(visible=True),
    #     gr.Textbox.update(visible=True),
    #     gr.Button.update(visible=True),
    # )


def load_demo_v2(url_params, request: gr.Request):
    global models

    ip = request.client.host
    logger.info(f"load_demo. ip: {ip}. params: {url_params}")
    ip_expiration_dict[ip] = time.time() + SESSION_EXPIRATION_TIME

    if args.model_list_mode == "reload":
        models = get_model_list(
            controller_url, args.add_chatgpt, args.add_claude, args.add_palm
        )

    return load_demo_single_v2(models, url_params)


def vote_last_response(state, vote_type, model_selector, request: gr.Request):
    with open(get_conv_log_filename(), "a") as fout:
        data = {
            "tstamp": round(time.time(), 4),
            "type": vote_type,
            "model": model_selector,
            "state": state.dict(),
            "ip": request.client.host,
        }
        fout.write(json.dumps(data) + "\n")


def upvote_last_response(state, model_selector, request: gr.Request):
    logger.info(f"upvote. ip: {request.client.host}")
    vote_last_response(state, "upvote", model_selector, request)
    return ("",) + (disable_btn,) * 3


def downvote_last_response(state, model_selector, request: gr.Request):
    logger.info(f"downvote. ip: {request.client.host}")
    vote_last_response(state, "downvote", model_selector, request)
    return ("",) + (disable_btn,) * 3


def flag_last_response(state, model_selector, request: gr.Request):
    logger.info(f"flag. ip: {request.client.host}")
    vote_last_response(state, "flag", model_selector, request)
    return ("",) + (disable_btn,) * 3


def regenerate(state, request: gr.Request):
    logger.info(f"regenerate. ip: {request.client.host}")
    state.conv.update_last_message(None)
    return (state, state.to_gradio_chatbot(), "") + (disable_btn,) * 5


def regenerate_v2(state, request: gr.Request):
    logger.info(f"regenerate. ip: {request.client.host}")
    state.conv.update_last_message(None)
    return (state, state.to_gradio_chatbot(), "") + (disable_btn,) * 6


def clear_history_v2(answer_num, request: gr.Request):
    logger.info(f"clear_history. ip: {request.client.host}")
    state = None
    return_list = (state, [], "")
    for i in range(int(answer_num)):
        return_list += (f"Enter the assistant {i+1}'s answer text")
    return return_list + (disable_btn,) * 5 + (enable_btn,)


def add_text_v2(state, model_selector, text, request: gr.Request):
    ip = request.client.host
    logger.info(f"add_text. ip: {ip}. len: {len(text)}")

    if state is None:
        state = State(model_selector)

    if len(text) <= 0:
        state.skip_next = True
        return (state, state.to_gradio_chatbot(), "") + (no_change_btn,) * 6

    if ip_expiration_dict[ip] < time.time():
        logger.info(f"inactive. ip: {request.client.host}. text: {text}")
        state.skip_next = True
        return (state, state.to_gradio_chatbot(), INACTIVE_MSG) + (no_change_btn,) * 6

    if enable_moderation:
        flagged = violates_moderation(text)
        if flagged:
            logger.info(f"violate moderation. ip: {request.client.host}. text: {text}")
            state.skip_next = True
            return (state, state.to_gradio_chatbot(), MODERATION_MSG) + (
                no_change_btn,
            ) * 6

    conv = state.conv
    if (len(conv.messages) - conv.offset) // 2 >= CONVERSATION_TURN_LIMIT:
        logger.info(f"conversation turn limit. ip: {request.client.host}. text: {text}")
        state.skip_next = True
        return (state, state.to_gradio_chatbot(), CONVERSATION_LIMIT_MSG) + (
            no_change_btn,
        ) * 6

    text = text[:INPUT_CHAR_LEN_LIMIT]  # Hard cut-off
    conv.append_message(conv.roles[0], text)
    conv.append_message(conv.roles[1], None)
    return (state, state.to_gradio_chatbot(), "") + (disable_btn,) * 6


def add_pretext_reference(state, model_selector, radio, question_text, answer_num, reference_box, answer1_text, answer2_text, answer3_text, answer4_text, answer5_text, answer6_text, answer7_text, answer8_text, answer9_text, answer10_text, request:gr.Request):
    text = question_text

    answers = [answer1_text, answer2_text, answer3_text, answer4_text, answer5_text, answer6_text, answer7_text, answer8_text, answer9_text, answer10_text]

    answer1_text = answers[0]
    answer2_text = answers[1]

    ip = request.client.host
    logger.info(f"add_text. ip: {ip}. len: {len(text)}")


    state = State(model_selector)

    if radio == "Fast Eval":
        state.conv.stop_str = "\n"
    # example questions
    if "ep1" == question_text:
        question_text = "I want to get a tattoo but I'm not sure what kind of design to get. What do you suggest?"
        answer1_text = "As an AI language model, I cannot recommend any specific tattoo designs. However, I recommend researching various designs and styles to find one that resonates with you and has a personal meaning. It's also important to find a reputable tattoo artist that you trust and feel comfortable with. Best of luck in your tattoo journey"
        answer2_text = "A friend of mine has a tattoo that he really likes, so I've been thinking about getting one too. What do you think?"

    if len(text) <= 0:
        state.skip_next = True
        return (state, state.to_gradio_chatbot(), "", "", "") + (no_change_btn,) * 6

    if ip_expiration_dict[ip] < time.time():
        logger.info(f"inactive. ip: {request.client.host}. text: {text}")
        state.skip_next = True
        return (state, state.to_gradio_chatbot(), INACTIVE_MSG, INACTIVE_MSG, INACTIVE_MSG) + (no_change_btn,) * 6

    if enable_moderation:
        flagged = violates_moderation(text)
        if flagged:
            logger.info(f"violate moderation. ip: {request.client.host}. text: {text}")
            state.skip_next = True
            return (state, state.to_gradio_chatbot(), MODERATION_MSG, MODERATION_MSG, MODERATION_MSG) + (
                no_change_btn,
            ) * 6

    conv = state.conv
    if (len(conv.messages) - conv.offset) // 2 >= CONVERSATION_TURN_LIMIT:
        logger.info(f"conversation turn limit. ip: {request.client.host}. text: {text}")
        state.skip_next = True
        return (state, state.to_gradio_chatbot(), CONVERSATION_LIMIT_MSG, CONVERSATION_LIMIT_MSG, CONVERSATION_LIMIT_MSG) + (
            no_change_btn,
        ) * 6


    # modify the input text
    prompt_template = {"prompt_id": 1, "system_prompt": "You are a helpful and precise assistant for checking the quality of the answer.", "prompt_template": "[Question]\n{question}\n\n[The Start of Assistant 1's Answer]\n{answer_1}\n\n[The End of Assistant 1's Answer]\n\n[The Start of Assistant 2's Answer]\n{answer_2}\n\n[The End of Assistant 2's Answer]\n\n[System]\n{prompt}\n\n", "defaults": {"prompt": "We would like to request your feedback on the performance of two AI assistants in response to the user question displayed above.\nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\nPlease first output a single line containing only two values indicating the scores for Assistant 1 and 2, respectively. The two scores are separated by a space. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."}, "description": "Prompt for general questions", "category": "general"}

    appendix = "### Response:"

    text = prompt_template['system_prompt'] + '\n' + prompt_template['prompt_template'].format(question=question_text,
                                                                               answer_1=answer1_text,
                                                                               answer_2=answer2_text,
                                                                               prompt=prompt_template['defaults'][
                                                                                   'prompt']) + appendix
    plug_in_before_str = "[The Start of Assistant 1's Answer]"
    plug_in_pos = text.find(plug_in_before_str)

    new_text = text[:plug_in_pos] + "[Reference Answer]\n" + reference_box + "\n\n" + text[plug_in_pos:]
    text = new_text
    
    if answer_num > 2:
        plug_in_before_str = "[System]"
        plug_in_pos = text.find(plug_in_before_str)

        new_answer = ""
        for q_i in range(2, int(answer_num)):
            new_answer += "[The Start of Assistant " + str(q_i+1) + "'s Answer]\n" + answers[q_i] + "\n\n" + "[The End of Assistant " + str(q_i+1) + "'s Answer]\n\n"

        new_data_sample = text[:plug_in_pos] + new_answer + text[plug_in_pos:]
        text = new_data_sample

        text = text.replace(f"of two AI assistants", f"of {num2words[int(answer_num)]} AI assistants")
        text = text.replace("containing only two values indicating ", f"containing only {num2words[int(answer_num)]} values indicating ")
        text = text.replace("for Assistant 1 and 2", "for Assistant 1")

        plug_in_after_str = "for Assistant 1"
        plug_in_pos = text.find(plug_in_after_str) + len(plug_in_after_str)

        new_answer = ""
        for i in range(int(answer_num)-2):
            new_answer += f", {i+2}"
        new_answer += f" and {int(answer_num)}"
        new_data_sample = text[:plug_in_pos] + new_answer + text[plug_in_pos:]
        text = new_data_sample

        text = text.replace("The two scores are", f"The {num2words[int(answer_num)]} scores are")
        pass

    text = text[:INPUT_CHAR_LEN_LIMIT]  # Hard cut-off
    conv.append_message(conv.roles[0], text)
    conv.append_message(conv.roles[1], None)
    return (state, state.to_gradio_chatbot()) + (disable_btn,) * 6


def add_pretext_single_answer(state, model_selector, radio, question_text, reference_box, answer_text, request:gr.Request):
    text = question_text

    ip = request.client.host
    logger.info(f"add_text. ip: {ip}. len: {len(text)}")


    state = State(model_selector)

    if radio == "Fast Eval":
        state.conv.stop_str = "\n"
    # example questions
    if "ep1" == question_text:
        question_text = "I want to get a tattoo but I'm not sure what kind of design to get. What do you suggest?"
        answer1_text = "As an AI language model, I cannot recommend any specific tattoo designs. However, I recommend researching various designs and styles to find one that resonates with you and has a personal meaning. It's also important to find a reputable tattoo artist that you trust and feel comfortable with. Best of luck in your tattoo journey"
        answer2_text = "A friend of mine has a tattoo that he really likes, so I've been thinking about getting one too. What do you think?"

    if len(text) <= 0:
        state.skip_next = True
        return (state, state.to_gradio_chatbot(), "", "", "") + (no_change_btn,) * 6

    if ip_expiration_dict[ip] < time.time():
        logger.info(f"inactive. ip: {request.client.host}. text: {text}")
        state.skip_next = True
        return (state, state.to_gradio_chatbot(), INACTIVE_MSG, INACTIVE_MSG, INACTIVE_MSG) + (no_change_btn,) * 6

    if enable_moderation:
        flagged = violates_moderation(text)
        if flagged:
            logger.info(f"violate moderation. ip: {request.client.host}. text: {text}")
            state.skip_next = True
            return (state, state.to_gradio_chatbot(), MODERATION_MSG, MODERATION_MSG, MODERATION_MSG) + (
                no_change_btn,
            ) * 6

    conv = state.conv
    if (len(conv.messages) - conv.offset) // 2 >= CONVERSATION_TURN_LIMIT:
        logger.info(f"conversation turn limit. ip: {request.client.host}. text: {text}")
        state.skip_next = True
        return (state, state.to_gradio_chatbot(), CONVERSATION_LIMIT_MSG, CONVERSATION_LIMIT_MSG, CONVERSATION_LIMIT_MSG) + (
            no_change_btn,
        ) * 6


    # modify the input text
    prompt_template = {"prompt_id": 1, "system_prompt": "You are a helpful and precise assistant for checking the quality of the answer.", "prompt_template": "[Question]\n{question}\n\n[The Start of Assistant 1's Answer]\n{answer_1}\n\n[The End of Assistant 1's Answer]\n\n[The Start of Assistant 2's Answer]\n{answer_2}\n\n[The End of Assistant 2's Answer]\n\n[System]\n{prompt}\n\n", "defaults": {"prompt": "We would like to request your feedback on the performance of two AI assistants in response to the user question displayed above.\nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\nPlease first output a single line containing only two values indicating the scores for Assistant 1 and 2, respectively. The two scores are separated by a space. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."}, "description": "Prompt for general questions", "category": "general"}

    appendix = "### Response:10"

    text = prompt_template['system_prompt'] + '\n' + prompt_template['prompt_template'].format(question=question_text,
                                                                               answer_1=reference_box,
                                                                               answer_2=answer_text,
                                                                               prompt=prompt_template['defaults'][
                                                                                   'prompt']) + appendix

    text = text[:INPUT_CHAR_LEN_LIMIT]  # Hard cut-off
    conv.append_message(conv.roles[0], text)
    conv.append_message(conv.roles[1], None)
    return (state, state.to_gradio_chatbot()) + (disable_btn,) * 6


def add_pretext(state, model_selector, radio, question_text, answer_num, answer1_text, answer2_text, answer3_text, answer4_text, answer5_text, answer6_text, answer7_text, answer8_text, answer9_text, answer10_text, request:gr.Request):
    text = question_text

    answers = [answer1_text, answer2_text, answer3_text, answer4_text, answer5_text, answer6_text, answer7_text, answer8_text, answer9_text, answer10_text]

    answer1_text = answers[0]
    answer2_text = answers[1]

    ip = request.client.host
    logger.info(f"add_text. ip: {ip}. len: {len(text)}")

    state = State(model_selector)

    if radio == "Fast Eval":
        state.conv.stop_str = "\n"
    # example questions
    if "ep1" == question_text:
        question_text = "I want to get a tattoo but I'm not sure what kind of design to get. What do you suggest?"
        answer1_text = "As an AI language model, I cannot recommend any specific tattoo designs. However, I recommend researching various designs and styles to find one that resonates with you and has a personal meaning. It's also important to find a reputable tattoo artist that you trust and feel comfortable with. Best of luck in your tattoo journey"
        answer2_text = "A friend of mine has a tattoo that he really likes, so I've been thinking about getting one too. What do you think?"

    if len(text) <= 0:
        state.skip_next = True
        return (state, state.to_gradio_chatbot(), "", "", "") + (no_change_btn,) * 6

    if ip_expiration_dict[ip] < time.time():
        logger.info(f"inactive. ip: {request.client.host}. text: {text}")
        state.skip_next = True
        return (state, state.to_gradio_chatbot(), INACTIVE_MSG, INACTIVE_MSG, INACTIVE_MSG) + (no_change_btn,) * 6

    if enable_moderation:
        flagged = violates_moderation(text)
        if flagged:
            logger.info(f"violate moderation. ip: {request.client.host}. text: {text}")
            state.skip_next = True
            return (state, state.to_gradio_chatbot(), MODERATION_MSG, MODERATION_MSG, MODERATION_MSG) + (
                no_change_btn,
            ) * 6

    conv = state.conv
    if (len(conv.messages) - conv.offset) // 2 >= CONVERSATION_TURN_LIMIT:
        logger.info(f"conversation turn limit. ip: {request.client.host}. text: {text}")
        state.skip_next = True
        return (state, state.to_gradio_chatbot(), CONVERSATION_LIMIT_MSG, CONVERSATION_LIMIT_MSG, CONVERSATION_LIMIT_MSG) + (
            no_change_btn,
        ) * 6


    # modify the input text
    prompt_template = {"prompt_id": 1, "system_prompt": "You are a helpful and precise assistant for checking the quality of the answer.", "prompt_template": "[Question]\n{question}\n\n[The Start of Assistant 1's Answer]\n{answer_1}\n\n[The End of Assistant 1's Answer]\n\n[The Start of Assistant 2's Answer]\n{answer_2}\n\n[The End of Assistant 2's Answer]\n\n[System]\n{prompt}\n\n", "defaults": {"prompt": "We would like to request your feedback on the performance of two AI assistants in response to the user question displayed above.\nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\nPlease first output a single line containing only two values indicating the scores for Assistant 1 and 2, respectively. The two scores are separated by a space. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."}, "description": "Prompt for general questions", "category": "general"}

    appendix = "### Response:"

    text = prompt_template['system_prompt'] + '\n' + prompt_template['prompt_template'].format(question=question_text,
                                                                               answer_1=answer1_text,
                                                                               answer_2=answer2_text,
                                                                               prompt=prompt_template['defaults'][
                                                                                   'prompt']) + appendix
    
    if answer_num > 2:
        plug_in_before_str = "[System]"
        plug_in_pos = text.find(plug_in_before_str)

        new_answer = ""
        for q_i in range(2, int(answer_num)):
            new_answer += "[The Start of Assistant " + str(q_i+1) + "'s Answer]\n" + answers[q_i] + "\n\n" + "[The End of Assistant " + str(q_i+1) + "'s Answer]\n\n"

        new_data_sample = text[:plug_in_pos] + new_answer + text[plug_in_pos:]
        text = new_data_sample

        text = text.replace(f"of two AI assistants", f"of {num2words[int(answer_num)]} AI assistants")
        text = text.replace("containing only two values indicating ", f"containing only {num2words[int(answer_num)]} values indicating ")
        text = text.replace("for Assistant 1 and 2", "for Assistant 1")

        plug_in_after_str = "for Assistant 1"
        plug_in_pos = text.find(plug_in_after_str) + len(plug_in_after_str)

        new_answer = ""
        for i in range(int(answer_num)-2):
            new_answer += f", {i+2}"
        new_answer += f" and {int(answer_num)}"
        new_data_sample = text[:plug_in_pos] + new_answer + text[plug_in_pos:]
        text = new_data_sample

        text = text.replace("The two scores are", f"The {num2words[int(answer_num)]} scores are")
        pass
    
    text = text[:INPUT_CHAR_LEN_LIMIT]  # Hard cut-off
    conv.append_message(conv.roles[0], text)
    conv.append_message(conv.roles[1], None)
    return (state, state.to_gradio_chatbot()) + (disable_btn,) * 6


def post_process_code(code):
    sep = "\n```"
    if sep in code:
        blocks = code.split(sep)
        if len(blocks) % 2 == 1:
            for i in range(1, len(blocks), 2):
                blocks[i] = blocks[i].replace("\\_", "_")
        code = sep.join(blocks)
    return code


def model_worker_stream_iter(
    conv,
    model_name,
    worker_addr,
    prompt,
    temperature,
    repetition_penalty,
    top_p,
    max_new_tokens,
):
    # Make requests
    gen_params = {
        "model": model_name,
        "prompt": prompt,
        "temperature": temperature,
        "repetition_penalty": repetition_penalty,
        "top_p": top_p,
        "max_new_tokens": max_new_tokens,
        "stop": conv.stop_str,
        "stop_token_ids": conv.stop_token_ids,
        "echo": False,
    }
    logger.info(f"==== request ====\n{gen_params}")

    # Stream output
    response = requests.post(
        worker_addr + "/worker_generate_stream",
        headers=headers,
        json=gen_params,
        stream=True,
        timeout=WORKER_API_TIMEOUT,
    )
    for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
        if chunk:
            data = json.loads(chunk.decode())
            yield data


def bot_response_v2(state, temperature, top_p, max_new_tokens, request: gr.Request):
    logger.info(f"bot_response. ip: {request.client.host}")
    start_tstamp = time.time()
    temperature = float(temperature)
    top_p = float(top_p)
    max_new_tokens = int(max_new_tokens)

    if state.skip_next:
        # This generate call is skipped due to invalid inputs
        state.skip_next = False
        yield (state, state.to_gradio_chatbot()) + (no_change_btn,) * 6
        return

    conv, model_name = state.conv, state.model_name
    if model_name == "gpt-3.5-turbo" or model_name == "gpt-4":
        prompt = conv.to_openai_api_messages()
        stream_iter = openai_api_stream_iter(
            model_name, prompt, temperature, top_p, max_new_tokens
        )
    elif model_name == "claude-2" or model_name == "claude-instant-1":
        prompt = conv.get_prompt()
        stream_iter = anthropic_api_stream_iter(
            model_name, prompt, temperature, top_p, max_new_tokens
        )
    elif model_name == "palm-2":
        stream_iter = palm_api_stream_iter(
            state.palm_chat, conv.messages[-2][1], temperature, top_p, max_new_tokens
        )
    else:
        # Query worker address
        ret = requests.post(
            controller_url + "/get_worker_address", json={"model": model_name}
        )
        worker_addr = ret.json()["address"]
        logger.info(f"model_name: {model_name}, worker_addr: {worker_addr}")

        # No available worker
        if worker_addr == "":
            conv.update_last_message(SERVER_ERROR_MSG)
            yield (
                state,
                state.to_gradio_chatbot(),
                disable_btn,
                disable_btn,
                disable_btn,
                enable_btn,
                enable_btn,
                disable_btn,
            )
            return

        # Construct prompt.
        # We need to call it here, so it will not be affected by "▌".
        prompt = conv.get_prompt()

        # Set repetition_penalty
        if "t5" in model_name:
            repetition_penalty = 1.2
        else:
            repetition_penalty = 1.0

        stream_iter = model_worker_stream_iter(
            conv,
            model_name,
            worker_addr,
            prompt,
            temperature,
            repetition_penalty,
            top_p,
            max_new_tokens,
        )

    conv.update_last_message("▌")
    yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 6

    try:
        for data in stream_iter:
            if data["error_code"] == 0:
                output = data["text"].strip()
                if "vicuna" in model_name:
                    output = post_process_code(output)
                conv.update_last_message(output + "▌")
                yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 6
            else:
                output = data["text"] + f"\n\n(error_code: {data['error_code']})"
                conv.update_last_message(output)
                yield (state, state.to_gradio_chatbot()) + (
                    disable_btn,
                    disable_btn,
                    disable_btn,
                    enable_btn,
                    enable_btn,
                    disable_btn,
                )
                return
            time.sleep(0.015)
    except requests.exceptions.RequestException as e:
        conv.update_last_message(
            f"{SERVER_ERROR_MSG}\n\n"
            f"(error_code: {ErrorCode.GRADIO_REQUEST_ERROR}, {e})"
        )
        yield (state, state.to_gradio_chatbot()) + (
            disable_btn,
            disable_btn,
            disable_btn,
            enable_btn,
            enable_btn,
            disable_btn,
        )
        return
    except Exception as e:
        conv.update_last_message(
            f"{SERVER_ERROR_MSG}\n\n"
            f"(error_code: {ErrorCode.GRADIO_STREAM_UNKNOWN_ERROR}, {e})"
        )
        yield (state, state.to_gradio_chatbot()) + (
            disable_btn,
            disable_btn,
            disable_btn,
            enable_btn,
            enable_btn,
            disable_btn,
        )
        return

    # Delete "▌"
    conv.update_last_message(conv.messages[-1][-1][:-1])
    yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 6

    finish_tstamp = time.time()
    logger.info(f"{output}")

    with open(get_conv_log_filename(), "a") as fout:
        data = {
            "tstamp": round(finish_tstamp, 4),
            "type": "chat",
            "model": model_name,
            "gen_params": {
                "temperature": temperature,
                "top_p": top_p,
                "max_new_tokens": max_new_tokens,
            },
            "start": round(start_tstamp, 4),
            "finish": round(finish_tstamp, 4),
            "state": state.dict(),
            "ip": request.client.host,
        }
        fout.write(json.dumps(data) + "\n")


def add_answer(answer_num):
    return_list = []
    for i in range(int(answer_num)+1):
        return_list.append(gr.update(visible=True))
    for i in range(int(answer_num)+1, 10):
        return_list.append(gr.update(visible=False))
    return (gr.update(visible=True), ) * int(answer_num) + (gr.update(visible=True, value=""), ) + ("", ) * (9-int(answer_num)) + (answer_num+1, enable_btn)


def reduce_answer(answer_num):
    if int(answer_num) - 1 == 2:
        return (gr.update(visible=True), ) * int(answer_num-1) + (gr.update(visible=False, value=""), ) + ("", ) * (10-int(answer_num)) + (answer_num-1, disable_btn)
    else:
        return (gr.update(visible=True), ) * int(answer_num-1) + (gr.update(visible=False, value=""), ) + ("", ) * (10-int(answer_num)) + (answer_num-1, enable_btn)


block_css = """
h1 {
  text-align: center;
}
#notice_markdown {
    font-size: 104%
}
#notice_markdown th {
    display: none;
}
#notice_markdown td {
    padding-top: 6px;
    padding-bottom: 6px;
}
#leaderboard_markdown {
    font-size: 104%
}
#leaderboard_markdown td {
    padding-top: 6px;
    padding-bottom: 6px;
}
#leaderboard_dataframe td {
    line-height: 0.1em;
}
"""


def get_model_description_md(models):
    model_description_md = """
| | | |
| ---- | ---- | ---- |
"""
    ct = 0
    visited = set()
    for i, name in enumerate(models):
        if name in model_info:
            minfo = model_info[name]
            if minfo.simple_name in visited:
                continue
            visited.add(minfo.simple_name)
            one_model_md = f"[{minfo.simple_name}]({minfo.link}): {minfo.description}"
        else:
            visited.add(name)
            one_model_md = (
                f"[{name}](): Add the description at fastchat/model/model_registry.py"
            )

        if ct % 3 == 0:
            model_description_md += "|"
        model_description_md += f" {one_model_md} |"
        if ct % 3 == 2:
            model_description_md += "\n"
        ct += 1
    return model_description_md


def build_single_model_ui_v2(model_selector, max_answer_num=10, reference=False, multi_answer=False, single_answer=False):
    state = gr.State()
    with gr.Row():
        question_box = gr.Textbox(
            placeholder="Enter the question text",
            label="Question",
        )
    reference_box = None
    if reference or single_answer:
        with gr.Row():
            reference_box = gr.Textbox(
                placeholder="Enter the reference text",
                label="Reference",
            )

    answer_list = []
    with gr.Group():
        if single_answer:
            answer_box = gr.Textbox(
                placeholder=f"Enter the assistant 1's answer text",
                label=f"Assistant 1's Answer",
            )
            answer_list.append(answer_box)     
        else:
            for i in range(2):
                with gr.Row():
                    answer_box = gr.Textbox(
                        placeholder=f"Enter the assistant {i+1}'s answer text",
                        label=f"Assistant {i+1}'s Answer",
                    )
                answer_list.append(answer_box)
            for i in range(2, max_answer_num):
                with gr.Row():
                    answer_box = gr.Textbox(
                        placeholder=f"Enter the assistant {i+1}'s answer text",
                        label=f"Assistant {i+1}'s Answer",
                        visible=False,
                    )
                answer_list.append(answer_box)

    with gr.Row().style():
        answer_num = gr.Number(value=2, visible=False)
        if multi_answer:
            add_answer_btn = gr.Button(value="Add Answer")
            reduce_answer_btn = gr.Button(value="Reduce Answer", interactive=False)
        radio = gr.Radio(
            label="Eval Type",
            choices=["Detailed Eval", "Fast Eval"],
            value="Detailed Eval",
        )

    with gr.Row().style(equal_height=True):
        send_pretext_btn = gr.Button(value="Generate the judgement", variant="primary")

    with gr.Row():
        gr.Markdown('Click to add example as input.👇')
    
    if reference:
        with gr.Row():
            gr.Examples(examples=examples_reference,
                        inputs=[question_box, reference_box, answer_list[0], answer_list[1]])
    elif single_answer:
        with gr.Row():
            gr.Examples(examples=examples_reference,
                        inputs=[question_box, reference_box, answer_list[0]])
    else:
        with gr.Row():
            gr.Examples(examples=examples,
                        inputs=[question_box, answer_list[0], answer_list[1]])


    chatbot = gr.Chatbot(
        elem_id="chatbot",
        label="Scroll down and start chatting",
        height=550,
    )
    with gr.Row():
        with gr.Column(scale=20):
            textbox = gr.Textbox(
                show_label=False,
                placeholder="Enter text and press ENTER",
            )
    with gr.Row():
        send_btn = gr.Button(value="Send")

    with gr.Row() as button_row:
        upvote_btn = gr.Button(value="👍  Upvote", interactive=False)
        downvote_btn = gr.Button(value="👎  Downvote", interactive=False)
        flag_btn = gr.Button(value="⚠️  Flag", interactive=False)
        regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=False)
        clear_btn = gr.Button(value="🗑️  Clear history", interactive=False)


    with gr.Accordion("Parameters", open=False) as parameter_row:
        temperature = gr.Slider(
            minimum=0.0,
            maximum=1.0,
            value=0.2,
            step=0.1,
            interactive=True,
            label="Temperature",
        )
        
        top_p = gr.Slider(
            minimum=0.0,
            maximum=1.0,
            value=1.0,
            step=0.1,
            interactive=True,
            label="Top P",
        )
        max_output_tokens = gr.Slider(
            minimum=16,
            maximum=2048,
            value=2048,
            step=64,
            interactive=True,
            label="Max output tokens",
        )

    gr.Markdown(learn_more_md)

    # Register listeners
    btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn, send_pretext_btn]
    upvote_btn.click(
        upvote_last_response,
        [state, model_selector],
        [textbox, upvote_btn, downvote_btn, flag_btn],
    )
    downvote_btn.click(
        downvote_last_response,
        [state, model_selector],
        [textbox, upvote_btn, downvote_btn, flag_btn],
    )
    flag_btn.click(
        flag_last_response,
        [state, model_selector],
        [textbox, upvote_btn, downvote_btn, flag_btn],
    )
    regenerate_btn.click(regenerate_v2, state, [state, chatbot, textbox] + btn_list).then(
        bot_response_v2,
        [state, temperature, top_p, max_output_tokens],
        [state, chatbot] + btn_list,
    )
    
    clear_btn.click(clear_history_v2, answer_num, [state, chatbot, textbox, question_box] + answer_list + btn_list)

    model_selector.change(clear_history_v2, answer_num, [state, chatbot, textbox, question_box] + answer_list + btn_list)
    if multi_answer:
        add_answer_btn.click(
            add_answer, answer_num, answer_list + [answer_num, reduce_answer_btn]
        )
        reduce_answer_btn.click(
            reduce_answer, answer_num, answer_list + [answer_num, reduce_answer_btn]
        )
    textbox.submit(
        add_text_v2, [state, model_selector, textbox], [state, chatbot, textbox] + btn_list
    ).then(
        bot_response_v2,
        [state, temperature, top_p, max_output_tokens],
        [state, chatbot] + btn_list,
    )
    send_btn.click(
        add_text_v2, [state, model_selector, textbox], [state, chatbot, textbox] + btn_list
    ).then(
        bot_response_v2,
        [state, temperature, top_p, max_output_tokens],
        [state, chatbot] + btn_list,
    )

    if reference:
        send_pretext_btn.click(
            add_pretext_reference,
            [state, model_selector, radio, question_box, answer_num, reference_box] + answer_list,
            [state, chatbot] + btn_list
        ).then(
            bot_response_v2,
            [state, temperature, top_p, max_output_tokens],
            [state, chatbot] + btn_list,
        )
    elif single_answer:
        send_pretext_btn.click(
            add_pretext_single_answer,
            [state, model_selector, radio, question_box, reference_box, answer_box],
            [state, chatbot] + btn_list
        ).then(
            bot_response_v2,
            [state, temperature, top_p, max_output_tokens],
            [state, chatbot] + btn_list,
        )   
    else:
        send_pretext_btn.click(
            add_pretext,
            [state, model_selector, radio, question_box, answer_num] + answer_list,
            [state, chatbot] + btn_list
        ).then(
            bot_response_v2,
            [state, temperature, top_p, max_output_tokens],
            [state, chatbot] + btn_list,
        )


def build_demo_v2(models):
    with gr.Blocks(
        title="Chat with Open Large Language Models",
        # theme=gr.themes.Default(),
        css=block_css,
    ) as demo:
        url_params = gr.JSON(visible=False)
        add_promotion_links = False
        promotion = (
                """
        - Introducing Llama 2: The Next Generation Open Source Large Language Model. [[Website]](https://ai.meta.com/llama/)
        - Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90% ChatGPT Quality. [[Blog]](https://lmsys.org/blog/2023-03-30-vicuna/)
        - | [GitHub](https://github.com/lm-sys/FastChat) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
        """
                if add_promotion_links
                else ""
            )

        notice_markdown = f"""
        # 🏔️ Chat with Open Large Language Models
        {promotion}

        ### Terms of use
        By using this service, users are required to agree to the following terms: The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. **The service collects user dialogue data and reserves the right to distribute it under a Creative Commons Attribution (CC-BY) license.**

        ### Choose a model to chat with
        """

        # model_description_md = get_model_description_md(models)
        gr.Markdown(notice_markdown, elem_id="notice_markdown")

        with gr.Row(elem_id="model_selector_row"):
            model_selector = gr.Dropdown(
                choices=models,
                value=models[0] if len(models) > 0 else "",
                interactive=True,
                show_label=False,
                container=False,
            )
        
        gr.Markdown("### Make Judgement")
        with gr.Tabs():
            with gr.TabItem("Judge w/o Ref"):
                build_single_model_ui_v2(model_selector)
                if args.model_list_mode not in ["once", "reload"]:
                    raise ValueError(f"Unknown model list mode: {args.model_list_mode}")
                demo.load(
                    load_demo_v2,
                    [url_params],
                    None, 
                    _js=get_window_url_params_js,
                )
            with gr.TabItem("Judge w/ Ref"):
                build_single_model_ui_v2(model_selector, reference=True)
                if args.model_list_mode not in ["once", "reload"]:
                    raise ValueError(f"Unknown model list mode: {args.model_list_mode}")
                demo.load(
                    load_demo_v2,
                    [url_params],
                    None, 
                    _js=get_window_url_params_js,
                )
            with gr.TabItem("Multi Answer Judge w/o Ref"):
                build_single_model_ui_v2(model_selector, multi_answer=True)
                if args.model_list_mode not in ["once", "reload"]:
                    raise ValueError(f"Unknown model list mode: {args.model_list_mode}")
                demo.load(
                    load_demo_v2,
                    [url_params],
                    None, 
                    _js=get_window_url_params_js,
                )
            with gr.TabItem("Multi Answer Judge w/ Ref"):
                build_single_model_ui_v2(model_selector, reference=True, multi_answer=True)
                if args.model_list_mode not in ["once", "reload"]:
                    raise ValueError(f"Unknown model list mode: {args.model_list_mode}")
                demo.load(
                    load_demo_v2,
                    [url_params],
                    None, 
                    _js=get_window_url_params_js,
                )
            with gr.TabItem("Single Answer Judge"):
                build_single_model_ui_v2(model_selector, single_answer=True)
                if args.model_list_mode not in ["once", "reload"]:
                    raise ValueError(f"Unknown model list mode: {args.model_list_mode}")
                demo.load(
                    load_demo_v2,
                    [url_params],
                    None, 
                    _js=get_window_url_params_js,
                )

    return demo


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--host", type=str, default="0.0.0.0")
    parser.add_argument("--port", type=int)
    parser.add_argument(
        "--share",
        action="store_true",
        help="Whether to generate a public, shareable link.",
    )
    parser.add_argument(
        "--controller-url",
        type=str,
        default="http://localhost:21001",
        help="The address of the controller.",
    )
    parser.add_argument(
        "--concurrency-count",
        type=int,
        default=10,
        help="The concurrency count of the gradio queue.",
    )
    parser.add_argument(
        "--model-list-mode",
        type=str,
        default="once",
        choices=["once", "reload"],
        help="Whether to load the model list once or reload the model list every time.",
    )
    parser.add_argument(
        "--moderate", action="store_true", help="Enable content moderation"
    )
    parser.add_argument(
        "--add-chatgpt",
        action="store_true",
        help="Add OpenAI's ChatGPT models (gpt-3.5-turbo, gpt-4)",
    )
    parser.add_argument(
        "--add-claude",
        action="store_true",
        help="Add Anthropic's Claude models (claude-2, claude-instant-1)",
    )
    parser.add_argument(
        "--add-palm",
        action="store_true",
        help="Add Google's PaLM model (PaLM 2 for Chat: chat-bison@001)",
    )
    parser.add_argument(
        "--gradio-auth-path",
        type=str,
        help='Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3"',
        default=None,
    )
    args = parser.parse_args()
    logger.info(f"args: {args}")

    # Set global variables
    set_global_vars(args.controller_url, args.moderate)
    models = get_model_list(
        args.controller_url, args.add_chatgpt, args.add_claude, args.add_palm
    )

    # Set authorization credentials
    auth = None
    if args.gradio_auth_path is not None:
        auth = parse_gradio_auth_creds(args.gradio_auth_path)

    # todo: swith to our judge ui
    # Launch the demo
    # demo = build_demo(models)
    demo = build_demo_v2(models)
    demo.queue(
        concurrency_count=args.concurrency_count, status_update_rate=10, api_open=False
    ).launch(
        server_name=args.host,
        server_port=args.port,
        share=args.share,
        max_threads=200,
        auth=auth,
    )