diff --git a/.gitignore b/.gitignore index c34fe5df..671c67da 100644 --- a/.gitignore +++ b/.gitignore @@ -135,6 +135,7 @@ logs/ archive/ data/ *.out +outputs # generated dot files and tree graphs .gv diff --git a/run_elm.py b/run_elm.py index 64cdab21..6bb8274a 100644 --- a/run_elm.py +++ b/run_elm.py @@ -1,8 +1,8 @@ """ This module gives an example of how to run the main ELM class. -It uses the hydra library to load the config from the -config/elm_sodarace_cfg.yaml file. +It uses the hydra library to load the config from the config dataclasses in +configs.py. This config file demonstrates an example of running ELM with the Sodarace environment, a 2D physics-based environment in which robots specified by @@ -10,27 +10,23 @@ """ import hydra -from hydra.core.config_store import ConfigStore from omegaconf import OmegaConf from openelm import ELM -from openelm.configs import SodaraceELMConfig -cs = ConfigStore.instance() -cs.store(name="config", node=SodaraceELMConfig) - -# Load hydra config from yaml files and command line arguments. @hydra.main( - config_name="config", + config_name="elmconfig", version_base="1.2", ) -def main(cfg): +def main(config): print("----------------- Config ---------------") - print(OmegaConf.to_yaml(cfg)) + print(OmegaConf.to_yaml(config)) print("----------------- End -----------------") - elm = ELM(cfg) - print("Best Individual: ", elm.run()) + config = OmegaConf.to_object(config) + elm = ELM(config) + print("Best Individual: ", elm.run(init_steps=config.qd.init_steps, + total_steps=config.qd.total_steps)) if __name__ == "__main__": diff --git a/run_p3.py b/run_p3.py new file mode 100644 index 00000000..cfa38fd6 --- /dev/null +++ b/run_p3.py @@ -0,0 +1,176 @@ +import logging +import pathlib +import requests +import time +import json +from collections import Counter + +from openelm.environments import p3_long_init_args, p3_med_init_args, P3Problem +from openelm.mutation_model import DiffModel, MutationModel, PromptModel +from openelm.configs import P3Config +from openelm.sandbox.server.sandbox_codex_execute import ExecResult +from openelm.utils.code_eval import pass_at_k +from openelm.codegen.codegen_utilities import set_seed + +import hydra +from omegaconf import OmegaConf + + +class P3: + def __init__(self, cfg: P3Config) -> None: + """ + Evaluate models on P3 dataset + """ + self.cfg: P3Config = cfg + + # Prompt size + if cfg.env.prompt_size == 'long': + env_args = p3_long_init_args + elif cfg.env.prompt_size == 'med': + env_args = p3_med_init_args + else: + raise ValueError('No init args found') + + # Model + if self.cfg.model.model_name == 'prompt': + self.mutation_model: MutationModel = PromptModel(self.cfg.model) + elif self.cfg.model.model_name == 'diff': + self.mutation_model: MutationModel = DiffModel(self.cfg.model) + + self.seed = env_args["seed"] + self.log_dir = 'logs/p3/problems' + + + def run(self): + """ + Query PromptMutationModelForP3 for solutions to programming puzzles + """ + # Get problems + problems = requests.get("https://raw.githubusercontent.com/microsoft/PythonProgrammingPuzzles/v0.2/puzzles/puzzles.json").json() + run_start_time = time.time() + num_problem_errors = 0 + for problem in problems: + problem_start_time = time.time() + problem_dict = {'name': problem['name']} + logging.info(problem['name']) + + problem['problem_func'] = problem['sat'].replace('def sat(', 'def f6(') # prompt form is f6() + problem['solution_preamble'] = problem['sol_header'].replace('def sol(', 'def g6(') # solution form is g6() + if self.cfg.env.prompt_size == 'long': + problem['solution_preamble'] = problem['solution_preamble'] + '\n' + problem['sol_docstring'] + + env = P3Problem(seed=self.seed, + config=self.cfg, + mutation_model=self.mutation_model, + problem_func=problem['problem_func'], + solution_preamble=problem['solution_preamble'], + ans_type = problem['ans_type']) + + # Find solutions + # If there is an error during finding a solution, log it and skip this problem + solutions = [] + try: + for i in range(self.cfg.env.solutions_per_problem // self.cfg.model.batch_size): + set_seed(i) # Change seed for each query + + try: + solutions += env.random() + except Exception as e: + logging.error(f'ERROR with solution {i} in {problem["name"]}: {e}') + num_problem_errors += 1 + raise(e) + except Exception as e: + continue + + # Evaluate fitness of solutions + res_sols_list = [] + solved = False + for sol in solutions: + res_sol_dict = {} + res_sol_dict['program_str'] = sol.program_str + + if isinstance(sol.result_obj, ExecResult): + if self.cfg.save_result_obj: res_sol_dict['result_obj'] = sol.result_obj.name + fitness = 0.0 + else: + if self.cfg.save_result_obj: res_sol_dict['result_obj'] = sol.result_obj + fitness = env.fitness(sol) + + res_sol_dict['fitness'] = fitness + res_sols_list.append(res_sol_dict) + if not solved and fitness == 1.0: + solved = True # just want to save if solved at all + + problem_dict['config'] = OmegaConf.to_container(self.cfg) + problem_dict['solutions'] = res_sols_list + problem_dict['solved'] = solved + problem_dict['time_elapsed'] = time.time() - problem_start_time + + # Save results + dir = f'{self.log_dir}/{problem_dict["name"]}/{run_start_time}' + pathlib.Path(dir).mkdir(parents=True, exist_ok=True) + + with open(f'{dir}/results.json', 'w') as file: + file.write(json.dumps(problem_dict)) + + logging.info(f'Successfully ran on {len(problems)}/{len(problems)-num_problem_errors}' + + f' problems and saved results to {self.log_dir}') + + + def eval_pass_at_k(self, timestamp: str, k: int): + """ + pass@k metric over a subset of run logs + + Args: + timestamp (str): (optional) go through all problems with a run generated with timestamp + (if None, go through the latest run for every problem currently in logs) + k (int): k for pass@k + """ + + path = pathlib.Path(self.log_dir) + problem_paths = sorted(list(path.iterdir())) # Get all logged problems + paks = [] + for p in problem_paths: + n = 0 + c = 0 + # Select one of the runs per problem + if len(timestamp) == 0: + # Get latest run + path = pathlib.Path(p) + run_paths = sorted(list(path.iterdir())) # Get all the runs per problem + run_path = run_paths[-1] + else: + # Get 'timestamp' run + run_path = p / timestamp + + with open(f'{run_path}/results.json', 'r') as f: + results = json.load(f) + n += len(results['solutions']) + c += Counter(sol['fitness'] for sol in results['solutions'])[1.0] + + pak = pass_at_k(n=n, c=c, k=k) + paks.append(pak) + + pak_overall = sum(paks) / len(paks) + return pak_overall + + +# Load hydra config from yaml files and command line arguments. +@hydra.main( + config_name="p3config", + version_base="1.2", +) +def main(cfg): + # Run + logging.info("----------------- Config ---------------") + logging.info(OmegaConf.to_yaml(cfg)) + logging.info("----------------- End -----------------") + p3 = P3(cfg) + + if cfg.eval_k > 0: logging.info(f"PASS@K: {p3.eval_pass_at_k(timestamp=cfg.eval_timestamp, k=cfg.eval_k)}") + else: p3.run() + + +if __name__ == "__main__": + main() + \ No newline at end of file diff --git a/src/openelm/__init__.py b/src/openelm/__init__.py index 731e512f..19ead3a9 100644 --- a/src/openelm/__init__.py +++ b/src/openelm/__init__.py @@ -3,3 +3,5 @@ from openelm.elm import ELM __version__ = importlib_version("openelm") + +__all__ = ["ELM"] diff --git a/src/openelm/benchmarks/benchmark_bugs.py b/src/openelm/benchmarks/benchmark_bugs.py index f0c99ca2..6b4aaa8d 100644 --- a/src/openelm/benchmarks/benchmark_bugs.py +++ b/src/openelm/benchmarks/benchmark_bugs.py @@ -13,8 +13,7 @@ from openelm.codegen import model_setup, sample, truncate from openelm.configs import BaseConfig -from openelm.utils.code_eval import eval_completions, mutate_code -from openelm.utils.diff_eval import apply_diff, split_diff +from openelm.utils import apply_diff, eval_completions, mutate_code, split_diff @dataclass @@ -54,7 +53,7 @@ def __init__(self, cfg: BenchmarkBugsConfig): os.environ["TOKENIZERS_PARALLELISM"] = "false" - self.device = torch.device("cuda" if cfg.cuda else "cpu") + self.device = torch.device("cuda") self.model, self.tokenizer, self.device = model_setup(cfg, self.device) def benchmark_parity(self, n_bugs, **kwargs): diff --git a/src/openelm/benchmarks/benchmark_crossover.py b/src/openelm/benchmarks/benchmark_crossover.py index d4bf8d60..ab23e226 100644 --- a/src/openelm/benchmarks/benchmark_crossover.py +++ b/src/openelm/benchmarks/benchmark_crossover.py @@ -86,7 +86,7 @@ def __init__(self, cfg: BenchmarkCrossoverConfig): os.environ["TOKENIZERS_PARALLELISM"] = "false" - self.device = torch.device("cuda" if cfg.cuda else "cpu") + self.device = torch.device("cuda") self.model, self.tokenizer, self.device = model_setup(cfg, self.device) def construct_prompt(self, seeds): diff --git a/src/openelm/benchmarks/benchmark_lm_speed.py b/src/openelm/benchmarks/benchmark_lm_speed.py index 857dd2e7..ef3c73f7 100644 --- a/src/openelm/benchmarks/benchmark_lm_speed.py +++ b/src/openelm/benchmarks/benchmark_lm_speed.py @@ -8,11 +8,12 @@ from tqdm import trange from openelm.codegen import model_setup, sample +from openelm.configs import BaseConfig from openelm.environments import SQUARE_SEED @dataclass -class BenchmarkSpeedConfig: +class BenchmarkSpeedConfig(BaseConfig): hydra: Any = field( default_factory=lambda: { "run": {"dir": "logs/benchmarks/lm_speed/${now:%Y-%m-%d-%H-%M-%S}"} diff --git a/src/openelm/codegen/codegen_utilities.py b/src/openelm/codegen/codegen_utilities.py index ce39436b..065ad78d 100644 --- a/src/openelm/codegen/codegen_utilities.py +++ b/src/openelm/codegen/codegen_utilities.py @@ -1,11 +1,14 @@ import os import random import re +from typing import Optional import numpy as np import torch from transformers import AutoModelForCausalLM, AutoTokenizer +from openelm.configs import ModelConfig + def set_seed(seed=None, deterministic=True) -> int: if seed is None: @@ -66,37 +69,46 @@ def find_re(string, pattern, start_pos): return completion -def model_setup(cfg, device=None): +def model_setup(cfg: ModelConfig, device=None, codegen_tokenizer: bool = True): set_seed(cfg.seed, deterministic=True) if device is None: - device = torch.device("cuda" if cfg.cuda else "cpu") + device = torch.device("cuda") use_fp16 = True if not cfg.fp16 or device.type == "cpu": use_fp16 = False - if cfg.model.startswith("codegen-16B"): + if "codegen-16B" in cfg.model_path: use_fp16 = True - tokenizer = AutoTokenizer.from_pretrained(cfg.model) - tokenizer.padding_side = "left" - tokenizer.pad_token = 50256 + tokenizer = AutoTokenizer.from_pretrained(cfg.model_path) + if codegen_tokenizer: + tokenizer.padding_side = "left" + tokenizer.pad_token = 50256 - model_path = cfg.model if cfg.gpus > 1: model = torch.nn.DataParallel( - create_model(model_path, fp16=use_fp16), device_ids=list(range(cfg.gpus)) + create_model(cfg.model_path, fp16=use_fp16), + device_ids=list(range(cfg.gpus)), ).to(device) else: - model = create_model(model_path, fp16=use_fp16).to(device) + model = create_model(cfg.model_path, fp16=use_fp16).to(device) return model, tokenizer, device def sample( - batch, cfg, model, tokenizer, decode: bool = True, starting_idx=None, **kwargs + batch, + cfg: ModelConfig, + model, + tokenizer, + decode: bool = True, + starting_idx: Optional[int] = None, + num_return_sequences: Optional[int] = None, + **kwargs ) -> list[str]: """Run a model on a batch of contexts for a particular task.""" - batch_size = kwargs.get("batch_size", cfg.batch_size) - device = kwargs.get("device", torch.device("cuda" if cfg.cuda else "cpu")) + if num_return_sequences is None: + num_return_sequences = cfg.batch_size + device = kwargs.get("device", torch.device("cuda")) temperature = kwargs.get("temperature", cfg.temp) top_p = kwargs.get("top_p", cfg.top_p) gen_max_len = kwargs.get("gen_max_len", cfg.gen_max_len) @@ -111,7 +123,7 @@ def sample( tokens = model.module.generate( **batch, do_sample=True, - num_return_sequences=batch_size, + num_return_sequences=num_return_sequences, temperature=temperature, max_new_tokens=gen_max_len, top_p=top_p, @@ -122,7 +134,7 @@ def sample( tokens = model.generate( **batch, do_sample=True, - num_return_sequences=batch_size, + num_return_sequences=num_return_sequences, temperature=temperature, max_new_tokens=gen_max_len, top_p=top_p, diff --git a/src/openelm/configs.py b/src/openelm/configs.py index ec7b282c..4f6512b9 100644 --- a/src/openelm/configs.py +++ b/src/openelm/configs.py @@ -11,91 +11,147 @@ class BaseConfig: @dataclass -class ConfigClass(BaseConfig): - model: str = MISSING - epochs: int = MISSING - batch_size: int = MISSING - fp16: bool = MISSING - cuda: bool = MISSING - gpus: int = MISSING - seed: int = MISSING - deterministic: bool = MISSING - top_p: float = MISSING - temp: float = MISSING - timeout: float = MISSING - gen_max_len: int = MISSING - evo_init_steps: int = MISSING - evo_n_steps: int = MISSING - behavior_n_bins: int = MISSING - evo_history_length: int = MISSING - evaluation_steps: int = MISSING - env_name: str = MISSING - run_name: str = MISSING - - -@dataclass -class SodaraceELMConfig(BaseConfig): - hydra: Any = field( - default_factory=lambda: { - "run": {"dir": "logs/elm/sodarace/${hydra.job.override_dirname}"} - } - ) - model: str = "Salesforce/codegen-350M-mono" - env_name: str = "sodarace" - batch_size: int = 32 +class ModelConfig(BaseConfig): fp16: bool = True cuda: bool = True gpus: int = 1 seed: Optional[int] = None - debug: bool = False deterministic: bool = False top_p: float = 0.95 temp: float = 0.85 - timeout: float = 5.0 # Seconds - eval_ms: int = 1000 # Milliseconds gen_max_len: int = 768 - evo_init_steps: int = 2 - evo_n_steps: int = 5 - behavior_n_bins: int = 12 - evo_history_length: int = 1 + batch_size: int = 32 + model_path: str = MISSING # Can be HF model name or path to local model + + +@dataclass +class PromptModelConfig(ModelConfig): + model_name: str = "prompt" + model_path: str = "Salesforce/codegen-350M-mono" + + +@dataclass +class DiffModelConfig(ModelConfig): + model_name: str = "diff" + model_path: str = "CarperAI/diff-codegen-350m-v2" + + +@dataclass +class QDConfig(BaseConfig): + init_steps: int = 2 + total_steps: int = 5 + + +@dataclass +class MAPElitesConfig(QDConfig): + history_length: int = 1 + save_history: bool = False + map_grid_size: tuple[int, ...] = field(default_factory=lambda: (12,)) + + +@dataclass +class EnvConfig(BaseConfig): + timeout: float = 5.0 # Seconds + sandbox: bool = False + sandbox_server: str = "http://localhost:5000" processes: int = 12 + batch_size: int = 32 # Batch size of MAP-Elites + env_name: str = MISSING + debug: bool = False + + +@dataclass +class SodaraceEnvConfig(EnvConfig): + env_name: str = "sodarace" + eval_ms: int = 1000 # Milliseconds + behavior_space: list[list[float]] = field( + default_factory=lambda: [ + # Height, Width, Mass dimensions + [0, 1000], + [0, 1000], + [0, 2000], + ] + ) + starting_seeds: list[str] = field(default_factory=lambda: ["square"]) + instruction: int = 1 + crossover: bool = False + + +@dataclass +class ImageEnvConfig(EnvConfig): + env_name: str = "image_evolution" + behavior_mode: str = "3-channel" + target: str = "circle" + + +@dataclass +class P3EnvConfig(EnvConfig): + env_name: str = "p3_problem" + solutions_per_problem: int = 128 + prompt_size: str = "long" # med or long + timeout: float = 1.0 + + +defaults_elm = [ + {"model": "prompt"}, + {"qd": "mapelites"}, + {"env": "sodarace"}, + "_self_", +] + + +@dataclass +class ELMConfig(BaseConfig): + hydra: Any = field( + default_factory=lambda: { + "run": {"dir": "logs/elm/${hydra.job.override_dirname}"} + } + ) + defaults: list[Any] = field(default_factory=lambda: defaults_elm) + model: Any = MISSING + qd: Any = MISSING + env: Any = MISSING run_name: Optional[str] = None - sandbox: bool = False + + +defaults_p3 = [ + {"model": "prompt"}, + {"env": "p3_problem"}, + "_self_", +] @dataclass -class ImageELMConfig(BaseConfig): +class P3Config(BaseConfig): hydra: Any = field( default_factory=lambda: { - "run": {"dir": "logs/elm/image/${hydra.job.override_dirname}"} + "run": {"dir": "logs/p3/${hydra.job.override_dirname}"} } ) - model: str = "Salesforce/codegen-350M-mono" - batch_size: int = 32 - fp16: bool = True - cuda: bool = True - gpus: int = 1 - seed: Optional[int] = None - debug: bool = False - deterministic: bool = False - top_p: float = 0.95 - temp: float = 0.85 - timeout: float = 5.0 # Seconds - evaluation_steps: int = 1000 # Milliseconds - gen_max_len: int = 1024 - evo_init_steps: int = 10 - evo_n_steps: int = 15 - behavior_n_bins: int = 12 - evo_history_length: int = 1 - processes: int = 12 + defaults: list[Any] = field(default_factory=lambda: defaults_p3) + model: Any = MISSING + env: Any = MISSING + save_result_obj: bool = False + # set >0, evaluate pass@k of previous runs using this k, instead of doing a new run + eval_k: int = -1 + # optionally provide timestamp of run to eval pass@k, otherwise eval with + # latest run of every problem + eval_timestamp: str = "" run_name: Optional[str] = None - sandbox: bool = False -# TODO: Hierarchy of configs -# e.g. ModelConfig, QDConfig, EnvConfig, etc. -# Also add base ELMConfig(BaseConfig) +def register_configstore() -> ConfigStore: + """Register configs with Hydra's ConfigStore.""" + cs = ConfigStore.instance() + cs.store(group="env", name="sodarace", node=SodaraceEnvConfig) + cs.store(group="env", name="image_evolution", node=ImageEnvConfig) + cs.store(group="env", name="p3_problem", node=P3EnvConfig) + cs.store(group="qd", name="mapelites", node=MAPElitesConfig) + cs.store(group="model", name="prompt", node=PromptModelConfig) + cs.store(group="model", name="diff", node=DiffModelConfig) + cs.store(name="elmconfig", node=ELMConfig) + cs.store(name="p3config", node=P3Config) + return cs -cs = ConfigStore.instance() -cs.store(name="elm_cfg", node=ConfigClass) +CONFIGSTORE = register_configstore() diff --git a/src/openelm/diff_model.py b/src/openelm/diff_model.py deleted file mode 100644 index e2982e5c..00000000 --- a/src/openelm/diff_model.py +++ /dev/null @@ -1,347 +0,0 @@ -import functools -import json -import os -import re -from abc import ABC, abstractmethod -from dataclasses import dataclass - -import numpy as np -import requests - -from openelm.codegen import model_setup, sample, set_seed, truncate -from openelm.configs import SodaraceELMConfig -from openelm.environments.sodaracer import IMPORTS, SQUARE_PREREQ, Walker -from openelm.utils.code_eval import pool_exec_processes -from openelm.utils.diff_eval import apply_diff, split_diff - - -class MutationModel(ABC): - """Base model class for all mutation models.""" - - @abstractmethod - def generate_program(self, code_batch: list[str]) -> list[dict]: - pass - - -@dataclass -class FunctionTemplate: - """ - A function template for a mutation model. - - Attributes: - func_name: (str) The name of the function that we want to execute. - import_line: (str) The import lines we add to the code. - func_preamble: (str) The function definition, as well as potentially a - few initial lines to generate code. - instruction (str): The instruction we give to the model, before the - preamble. - """ - - func_name: str - import_line: str - func_preamble: str - instruction: str - - -class PromptMutationModel(MutationModel): - """Mutation model that uses prompts to change a seed.""" - - def __init__( - self, - cfg: SodaraceELMConfig, - function_template: FunctionTemplate, - sandbox_server: str = "http://localhost:5000", - ) -> None: - - self.cfg: SodaraceELMConfig = cfg - seed: int = set_seed(self.cfg.seed) - # Use RNG to rotate random seeds during inference. - self.rng = np.random.default_rng(seed=seed) - self.sandbox_server = sandbox_server - os.environ["TOKENIZERS_PARALLELISM"] = "false" - self.model, self.tokenizer, self.device = model_setup(self.cfg) - self.func_template: FunctionTemplate = function_template - - def construct_prompt(self, code: str) -> tuple[str, str]: - """ - Construct a prompt from a code string. - - Args: - code (str): The code string. - - Returns: - A tuple of the prompt string and imports plus instruction. - """ - prompt_str = ( - code + self.func_template.instruction + self.func_template.func_preamble - ) - preamble_str = ( - self.func_template.import_line - + self.func_template.instruction - + self.func_template.func_preamble - ) - return prompt_str, preamble_str - - def generate_program(self, code_batch: list[str]) -> list[dict]: - """ - Generate a new program from a batch of programs. - - Given a piece of code, do prompt mutation, execute the code, - and return the result. - - Args: - code (str): The full code string. - - Returns: - A numpy array (if successful) or the exception object. - """ - prompts, preamble_strings = zip(*map(self.construct_prompt, code_batch)) - encodings = self.tokenizer( - list(prompts), - truncation=True, - padding=True, - return_tensors="pt", - ) - completions: list[str] = sample( - encodings, - self.cfg, - self.model, - self.tokenizer, - batch_size=1, - ) - local_scope_exec: bool = len(self.func_template.func_preamble) > 0 - trunc = functools.partial(truncate, only_local_scope=local_scope_exec) - self.truncations: list[str] = [ - preamble_strings[i] + trunc(completions[i]) for i in range(len(completions)) - ] - if self.cfg.sandbox: - results = [] - for code in self.truncations: - resp = self._get_response(code, self.cfg.timeout) - if resp.status_code == 200: - return_dict = json.loads(resp.text) - results.append(return_dict) - else: - results = pool_exec_processes( - self.truncations, - func_name=self.func_template.func_name, - timeout=self.cfg.timeout, - processes=self.cfg.processes, - debug=self.cfg.debug, - ) - return self._post_process(results) - - @abstractmethod - def _get_response(self, code: str, timeout: float) -> requests.models.Response: - raise NotImplementedError - - @abstractmethod - def _post_process(self, results: list) -> list: - raise NotImplementedError - - -class PromptMutationForSodarace(PromptMutationModel): - def __init__(self, cfg, sandbox_server="http://localhost:5000") -> None: - function_template = FunctionTemplate( - func_name="make_walker", - import_line=IMPORTS + SQUARE_PREREQ, - instruction="", - func_preamble="def make_walker():\n", - ) - super().__init__(cfg, function_template, sandbox_server) - - def _get_response(self, code: str, timeout: float) -> requests.models.Response: - return requests.post( - f"{self.sandbox_server}/gen_racer", - json={"code": code, "timeout": timeout}, - timeout=timeout, - ) - - def _post_process(self, results: list) -> list: - if self.cfg.sandbox: - return results - else: - result_list: list = [] - for i, result in enumerate(results): - try: - if isinstance(result, Walker) and result.validate(): - result_list.append( - { - "program_str": self.truncations[i], - "result_obj": result.to_dict(), - } - ) - else: - if self.cfg.debug: - print("Failed execution, type:", result) - print(self.truncations[i]) - except Exception as e: - if self.cfg.debug: - print(type(e), e) - return result_list - - -class PromptMutationForImgTask(PromptMutationModel): - def __init__(self, cfg, sandbox_server="http://localhost:5000") -> None: - func_name = "draw" - func_preamble = ( - f'def {func_name}():\n\t"""Draw a yellow circle.\n' - '\t"""\n\tpic = np.zeros((32, 32, 3))\n' - ) - function_template = FunctionTemplate( - func_name=func_name, - import_line="import math\nimport numpy as np", - func_preamble=func_preamble, - instruction="", - ) - super().__init__(cfg, function_template, sandbox_server) - - def reset_shape(self, shape: tuple): - func_name = self.func_template.func_name - self.func_preamble = f'def {func_name}():\n\t"""Draw a yellow circle.\n\t"""\n\tpic = np.zeros({shape})\n' - - def _get_response(self, code: str, timeout: float) -> requests.models.Response: - func_name = self.func_template.func_name - return requests.post( - f"{self.sandbox_server}/eval_imageoptim_func", - json={"code": code, "func_name": func_name, "timeout": timeout}, - timeout=timeout, - ) - - def _post_process(self, results: list) -> list: - for i in range(len(results)): - results[i]["result_obj"] = np.array(results[i]["result_obj"]) - return results - - -class DiffModel(PromptMutationModel): - def __init__( - self, - cfg: SodaraceELMConfig, - function_template: FunctionTemplate, - sandbox_server: str = "http://localhost:5000", - ) -> None: - super().__init__(cfg, function_template, sandbox_server) - - def construct_prompt(self, code: str) -> tuple[str, str]: - prompt_list = [ - " walker.py\n ", - code, - "\n Fixed bugs", - ] - prompt_str = "".join(prompt_list) - prompt_str = ( - code + self.func_template.instruction + self.func_template.func_preamble - ) - preamble_str = ( - self.func_template.import_line - + self.func_template.instruction - + self.func_template.func_preamble - ) - return prompt_str, preamble_str - - def generate_program(self, code_batch: list[str]) -> list[dict]: - """ - Generate a new program for a diff model from a batch of programs. - - Given a piece of code, do prompt mutation, execute the code, - and return the result. - - Args: - code (str): The full code string. - - Returns: - A numpy array (if successful) or the exception object. - """ - prompts, preamble_strings = zip(*map(self.construct_prompt, code_batch)) - encodings = self.tokenizer( - list(prompts), - truncation=True, - padding=True, - return_tensors="pt", - ) - completions: list[str] = sample( - encodings, - self.cfg, - self.model, - self.tokenizer, - batch_size=1, - ) - - local_scope_exec: bool = len(self.func_template.func_preamble) > 0 - end_of_diff = re.compile("\n[^ +-@]+") - trunc = functools.partial(truncate, only_local_scope=local_scope_exec) - self.truncations: list[str] = [ - preamble_strings[i] + trunc(completions[i]) for i in range(len(completions)) - ] - outputs = [] - for i, code in enumerate(self.truncations): - # split the diff text according to , , , . - parsed: dict = split_diff(code) - # truncate the diff hunk at the first line not starting with " ", - # "+", "-", or "@". - if parsed and all( - (s in parsed for s in ["name", "file", "message", "diff"]) - ): - diff_hunk: str = end_of_diff.split(parsed["diff"])[0] - nme_idx: int = diff_hunk.find("") - if nme_idx != -1: - diff_hunk = diff_hunk[:nme_idx] - outputs.append(apply_diff(prompts[i], diff_hunk)) - if self.cfg.sandbox: - results = [] - for code in outputs: - resp = self._get_response(code, self.cfg.timeout) - if resp.status_code == 200: - return_dict = json.loads(resp.text) - results.append(return_dict) - else: - results = pool_exec_processes( - outputs, - func_name=self.func_template.func_name, - timeout=self.cfg.timeout, - processes=self.cfg.processes, - debug=self.cfg.debug, - ) - return self._post_process(results) - - -class DiffModelForSodarace(DiffModel): - def __init__(self, cfg, sandbox_server="http://localhost:5000") -> None: - function_template = FunctionTemplate( - func_name="make_walker", - import_line=IMPORTS + SQUARE_PREREQ, - instruction="", - func_preamble="def make_walker():\n", - ) - super().__init__(cfg, function_template, sandbox_server) - - def _get_response(self, code: str, timeout: float) -> requests.models.Response: - return requests.post( - f"{self.sandbox_server}/gen_racer", - json={"code": code, "timeout": timeout}, - timeout=timeout, - ) - - def _post_process(self, results: list) -> list: - if self.cfg.sandbox: - return results - else: - result_list: list = [] - for i, result in enumerate(results): - try: - if isinstance(result, Walker) and result.validate(): - result_list.append( - { - "program_str": self.truncations[i], - "result_obj": result.to_dict(), - } - ) - else: - if self.cfg.debug: - print("Failed execution, type:", result) - print(self.truncations[i]) - except Exception as e: - if self.cfg.debug: - print(type(e), e) - return result_list diff --git a/src/openelm/elm.py b/src/openelm/elm.py index 382b2eaf..7f449c3b 100644 --- a/src/openelm/elm.py +++ b/src/openelm/elm.py @@ -1,17 +1,13 @@ -from openelm.environments import ( - ImageOptim, - Sodarace, - image_init_args, - sodarace_init_args, -) -from openelm.map_elites import MAPElites +from typing import Optional -ENVS_DICT = {"sodarace": Sodarace, "imageoptim": ImageOptim} -ARG_DICT = {"sodarace": sodarace_init_args, "imageoptim": image_init_args} +from openelm.configs import DiffModelConfig, ELMConfig, PromptModelConfig +from openelm.environments import ENVS_DICT +from openelm.map_elites import MAPElites +from openelm.mutation_model import DiffModel, MutationModel, PromptModel class ELM: - def __init__(self, cfg, diff_model_cls=None, env_args: dict = None) -> None: + def __init__(self, config: ELMConfig) -> None: """ The main class of ELM. @@ -19,41 +15,43 @@ def __init__(self, cfg, diff_model_cls=None, env_args: dict = None) -> None: from the passed config. Args: - cfg: The config (e.g. OmegaConf who uses dot to access members). - diff_model_cls: (Optional) The class of diff model. One can apply - alternative models here for comparison. - env_args: (Optional) The argument dict for Environment. + config: The config containing the diff model, environment, and QD algorithm. """ - self.cfg = cfg - - # Get the defaults if `env_args` is not specified. - if env_args is None: - env_args = ARG_DICT[self.cfg.env_name] - env_args["config"] = self.cfg # Override default environment config - - # Override diff model if `diff_model_cls` is specified. - if diff_model_cls is not None: - self.diff_model = diff_model_cls(self.cfg) - env_args = {**env_args, "diff_model": self.diff_model} - else: - self.diff_model = None - - self.seed = env_args["seed"] - self.environment = ENVS_DICT[self.cfg.env_name](**env_args) + self.config: ELMConfig = config + env_name: str = self.config.env.env_name + if isinstance(self.config.model, PromptModelConfig): + self.mutation_model: MutationModel = PromptModel(self.config.model) + elif isinstance(self.config.model, DiffModelConfig): + self.mutation_model = DiffModel(self.config.model) + + self.environment = ENVS_DICT[env_name]( + config=self.config.env, + mutation_model=self.mutation_model, + ) self.qd_algorithm = MAPElites( self.environment, - n_bins=self.cfg.behavior_n_bins, - history_length=self.cfg.evo_history_length, + map_grid_size=self.config.qd.map_grid_size, + history_length=self.config.qd.history_length, + save_history=self.config.qd.save_history, ) - def run(self) -> str: + def run( + self, init_steps: Optional[int] = None, total_steps: Optional[int] = None + ) -> str: """ Run the ELM algorithm to evolve the population in the environment. + Args: + init_steps: The number of steps to run the initialisation phase. + total_steps: The number of steps to run the QD algorithm in total, + including init_steps. + Returns: str: A string representing the maximum fitness genotype. The `qd_algorithm` class attribute will be updated. """ - return self.qd_algorithm.search( - initsteps=self.cfg.evo_init_steps, totalsteps=self.cfg.evo_n_steps - ) + if init_steps is None: + init_steps = self.config.qd.init_steps + if total_steps is None: + total_steps = self.config.qd.total_steps + return self.qd_algorithm.search(init_steps=init_steps, total_steps=total_steps) diff --git a/src/openelm/environments/__init__.py b/src/openelm/environments/__init__.py index 58bd70ea..3e7a34b9 100644 --- a/src/openelm/environments/__init__.py +++ b/src/openelm/environments/__init__.py @@ -1,4 +1,4 @@ -import numpy as np +from typing import Any from openelm.environments.environments import ( BaseEnvironment, @@ -6,67 +6,124 @@ Genotype, ImageOptim, MatchString, + P3Problem, Sodarace, ) -from openelm.environments.sodaracer import IMPORTS, SQUARE, SQUARE_PREREQ - -# ----- Generate sample seeds and init args for environments ----- -# They are simple template arguments to initialize several environments. -# Sample usage: -# from openelm.environment import sodarace_init_args -# sodarace = Sodarace(**sodarace_init_args, run_name="test") - - -IMAGE_SEED = { - "program_str": """import numpy as np -def draw_blue_rectangle() -> np.ndarray: -\tpic = np.zeros((32, 32, 3)) -\tfor x in range(2, 30): -\t\tfor y in range(2, 30): -\t\t\tpic[x, y] = np.array([0, 0, 255]) -\treturn pic -""", - "result_obj": None, + +P3_MED_SEED = { + "program_str": """from typing import List + +def f1(s: str): + return "Hello " + s == "Hello world" + +def g1(): + return "world" + +assert f1(g1()) + +def f2(s: str): + return "Hello " + s[::-1] == "Hello world" + +def g2(): + return "world"[::-1] + +assert f2(g2()) + +def f3(x: List[int]): + return len(x) == 2 and sum(x) == 3 + +def g3(): + return [1, 2] + +assert f3(g3()) + +def f4(s: List[str]): + return len(set(s)) == 1000 and all( + (x.count("a") > x.count("b")) and ('b' in x) for x in s) + +def g4(): + return ["a"*(i+2)+"b" for i in range(1000)] + +assert f4(g4()) + +def f5(n: int): + return str(n * n).startswith("123456789") + +def g5(): + return int(int("123456789" + "0"*9) ** 0.5) + 1 + +assert f5(g5())""", + "result_obj": {}, } -exec(IMAGE_SEED["program_str"], globals()) -IMAGE_SEED["result_obj"] = globals()["draw_blue_rectangle"]() -target = np.zeros((32, 32, 3)) -for y in range(32): - for x in range(32): - if (y - 16) ** 2 + (x - 16) ** 2 <= 100: # a radius-10 circle - target[y, x] = np.array([1, 1, 0]) - - -SQUARE_SEED = { - "program_str": IMPORTS + SQUARE_PREREQ + SQUARE, - "result_obj": { - "joints": [(0, 0), (0, 10), (10, 10), (10, 0), (5, 5)], - "muscles": [ - [0, 1, {"type": "distance", "amplitude": 0.0, "phase": 0.0}], - [1, 2, {"type": "distance", "amplitude": 0.0, "phase": 0.0}], - [2, 3, {"type": "distance", "amplitude": 0.0, "phase": 0.0}], - [3, 0, {"type": "distance", "amplitude": 0.0, "phase": 0.0}], - [3, 4, {"type": "distance", "amplitude": 0.0, "phase": 0.0}], - [0, 4, {"type": "muscle", "amplitude": 5.0, "phase": 0.0}], - [1, 4, {"type": "muscle", "amplitude": 10.0, "phase": 0.0}], - [2, 4, {"type": "muscle", "amplitude": 2.0, "phase": 0.0}], - ], - }, + +P3_LONG_SEED = { + "program_str": '''from typing import List + +def f1(s: str): + return "Hello " + s == "Hello world" + +def g1(): + """Find a string that when concatenated onto 'Hello ' gives 'Hello world'.""" + return "world" + +assert f1(g1()) + +def f2(s: str): + return "Hello " + s[::-1] == "Hello world" + +def g2(): + """Find a string that when reversed and concatenated onto 'Hello ' gives 'Hello world'.""" + return "world"[::-1] + +assert f2(g2()) + +def f3(x: List[int]): + return len(x) == 2 and sum(x) == 3 + +def g3(): + """Find a list of two integers whose sum is 3.""" + return [1, 2] + +assert f3(g3()) + +def f4(s: List[str]): + return len(set(s)) == 1000 and all( + (x.count("a") > x.count("b")) and ('b' in x) for x in s) + +def g4(): + """Find a list of 1000 distinct strings which each have more 'a's than 'b's and at least one 'b'.""" + return ["a"*(i+2)+"b" for i in range(1000)] + +assert f4(g4()) + +def f5(n: int): + return str(n * n).startswith("123456789") + +def g5(): + """Find an integer whose perfect square begins with 123456789 in its decimal representation.""" + return int(int("123456789" + "0"*9) ** 0.5) + 1 + +assert f5(g5())''', + "result_obj": {}, } -# A sample init args for ImageOptim -image_init_args = { - "seed": IMAGE_SEED, - "config": "openelm/config/elm_image_cfg.yaml", - "target_img": target, +p3_med_init_args = { + "seed": P3_MED_SEED, + "config": "openelm/config/elm_p3_cfg.yaml", "diff_model": None, - "behavior_mode": "3-channel", } -# A sample init args for Sodarace -sodarace_init_args = {"seed": SQUARE_SEED, "diff_model": None, "eval_ms": 1000} +p3_long_init_args = { + "seed": P3_LONG_SEED, + "config": "openelm/config/elm_p3_cfg.yaml", + "diff_model": None, +} -# ----- (Sample init args end) ----- +ENVS_DICT: dict[str, Any] = { + "sodarace": Sodarace, + "image_evolution": ImageOptim, + "p3": P3Problem, +} __all__ = [ "Genotype", @@ -75,8 +132,6 @@ def draw_blue_rectangle() -> np.ndarray: "ImageOptim", "MatchString", "Sodarace", - "IMAGE_SEED", - "image_init_args", - "SQUARE_SEED", - "sodarace_init_args", + "ENVS_DICT", + "P3Problem", ] diff --git a/src/openelm/environments/env_utils.py b/src/openelm/environments/env_utils.py new file mode 100644 index 00000000..fbad4a12 --- /dev/null +++ b/src/openelm/environments/env_utils.py @@ -0,0 +1,23 @@ +import numpy as np + + +def get_image_target(name: str) -> np.ndarray: + if name == "circle": + target = np.zeros((32, 32, 3)) + for y in range(32): + for x in range(32): + if (y - 16) ** 2 + (x - 16) ** 2 <= 100: # a radius-10 circle + target[y, x] = np.array([1, 1, 0]) + else: + raise NotImplementedError(f"Image target {name} not implemented") + return target + + +IMAGE_SEED: str = """ +def draw(): +\tpic = np.zeros((32, 32, 3)) +\tfor x in range(2, 30): +\t\tfor y in range(2, 30): +\t\t\tpic[x, y] = np.array([0, 0, 255]) +\treturn pic +""" diff --git a/src/openelm/environments/environments.py b/src/openelm/environments/environments.py index 15b1209e..bbcb1931 100644 --- a/src/openelm/environments/environments.py +++ b/src/openelm/environments/environments.py @@ -1,15 +1,31 @@ +import json import math import string +import sys from abc import ABC, abstractmethod -from dataclasses import is_dataclass -from typing import Generic, Optional, TypeVar, Union +from typing import Generic, Optional, Type, TypeVar, Union import numpy as np -from omegaconf import DictConfig, OmegaConf - -from openelm.configs import BaseConfig, ImageELMConfig, SodaraceELMConfig -from openelm.diff_model import PromptMutationForImgTask, PromptMutationForSodarace -from openelm.environments.sodaracer import SodaraceSimulator +import requests + +from openelm.configs import EnvConfig, ImageEnvConfig, P3EnvConfig, SodaraceEnvConfig +from openelm.environments.env_utils import IMAGE_SEED, get_image_target +from openelm.environments.sodaracer import ( + CIRCLE, + GALLOPER_PREREQ, + IMPORTS, + INSTRUCTIONS, + QUERY_CPPN, + SEEDS_DICT, + SQUARE_PREREQ, + SodaraceSimulator, + Walker, +) +from openelm.mutation_model import MutationModel +from openelm.utils.code_eval import pool_exec_processes, type_check + +sys.set_int_max_str_digits(0) # remove length limitation for int->str conversion +# (model sometimes outputs really long ints) Phenotype = Optional[np.ndarray] @@ -41,6 +57,7 @@ class BaseEnvironment(ABC, Generic[GenoType]): def __init__(self) -> None: self.genotype_space: np.ndarray self.batch_size: int + self.config: EnvConfig @abstractmethod def random(self) -> list[GenoType]: @@ -67,18 +84,6 @@ def behavior_space(self) -> np.ndarray: def behavior_ndim(self) -> int: return self.behavior_space.shape[1] - @staticmethod - def _load_config(config): - # TODO: convert all to dataclass - if isinstance(config, str): - return OmegaConf.load(config) - elif isinstance(config, (dict, DictConfig)): - return DictConfig(config) - elif is_dataclass(config): - return OmegaConf.structured(config) - else: - raise ValueError - class ArrayGenotype(Genotype, np.ndarray): def __new__(cls, input_array): @@ -190,106 +195,119 @@ class ImageOptim(BaseEnvironment[ImageGeneration]): Fitness is simply the absolute difference between the returning image and the target image. To map into the behavior space, - if behavior_mode=="3-channel", the image will be divided into blocks + if behavior_ndims=="3-channel", the image will be divided into blocks (specified in `block_size`), and average values of RGB channels in each block will be put together as a point in the behavior space (average-pooling). """ - default_diff_model_cls = PromptMutationForImgTask - # Record different definitions of behavior spaces in a dict. Feel free to add. - behavior_mode_spec = {"3-channel-avg": {"genotype_ndim": 3}} + # Record different definitions of behavior spaces in a dict. + behavior_ndims = {"3-channel": 3} def __init__( self, - seed: dict, - config: Union[str, dict, DictConfig], - target_img: np.ndarray, - diff_model, - behavior_mode: str = "3-channel", - run_name: Optional[str] = None, + config: ImageEnvConfig, + mutation_model: MutationModel, ): - """ - Mutate programs that return images. + self.config: ImageEnvConfig = config + self.batch_size = self.config.batch_size + self.target_img: np.ndarray = get_image_target(self.config.target) + self.seed: str = IMAGE_SEED + self.mutation_model: MutationModel = mutation_model - Fitness is simply the absolute difference between the returning - image and the target image. To map into the behavior space, - if behavior_mode=="3-channel", the image will be divided into blocks - (specified in `block_size`), and average values of RGB channels in each - block will be put together as a point in the behavior space (average-pooling). + self.behavior_mode: str = self.config.behavior_mode + self.genotype_ndim: int = self.behavior_ndims[self.behavior_mode] + self.genotype_space = np.repeat([[0, 255]], self.genotype_ndim, axis=0).T - Args: - seed: the seed dict. - config: the config file path or dict. - target_img: the target image. - diff_model: the diff model (or alternatives). - behavior_mode: (Optional) a string indicating the way an individual - is mapped into behavior space. - run_name: (Optional) override the run_name in config. - """ - if isinstance(seed, dict): - self.seed = ImageGeneration(**seed) + def construct_prompt( + self, code_batch: Optional[Union[list[str], str]] = None + ) -> dict[str, str]: + prompt_str: str = "import math\nimport numpy as np\n" + instruction_str: str = """ +def draw(): + \"\"\"Draw a yellow circle.\"\"\" + pic = np.zeros((32, 32, 3)) +""" + import_str: str = prompt_str + if code_batch is None: + # Initialization steps + prompt_str += self.seed else: - raise TypeError - - self.config: ImageELMConfig = self._load_config(config) - if run_name is not None: - self.config.run_name = run_name - - self.target_img = target_img - self.shape = target_img.shape - - if diff_model is None: - self.diff_model = self.default_diff_model_cls(self.config) + # Evolution steps + if isinstance(code_batch, list): + prompt_str += code_batch[0] + elif isinstance(code_batch, str): + prompt_str += code_batch + import_str += instruction_str + prompt_str += instruction_str + return {"prompt": prompt_str, "template": import_str} + + def generate_programs( + self, code_batch: list[dict[str, str]] + ) -> list[ImageGeneration]: + func_name: str = "draw" + generated_programs = self.mutation_model.generate_programs( + code_batch, local_scope_truncate=True + ) + if self.config.sandbox: + results = [] + for code in generated_programs: + resp = requests.post( + f"{self.config.sandbox_server}/eval_imageoptim_func", + json={ + "code": code, + "func_name": func_name, + "timeout": self.config.timeout, + }, + timeout=self.config.timeout, + ) + if resp.status_code == 200: + return_dict = json.loads(resp.text) + results.append(return_dict) + return [ImageGeneration(**p) for p in results] + # for i in range(len(results)): + # results[i]["result_obj"] = np.array(results[i]["result_obj"]) + # return results else: - self.diff_model = diff_model - - self.behavior_mode = behavior_mode - self.genotype_ndim: int = self.behavior_mode_spec[self.behavior_mode][ - "genotype_ndim" - ] - self.genotype_space = np.repeat([[0, 255]], self.genotype_ndim, axis=0).T - - def generate_program(self, code_batch: list[str]) -> list[ImageGeneration]: - """ - Call LM to generate a new program and run it. - - Returns: - An ImageGeneration object containing the code, the resulting image - and the error code. - """ - generated_programs = self.diff_model.generate_program(code_batch) - return [ImageGeneration(**p) for p in generated_programs] + results = pool_exec_processes( + generated_programs, + func_name=func_name, + timeout=self.config.timeout, + processes=self.config.processes, + debug=self.config.debug, + ) + result_list: list = [] + for i, result in enumerate(results): + try: + if isinstance(result, np.ndarray): + result_list.append( + { + "program_str": generated_programs[i], + "result_obj": result, + } + ) + else: + if self.config.debug: + print("Failed execution, type:", result) + print(generated_programs[i]) + except Exception as e: + if self.config.debug: + print(type(e), e) + return [ImageGeneration(**p) for p in result_list] def random(self) -> list[ImageGeneration]: - """ - Randomly generate a batch of codes and evaluate their outputs. - - Returns: - a tuple of the code string and the returning result (None if there - is error). - """ - program_str_list = [self.seed.program_str] * self.batch_size - new_images = self.generate_program(program_str_list) + program_list = [self.construct_prompt() for _ in range(self.config.batch_size)] + new_images = self.generate_programs(program_list) return new_images def mutate(self, images_list: list[ImageGeneration]) -> list[ImageGeneration]: - """ - Randomly mutate a batch of codes and evaluate their outputs. - - Args: - x: the individual to be mutated. - - Returns: - a tuple of the code string and the returning result (None if there - is an error). - """ - program_str_list = [sr.program_str for sr in images_list] - new_images = self.generate_program(program_str_list) + images = [img.program_str for img in images_list] + program_list = list(map(self.construct_prompt, images)) + new_images = self.generate_programs(program_list) return new_images def fitness(self, x: ImageGeneration) -> float: - if not x.valid or x.result_obj.shape != self.shape: + if not x.valid or x.result_obj.shape != self.target_img.shape: return -np.inf return -np.abs(x.result_obj - self.target_img).sum() @@ -305,7 +323,6 @@ def __init__(self, program_str: str, result_obj: dict): """ self.program_str: str = program_str self.result_obj: dict = result_obj - # self._fitness: Optional[float] = None # Check whether the Sodaracer is valid. try: @@ -320,6 +337,9 @@ def __init__(self, program_str: str, result_obj: dict): def evaluate(self, eval_ms: int) -> float: self._fitness = self.simulator.evaluate(eval_ms) # if self._fitness is None: + # print(self.valid) + # self.simulator = SodaraceSimulator(body=self.result_obj) + # print(self.evaluate(0)) return self._fitness def __str__(self) -> str: @@ -343,74 +363,279 @@ def fitness(self) -> Optional[float]: class Sodarace(BaseEnvironment[Sodaracer]): - default_diff_model_cls = PromptMutationForSodarace - def __init__( self, - seed: dict, - config: Union[str, dict, DictConfig, BaseConfig], - diff_model, - eval_ms: int, - max_height: int = 1000, - max_width: int = 1000, - max_mass: int = 2000, - ndim: int = 3, - run_name: Optional[str] = None, + config: SodaraceEnvConfig, + mutation_model: MutationModel, ) -> None: """ Sodarace environment. Args: - seed: the seed dict. - config: the config file path or dict. - diff_model: the diff model (or alternatives). - eval_ms: The time in ms for sodaracer evaluation. - max_height: (Optional) the maximal height. - max_width: (Optional) the maximal width. - max_mass: (Optional) the maximal mass. - ndim: (Optional) the dimension of behavior space. - run_name: (Optional) override the run_name in config. + seeds: the seed dict. + config: the environment config. + mutation_model: the mutation model. """ - if isinstance(seed, dict): - self.seed = Sodaracer(**seed) + self.config: SodaraceEnvConfig = config + self.batch_size = self.config.batch_size + self.mutation_model: MutationModel = mutation_model + + self.genotype_space = np.array(self.config.behavior_space).T + self.genotype_ndim = self.genotype_space.shape[1] + + self.seed_strs: list[str] = self.config.starting_seeds + + def construct_prompt( + self, code_batch: Optional[Union[list[str], str]] = None + ) -> dict[str, str]: + prompt_str: str = IMPORTS + if "square" in self.seed_strs: + prompt_str += SQUARE_PREREQ + if "galloper" in self.seed_strs: + prompt_str += GALLOPER_PREREQ + if "radial" in self.seed_strs or "wheel" in self.seed_strs: + prompt_str += CIRCLE + if ( + "cppn_fixed" in self.seed_strs + or "cppn_mutable" in self.seed_strs + or "runner" in self.seed_strs + ): + prompt_str += QUERY_CPPN + # For crossover: + # If init steps, combine seeds and prereqs, and use instruction 3 code below. + # For all other steps, prepend all prereqs and ignore instruction 3 code. + # For non-crossover + # Always preprend prereq, and len(code_batch) == 1 + import_str: str = prompt_str + if code_batch is None: + # Initialization steps + seeds = [SEEDS_DICT[seed] for seed in self.seed_strs] + if not self.config.crossover: + # TODO: Sample from seeds randomly + prompt_str += seeds[0] + elif self.config.crossover: + if self.config.instruction == 3: + instruction_str: str = INSTRUCTIONS[self.config.instruction].split( + "," + )[0] + for seed in seeds: + prompt_str += seed + if self.config.instruction == 3: + reverse_seeds: dict[str, str] = { + v: k for k, v in SEEDS_DICT.items() + } + instruction_str += reverse_seeds[seed] + ", " + if self.config.instruction == 3: + instruction_str += INSTRUCTIONS[self.config.instruction].split(",")[ + 1 + ] + raise NotImplementedError else: - raise TypeError - # TODO: rewrite config code to make everything an instance of a dataclass - self.config: SodaraceELMConfig = self._load_config(config) - if run_name is not None: - self.config.run_name = run_name - - if diff_model is None: - self.diff_model = self.default_diff_model_cls(self.config) + # Evolution steps + if not self.config.crossover: + if isinstance(code_batch, list): + # TODO: get nearby genotypes + prompt_str += code_batch[0] + elif isinstance(code_batch, str): + prompt_str += code_batch + elif self.config.crossover: + # Crossover + raise NotImplementedError + instruction_str = INSTRUCTIONS[self.config.instruction] + import_str += instruction_str + prompt_str += instruction_str + return {"prompt": prompt_str, "template": import_str} + + def generate_programs(self, code_batch: list[dict[str, str]]) -> list[Sodaracer]: + """Generate new programs with a mutation model and evaluate them.""" + local_scope_exec: bool = self.config.instruction != 0 + generated_programs = self.mutation_model.generate_programs( + code_batch, local_scope_exec + ) + if self.config.sandbox: + results = [] + for code in generated_programs: + resp = requests.post( + f"{self.config.sandbox_server}/gen_racer", + json={"code": code, "timeout": self.config.timeout}, + timeout=self.config.timeout, + ) + if resp.status_code == 200: + return_dict = json.loads(resp.text) + results.append(return_dict) + return [Sodaracer(**p) for p in results] else: - self.diff_model = diff_model - - self.batch_size = self.config.batch_size - self.eval_ms = eval_ms - self.genotype_ndim = ndim - self.genotype_space = np.array( - [[0, max_height], [0, max_width], [0, max_mass]] - ).T - - def generate_program(self, code_batch: list[str]) -> list[Sodaracer]: - # Call LM to generate a new program and run it, returning a dict - # containing the program string and the dict from running it. - generated_programs = self.diff_model.generate_program(code_batch) - return [Sodaracer(**p) for p in generated_programs] + results = pool_exec_processes( + generated_programs, + func_name="make_walker", + timeout=self.config.timeout, + processes=self.config.processes, + debug=self.config.debug, + ) + result_list: list = [] + for i, result in enumerate(results): + try: + if isinstance(result, Walker) and result.validate(): + result_list.append( + { + "program_str": generated_programs[i], + "result_obj": result.to_dict(), + } + ) + else: + if self.config.debug: + print("Failed execution, type:", result) + print(generated_programs[i]) + except Exception as e: + if self.config.debug: + print(type(e), e) + return [Sodaracer(**p) for p in result_list] def random(self) -> list[Sodaracer]: - program_str_list = [self.seed.program_str] * self.batch_size - new_sodaracers = self.generate_program(program_str_list) + program_list = [self.construct_prompt() for _ in range(self.config.batch_size)] + new_sodaracers = self.generate_programs(program_list) return new_sodaracers def mutate(self, sodaracer_list: list[Sodaracer]) -> list[Sodaracer]: - program_str_list = [sr.program_str for sr in sodaracer_list] - new_sodaracers = self.generate_program(program_str_list) + sodaracers = [sr.program_str for sr in sodaracer_list] + program_list = list(map(self.construct_prompt, sodaracers)) + new_sodaracers = self.generate_programs(program_list) return new_sodaracers def fitness(self, x: Sodaracer) -> float: - # Call Sodaracers environment to get the fitness. if x.valid: - return x.evaluate(self.eval_ms) + return x.evaluate(self.config.eval_ms) else: return -np.inf + + +class P3Solution(Genotype): + def __init__(self, program_str: str, result_obj: dict): + """ + Genotype for a programming puzzle solution. + + Args: + program_str: the solution program string (the g6() function). + result_obj: dict. + """ + self.program_str = program_str + self.result_obj = result_obj + + def __str__(self) -> str: + return self.program_str + + def to_phenotype(self) -> Optional[Phenotype]: + return None + + +class P3Problem(BaseEnvironment[P3Solution]): + def __init__( + self, + seed: dict, + config: P3EnvConfig, + mutation_model: MutationModel, + problem_func: str, + solution_preamble: str, + ans_type: Type, + ) -> None: + """ + P3 Environment. + + Args: + seed: the seed dict. + config: the config file path or dict. + mutation_model: the diff model (or alternatives). + problem_func: the f6() function containing the programming problem + solution_preamble: the g6() function definition (must be passed in in order to include params) + ans_type: answer type + """ + if isinstance(seed, dict): + self.seed = seed + else: + raise TypeError + self.mutation_model = mutation_model + self.problem_func = problem_func + self.solution_preamble = solution_preamble + self.config = config + self.batch_size = self.config.batch_size + # The only import that's necessary as of P3 v0.2 + self.import_line = "from typing import List\n" + self.ans_type = ans_type + + def construct_prompt(self) -> dict[str, str]: + prompt_str = ( + self.seed["program_str"] + + f"\n\n{self.problem_func}" # add f6() to the prompt + f"\n\n{self.solution_preamble}" # add g6() preamble + ) + + template = f"{self.import_line}\n{self.solution_preamble}" + return {"prompt": prompt_str, "template": template} + + def generate_program(self, code_batch: list[dict[str, str]]) -> list[P3Solution]: + """Generate new programs with a mutation model and evaluate them.""" + local_scope_exec = True + generated_programs = self.mutation_model.generate_programs( + code_batch, local_scope_exec + ) + + if self.config.sandbox: + results = [] + for code in generated_programs: + resp = requests.post( + f"{self.config.sandbox_server}/eval_p3_solution", + json={"code": code, "timeout": self.config.timeout}, + timeout=self.config.timeout, + ) + if resp.status_code == 200: + return_dict = json.loads(resp.text) + results.append(return_dict) + else: + results = pool_exec_processes( + generated_programs, + func_name="g6", + timeout=self.config.timeout, + processes=self.config.processes, + debug=self.config.debug, + ) + results = [ + {"program_str": gen_prog, "result_obj": res_obj} + for (gen_prog, res_obj) in zip(generated_programs, results) + ] + return [P3Solution(**p) for p in results] + + def fitness(self, sol: P3Solution) -> float: + # If passing the solution to the problem returns True, fitness is 1.0 + # else 0.0 + if not type_check(self.ans_type, sol.result_obj): + return 0.0 + + eval_code = ( + f"{self.import_line}\n" + f"{self.problem_func}\n" + f"def run_eval():\n" + f" return f6({sol.result_obj})" + ) + + result = pool_exec_processes( + eval_code, + func_name="run_eval", + timeout=self.config.timeout, + processes=self.config.processes, + debug=self.config.debug, + ) + if result[0] is True: + return 1.0 + else: + return 0.0 + + def random(self) -> list[P3Solution]: + program_list = [self.construct_prompt() for _ in range(self.config.batch_size)] + new_solutions = self.generate_program(program_list) + return new_solutions + + def mutate(self, x: P3Solution) -> list[P3Solution]: + raise NotImplementedError + + def to_behavior_space(self, x: Sodaracer) -> Optional[Phenotype]: + raise NotImplementedError diff --git a/src/openelm/environments/sodaracer/__init__.py b/src/openelm/environments/sodaracer/__init__.py index 52d77f8d..5087e9af 100644 --- a/src/openelm/environments/sodaracer/__init__.py +++ b/src/openelm/environments/sodaracer/__init__.py @@ -240,6 +240,13 @@ def phase(x1, y1, x2, y2): "runner": RUNNER, } +INSTRUCTIONS = { + 0: "", + 1: "def make_walker():\n", + 2: "#Create a new walker by modifying the starting function above.\ndef make_walker():\n", + 3: "#Combine the ,starting programs above to make a new program.\ndef make_walker():\n", +} + __all__ = [ "IESoRWorld", "SodaraceSimulator", diff --git a/src/openelm/environments/sodaracer/simulator.py b/src/openelm/environments/sodaracer/simulator.py index 5a9efb49..c8c22435 100644 --- a/src/openelm/environments/sodaracer/simulator.py +++ b/src/openelm/environments/sodaracer/simulator.py @@ -13,6 +13,7 @@ from pathlib import Path from typing import Any +import numpy as np from Box2D import Box2D as b2 from openelm.environments.sodaracer.helpers import ( @@ -629,7 +630,7 @@ def evaluate(self, time: float) -> float: ) return abs(end + self.morphology["offsetX"]) except Exception as e: - # print(e) + print(e) # print(self.world.bone_list) # print(self.world.muscle_list) - return None + return -np.inf diff --git a/src/openelm/map_elites.py b/src/openelm/map_elites.py index 5bd7c7b9..30ccaf7e 100644 --- a/src/openelm/map_elites.py +++ b/src/openelm/map_elites.py @@ -112,7 +112,7 @@ class MAPElites: def __init__( self, env, - n_bins: int, + map_grid_size: tuple[int, ...], init_map: Optional[Map] = None, history_length: int = 1, save_history: bool = False, @@ -125,7 +125,7 @@ def __init__( should be a subclass of `BaseEnvironment`, and should implement methods to generate random solutions, mutate existing solutions, and evaluate solutions for their fitness in the environment. - n_bins (int): Number of bins to partition the behavior space into. + map_grid_size (int): Number of bins to partition the behavior space into. init_map (Map, optional): A map to use for the algorithm. If not passed, a new map will be created. Defaults to None. history_length (int): Length of history to store for each niche (cell) @@ -137,18 +137,19 @@ def __init__( Defaults to False. """ self.env: BaseEnvironment = env - self.n_bins = n_bins + self.map_grid_size = map_grid_size self.history_length = history_length self.save_history = save_history # self.history will be set/reset each time when calling `.search(...)` self.history: dict = defaultdict(list) # discretization of space - self.bins = np.linspace(*env.behavior_space, n_bins + 1)[1:-1].T # type: ignore + # TODO: make this work for any number of dimensions + self.bins = np.linspace(*env.behavior_space, map_grid_size[0] + 1)[1:-1].T # type: ignore # TODO: abstract all maps out to a single class. # perfomance of niches if init_map is None: self.fitnesses: Map = Map( - dims=(n_bins,) * env.behavior_ndim, + dims=map_grid_size * env.behavior_ndim, fill_value=-np.inf, dtype=float, history_length=history_length, @@ -183,7 +184,7 @@ def random_selection(self) -> MapIndex: ix = np.random.choice(np.flatnonzero(self.nonzero.array)) return np.unravel_index(ix, self.nonzero.dims) - def search(self, initsteps: int, totalsteps: int, atol: float = 1.0) -> str: + def search(self, init_steps: int, total_steps: int, atol: float = 1.0) -> str: """ Run the MAP-Elites search algorithm. @@ -200,16 +201,17 @@ def search(self, initsteps: int, totalsteps: int, atol: float = 1.0) -> str: best performing solution object can be accessed via the `current_max_genome` class attribute. """ - tbar = trange(int(totalsteps)) + tbar = trange(int(total_steps)) max_fitness = -np.inf max_genome = None if self.save_history: self.history = defaultdict(list) for n_steps in tbar: - if n_steps < initsteps or self.genomes.empty: + if n_steps < init_steps or self.genomes.empty: # Initialise by generating initsteps random solutions. # If map is still empty: force to do generation instead of mutation. + # TODO: use a separate sampler, move batch size to qd config. new_individuals: list[Genotype] = self.env.random() else: # Randomly select a batch of elites from the map. @@ -225,6 +227,9 @@ def search(self, initsteps: int, totalsteps: int, atol: float = 1.0) -> str: # TODO: account for the case where multiple new individuals are # placed in the same niche, for saving histories. for individual in new_individuals: + fitness = self.env.fitness(individual) + if np.isinf(fitness): + continue map_ix = self.to_mapindex(individual.to_phenotype()) # if the return is None, the individual is invalid and is thrown # into the recycle bin. @@ -238,7 +243,6 @@ def search(self, initsteps: int, totalsteps: int, atol: float = 1.0) -> str: self.history[map_ix].append(individual) self.nonzero[map_ix] = True - fitness = self.env.fitness(individual) # If new fitness greater than old fitness in niche, replace. if fitness > self.fitnesses[map_ix]: self.fitnesses[map_ix] = fitness diff --git a/src/openelm/mutation_model.py b/src/openelm/mutation_model.py new file mode 100644 index 00000000..ac2eb5f6 --- /dev/null +++ b/src/openelm/mutation_model.py @@ -0,0 +1,117 @@ +import functools +import os +import re +from abc import ABC, abstractmethod + +import numpy as np + +from openelm.codegen import model_setup, sample, set_seed, truncate +from openelm.configs import ModelConfig +from openelm.utils.diff_eval import apply_diff, split_diff + + +class MutationModel(ABC): + """Base model class for all mutation models.""" + + def __init__(self) -> None: + self.config: ModelConfig + + @abstractmethod + def generate_programs(self, *args, **kwargs) -> list[str]: + raise NotImplementedError + + +class PromptModel(MutationModel): + """Mutation model that uses prompts to change a seed.""" + + def __init__(self, config: ModelConfig) -> None: + self.config: ModelConfig = config + seed: int = set_seed(self.config.seed) + # Use RNG to rotate random seeds during inference. + self.rng = np.random.default_rng(seed=seed) + os.environ["TOKENIZERS_PARALLELISM"] = "false" + self.model, self.tokenizer, self.device = model_setup(self.config) + + def generate_programs( + self, prompt_dicts: list[dict[str, str]], local_scope_truncate: bool, **kwargs + ) -> list[str]: + """ + Generate new programs from a batch of programs. + + Given a piece of code, do prompt mutation, execute the code, + and return the result. + + Args: + prompt_dicts (list[dict[str, str]): A list of dictionaries containing + the prompt and template for each program. + local_scope_truncate (bool): Whether or not to truncate the code to + the local scope. + + Returns: + A list of code strings. + """ + prompts = [prompt_dict["prompt"] for prompt_dict in prompt_dicts] + templates = [prompt_dict["template"] for prompt_dict in prompt_dicts] + encodings = self.tokenizer( + prompts, + truncation=True, + padding=True, + return_tensors="pt", + ) + completions: list[str] = sample( + encodings, + self.config, + self.model, + self.tokenizer, + num_return_sequences=1, + ) + trunc = functools.partial(truncate, only_local_scope=local_scope_truncate) + truncations: list[str] = [ + templates[i] + trunc(completions[i]) for i in range(len(completions)) + ] + return truncations + + +class DiffModel(PromptModel): + def __init__(self, config: ModelConfig) -> None: + super().__init__(config) + + def generate_programs( + self, prompt_dicts: list[dict[str, str]], local_scope_truncate: bool, **kwargs + ) -> list[str]: + prompts = [prompt_dict["prompt"] for prompt_dict in prompt_dicts] + templates = [prompt_dict["template"] for prompt_dict in prompt_dicts] + encodings = self.tokenizer( + prompts, + truncation=True, + padding=True, + return_tensors="pt", + ) + completions: list[str] = sample( + encodings, + self.config, + self.model, + self.tokenizer, + num_return_sequences=1, + ) + + end_of_diff = re.compile("\n[^ +-@]+") + trunc = functools.partial(truncate, only_local_scope=local_scope_truncate) + truncations: list[str] = [ + templates[i] + trunc(completions[i]) for i in range(len(completions)) + ] + outputs: list[str] = [] + for i, code in enumerate(truncations): + # split the diff text according to , , , . + parsed: dict = split_diff(code) + # truncate the diff hunk at the first line not starting with " ", + # "+", "-", or "@". + if parsed and all( + (s in parsed for s in ["name", "file", "message", "diff"]) + ): + diff_hunk: str = end_of_diff.split(parsed["diff"])[0] + nme_idx: int = diff_hunk.find("") + if nme_idx != -1: + diff_hunk = diff_hunk[:nme_idx] + outputs.append(apply_diff(prompts[i], diff_hunk)) + return outputs diff --git a/src/openelm/sandbox/server/index.py b/src/openelm/sandbox/server/index.py index 09a18dd7..4d4ac4d1 100644 --- a/src/openelm/sandbox/server/index.py +++ b/src/openelm/sandbox/server/index.py @@ -2,7 +2,7 @@ from numpy import ndarray from .environments.walker.walk_creator import Walker -from .sandbox_codex_execute import unsafe_execute +from .sandbox_codex_execute import ExecResult, unsafe_execute app = Flask(__name__) @@ -11,9 +11,11 @@ def bad_request(message, **kwargs): return {"message": message, **kwargs}, 500 -def generate_racer(code_str, timeout): +def generate_racer(code_str: str, timeout: float): try: - execution_result = unsafe_execute(code_str, "make_walker", timeout) + execution_result = unsafe_execute( + code_str, func_name="make_walker", timeout=timeout + ) except Exception: return bad_request( "Failed to execute code", unsafe_execute_error_code=6 @@ -32,9 +34,10 @@ def generate_racer(code_str, timeout): walker=execution_result.to_dict(), unsafe_execute_error_code=1, ) - elif isinstance(execution_result, int): + elif isinstance(execution_result, ExecResult): return bad_request( - "Failed sandbox_unsafe_execute", unsafe_execute_error_code=execution_result + "Failed sandbox_unsafe_execute", + unsafe_execute_error_code=execution_result.name, ) else: return bad_request( @@ -57,20 +60,22 @@ def gen_racer(): @app.route("/eval_imageoptim_func", methods=["POST"]) def evaluate_function(): - req_json = request.get_json() + req_json: dict = request.get_json() try: execution_result = unsafe_execute( - req_json["code"], req_json["func_name"], req_json["timeout"] + code_str=req_json["code"], + func_name=req_json["func_name"], + timeout=req_json["timeout"], ) if isinstance(execution_result, ndarray): return { "program_str": req_json["code"], "result_obj": execution_result.tolist().__repr__(), }, 200 - elif isinstance(execution_result, int): + elif isinstance(execution_result, ExecResult): return bad_request( "Failed sandbox_unsafe_execute", - unsafe_execute_error_code=execution_result, + unsafe_execute_error_code=execution_result.name, ) else: bad_request( @@ -80,3 +85,24 @@ def evaluate_function(): return bad_request( "Failed to execute code", unsafe_execute_error_code=6 ) # 6: Other errors. + + +@app.route("/eval_p3_solution", methods=["POST"]) +def evaluate_p3_solution(): + req_json = request.get_json() + try: + execution_result = unsafe_execute( + req_json["code"], req_json["func_name"], req_json["timeout"] + ) + if isinstance(execution_result, ExecResult): + return bad_request( + "Failed sandbox_unsafe_execute", + unsafe_execute_error_code=execution_result.name, + ) + return { + "program_str": req_json["code"], + "result_obj": execution_result.__repr__(), + }, 200 + except Exception: + return bad_request("Failed to execute code", unsafe_execute_error_code=6) + # 6: Other errors. diff --git a/src/openelm/sandbox/server/sandbox_codex_execute.py b/src/openelm/sandbox/server/sandbox_codex_execute.py index e7ad0fac..9828e8fa 100644 --- a/src/openelm/sandbox/server/sandbox_codex_execute.py +++ b/src/openelm/sandbox/server/sandbox_codex_execute.py @@ -66,9 +66,18 @@ def unsafe_execute( exec(code_str, code_dct) if ground_truth is None: if args is None: - return code_dct[func_name]() + result = code_dct[func_name]() elif args is not None: - return code_dct[func_name](**args) + result = code_dct[func_name](**args) + + # Multiprocessing.pool.map + # (in utils.code_eval.pool_exec_processes()) + # cannot return 'generators' + # (this may not catch all 'invalid' generator uses) + if isinstance(result, range): + result = list(result) + + return result elif ground_truth is not None: if all( [ @@ -76,7 +85,7 @@ def unsafe_execute( for arguments, res in ground_truth.items() ] ): - return 0 + return ExecResult(0) else: return ExecResult(1) except Exception as e: diff --git a/src/openelm/utils/__init__.py b/src/openelm/utils/__init__.py index e69de29b..4b58d3fe 100644 --- a/src/openelm/utils/__init__.py +++ b/src/openelm/utils/__init__.py @@ -0,0 +1,12 @@ +from openelm.utils.code_eval import eval_completions, mutate_code, pool_exec_processes +from openelm.utils.diff_eval import apply_diff, split_diff +from openelm.utils.utils import validate_config + +__all__ = [ + "pool_exec_processes", + "eval_completions", + "mutate_code", + "apply_diff", + "split_diff", + "validate_config", +] diff --git a/src/openelm/utils/code_eval.py b/src/openelm/utils/code_eval.py index 6f8c3b6f..d6dc1eb2 100644 --- a/src/openelm/utils/code_eval.py +++ b/src/openelm/utils/code_eval.py @@ -3,6 +3,8 @@ import multiprocessing as mp from typing import Any, Iterable, Optional, Union +import numpy as np + from openelm.sandbox.server.sandbox_codex_execute import ExecResult, unsafe_execute @@ -144,3 +146,46 @@ def parity_reference(b1, b2, b3, b4): def quadratic(a, b, c, x): """Return quadratic: a,b,c are coefficients and x is the independent variable.""" return a * x**2 + b * x + c + + +def pass_at_k(n, c, k): + """ + :param n: total number of samples + :param c: number of correct samples + :param k: k in pass@k + """ + if n - c < k: return 1.0 + return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1)) + +def type_check(typ, obj): + """ + Checks the object is the correct type. Supports only bool, int, float, str, and (possibly nested) lists of these + + From: https://github.com/microsoft/PythonProgrammingPuzzles/blob/v0.2/puzzle_generator.py + """ + type_s = type_str(typ) # convert to string if necessary + + nest_depth = type_s.count("List") + assert type_s.count("[") == nest_depth, "type_check only supports List for now, no Sets, Dicts, Tuples, ..." + + assert type_s.startswith("List[" * nest_depth) and type_s.endswith("]" * nest_depth) + base_type = {"bool": bool, "int": int, "float": float, "str": str}[type_s[5 * nest_depth:len(type_s) - nest_depth]] + + def helper(depth, o): + if depth == 0: + return type(o) is base_type + else: + return type(o) is list and all(helper(depth - 1, i) for i in o) + + return helper(nest_depth, obj) + +def type_str(ty: type) -> str: + """ + Convert type ty to string. + :param ty: str, typing.List[int] , typing.List[typing.List[bool]], etc. + :return: string form of type, "str", "List[int]" , "List[List[bool]]", etc. + + From: https://github.com/microsoft/PythonProgrammingPuzzles/blob/v0.2/puzzle_generator.py + """ + type_str = str(ty).replace("typing.", "") + return type_str[8:-2] if type_str.startswith("