Skip to content

Commit

Permalink
Release/0.2.1 (#45)
Browse files Browse the repository at this point in the history
* Check for ExecResult error in sandbox (#44)

* Big refactor

* Fix env bugs

* Refactor diff model

* Update image evo environment

* P3 prompting (#41)

* Check for ExecResult error in sandbox

* Add PromptMutationForP3

* Add p3 run script

* Add long prompt; Prompt mutation v0.2 workflow

* pass@k

* Update to new API and configs

---------

Co-authored-by: Herbie Bradley <[email protected]>

* Improvements to integrate P3

* Benchmark config fix

---------

Co-authored-by: Daniel Scott <[email protected]>
  • Loading branch information
herbiebradley and dyodx authored Mar 8, 2023
1 parent 98e5c22 commit e6402a0
Show file tree
Hide file tree
Showing 25 changed files with 1,157 additions and 715 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ logs/
archive/
data/
*.out
outputs

# generated dot files and tree graphs
.gv
Expand Down
22 changes: 9 additions & 13 deletions run_elm.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,32 @@
"""
This module gives an example of how to run the main ELM class.
It uses the hydra library to load the config from the
config/elm_sodarace_cfg.yaml file.
It uses the hydra library to load the config from the config dataclasses in
configs.py.
This config file demonstrates an example of running ELM with the Sodarace
environment, a 2D physics-based environment in which robots specified by
Python dictionaries are evolved over.
"""
import hydra
from hydra.core.config_store import ConfigStore
from omegaconf import OmegaConf

from openelm import ELM
from openelm.configs import SodaraceELMConfig

cs = ConfigStore.instance()
cs.store(name="config", node=SodaraceELMConfig)


# Load hydra config from yaml files and command line arguments.
@hydra.main(
config_name="config",
config_name="elmconfig",
version_base="1.2",
)
def main(cfg):
def main(config):
print("----------------- Config ---------------")
print(OmegaConf.to_yaml(cfg))
print(OmegaConf.to_yaml(config))
print("----------------- End -----------------")
elm = ELM(cfg)
print("Best Individual: ", elm.run())
config = OmegaConf.to_object(config)
elm = ELM(config)
print("Best Individual: ", elm.run(init_steps=config.qd.init_steps,
total_steps=config.qd.total_steps))


if __name__ == "__main__":
Expand Down
176 changes: 176 additions & 0 deletions run_p3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import logging
import pathlib
import requests
import time
import json
from collections import Counter

from openelm.environments import p3_long_init_args, p3_med_init_args, P3Problem
from openelm.mutation_model import DiffModel, MutationModel, PromptModel
from openelm.configs import P3Config
from openelm.sandbox.server.sandbox_codex_execute import ExecResult
from openelm.utils.code_eval import pass_at_k
from openelm.codegen.codegen_utilities import set_seed

import hydra
from omegaconf import OmegaConf


class P3:
def __init__(self, cfg: P3Config) -> None:
"""
Evaluate models on P3 dataset
"""
self.cfg: P3Config = cfg

# Prompt size
if cfg.env.prompt_size == 'long':
env_args = p3_long_init_args
elif cfg.env.prompt_size == 'med':
env_args = p3_med_init_args
else:
raise ValueError('No init args found')

# Model
if self.cfg.model.model_name == 'prompt':
self.mutation_model: MutationModel = PromptModel(self.cfg.model)
elif self.cfg.model.model_name == 'diff':
self.mutation_model: MutationModel = DiffModel(self.cfg.model)

self.seed = env_args["seed"]
self.log_dir = 'logs/p3/problems'


def run(self):
"""
Query PromptMutationModelForP3 for solutions to programming puzzles
"""
# Get problems
problems = requests.get("https://raw.githubusercontent.com/microsoft/PythonProgrammingPuzzles/v0.2/puzzles/puzzles.json").json()
run_start_time = time.time()
num_problem_errors = 0
for problem in problems:
problem_start_time = time.time()
problem_dict = {'name': problem['name']}
logging.info(problem['name'])

problem['problem_func'] = problem['sat'].replace('def sat(', 'def f6(') # prompt form is f6()
problem['solution_preamble'] = problem['sol_header'].replace('def sol(', 'def g6(') # solution form is g6()
if self.cfg.env.prompt_size == 'long':
problem['solution_preamble'] = problem['solution_preamble'] + '\n' + problem['sol_docstring']

env = P3Problem(seed=self.seed,
config=self.cfg,
mutation_model=self.mutation_model,
problem_func=problem['problem_func'],
solution_preamble=problem['solution_preamble'],
ans_type = problem['ans_type'])

# Find solutions
# If there is an error during finding a solution, log it and skip this problem
solutions = []
try:
for i in range(self.cfg.env.solutions_per_problem // self.cfg.model.batch_size):
set_seed(i) # Change seed for each query

try:
solutions += env.random()
except Exception as e:
logging.error(f'ERROR with solution {i} in {problem["name"]}: {e}')
num_problem_errors += 1
raise(e)
except Exception as e:
continue

# Evaluate fitness of solutions
res_sols_list = []
solved = False
for sol in solutions:
res_sol_dict = {}
res_sol_dict['program_str'] = sol.program_str

if isinstance(sol.result_obj, ExecResult):
if self.cfg.save_result_obj: res_sol_dict['result_obj'] = sol.result_obj.name
fitness = 0.0
else:
if self.cfg.save_result_obj: res_sol_dict['result_obj'] = sol.result_obj
fitness = env.fitness(sol)

res_sol_dict['fitness'] = fitness
res_sols_list.append(res_sol_dict)
if not solved and fitness == 1.0:
solved = True # just want to save if solved at all

problem_dict['config'] = OmegaConf.to_container(self.cfg)
problem_dict['solutions'] = res_sols_list
problem_dict['solved'] = solved
problem_dict['time_elapsed'] = time.time() - problem_start_time

# Save results
dir = f'{self.log_dir}/{problem_dict["name"]}/{run_start_time}'
pathlib.Path(dir).mkdir(parents=True, exist_ok=True)

with open(f'{dir}/results.json', 'w') as file:
file.write(json.dumps(problem_dict))

logging.info(f'Successfully ran on {len(problems)}/{len(problems)-num_problem_errors}' +
f' problems and saved results to {self.log_dir}')


def eval_pass_at_k(self, timestamp: str, k: int):
"""
pass@k metric over a subset of run logs
Args:
timestamp (str): (optional) go through all problems with a run generated with timestamp
(if None, go through the latest run for every problem currently in logs)
k (int): k for pass@k
"""

path = pathlib.Path(self.log_dir)
problem_paths = sorted(list(path.iterdir())) # Get all logged problems
paks = []
for p in problem_paths:
n = 0
c = 0
# Select one of the runs per problem
if len(timestamp) == 0:
# Get latest run
path = pathlib.Path(p)
run_paths = sorted(list(path.iterdir())) # Get all the runs per problem
run_path = run_paths[-1]
else:
# Get 'timestamp' run
run_path = p / timestamp

with open(f'{run_path}/results.json', 'r') as f:
results = json.load(f)
n += len(results['solutions'])
c += Counter(sol['fitness'] for sol in results['solutions'])[1.0]

pak = pass_at_k(n=n, c=c, k=k)
paks.append(pak)

pak_overall = sum(paks) / len(paks)
return pak_overall


# Load hydra config from yaml files and command line arguments.
@hydra.main(
config_name="p3config",
version_base="1.2",
)
def main(cfg):
# Run
logging.info("----------------- Config ---------------")
logging.info(OmegaConf.to_yaml(cfg))
logging.info("----------------- End -----------------")
p3 = P3(cfg)

if cfg.eval_k > 0: logging.info(f"PASS@K: {p3.eval_pass_at_k(timestamp=cfg.eval_timestamp, k=cfg.eval_k)}")
else: p3.run()


if __name__ == "__main__":
main()

2 changes: 2 additions & 0 deletions src/openelm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
from openelm.elm import ELM

__version__ = importlib_version("openelm")

__all__ = ["ELM"]
5 changes: 2 additions & 3 deletions src/openelm/benchmarks/benchmark_bugs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@

from openelm.codegen import model_setup, sample, truncate
from openelm.configs import BaseConfig
from openelm.utils.code_eval import eval_completions, mutate_code
from openelm.utils.diff_eval import apply_diff, split_diff
from openelm.utils import apply_diff, eval_completions, mutate_code, split_diff


@dataclass
Expand Down Expand Up @@ -54,7 +53,7 @@ def __init__(self, cfg: BenchmarkBugsConfig):

os.environ["TOKENIZERS_PARALLELISM"] = "false"

self.device = torch.device("cuda" if cfg.cuda else "cpu")
self.device = torch.device("cuda")
self.model, self.tokenizer, self.device = model_setup(cfg, self.device)

def benchmark_parity(self, n_bugs, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion src/openelm/benchmarks/benchmark_crossover.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __init__(self, cfg: BenchmarkCrossoverConfig):

os.environ["TOKENIZERS_PARALLELISM"] = "false"

self.device = torch.device("cuda" if cfg.cuda else "cpu")
self.device = torch.device("cuda")
self.model, self.tokenizer, self.device = model_setup(cfg, self.device)

def construct_prompt(self, seeds):
Expand Down
3 changes: 2 additions & 1 deletion src/openelm/benchmarks/benchmark_lm_speed.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
from tqdm import trange

from openelm.codegen import model_setup, sample
from openelm.configs import BaseConfig
from openelm.environments import SQUARE_SEED


@dataclass
class BenchmarkSpeedConfig:
class BenchmarkSpeedConfig(BaseConfig):
hydra: Any = field(
default_factory=lambda: {
"run": {"dir": "logs/benchmarks/lm_speed/${now:%Y-%m-%d-%H-%M-%S}"}
Expand Down
40 changes: 26 additions & 14 deletions src/openelm/codegen/codegen_utilities.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import os
import random
import re
from typing import Optional

import numpy as np
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

from openelm.configs import ModelConfig


def set_seed(seed=None, deterministic=True) -> int:
if seed is None:
Expand Down Expand Up @@ -66,37 +69,46 @@ def find_re(string, pattern, start_pos):
return completion


def model_setup(cfg, device=None):
def model_setup(cfg: ModelConfig, device=None, codegen_tokenizer: bool = True):
set_seed(cfg.seed, deterministic=True)
if device is None:
device = torch.device("cuda" if cfg.cuda else "cpu")
device = torch.device("cuda")
use_fp16 = True
if not cfg.fp16 or device.type == "cpu":
use_fp16 = False

if cfg.model.startswith("codegen-16B"):
if "codegen-16B" in cfg.model_path:
use_fp16 = True

tokenizer = AutoTokenizer.from_pretrained(cfg.model)
tokenizer.padding_side = "left"
tokenizer.pad_token = 50256
tokenizer = AutoTokenizer.from_pretrained(cfg.model_path)
if codegen_tokenizer:
tokenizer.padding_side = "left"
tokenizer.pad_token = 50256

model_path = cfg.model
if cfg.gpus > 1:
model = torch.nn.DataParallel(
create_model(model_path, fp16=use_fp16), device_ids=list(range(cfg.gpus))
create_model(cfg.model_path, fp16=use_fp16),
device_ids=list(range(cfg.gpus)),
).to(device)
else:
model = create_model(model_path, fp16=use_fp16).to(device)
model = create_model(cfg.model_path, fp16=use_fp16).to(device)
return model, tokenizer, device


def sample(
batch, cfg, model, tokenizer, decode: bool = True, starting_idx=None, **kwargs
batch,
cfg: ModelConfig,
model,
tokenizer,
decode: bool = True,
starting_idx: Optional[int] = None,
num_return_sequences: Optional[int] = None,
**kwargs
) -> list[str]:
"""Run a model on a batch of contexts for a particular task."""
batch_size = kwargs.get("batch_size", cfg.batch_size)
device = kwargs.get("device", torch.device("cuda" if cfg.cuda else "cpu"))
if num_return_sequences is None:
num_return_sequences = cfg.batch_size
device = kwargs.get("device", torch.device("cuda"))
temperature = kwargs.get("temperature", cfg.temp)
top_p = kwargs.get("top_p", cfg.top_p)
gen_max_len = kwargs.get("gen_max_len", cfg.gen_max_len)
Expand All @@ -111,7 +123,7 @@ def sample(
tokens = model.module.generate(
**batch,
do_sample=True,
num_return_sequences=batch_size,
num_return_sequences=num_return_sequences,
temperature=temperature,
max_new_tokens=gen_max_len,
top_p=top_p,
Expand All @@ -122,7 +134,7 @@ def sample(
tokens = model.generate(
**batch,
do_sample=True,
num_return_sequences=batch_size,
num_return_sequences=num_return_sequences,
temperature=temperature,
max_new_tokens=gen_max_len,
top_p=top_p,
Expand Down
Loading

0 comments on commit e6402a0

Please sign in to comment.