Skip to content

Commit

Permalink
Rewrite the rewrite command to handle chunking and other improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
bakks committed Mar 1, 2023
1 parent 40b5227 commit 6281de9
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 69 deletions.
2 changes: 1 addition & 1 deletion butterfish/butterfish.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ type LLM interface {
CompletionStream(request *util.CompletionRequest, writer io.Writer) (string, error)
Completion(request *util.CompletionRequest) (string, error)
Embeddings(ctx context.Context, input []string) ([][]float64, error)
Edits(ctx context.Context, content, instruction, model string) (string, error)
Edits(ctx context.Context, content, instruction, model string, temperature float32) (string, error)
}

type ButterfishCtx struct {
Expand Down
164 changes: 102 additions & 62 deletions butterfish/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,15 @@ type CliCommandConfig struct {
} `cmd:"" help:"Generate a shell command from a prompt, i.e. pass in what you want, a shell command will be generated. Accepts piped input. You can use the -f command to execute it sight-unseen."`

Rewrite struct {
Prompt string `arg:"" help:"Instruction to the model on how to rewrite."`
Inputfile string `short:"i" help:"File to rewrite."`
Outputfile string `short:"o" help:"File to write the rewritten output to."`
Inplace bool `short:"I" help:"Rewrite the input file in place, cannot be set at the same time as the Output file flag."`
Model string `short:"m" default:"code-davinci-edit-001" help:"GPT model to use for editing. At compile time this should be either 'code-davinci-edit-001' or 'text-davinci-edit-001'."`
} `cmd:"" help:"Rewrite a file using a prompt, must specify either a file path or provide piped input, and can output to stdout, output to a given file, or edit the input file in-place."`
Prompt string `arg:"" help:"Instruction to the model on how to rewrite."`
Inputfile string `short:"i" help:"Source file for content to rewrite. If not set then there must be piped input."`
Outputfile string `short:"o" help:"File to write the rewritten output to."`
Inplace bool `short:"I" help:"Rewrite the input file in place, cannot be set at the same time as the outputfile flag."`
Model string `short:"m" default:"code-davinci-edit-001" help:"GPT model to use for editing. At compile time this should be either 'code-davinci-edit-001' or 'text-davinci-edit-001'."`
Temperature float32 `short:"T" default:"0.6" help:"Temperature to use for the prompt, higher temperature indicates more freedom/randomness when generating each token."`
ChunkSize int `short:"c" default:"4000" help:"Number of bytes to rewrite at a time if the file must be split up."`
MaxChunks int `short:"C" default:"128" help:"Maximum number of chunks to rewrite from a specific file."`
} `cmd:"" help:"Rewrite a file using a prompt, must specify either a file path or provide piped input, and can output to stdout, output to a given file, or edit the input file in-place. This command uses the OpenAI edit API rather than the completion API."`

Exec struct {
Command []string `arg:"" help:"Command to execute." optional:""`
Expand Down Expand Up @@ -123,6 +126,13 @@ func (this *ButterfishCtx) getPipedStdin() string {
return ""
}

func (this *ButterfishCtx) getPipedStdinReader() io.Reader {
if !this.InConsoleMode && util.IsPipedStdin() {
return os.Stdin
}
return nil
}

// Given a parsed input split into a slice, join the string together
// and remove any leading/trailing quotes
func (this *ButterfishCtx) cleanInput(input []string) string {
Expand Down Expand Up @@ -190,7 +200,7 @@ func (this *ButterfishCtx) ExecCommand(parsed *kong.Context, options *CliCommand
case "summarize":
chunks, err := util.GetChunks(
os.Stdin,
uint64(options.Summarize.ChunkSize),
options.Summarize.ChunkSize,
options.Summarize.MaxChunks)

if err != nil {
Expand All @@ -215,60 +225,15 @@ func (this *ButterfishCtx) ExecCommand(parsed *kong.Context, options *CliCommand
return err

case "rewrite <prompt>":
prompt := options.Rewrite.Prompt
model := options.Rewrite.Model
if prompt == "" {
return errors.New("Please provide a prompt")
}
if model == "" {
return errors.New("Please provide a model")
}

// cannot set Outputfile and Inplace at the same time
if options.Rewrite.Outputfile != "" && options.Rewrite.Inplace {
return errors.New("Cannot set both outputfile and inplace flags")
}

input := this.getPipedStdin()
filename := options.Rewrite.Inputfile
if input != "" && filename != "" {
return errors.New("Please provide either piped data or a file path, not both")
}
if input == "" && filename == "" {
return errors.New("Please provide a file path or piped data to rewrite")
}
if filename != "" {
// we have a filename but no piped input, read the file
content, err := ioutil.ReadFile(filename)
if err != nil {
return err
}
input = string(content)
}

edited, err := this.LLMClient.Edits(this.Ctx, input, prompt, model)
if err != nil {
return err
}

outputFile := options.Rewrite.Outputfile
// if output file is empty then check inplace flag and use input as output
if outputFile == "" && options.Rewrite.Inplace {
outputFile = filename
}

if outputFile == "" {
// If there's no output file specified then print edited text
this.StylePrintf(this.Config.Styles.Answer, "%s", edited)
} else {
// otherwise we write to the output file
err = ioutil.WriteFile(outputFile, []byte(edited), 0644)
if err != nil {
return err
}
}

return nil
return this.rewriteCommand(
options.Rewrite.Prompt,
options.Rewrite.Model,
options.Rewrite.Inputfile,
options.Rewrite.Outputfile,
options.Rewrite.Inplace,
options.Rewrite.ChunkSize,
options.Rewrite.MaxChunks,
options.Rewrite.Temperature)

case "gencmd <prompt>":
input := this.cleanInput(options.Gencmd.Prompt)
Expand Down Expand Up @@ -464,6 +429,81 @@ func (this *ButterfishCtx) Prompt(prompt string, model string, maxTokens int, te
return err
}

func (this *ButterfishCtx) rewriteCommand(
prompt, model string,
inputFilePath, outputFilePath string,
inPlace bool,
chunkSize, maxChunks int,
temperature float32) error {
if prompt == "" {
return errors.New("Please provide a prompt")
}
if model == "" {
return errors.New("Please provide a model")
}

if inPlace {
if inputFilePath == "" {
return errors.New("Cannot use the edit in-place flag (--inplace, -I) without specifying an input file with (--inputfile, -i)")
}

if outputFilePath != "" {
// cannot set Outputfile and Inplace at the same time
return errors.New("Cannot set both (--outputfile, -o) and (--inplace, -I) flags")
} else {
// if output file is empty then check inplace flag and use input as output
outputFilePath = inputFilePath
}
}

var inputReader io.Reader
var outputWriter io.Writer

inputReader = this.getPipedStdinReader()
if inputReader != nil && inputFilePath != "" {
return errors.New("Please provide either piped data or a file path, not both")
}
if inputReader == nil && inputFilePath == "" {
return errors.New("Please provide a file path or piped data to rewrite")
}
if inputFilePath != "" {
// open the file and get a reader
inputFile, err := os.Open(inputFilePath)
if err != nil {
return err
}
defer inputFile.Close()
inputReader = inputFile
}

if outputFilePath != "" {
// open output file for writing
outputFile, err := os.Create(outputFilePath)
defer outputFile.Close()
if err != nil {
return err
}
outputWriter = outputFile
} else {
outputWriter = util.NewStyledWriter(this.Out, this.Config.Styles.Answer)
}

return util.ChunkFromReader(inputReader, chunkSize, maxChunks, func(i int, chunk []byte) error {
edited, err := this.LLMClient.Edits(this.Ctx, string(chunk), prompt, model, temperature)
if err != nil {
return err
}

// drop any added newline
if edited[len(edited)-1] == '\n' && chunk[len(chunk)-1] != '\n' {
edited = edited[:len(edited)-1]
}

_, err = outputWriter.Write([]byte(edited))
return err
})
}

// Given a description of functionality, we call GPT to generate a shell
// command
func (this *ButterfishCtx) gencmdCommand(description string) (string, error) {
Expand Down Expand Up @@ -621,7 +661,7 @@ func (this *ButterfishCtx) SummarizePath(path string, chunkSize, maxChunks int)
this.StylePrintf(this.Config.Styles.Question, "Summarizing %s\n", path)

fs := afero.NewOsFs()
chunks, err := util.GetFileChunks(this.Ctx, fs, path, uint64(chunkSize), maxChunks)
chunks, err := util.GetFileChunks(this.Ctx, fs, path, chunkSize, maxChunks)
if err != nil {
return err
}
Expand Down
5 changes: 4 additions & 1 deletion butterfish/gpt.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,9 @@ func (this *GPT) Embeddings(ctx context.Context, input []string) ([][]float64, e

const GPTEditModel = "code-davinci-edit-001"

func (this *GPT) Edits(ctx context.Context, content, instruction, model string) (string, error) {
func (this *GPT) Edits(ctx context.Context,
content, instruction, model string,
temperature float32) (string, error) {
if model == "" {
model = GPTEditModel
}
Expand All @@ -166,6 +168,7 @@ func (this *GPT) Edits(ctx context.Context, content, instruction, model string)
Model: model,
Input: content,
Instruction: instruction,
Temperature: &temperature,
}

if this.verbose {
Expand Down
2 changes: 1 addition & 1 deletion embedding/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ func (this *DiskCachedEmbeddingIndex) EmbedFile(ctx context.Context, path string
}

// first we chunk the file
chunks, err := util.GetFileChunks(ctx, this.Fs, absPath, uint64(chunkSize), maxChunks)
chunks, err := util.GetFileChunks(ctx, this.Fs, absPath, chunkSize, maxChunks)
if err != nil {
return nil, err
}
Expand Down
8 changes: 4 additions & 4 deletions util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ type CompletionRequest struct {
func ChunkFile(
fs afero.Fs,
path string,
chunkSize uint64,
chunkSize int,
maxChunks int,
callback func(int, []byte) error) error {

Expand All @@ -39,7 +39,7 @@ func ChunkFile(

func ChunkFromReader(
reader io.Reader,
chunkSize uint64,
chunkSize int,
maxChunks int,
callback func(int, []byte) error) error {

Expand All @@ -66,7 +66,7 @@ func ChunkFromReader(
// Given a filesystem, a path, a chunk size, and maximum number of chunks,
// return a list of chunks of the file at the given path
func GetFileChunks(ctx context.Context, fs afero.Fs, path string,
chunkSize uint64, maxChunks int) ([][]byte, error) {
chunkSize int, maxChunks int) ([][]byte, error) {
chunks := make([][]byte, 0)

err := ChunkFile(fs, path, chunkSize, maxChunks, func(i int, chunk []byte) error {
Expand All @@ -81,7 +81,7 @@ func GetFileChunks(ctx context.Context, fs afero.Fs, path string,
return chunks, err
}

func GetChunks(reader io.Reader, chunkSize uint64, maxChunks int) ([][]byte, error) {
func GetChunks(reader io.Reader, chunkSize int, maxChunks int) ([][]byte, error) {
chunks := make([][]byte, 0)
err := ChunkFromReader(reader, chunkSize, maxChunks, func(i int, chunk []byte) error {
chunks = append(chunks, chunk)
Expand Down

0 comments on commit 6281de9

Please sign in to comment.