Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate lexers for programs in various languages from the DFA #32

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
86 changes: 86 additions & 0 deletions cmd/lexgen/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package main

import (
"fmt"
"log"
"os"

"github.com/timtadh/getopt"
"github.com/timtadh/lexmachine/codegen/python"
"github.com/timtadh/lexmachine/dfa"
"github.com/timtadh/lexmachine/frontend"
)

var usageMessage = "lexgen -p <pattern> [-p <pattern>]*"
var extendedMessage = `
lexgen compiles regular expressions to a program

Options
-h, --help print this message
-p, --pattern=<pattern> a regex pattern

Specs
<pattern>
a regex pattern
`

func usage(code int) {
fmt.Fprintln(os.Stderr, usageMessage)
if code == 0 {
fmt.Fprintln(os.Stderr, extendedMessage)
code = 1
} else {
fmt.Fprintln(os.Stderr, "Try -h or --help for help")
}
os.Exit(code)
}

func main() {

short := "hp:"
long := []string{
"help",
"pattern=",
}

_, optargs, err := getopt.GetOpt(os.Args[1:], short, long)
if err != nil {
log.Print(err)
usage(1)
}

patterns := make([]string, 0, 10)
for _, oa := range optargs {
switch oa.Opt() {
case "-h", "--help":
usage(0)
case "-p", "--pattern":
patterns = append(patterns, oa.Arg())
}
}

if len(patterns) <= 0 {
log.Print("Must supply some regulars expressions!")
usage(1)
}

asts := make([]frontend.AST, 0, len(patterns))
for _, p := range patterns {
ast, err := frontend.Parse([]byte(p))
if err != nil {
log.Fatal(err)
}
asts = append(asts, ast)
}

lexast := asts[len(asts)-1]
for i := len(asts) - 2; i >= 0; i-- {
lexast = frontend.NewAltMatch(asts[i], lexast)
}

pydfa := python.Generate(dfa.Generate(lexast))
if err != nil {
log.Fatal(err)
}
fmt.Println(pydfa)
}
124 changes: 124 additions & 0 deletions codegen/python/pygen.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package python

import (
"fmt"
"strings"

"github.com/timtadh/lexmachine/dfa"
)

var header = `

def tokenize(input):
return _Scanner(input).tokenize()

class Match(object):

def __init__(self, match_id, lexeme):
self.match_id = match_id
self.lexeme = lexeme

def __repr__(self):
return self.__str__()

def __str__(self):
return "Match({}, {})".format(self.match_id, repr(self.lexeme))

class _Scanner(object):

def __init__(self, input):
self.input = input
self.idx = 0
self.buf = list()
self.tokens = list()

def tokenize(self):
state = self.start()
while state != None:
state = state()
if self.idx != len(self.input):
self.eosError()
return self.tokens

def mvto(self, next_state):
if self.idx >= len(self.input):
raise Exception("internal DFA error, index out of bounds")
self.buf.append(self.input[self.idx])
self.idx += 1
return next_state

def match(self, match_id):
self.tokens.append(Match(match_id, ''.join(self.buf)))
self.buf = list()
if self.idx < len(self.input):
return self.start()
return None

def eosError(self, state=None):
raise Exception("UnconsumedInput, {}".format(repr(self.input[self.idx-len(self.buf):])))

def error(self, state, expected):
raise Exception("UnexpectedInput, {}. expected one of: {}".format(
repr(self.input[self.idx]),
[chr(x) for x in expected]))

`

func Generate(dfa *dfa.DFA) string {
stateFuncs := make([]string, 0, len(dfa.Trans))
stateFuncs = append(stateFuncs, genStart(dfa))
for state := range dfa.Trans {
stateFuncs = append(stateFuncs, genState(dfa, state))
}
return header + strings.Join(stateFuncs, "\n\n")
}

func genStart(dfa *dfa.DFA) string {
lines := make([]string, 0, 3)
lines = append(lines, fmt.Sprintf(" def start(self):"))
lines = append(lines, fmt.Sprintf(" return self.state_%d", dfa.Start))
return strings.Join(lines, "\n")
}

func genState(dfa *dfa.DFA, state int) string {
trans := dfa.Trans[state]
matchID, accepting := dfa.Accepting[state]
lines := make([]string, 0, len(trans))
lines = append(lines, fmt.Sprintf(" def state_%v(self):", state))
if dfa.Error == state {
lines = append(lines, fmt.Sprintf(" self.error(%d, [])", state))
return strings.Join(lines, "\n")
}
if len(trans) > 0 && accepting {
lines = append(lines, fmt.Sprintf(" if self.idx >= len(self.input):"))
lines = append(lines, fmt.Sprintf(" return self.match(%v)", matchID))
} else if len(trans) > 0 {
lines = append(lines, fmt.Sprintf(" if self.idx >= len(self.input):"))
lines = append(lines, fmt.Sprintf(" self.eosError(%v)", state))
lines = append(lines, fmt.Sprintf(" return"))
}
first := true
allowed := make([]string, 0, len(trans))
for ord := 0; ord < len(trans); ord++ {
if trans[ord] == dfa.Error {
continue
}
allowed = append(allowed, fmt.Sprint(ord))
if first {
lines = append(lines, fmt.Sprintf(" if ord(self.input[self.idx]) == %d:", ord))
first = false
} else {
lines = append(lines, fmt.Sprintf(" elif ord(self.input[self.idx]) == %d:", ord))
}
lines = append(lines, fmt.Sprintf(" return self.mvto(self.state_%d)", trans[ord]))
}
if len(allowed) == 0 && !accepting && dfa.Error != state {
panic("bad dfa")
}
if accepting {
lines = append(lines, fmt.Sprintf(" return self.match(%d)", matchID))
} else {
lines = append(lines, fmt.Sprintf(" self.error(%d, [%v])", state, strings.Join(allowed, ", ")))
}
return strings.Join(lines, "\n")
}