-
Notifications
You must be signed in to change notification settings - Fork 1
/
TetroEnv.py
90 lines (73 loc) · 2.56 KB
/
TetroEnv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from pybrain.rl.environments.environment import Environment
from tetromino4 import *
import random
from TetroTask import arrayToObs
import numpy as np
class TetroEnv(Environment):
# number of actions
indim = 15
# number of states output by sensor
outdim = 2**15
def __init__(self, board):
self.board = board
self.pieces = [0, 1, 2, 3, 4, 5, 6]
self.piece = 0
self.ActionValid = True
self.highestRow = None
self.rewardData = None
self.isSlice = False
self.num_resets = 0
self.totallines = 0
self.score = 0
self.display = False
self.curr_learning = True
self.ended = False
self.getNewPiece()
def getSensors(self):
bin_state_arr, highestRow = boardToState(self.board, self.piece)
convstate = arrayToObs(bin_state_arr)
self.highestRow = highestRow
return [int(convstate)], highestRow
def getRewardData(self, highestRow):
board, highestRow = boardToState(self.board, self.piece, highestRow)
return board
def getActionValid(self):
return self.ActionValid
def setLearning(self, flag):
self.curr_learning = flag
def getHighestRow(self):
return self.highestRow
def setDisplay(self, disp):
self.display = disp
def performAction(self, action):
self.ended = False
action = int(action[0])
new_highestRow = getHighestRow(self.board)
#print "action", action
while not actionIsValid(action, self.piece, self.board):
action = (action + 1) % 16
highestRow = getHighestRow(self.board)
self.score += addAndClearLines(self.board, action, self.piece, self.curr_learning)
self.getNewPiece()
self.rewardData, _ = boardToState(self.board, self.piece, highestRow)
new_highestRow = getHighestRow(self.board)
if new_highestRow <= 2:
self.ended = True
if not self.isSlice:
self.reset()
if self.display:
rlAction(self.board, self.piece, self.score)
def getNewPiece(self):
if len(self.pieces) == 1:
self.piece_piece = self.pieces[0]
self.pieces = [0, 1, 2, 3, 4, 5, 6]
else:
self.piece = random.choice(self.pieces)
self.pieces.remove(self.piece)
def reset(self):
self.totallines += self.score
self.score = 0
self.num_resets += 1
self.board = getBlankBoard()
self.pieces = [0, 1, 2, 3, 4, 5, 6]
self.getNewPiece()