This repository has been archived by the owner on Jan 22, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQLearner.js
85 lines (76 loc) · 3.3 KB
/
QLearner.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
/**
* Creates a new qlearner
*
* @constructor
* @param {Player} _player the Player this QLearner is associated to
* @todo wire up options
* @author Christian Vogt <[email protected]>
*/
function QLearner(_player) {
checkInstance(_player, Player);
this.player = _player;
this.stateTable = {};
this.alpha = 0.1; // learning rate 0:dont learn anything
this.gamma = 0.9; // discount factor
this.epsilon = 0.1; // exploration rate
/**
* Move player based on the current stateTable and the rewards for the next actions
*
*/
this.move = function() {
var moveDebug = false;
var currentPosition = new Position(this.player.position.posx, this.player.position.posy);
var currentHash = this.player.hash();//stateHash(_player, this.env);
if(moveDebug) console.log('QLearner:\t\t current Position: ' + currentPosition.toString());
// determine action to perform
this.stateTable[currentHash] = this.initState(currentHash);
if(moveDebug) console.log('QLearner:\t\t current ActionTable: ' + this.stateTable[currentHash]);
var action = this.chooseAction(this.stateTable[currentHash]);
if(moveDebug) console.log('QLearner:\t\t Chosen Action: ' + action);
// ok, now perform it
this.player.performAction(action);
// get reward for this action
var reward = this.player.reward();//this.calculateReward(this.player);
// determine new state hash
var newHash = this.player.hash();//this.stateHash(_player, this.env);
this.stateTable[newHash] = this.initState(newHash);
// updates stateTable
this.stateTable[currentHash][action] = this.stateTable[currentHash][action] + this.alpha * (reward + this.gamma * this.stateTable[newHash][this.chooseAction(this.stateTable[newHash])] - this.stateTable[currentHash][action]);
};
/**
* Choose an action based on the given `_actionValues`
*
* @param {Array} _actionValues the array containing the actionValues
* @return {ACTIONS} the determined action to perform
*/
this.chooseAction = function (_actionValues) {
var chooseActionDebug = false;
// random?
if (Math.random() < this.epsilon) {
return Math.floor(Math.random()*_actionValues.length);
}
var actionCandidates = [];
// do we have a single max value?
var testMax = Array.max(_actionValues);
var pos = _actionValues.indexOf(testMax); // first pos
while ( pos != -1 ) {
actionCandidates.push(pos);
pos = _actionValues.indexOf(testMax, pos + 1);
}
var randomFromCandidates = actionCandidates[Math.floor(Math.random()*actionCandidates.length)];
if(chooseActionDebug) console.log('ChooseAction:\t actionCandidates to choose from: ' + actionCandidates.toString());
return randomFromCandidates;
};
/**
* Init a single ActionTable entry
*
* @param {String} _hash the hash for the stateTable entry
* @returns {Object} the action values for this hash or a default set (0)
*/
this.initState = function (_hash) {
if (!this.stateTable[_hash]) {
this.stateTable[_hash] = newFilledArray(Object.keys(ACTIONS).length, 0);
}
return this.stateTable[_hash];
};
}