-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel-horizon-4.py
80 lines (62 loc) · 3.31 KB
/
model-horizon-4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def reward_function(params):
# Read input parameters
all_wheels_on_track = params['all_wheels_on_track']
distance_from_center = params['distance_from_center']
track_width = params['track_width']
speed = params['speed']
is_crashed = params['is_crashed']
progress = params['progress']
steering_angle = abs(params['steering_angle'])
SPEED_THRESHOLD = 1.0
MAX_REWARD = 10.0 # Maximum reward for optimal performance
MIN_REWARD = 1e-3 # Minimum reward for out-of-track or crashed scenarios
# Start with a default low reward
reward = MIN_REWARD
# Porgress based reward (to complete the lap faster)
reward += progress * 0.15 # Reward for every 10% of the lap completed
# Additional reward for making significant progress
if progress >= 75:
reward += 2.0
# Reward for accelerating out of gentle turns
if steering_angle < 10.0 and speed >= 0.9:
reward += 1.5
# Strong reward for maintaining high speed in straight sections
if steering_angle < 5.0 and speed >= 0.95:
reward += 3.0
# Calculate markers for distance from center
markers = [0.1, 0.2, 0.3, 0.4, 0.5]
marker_rewards = [3.0, 2.5, 1.5, 1.0, 0.5]
# Reward for being closer to the center of the track
if all_wheels_on_track:
for i, marker in enumerate(markers):
if distance_from_center <= marker * track_width:
reward += marker_rewards[i]
break # Stop checking further if we found a marker
# Speed reward (penalize for low speed, reward for optimal speed)
if speed < 0.8:
reward -= 0.5 # Penalty for going too slow
elif speed >= 0.8:
reward += 1.8 # Reward for maintaining optimal speed
SPEED_DIFF = SPEED_THRESHOLD - speed #difference bewtween speed threshold and speed of the bot
MAX_SPEED_DIFF_1 = 0.2 # Maximum allowed speed diff. (1.0 - 0.8 = 0.2)
MAX_SPEED_DIFF_2 = 0.1 # Maximum allowed speed diff. (1.0 - 0.9 = 0.1)
MAX_SPEED_DIFF_3 = 0.05 # Maximum allowed speed diff. (1.0 - 0.95 = 0.05)
if SPEED_DIFF <= MAX_SPEED_DIFF_3:
reward += 4.0 # Reward for difference less than or equal to 0.05
elif MAX_SPEED_DIFF_3 < SPEED_DIFF <= MAX_SPEED_DIFF_2:
reward += 3.0 # Reward for difference less than or equal to 0.1
elif MAX_SPEED_DIFF_2 < SPEED_DIFF <= MAX_SPEED_DIFF_1:
reward += 1.5 # Reward for difference less than or equal to 0.2
else:
reward -= 2.0 # Penalty for difference more than 0.2
# Encourage staying on the track
if all_wheels_on_track and (0.5 * track_width - distance_from_center) >= 0.05:
reward += 1.0 # Additional reward for being well-positioned on the track
# Check if the bot is crashed
if is_crashed:
reward -= 1.0 # Return minimum reward on crash
# Normalize reward to ensure it's within acceptable bounds
reward = max(reward, MIN_REWARD) # Ensure we don't go below the minimum reward
reward = min(reward, MAX_REWARD) # Cap the maximum reward
# Return the final reward as a float
return float(reward)