-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathaubo_reach4_env.py
494 lines (425 loc) · 19.3 KB
/
aubo_reach4_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
#!/usr/bin/env python
# IMPORT
import gym
import rospy
import numpy as np
import time
import random
import sys
import yaml
import math
import datetime
import csv
import rospkg
from gym import utils, spaces
from gym.utils import seeding
from gym.envs.registration import register
from scipy.spatial.transform import Rotation
# OTHER FILES
import util_env as U
import math_util as UMath
from joint_array_publisher import JointArrayPub
import logger
# MESSAGES/SERVICES
from std_msgs.msg import String
from std_msgs.msg import Bool
from sensor_msgs.msg import JointState
from sensor_msgs.msg import Image
from geometry_msgs.msg import Point, Quaternion, Vector3
from geometry_msgs.msg import Pose
from geometry_msgs.msg import Point, PoseStamped
from openai_ros.msg import RLExperimentInfo
from moveit_msgs.msg import MoveGroupActionFeedback
register(
id='AuboReach-v1',
entry_point='aubo_reach4_env:PickbotEnv',
max_episode_steps=100, #100
)
# DEFINE ENVIRONMENT CLASS
class PickbotEnv(gym.GoalEnv):
def __init__(self, joint_increment=None, sim_time_factor=0.005, random_object=False, random_position=False,
use_object_type=False, populate_object=False, env_object_type='free_shapes'):
"""
initializing all the relevant variables and connections
:param joint_increment: increment of the joints
:param running_step: gazebo simulation time factor
:param random_object: spawn random object in the simulation
:param random_position: change object position in each reset
:param use_object_type: assign IDs to objects and used them in the observation space
:param populate_object: to populate object(s) in the simulation using sdf file
:param env_object_type: object type for environment, free_shapes for boxes while others are related to use_case
'door_handle', 'combox', ...
"""
rospy.init_node('env_node', anonymous=True)
# Assign Parameters
self._joint_increment = joint_increment # joint_increment in rad
self._random_object = random_object
self._random_position = random_position
self._use_object_type = use_object_type
self._populate_object = populate_object
self.rewardThreshold = 0.25
# Assign MsgTypes
self.joints_state = JointState()
self.current_pose_moveit = PoseStamped()
self.movement_complete = Bool()
self.movement_complete.data = False
self.moveit_action_feedback = MoveGroupActionFeedback()
self.feedback_list = []
self.new_action = []
self.publisher_to_moveit_object = JointArrayPub()
# rospy.Subscriber("/joint_states", JointState, self.joints_state_callback)
rospy.Subscriber("/pickbot/movement_complete", Bool, self.movement_complete_callback)
rospy.Subscriber("/move_group/feedback", MoveGroupActionFeedback, self.move_group_action_feedback_callback,
queue_size=4)
# Define Action and state Space and Reward Range
"""
Action Space: Box Space with 6 values.
State Space: Box Space with 12 values. It is a numpy array with shape (12,)
Reward Range: -infitity to infinity
"""
low_action = np.array([
-(1.7),
-(1.7),
-(1.7),
-(1.7),
-(1.7),
-(1.7)])
high_action = np.array([
1.7,
1.7,
1.7,
1.7,
1.7,
1.7])
self.action_space = spaces.Box(low_action, high_action)
# self.action_space = spaces.Box(-2.0, 2.0, shape=(6,), dtype="float32")
# self.goal = np.array([-1.7, -1.78, -1.77, 0.013, 1.69, 0.00023])
# self.goal = np.array([1.84, 0.71, -1.01, -1.07, -1.40, -1.66])
self.goal = np.array([-0.503, 0.605, -1.676, -1.597, -1.527, -0.036])
self.new_action = [0, 0, 0, 0, 0, 0]
# self.check_joint_states()
obs = self.get_obs()
self.observation_space = spaces.Dict(
dict(
desired_goal=spaces.Box(
-np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32"
),
achieved_goal=spaces.Box(
-np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32"
),
observation=spaces.Box(
-np.inf, np.inf, shape=obs["observation"].shape, dtype="float32"
),
)
)
self.counter = 1
# self.observation_space = spaces.Box(low, high)
self.reward_range = (-np.inf, np.inf)
# self.reward_range = (0, 0.9)
print("------------------start seed-------------------------")
self._seed()
print("-------------------exit seed-----------------")
self.done_reward = 0
# set up everything to publish the Episode Number and Episode Reward on a rostopic
self.episode_num = 0
self.accumulated_episode_reward = 0
self.episode_steps = 0
self.reward_pub = rospy.Publisher('/openai/reward', RLExperimentInfo, queue_size=1)
self.reward_list = []
self.episode_list = []
self.step_list = []
self.csv_name = logger.get_dir() + '/result_log'
print("CSV NAME")
print(self.csv_name)
self.csv_success_exp = "success_exp" + datetime.datetime.now().strftime('%Y-%m-%d_%Hh%Mmin') + ".csv"
# self.reset()
def goal_distance(self, goal_a, goal_b):
assert goal_a.shape == goal_b.shape
return np.linalg.norm(goal_a - goal_b, axis=-1)
def compute_reward(self, achieved_goal, goal, info):
# Compute distance between goal and the achieved goal.
d = self.goal_distance(achieved_goal, goal)
print("reward distance: {}".format(-d))
# calc_d = 1 - (0.12 + 0.88 * (d / 10))
# if self.reward_type == "sparse":
# return -(d > self.distance_threshold).astype(np.float32)
# else:
return -d
# return 1/d
# success = self._is_success(achieved_goal, goal).astype(np.float32)
# return success
# Callback Functions for Subscribers to make topic values available each time the class is initialized
def joints_state_callback(self, msg):
self.joints_state = msg
def movement_complete_callback(self, msg):
self.movement_complete = msg
def move_group_action_feedback_callback(self, msg):
self.moveit_action_feedback = msg
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def reset(self):
"""
Reset The Robot to its initial Position and restart the Controllers
1) Publish the initial joint_positions to MoveIt
2) Busy waiting until the movement is completed by MoveIt
3) set target_object to random position
4) Check all Systems work
5) Create YAML Files for contact forces in order to get the average over 2 contacts
6) Create YAML Files for collision to make shure to see a collision due to high noise in topic
7) Get Observations and return current State
8) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number
9) Return State
"""
# print("Joint (reset): {}".format(np.around(self.joints_state.position, decimals=3)))
# init_joint_pos = [1.5, -1.2, 1.4, -1.87, -1.57, 0]
init_joint_pos = [0, 0, 0, 0, 0, 0]
# self.publisher_to_moveit_object.set_joints(init_joint_pos)
# print(">>>>>>>>>>>>>>>>>>> RESET: waiting for the movement to complete")
# rospy.wait_for_message("/pickbot/movement_complete", Bool)
# while not self.movement_complete.data:
# pass
# print(">>>>>>>>>>>>>>>>>>> RESET: Waiting complete")
# start_ros_time = rospy.Time.now()
# while True:
# # Check collision:
# # invalid_collision = self.get_collisions()
# # if invalid_collision:
# # print(">>>>>>>>>> Collision: RESET <<<<<<<<<<<<<<<")
# # observation = self.get_obs()
# # reward = UMath.compute_reward(observation, -200, True)
# # observation = self.get_obs()
# # print("Test Joint: {}".format(np.around(observation[1:7], decimals=3)))
# # return U.get_state(observation), reward, True, {}
#
# elapsed_time = rospy.Time.now() - start_ros_time
# if np.isclose(init_joint_pos, self.joints_state.position, rtol=0.0, atol=0.01).all():
# break
# elif elapsed_time > rospy.Duration(2): # time out
# break
# self.set_target_object(random_object=self._random_object, random_position=self._random_position)
# self._check_all_systems_ready()
self.new_action = init_joint_pos
observation = self.get_obs()
self._update_episode()
return observation
def get_distance_gripper_to_object(self, gripper_joints, height=None):
Object = self.goal
Gripper = np.asarray(gripper_joints)
distance = np.linalg.norm(Object - Gripper)
return distance
def step(self, action):
"""
Given the action selected by the learning algorithm,
we perform the corresponding movement of the robot
return: the state of the robot, the corresponding reward for the step and if its done(terminal State)
1) Read last joint positions by getting the observation before acting
2) Get the new joint positions according to chosen action (actions here are the joint increments)
3) Publish the joint_positions to MoveIt, meanwhile busy waiting, until the movement is complete
4) Get new observation after performing the action
5) Convert Observations into States
6) Check if the task is done or crashing happens, calculate done_reward and pause Simulation again
7) Calculate reward based on Observatin and done_reward
8) Return State, Reward, Done
"""
print("############################")
# print("before clipping action: {}".format(action))
# clipped_action = np.clip(action, self.action_space.low, self.action_space.high)
# print("after clipping action: {}".format(clipped_action))
self.movement_complete.data = False
# clipped_action = np.clip(action, self.action_space.low, self.action_space.high)
# 1) Read last joint positions by getting the observation before acting
# old_observation = self.get_obs()
# 2) Get the new joint positions according to chosen action (actions here are the joint increments)
# if self._joint_increment is None:
# next_action_position = action
# else:
# next_action_position = self.get_action_to_position(clipped_action, self.joints_state.position)
print('=======================next_action=====================')
print(action)
print('============================================================')
# 3) Move to position and wait for moveit to complete the execution
# self.publisher_to_moveit_object.pub_joints_to_moveit(action)
# while not self.movement_complete.data:
# pass
# start_ros_time = rospy.Time.now()
# while True:
# elapsed_time = rospy.Time.now() - start_ros_time
# if np.isclose(action, self.joints_state.position, rtol=0.0, atol=0.01).all():
# break
# elif elapsed_time > rospy.Duration(2): # time out
# break
# time.sleep(s
self.new_action = action
# 4) Get new observation and update min_distance after performing the action
obs = self.get_obs()
done = False
info = {
"is_success": self._is_success(np.asarray(action), self.goal),
}
reward = self.compute_reward(np.asarray(action), self.goal, info)
# percentage = 1 - (0.12 + 0.88 * (abs(reward) / 10))
# print("pecentage success possible: {}".format(percentage))
# with open('logs_success_rate.txt', 'a') as output:
# output.write(str(self.calculatedReward))
# if self.calculatedReward >= self.rewardThreshold:
# output.write(" :SUCCESS" + "\n")
# else:
# output.write("\n")
if self._is_success(np.asarray(action), self.goal):
done = True
if done:
joint_pos = self.joints_state.position
print("Joint in step (done): {}".format(np.around(joint_pos, decimals=3)))
# if not done:
# reward = 1.0
# else:
# reward = 0.0
# joint_pos = self.joints_state.position
# print("Joint in step (done): {}".format(np.around(joint_pos, decimals=3)))
### END of TEST ###
# self.accumulated_episode_reward += reward
self.episode_steps += 1
# info = {}
print("achieved--------------------------goal---------------------------")
print(obs['achieved_goal'])
print(self.goal)
print("-------------------------------------------------------")
goal = self.goal.copy()
self.accumulated_episode_reward += reward
print("======================the reward==========================")
print(reward)
print('==========================================================')
row_list = [reward, self.counter]
with open('rewards.csv', 'a', encoding='UTF8', newline='') as f:
writer = csv.writer(f)
# write the header
writer.writerow(row_list)
self.counter = self.counter + 1
return obs, reward, done, info
def _check_all_systems_ready(self):
"""
Checks that all subscribers for sensortopics are working
1) /joint_states
"""
self.check_joint_states()
rospy.logdebug("ALL SYSTEMS READY")
def check_joint_states(self):
joint_states_msg = None
while joint_states_msg is None and not rospy.is_shutdown():
try:
joint_states_msg = rospy.wait_for_message("/joint_states", JointState, timeout=0.1)
self.joints_state = joint_states_msg
rospy.logdebug("Current joint_states READY")
except Exception as e:
rospy.logdebug("Current joint_states not ready yet, retrying==>" + str(e))
print("EXCEPTION: Joint States not ready yet, retrying.")
def get_action_to_position(self, action, last_position):
"""
takes the last position and adds the increments for each joint
returns the new position
"""
# action_position = np.asarray(last_position) + action
action_position = last_position + action
# clip action that is going to be published to -2.9 and 2.9 just to make sure to avoid loosing controll of controllers
x = np.clip(action_position, -(math.pi - 0.05), math.pi - 0.05)
return x.tolist()
def get_obs(self):
"""
Returns the state of the robot needed for Algorithm to learn
The state will be defined by a List (later converted to numpy array) of the:
1) Distance from desired point in meters
2-7) States of the 6 joints in radiants
8,9) Force in contact sensor in Newtons
10,11,12) x, y, z Position of object?
MISSING
10) RGBD image
"""
joint_states = self.joints_state
print("==================================================================================")
# if(len(action)>0):
# print(joint_states)
# if(joint_states.position[0] != action[0]):
# shoulder_joint_state = joint_states.position[0]
# if (joint_states.position[1] != action[1]):
# foreArm_joint_state = joint_states.position[1]
# if (joint_states.position[2] != action[2]):
# upperArm_joint_state = joint_states.position[2]
# if (joint_states.position[3] != action[3]):
# wrist1_joint_state = joint_states.position[3]
# if (joint_states.position[4] != action[4]):
# wrist2_joint_state = joint_states.position[4]
# if (joint_states.position[5] != action[5]):
# wrist3_joint_state = joint_states.position[5]
if(len(self.new_action)>0):
shoulder_joint_state = self.new_action[0]
foreArm_joint_state = self.new_action[1]
upperArm_joint_state = self.new_action[2]
wrist1_joint_state = self.new_action[3]
wrist2_joint_state = self.new_action[4]
wrist3_joint_state = self.new_action[5]
# for joint in joint_states.position:
# if joint > 2 * math.pi or joint < -2 * math.pi:
# print(joint_states.name)
# print(np.around(joint_states.position, decimals=3))
# sys.exit("Joint exceeds limit")
self.curr_joint = np.array(
[shoulder_joint_state, foreArm_joint_state, upperArm_joint_state, wrist1_joint_state, wrist2_joint_state,
wrist3_joint_state])
object = self.goal
# rel_pos = self.get_distance_gripper_to_object(self.joints_state.position)
# achieved_goal = np.asarray(self.joints_state.position)
achieved_goal = np.asarray(self.new_action)
rel_pos = achieved_goal - object
relative_pose = np.asarray(rel_pos)
obs = np.concatenate([achieved_goal, relative_pose])
# Stack all information into Observations List
return {
"observation": obs.copy(),
"achieved_goal": achieved_goal.copy(),
"desired_goal": self.goal.copy(),
}
def _is_success(self, achieved_goal, desired_goal):
d = self.goal_distance(achieved_goal, desired_goal)
# old calculation
calc_d = 1 - (0.12 + 0.88 * (d / 10))
# new calculation
if d != 0:
calc_d = 1 / d
else:
calc_d = 0
self.calculatedReward = calc_d
return (calc_d >= self.rewardThreshold).astype(np.float32)
def _update_episode(self):
"""
Publishes the accumulated reward of the episode and
increases the episode number by one.
:return:
"""
if self.episode_num > 0:
self._publish_reward_topic(
self.accumulated_episode_reward,
self.episode_steps,
self.episode_num
)
self.episode_num += 1
self.accumulated_episode_reward = 0
self.episode_steps = 0
def _publish_reward_topic(self, reward, steps, episode_number=1):
"""
This function publishes the given reward in the reward topic for
easy access from ROS infrastructure.
:param reward:
:param episode_number:
:return:
"""
reward_msg = RLExperimentInfo()
reward_msg.episode_number = episode_number
reward_msg.episode_reward = reward
self.reward_pub.publish(reward_msg)
self.reward_list.append(reward)
self.episode_list.append(episode_number)
self.step_list.append(steps)
list = str(reward) + ";" + str(episode_number) + ";" + str(steps) + "\n"
with open(self.csv_name + '.csv', 'a') as csv:
csv.write(str(list))