diff --git a/ArmRobot/Assets/ML-Agents/Timers.meta b/ArmRobot/Assets/ML-Agents/Timers.meta deleted file mode 100644 index 0262e7f1..00000000 --- a/ArmRobot/Assets/ML-Agents/Timers.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: f2942cad64d35463993df6db9c396e4c -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/ArmRobot/Assets/ML-Agents/Timers/ArticulationRobot_timers.json b/ArmRobot/Assets/ML-Agents/Timers/ArticulationRobot_timers.json deleted file mode 100644 index 883f5477..00000000 --- a/ArmRobot/Assets/ML-Agents/Timers/ArticulationRobot_timers.json +++ /dev/null @@ -1 +0,0 @@ -{"count":1,"gauges":{"TouchCube.CumulativeReward":{"count":6,"max":-0.709468663,"min":-2.463026,"value":-2.35320449,"weightedAverage":-1.8074373}},"self":18.4648816,"total":28.587856,"children":{"AgentSendState":{"count":29299,"self":0.16121,"total":0.264026,"children":{"CollectObservations":{"count":2930,"self":0.059074999999999996,"total":0.059074999999999996,"children":null},"CollectDiscreteActionMasks":{"count":2930,"self":0.004565,"total":0.004565,"children":null},"AgentInfo.ToProto":{"count":2930,"self":0.017301,"total":0.039175999999999996,"children":{"GenerateSensorData":{"count":2930,"self":0.021875,"total":0.021875,"children":null}}}}},"DecideAction":{"count":29299,"self":9.6932232,"total":9.693223,"children":null},"AgentAct":{"count":29299,"self":0.164736,"total":0.164814,"children":{"AgentInfo.ToProto":{"count":6,"self":3.4E-05,"total":7.8E-05,"children":{"GenerateSensorData":{"count":6,"self":4.4E-05,"total":4.4E-05,"children":null}}}}}}} \ No newline at end of file diff --git a/ArmRobot/Assets/ML-Agents/Timers/ArticulationRobot_timers.json.meta b/ArmRobot/Assets/ML-Agents/Timers/ArticulationRobot_timers.json.meta deleted file mode 100644 index fdcaac28..00000000 --- a/ArmRobot/Assets/ML-Agents/Timers/ArticulationRobot_timers.json.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 7094c553a78b548d9bb1b3b27f91e3fa -TextScriptImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/ArmRobot/Assets/Models/TouchCubeSample.nn b/ArmRobot/Assets/Models/TouchCubeSample.nn index 79a29ed1..6b3b8aa8 100644 Binary files a/ArmRobot/Assets/Models/TouchCubeSample.nn and b/ArmRobot/Assets/Models/TouchCubeSample.nn differ diff --git a/ArmRobot/Assets/Scenes/ArticulationRobot.unity b/ArmRobot/Assets/Scenes/ArticulationRobot.unity index 7b3d655e..33ce11a0 100644 --- a/ArmRobot/Assets/Scenes/ArticulationRobot.unity +++ b/ArmRobot/Assets/Scenes/ArticulationRobot.unity @@ -1412,13 +1412,14 @@ GameObject: - component: {fileID: 1390582875} - component: {fileID: 1390582874} - component: {fileID: 1390582877} + - component: {fileID: 1390582878} m_Layer: 0 m_Name: MLAgents m_TagString: Untagged m_Icon: {fileID: 0} m_NavMeshLayer: 0 m_StaticEditorFlags: 0 - m_IsActive: 0 + m_IsActive: 1 --- !u!114 &1390582874 MonoBehaviour: m_ObjectHideFlags: 0 @@ -1451,7 +1452,7 @@ MonoBehaviour: m_Name: m_EditorClassIdentifier: m_BrainParameters: - VectorObservationSize: 79 + VectorObservationSize: 9 NumStackedVectorObservations: 1 VectorActionSize: 03000000030000000300000003000000030000000300000003000000 VectorActionDescriptions: [] @@ -1462,6 +1463,7 @@ MonoBehaviour: m_BehaviorName: TouchCube TeamId: 0 m_UseChildSensors: 1 + m_ObservableAttributeHandling: 0 --- !u!4 &1390582876 Transform: m_ObjectHideFlags: 0 @@ -1490,6 +1492,29 @@ MonoBehaviour: m_EditorClassIdentifier: DecisionPeriod: 20 TakeActionsBetweenDecisions: 1 +--- !u!114 &1390582878 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInstance: {fileID: 0} + m_PrefabAsset: {fileID: 0} + m_GameObject: {fileID: 1390582873} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: e57a788acd5e049c6aa9642b450ca318, type: 3} + m_Name: + m_EditorClassIdentifier: + RootBody: {fileID: 1442213769} + Settings: + UseModelSpaceTranslations: 1 + UseModelSpaceRotations: 1 + UseLocalSpaceTranslations: 0 + UseLocalSpaceRotations: 0 + UseModelSpaceLinearVelocity: 0 + UseLocalSpaceLinearVelocity: 0 + UseJointPositionsAndAngles: 1 + UseJointForces: 0 + sensorName: --- !u!1 &1442213767 GameObject: m_ObjectHideFlags: 0 diff --git a/ArmRobot/Assets/Scripts/RobotAgent.cs b/ArmRobot/Assets/Scripts/RobotAgent.cs index fa017872..4b36dd3c 100644 --- a/ArmRobot/Assets/Scripts/RobotAgent.cs +++ b/ArmRobot/Assets/Scripts/RobotAgent.cs @@ -14,7 +14,7 @@ public class RobotAgent : Agent RobotController robotController; TouchDetector touchDetector; TablePositionRandomizer tablePositionRandomizer; - + void Start() { @@ -41,21 +41,6 @@ public override void CollectObservations(VectorSensor sensor) // No robot is present, no observation should be added return; } - // current rotations - float[] rotations = robotController.GetCurrentJointRotations(); - foreach (float rotation in rotations) - { - // normalize rotation to [-1, 1] range - float normalizedRotation = (rotation / 360.0f) % 1f; - sensor.AddObservation(normalizedRotation); - } - - foreach (var joint in robotController.joints) - { - sensor.AddObservation(joint.robotPart.transform.position - robot.transform.position); - sensor.AddObservation(joint.robotPart.transform.forward); - sensor.AddObservation(joint.robotPart.transform.right); - } // relative cube position Vector3 cubePosition = cube.transform.position - robot.transform.position; @@ -63,8 +48,8 @@ public override void CollectObservations(VectorSensor sensor) // relative end position Vector3 endPosition = endEffector.transform.position - robot.transform.position; - sensor.AddObservation(endPosition); - sensor.AddObservation(cubePosition - endPosition); + sensor.AddObservation(endPosition); + sensor.AddObservation(cubePosition - endPosition); } public override void OnActionReceived(float[] vectorAction) @@ -76,6 +61,13 @@ public override void OnActionReceived(float[] vectorAction) robotController.RotateJoint(jointIndex, rotationDirection, false); } + // Knocked the cube off the table + if (cube.transform.position.y < -1.0) + { + SetReward(-1f); + EndEpisode(); + } + // end episode if we touched the cube if (touchDetector.hasTouchedTarget) { @@ -86,7 +78,7 @@ public override void OnActionReceived(float[] vectorAction) //reward float distanceToCube = Vector3.Distance(endEffector.transform.position, cube.transform.position); // roughly 0.7f - + var jointHeight = 0f; // This is to reward the agent for keeping high up // max is roughly 3.0f for (int jointIndex = 0; jointIndex < robotController.joints.Length; jointIndex ++) diff --git a/ArmRobot/Packages/manifest.json b/ArmRobot/Packages/manifest.json index e018df38..ceab8380 100755 --- a/ArmRobot/Packages/manifest.json +++ b/ArmRobot/Packages/manifest.json @@ -3,7 +3,8 @@ "com.unity.ext.nunit": "1.0.0", "com.unity.ide.rider": "2.0.1", "com.unity.ide.vscode": "1.2.0", - "com.unity.ml-agents": "1.0.0-preview", + "com.unity.ml-agents": "1.2.0-preview", + "com.unity.ml-agents.extensions": "git+https://github.com/Unity-Technologies/ml-agents.git?path=com.unity.ml-agents.extensions", "com.unity.test-framework": "1.1.13", "com.unity.textmeshpro": "3.0.0-preview.1", "com.unity.ugui": "1.0.0", @@ -38,5 +39,11 @@ "com.unity.modules.vr": "1.0.0", "com.unity.modules.wind": "1.0.0", "com.unity.modules.xr": "1.0.0" + }, + "lock": { + "com.unity.ml-agents.extensions": { + "revision": "HEAD", + "hash": "4e8f08c6260742a936d6881f9e3a48b31ea45993" + } } } diff --git a/ArmRobot/ProjectSettings/ProjectVersion.txt b/ArmRobot/ProjectSettings/ProjectVersion.txt index 6489044a..94fa1422 100755 --- a/ArmRobot/ProjectSettings/ProjectVersion.txt +++ b/ArmRobot/ProjectSettings/ProjectVersion.txt @@ -1,2 +1,2 @@ -m_EditorVersion: 2020.1.0b7 -m_EditorVersionWithRevision: 2020.1.0b7 (6cfebb967dcd) +m_EditorVersion: 2020.1.0b8 +m_EditorVersionWithRevision: 2020.1.0b8 (726fc0bca2bc) diff --git a/images/reward.png b/images/reward.png index 6c1dc3c5..bcbe47f3 100644 Binary files a/images/reward.png and b/images/reward.png differ diff --git a/ur3_config.yml b/ur3_config.yml index 24944bb1..babb2d21 100644 --- a/ur3_config.yml +++ b/ur3_config.yml @@ -7,7 +7,7 @@ TouchCube: init_entcoef: 1.0 learning_rate: 3.0e-4 learning_rate_schedule: constant - max_steps: 5.0e5 + max_steps: 1.25e5 memory_size: 128 normalize: false num_update: 1