Skip to content

Commit

Permalink
that did no worked.
Browse files Browse the repository at this point in the history
  • Loading branch information
JulioJerez committed Sep 27, 2024
1 parent 42118f0 commit 9aa278a
Show file tree
Hide file tree
Showing 11 changed files with 67 additions and 73 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace ndAdvancedRobot
#define ND_TRAIN_MODEL
#define CONTROLLER_NAME "ndRobotArmReach"

//#define CONTROLLER_RESUME_TRAINING
#define CONTROLLER_RESUME_TRAINING

class ndActionVector
{
Expand Down Expand Up @@ -197,8 +197,8 @@ namespace ndAdvancedRobot
class ndController : public ndBrainAgentContinuePolicyGradient
{
public:
ndController(const ndSharedPtr<ndBrain>& brain)
:ndBrainAgentContinuePolicyGradient(brain)
ndController(const ndSharedPtr<ndBrain>& policyNetwork)
:ndBrainAgentContinuePolicyGradient(policyNetwork)
,m_robot(nullptr)
{
}
Expand Down Expand Up @@ -562,8 +562,6 @@ namespace ndAdvancedRobot

auto GaussianReward = [](ndFloat32 param)
{
//ndFloat32 invRewardSigma2 = 500.0f;
//return ndExp(-invRewardSigma2 * param2);
return param * param * param * param;
};

Expand All @@ -572,7 +570,6 @@ namespace ndAdvancedRobot
ndFloat32 posit_zReward = rewardWeigh * ScalarReward(positError2.m_z);
ndFloat32 azimuthReward = rewardWeigh * ScalarReward(positError2.m_w);

//ndFloat32 angleError = CalculateDeltaTargetRotation(currentEffectorMatrix);
const ndMatrix targetMatrix(ndPitchMatrix(m_targetLocation.m_pitch) * ndYawMatrix(m_targetLocation.m_yaw) * ndRollMatrix(m_targetLocation.m_roll));
const ndMatrix relativeRotation(currentEffectorMatrix * targetMatrix.OrthoInverse());
ndFloat32 sideCos = currentEffectorMatrix.m_up.DotProduct(targetMatrix.m_up).GetScalar();
Expand All @@ -582,7 +579,7 @@ namespace ndAdvancedRobot
ndFloat32 angularReward1 = rewardWeigh * GaussianReward((frontCos + 1.0f) * 0.5f);

ndFloat32 reward = angularReward0 + angularReward1;
if ((angularReward0 > 0.195) && (angularReward1 > 0.195f))
//if ((angularReward0 > 0.195) && (angularReward1 > 0.195f))
{
reward = reward + posit_xReward + posit_zReward + azimuthReward;
}
Expand Down Expand Up @@ -1087,13 +1084,13 @@ namespace ndAdvancedRobot
char fileName[256];
snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> critic(ndBrainLoad::Load(fileName));
m_master->GetValueNetwork()->CopyFrom(**critic);
ndSharedPtr<ndBrain> valueNetwork(ndBrainLoad::Load(fileName));
m_master->GetValueNetwork()->CopyFrom(**valueNetwork);

snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> actor(ndBrainLoad::Load(fileName));
m_master->GetPolicyNetwork()->CopyFrom(**actor);
ndSharedPtr<ndBrain> policyNetwork(ndBrainLoad::Load(fileName));
m_master->GetPolicyNetwork()->CopyFrom(**policyNetwork);
#endif

auto SpawnModel = [this, scene, &visualMesh, floor](const ndMatrix& matrix)
Expand All @@ -1112,8 +1109,8 @@ namespace ndAdvancedRobot

ndInt32 countX = 22;
ndInt32 countZ = 23;
countX = 10;
countZ = 11;
//countX = 10;
//countZ = 11;

// add a hidden battery of model to generate trajectories in parallel
for (ndInt32 i = 0; i < countZ; ++i)
Expand Down Expand Up @@ -1309,8 +1306,8 @@ void ndAdvancedIndustrialRobot(ndDemoEntityManager* const scene)
char fileName[256];
snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> brain(ndBrainLoad::Load(fileName));
model->SetNotifyCallback(new RobotModelNotify(brain, model, true));
ndSharedPtr<ndBrain> policy(ndBrainLoad::Load(fileName));
model->SetNotifyCallback(new RobotModelNotify(policy, model, true));

ndSharedPtr<ndUIEntity> robotUI(new ndRobotUI(scene, (RobotModelNotify*)*model->GetNotifyCallback()));
scene->Set2DDisplayRenderFunction(robotUI);
Expand Down
16 changes: 8 additions & 8 deletions newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,12 @@ namespace ndCarpole_1
Init(robot);
}

RobotModelNotify(const ndSharedPtr<ndBrain>& brain, ndModelArticulation* const robot)
RobotModelNotify(const ndSharedPtr<ndBrain>& policy, ndModelArticulation* const robot)
:ndModelNotify()
,m_controller(nullptr)
,m_controllerTrainer(nullptr)
{
m_controller = new ndController(brain);
m_controller = new ndController(policy);
m_controller->m_robot = this;
Init(robot);
}
Expand Down Expand Up @@ -339,13 +339,13 @@ namespace ndCarpole_1
char fileName[256];
snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> critic(ndBrainLoad::Load(fileName));
m_master->GetValueNetwork()->CopyFrom(**critic);
ndSharedPtr<ndBrain> valueNetwork(ndBrainLoad::Load(fileName));
m_master->GetValueNetwork()->CopyFrom(**valueNetwork);

snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> actor(ndBrainLoad::Load(fileName));
m_master->GetPolicyNetwork()->CopyFrom(**actor);
ndSharedPtr<ndBrain> policyNetwork(ndBrainLoad::Load(fileName));
m_master->GetPolicyNetwork()->CopyFrom(**policyNetwork);
#endif

ndWorld* const world = scene->GetWorld();
Expand Down Expand Up @@ -561,8 +561,8 @@ void ndCartpoleContinue(ndDemoEntityManager* const scene)
snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);

ndSharedPtr<ndBrain> brain(ndBrainLoad::Load(fileName));
model->SetNotifyCallback(new RobotModelNotify(brain, model));
ndSharedPtr<ndBrain> policy(ndBrainLoad::Load(fileName));
model->SetNotifyCallback(new RobotModelNotify(policy, model));
#endif

matrix.m_posit.m_y = 0.5f;
Expand Down
12 changes: 6 additions & 6 deletions newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,13 +378,13 @@ namespace ndCarpole_0
char fileName[256];
snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> critic(ndBrainLoad::Load(fileName));
m_master->GetValueNetwork()->CopyFrom(**critic);
ndSharedPtr<ndBrain> valueNetwork(ndBrainLoad::Load(fileName));
m_master->GetValueNetwork()->CopyFrom(**valueNetwork);

snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> actor(ndBrainLoad::Load(fileName));
m_master->GetPolicyNetwork()->CopyFrom(**actor);
ndSharedPtr<ndBrain> policyNetwork(ndBrainLoad::Load(fileName));
m_master->GetPolicyNetwork()->CopyFrom(**policyNetwork);
#endif

ndWorld* const world = scene->GetWorld();
Expand Down Expand Up @@ -601,8 +601,8 @@ void ndCartpoleDiscrete(ndDemoEntityManager* const scene)
snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);

ndSharedPtr<ndBrain> brain(ndBrainLoad::Load(fileName));
model->SetNotifyCallback(new RobotModelNotify(brain, model));
ndSharedPtr<ndBrain> policy(ndBrainLoad::Load(fileName));
model->SetNotifyCallback(new RobotModelNotify(policy, model));
#endif

matrix.m_posit.m_y = 0.5f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1495,8 +1495,8 @@ void ndQuadrupedTest_1(ndDemoEntityManager* const scene)

char fileName[256];
ndGetWorkingFileName(CONTROLLER_NAME, fileName);
ndSharedPtr<ndBrain> brain(ndBrainLoad::Load(fileName));
referenceModel->SetNotifyCallback(new RobotModelNotify(brain, referenceModel, true));
ndSharedPtr<ndBrain> policy(ndBrainLoad::Load(fileName));
referenceModel->SetNotifyCallback(new RobotModelNotify(policy, referenceModel, true));

ndSharedPtr<ndUIEntity> quadrupedUI(new ndModelUI(scene, (RobotModelNotify*)*referenceModel->GetNotifyCallback()));
scene->Set2DDisplayRenderFunction(quadrupedUI);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1640,8 +1640,8 @@ void ndQuadrupedTest_2(ndDemoEntityManager* const scene)

char fileName[256];
ndGetWorkingFileName(CONTROLLER_NAME, fileName);
ndSharedPtr<ndBrain> brain(ndBrainLoad::Load(fileName));
referenceModel->SetNotifyCallback(new RobotModelNotify(brain, referenceModel, true));
ndSharedPtr<ndBrain> policy(ndBrainLoad::Load(fileName));
referenceModel->SetNotifyCallback(new RobotModelNotify(policy, referenceModel, true));

ndSharedPtr<ndUIEntity> quadrupedUI(new ndModelUI(scene, (RobotModelNotify*)*referenceModel->GetNotifyCallback()));
scene->Set2DDisplayRenderFunction(quadrupedUI);
Expand Down
8 changes: 4 additions & 4 deletions newton-4.00/applications/ndSandbox/demos/ndUnicycle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,13 @@ namespace ndUnicycle
Init(robot);
}

RobotModelNotify(const ndSharedPtr<ndBrain>& brain, ndModelArticulation* const model)
RobotModelNotify(const ndSharedPtr<ndBrain>& policy, ndModelArticulation* const model)
:ndModelNotify()
,m_controller(nullptr)
,m_controllerTrainer(nullptr)
{
m_timestep = 0.0f;
m_controller = new ndController(brain);
m_controller = new ndController(policy);
m_controller->m_robot = this;
Init(model);
}
Expand Down Expand Up @@ -727,8 +727,8 @@ void ndUnicycleController(ndDemoEntityManager* const scene)

char fileName[256];
ndGetWorkingFileName(CONTROLLER_NAME, fileName);
ndSharedPtr<ndBrain> brain(ndBrainLoad::Load(fileName));
model->SetNotifyCallback(new RobotModelNotify(brain, model));
ndSharedPtr<ndBrain> policy(ndBrainLoad::Load(fileName));
model->SetNotifyCallback(new RobotModelNotify(policy, model));
#endif

matrix.m_posit.m_x -= 0.0f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "ndBrainAgentContinuePolicyGradient_Trainer.h"

#define ND_CONTINUE_POLICY_GRADIENT_BUFFER_SIZE (1024 * 128)
#define ND_CONTINUE_POLICY_GRADIENT_MIN_VARIANCE ndBrainFloat(1.0e-1f)
#define ND_CONTINUE_POLICY_GRADIENT_MIN_VARIANCE ndBrainFloat(1.0e-2f)

//#define ND_USE_LOG_DEVIATION

Expand Down Expand Up @@ -62,18 +62,17 @@ ndBrainAgentContinuePolicyGradient_TrainerMaster::HyperParameters::HyperParamete
//*********************************************************************************************
//
//*********************************************************************************************
//class ndBrainAgentContinuePolicyGradient_TrainerMaster::LastActivationLayer : public ndBrainLayerActivationTanh
class ndBrainAgentContinuePolicyGradient_TrainerMaster::LastActivationLayer : public ndBrainLayerActivation
class ndBrainAgentContinuePolicyGradient_TrainerMaster::LastActivationLayer : public ndBrainLayerActivationTanh
{
public:
LastActivationLayer(ndInt32 neurons)
:ndBrainLayerActivation(neurons * 2)
:ndBrainLayerActivationTanh(neurons * 2)
,m_minimumSigma(ND_CONTINUE_POLICY_GRADIENT_MIN_VARIANCE)
{
}

LastActivationLayer(const LastActivationLayer& src)
:ndBrainLayerActivation(src)
:ndBrainLayerActivationTanh(src)
,m_minimumSigma(src.m_minimumSigma)
{
}
Expand All @@ -86,7 +85,7 @@ class ndBrainAgentContinuePolicyGradient_TrainerMaster::LastActivationLayer : pu
#pragma optimize( "", off )
void MakePrediction(const ndBrainVector& input, ndBrainVector& output) const
{
ndBrainLayerActivation::MakePrediction(input, output);
ndBrainLayerActivationTanh::MakePrediction(input, output);
#ifdef ND_USE_LOG_DEVIATION
for (ndInt32 i = m_neurons / 2 - 1; i >= 0; --i)
{
Expand All @@ -103,8 +102,7 @@ class ndBrainAgentContinuePolicyGradient_TrainerMaster::LastActivationLayer : pu
#pragma optimize( "", off )
void InputDerivative(const ndBrainVector& input, const ndBrainVector& output, const ndBrainVector& outputDerivative, ndBrainVector& inputDerivative) const
{
//ndBrainLayerActivationTanh::InputDerivative(input, output, outputDerivative, inputDerivative);
ndBrainLayerActivation::InputDerivative(input, output, outputDerivative, inputDerivative);
ndBrainLayerActivationTanh::InputDerivative(input, output, outputDerivative, inputDerivative);
#ifdef ND_USE_LOG_DEVIATION
for (ndInt32 i = m_neurons / 2 - 1; i >= 0; --i)
{
Expand Down Expand Up @@ -278,8 +276,7 @@ void ndBrainAgentContinuePolicyGradient_Trainer::SelectAction(ndBrainVector& act
for (ndInt32 i = numberOfActions - 1; i >= 0; --i)
{
ndBrainFloat sample = ndBrainFloat(actions[i] + generator.m_d(generator.m_gen) * actions[i + numberOfActions]);
//ndBrainFloat squashedAction = ndClamp(sample, ndBrainFloat(-1.0f), ndBrainFloat(1.0f));
ndBrainFloat squashedAction = sample;
ndBrainFloat squashedAction = ndClamp(sample, ndBrainFloat(-1.0f), ndBrainFloat(1.0f));
actions[i] = squashedAction;
}
}
Expand Down
6 changes: 3 additions & 3 deletions newton-4.00/sdk/dBrain/ndBrainAgentDDPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ class ndBrainAgentDDPG: public ndBrainAgent
void Save(ndBrainSave* const loadSave);

void InitWeights();
ndSharedPtr<ndBrain> m_policy;
ndSharedPtr<ndBrain> m_actor;
};

template<ndInt32 statesDim, ndInt32 actionDim>
ndBrainAgentDDPG<statesDim, actionDim>::ndBrainAgentDDPG(const ndSharedPtr<ndBrain>& actor)
:ndBrainAgent()
,m_policy(actor)
,m_actor(actor)
{
}

Expand Down Expand Up @@ -112,7 +112,7 @@ void ndBrainAgentDDPG<statesDim, actionDim>::Step()
ndBrainFixSizeVector<statesDim> observations;

GetObservation(&observations[0]);
m_policy->MakePrediction(observations, actions);
m_actor->MakePrediction(observations, actions);
ApplyActions(&actions[0]);
}

Expand Down
22 changes: 11 additions & 11 deletions newton-4.00/sdk/dBrain/ndBrainAgentDDPG_Trainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class ndBrainAgentDDPG_Trainer: public ndBrainAgent, public ndBrainThreadPool

void CalculateQvalue(const ndBrainVector& state, const ndBrainVector& actions);

ndBrain m_policy;
ndBrain m_actor;
ndBrain m_critic;
ndBrain m_targetActor;
ndBrain m_targetCritic;
Expand Down Expand Up @@ -143,7 +143,7 @@ class ndBrainAgentDDPG_Trainer: public ndBrainAgent, public ndBrainThreadPool
template<ndInt32 statesDim, ndInt32 actionDim>
ndBrainAgentDDPG_Trainer<statesDim, actionDim>::ndBrainAgentDDPG_Trainer(const HyperParameters& hyperParameters)
:ndBrainAgent()
,m_policy()
,m_actor()
,m_critic()
,m_targetActor()
,m_targetCritic()
Expand Down Expand Up @@ -181,7 +181,7 @@ ndBrainAgentDDPG_Trainer<statesDim, actionDim>::ndBrainAgentDDPG_Trainer(const H
layers.PushBack(new ndBrainLayerActivationTanh(actionDim));
for (ndInt32 i = 0; i < layers.GetCount(); ++i)
{
m_policy.AddLayer(layers[i]);
m_actor.AddLayer(layers[i]);
m_targetActor.AddLayer(layers[i]->Clone());
}

Expand All @@ -203,16 +203,16 @@ ndBrainAgentDDPG_Trainer<statesDim, actionDim>::ndBrainAgentDDPG_Trainer(const H
}

ndAssert(m_critic.GetOutputSize() == 1);
ndAssert(m_critic.GetInputSize() == (m_policy.GetInputSize() + m_policy.GetOutputSize()));
ndAssert(!strcmp((m_policy[m_policy.GetCount() - 1])->GetLabelId(), "ndBrainLayerActivationTanh"));
ndAssert(m_critic.GetInputSize() == (m_actor.GetInputSize() + m_actor.GetOutputSize()));
ndAssert(!strcmp((m_actor[m_actor.GetCount() - 1])->GetLabelId(), "ndBrainLayerActivationTanh"));

m_actorTrainers.SetCount(0);
m_criticTrainers.SetCount(0);
SetThreadCount(hyperParameters.m_threadsCount);

for (ndInt32 i = 0; i < m_bashBufferSize; ++i)
{
m_actorTrainers.PushBack(new ndBrainTrainer(&m_policy));
m_actorTrainers.PushBack(new ndBrainTrainer(&m_actor));
m_criticTrainers.PushBack(new ndBrainTrainer(&m_critic));
}

Expand Down Expand Up @@ -247,10 +247,10 @@ bool ndBrainAgentDDPG_Trainer<statesDim, actionDim>::IsTrainer() const
template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDDPG_Trainer<statesDim, actionDim>::InitWeights()
{
m_policy.InitWeights();
m_actor.InitWeights();
m_critic.InitWeights();

m_targetActor.CopyFrom(m_policy);
m_targetActor.CopyFrom(m_actor);
m_targetCritic.CopyFrom(m_critic);
}

Expand Down Expand Up @@ -436,7 +436,7 @@ void ndBrainAgentDDPG_Trainer<statesDim, actionDim>::BackPropagateActor(const nd

ParallelExecute(PropagateBash);
m_actorOptimizer->Update(this, m_actorTrainers, -m_actorLearnRate);
m_targetActor.SoftCopy(m_policy, m_softTargetFactor);
m_targetActor.SoftCopy(m_actor, m_softTargetFactor);
}

template<ndInt32 statesDim, ndInt32 actionDim>
Expand All @@ -455,7 +455,7 @@ void ndBrainAgentDDPG_Trainer<statesDim, actionDim>::BackPropagate()
template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDDPG_Trainer<statesDim, actionDim>::Save(ndBrainSave* const loadSave)
{
loadSave->Save(&m_policy);
loadSave->Save(&m_actor);
}

template<ndInt32 statesDim, ndInt32 actionDim>
Expand Down Expand Up @@ -510,7 +510,7 @@ template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDDPG_Trainer<statesDim, actionDim>::Step()
{
GetObservation(&m_currentTransition.m_observation[0]);
m_policy.MakePrediction(m_currentTransition.m_observation, m_currentTransition.m_action);
m_actor.MakePrediction(m_currentTransition.m_observation, m_currentTransition.m_action);

// explore environment
SelectAction(&m_currentTransition.m_action[0]);
Expand Down
Loading

0 comments on commit 9aa278a

Please sign in to comment.