diff --git a/src/relaqs/api/utils.py b/src/relaqs/api/utils.py index 00b01d62..4a156426 100644 --- a/src/relaqs/api/utils.py +++ b/src/relaqs/api/utils.py @@ -204,7 +204,7 @@ def run_noisy_one_qubit_experiment(gate, n_training_iterations=1, noise_file=" # ---------------------> Configure algorithm and Environment <------------------------- alg_config = DDPGConfig() alg_config.framework("torch") - alg_config.environment("my_env", env_config=env_config) + alg_config.environment(NoisySingleQubitEnv, env_config=env_config) alg_config.rollouts(batch_mode="complete_episodes") alg_config.callbacks(GateSynthesisCallbacks) alg_config.train_batch_size = NoisySingleQubitEnv.get_default_env_config()["steps_per_Haar"] diff --git a/src/relaqs/environments/single_qubit_env.py b/src/relaqs/environments/single_qubit_env.py index e83177ac..7e434c92 100644 --- a/src/relaqs/environments/single_qubit_env.py +++ b/src/relaqs/environments/single_qubit_env.py @@ -30,6 +30,7 @@ def __init__(self, env_config): self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(env_config["observation_space_size"],)) self.action_space = gym.spaces.Box(low=np.array([-1, -1, -1]), high=np.array([1, 1, 1])) self.delta = env_config["delta"] # detuning + self.detuning = 0 self.U_target = env_config["U_target"] self.U_initial = env_config["U_initial"] # future todo, can make random initial state self.U = env_config["U_initial"].copy() @@ -70,6 +71,10 @@ def compute_reward(self, fidelity): return (-3 * np.log10(1.0 - fidelity) + np.log10(1.0 - self.prev_fidelity)) + (3 * fidelity - self.prev_fidelity) def hamiltonian(self, delta, alpha, gamma_magnitude, gamma_phase): + print("delta: ", delta) + print("alpha: ", alpha) + print("gamma_magnitude: ", gamma_magnitude) + print("gamma_phase: ", gamma_phase) return (delta + alpha) * Z + gamma_magnitude * (np.cos(gamma_phase) * X + np.sin(gamma_phase) * Y) def reset(self, *, seed=None, options=None): @@ -87,7 +92,7 @@ def reset(self, *, seed=None, options=None): return starting_observeration, info def hamiltonian_update(self, alpha, gamma_magnitude, gamma_phase): - H = self.hamiltonian(self.delta, alpha, gamma_magnitude, gamma_phase) + H = self.hamiltonian(self.detuning, alpha, gamma_magnitude, gamma_phase) self.H_array.append(H) def H_tot_upate(self, num_time_bins): diff --git a/tests/relaqs/environments/test_gate_synth_env_rllib.py b/tests/relaqs/environments/test_gate_synth_env_rllib.py index 7d7c2be4..a7940de1 100644 --- a/tests/relaqs/environments/test_gate_synth_env_rllib.py +++ b/tests/relaqs/environments/test_gate_synth_env_rllib.py @@ -7,6 +7,7 @@ from relaqs.environments.noisy_two_qubit_env import NoisyTwoQubitEnv from relaqs.environments.noisy_single_qubit_env import NoisySingleQubitEnv from relaqs.environments.single_qubit_env import SingleQubitEnv +from relaqs.api.utils import run from relaqs.api.utils import (return_env_from_alg, run_noisless_one_qubit_experiment, @@ -111,6 +112,7 @@ def test_noisy_training(gate_to_train, number_of_training_iterations): n_training_iterations=n_training_iterations, noise_file=noise_file ) + env = return_env_from_alg(alg) sr = SaveResults(env, alg) save_dir = sr.save_results() @@ -125,7 +127,7 @@ def test_noiseless_training(gate_to_train, number_of_training_iterations): n_train_iterations= number_of_training_iterations alg, _ = run_noisless_one_qubit_experiment(gate_to_train, n_training_iterations=n_train_iterations - ) + ) env = return_env_from_alg(alg) sr = SaveResults(env, alg) save_dir = sr.save_results()