Commit 034493f7 authored by hazrmard's avatar hazrmard
Browse files

bugfix w/ policy evaluations

parent 501e3484
This diff is collapsed.
......@@ -268,15 +268,16 @@
%% Cell type:code id: tags:
``` python
tanks = TanksFactory(n = n_tanks, e = n_engines, **nominal_config)
env = TanksPhysicalEnv(tanks, tstep=tstep)
timesteps = 50000 #50000 # max timesteps in one episode
timesteps = 30000 #50000 # max timesteps in one episode
ppo_params = dict(
state_dim = env.observation_space.shape[0],
action_dim = 6,
policy = ActorCriticBinary,
epochs = 5, # update policy for K epochs
lr = 0.02, # learning rate
n_latent_var = 64, # number of variables in hidden layer
betas = (0.9, 0.999),
gamma = 0.99, # discount factor
......@@ -509,12 +510,12 @@
trial_params = dict(
alpha_inner = 1e-3,
alpha_outer = 1e-2,
n_inner = 4,
n_outer = 2,
data_model = True,
post_steps = 300
data_model = False,
post_steps = 30000
)
pth = './bin/populated_vs_empty_library/'
os.makedirs(pth, exist_ok=True)
tanks_ = TanksFactory(n = n_tanks, e = n_engines, **nominal_config)
......
......@@ -75,7 +75,7 @@ class ActorCriticBinary(nn.Module):
dist = Bernoulli(action_probs)
action_logprobs = dist.log_prob(action).sum(-1)
dist_entropy = dist.entropy().sum(-1) # TODO, sum entropy over variables
dist_entropy = dist.entropy()
state_value = self.value_layer(state)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment