Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
this.criticOptimizer.minimize(() => {
// Compute Q targets for current states (y-i)
const qTargets = tf.add(rewards, tf.mul(tf.mul(gamma, qTargetsNext), tf.sub(1, dones)));
const qExpected = this.critic.predict([states, actions]);
const criticLoss = tf.losses.meanSquaredError(qExpected, qTargets);
// torch.nn.utils.clip_grad_norm_(self.critic.parameters(), 1)
return criticLoss;
});
// Actor update
const res = sWeights.map((s, i) =>
tf.mul(s, tau).add(tf.mul(tWeights[i], 1-tau)));
layer.setWeights(res);
const action = tf.tidy(() => {
let action = tf.squeeze(this.actor.predict(tf.tensor([state])));
if (train) {
const noise = softmax(this.noise.sample());
action = action.mul(1-this.epsilon).add(tf.mul(noise, this.epsilon));
}
return action;
});
const data = await action.data();