-
Notifications
You must be signed in to change notification settings - Fork 6.3k
/
Copy pathstateless_cartpole_ppo.py
43 lines (38 loc) · 1.25 KB
/
stateless_cartpole_ppo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.connectors.env_to_module import MeanStdFilter
from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
from ray.rllib.examples.envs.classes.stateless_cartpole import StatelessCartPole
from ray.rllib.utils.test_utils import add_rllib_example_script_args
parser = add_rllib_example_script_args(
default_timesteps=2000000,
default_reward=350.0,
)
parser.set_defaults(
enable_new_api_stack=True,
num_env_runners=3,
)
# Use `parser` to add your own custom command line options to this script
# and (if needed) use their values to set up `config` below.
args = parser.parse_args()
config = (
PPOConfig()
.environment(StatelessCartPole)
.env_runners(
env_to_module_connector=lambda env: MeanStdFilter(),
)
.training(
lr=0.0003 * ((args.num_learners or 1) ** 0.5),
num_epochs=6,
vf_loss_coeff=0.05,
)
.rl_module(
model_config=DefaultModelConfig(
vf_share_layers=False,
use_lstm=True,
max_seq_len=20,
),
)
)
if __name__ == "__main__":
from ray.rllib.utils.test_utils import run_rllib_example_script_experiment
run_rllib_example_script_experiment(config, args)