Skip to main content

Documentation Index

Fetch the complete documentation index at: https://ray-preview.mintlify.app/llms.txt

Use this file to discover all available pages before exploring further.

Install

pip install -U "ray[rllib]" "gymnasium[classic-control]"

A minimal training run

from ray.rllib.algorithms.ppo import PPOConfig

config = (
    PPOConfig()
    .environment("CartPole-v1")
    .env_runners(num_env_runners=2)
    .training(lr=1e-3, train_batch_size=4000)
)
algo = config.build()

for i in range(20):
    result = algo.train()
    mean_return = result["env_runners"]["episode_return_mean"]
    print(f"iter {i}: return = {mean_return:.1f}")

Save and load checkpoints

checkpoint = algo.save("/tmp/ppo-cartpole")

# later, in a new process:
from ray.rllib.algorithms.algorithm import Algorithm
algo = Algorithm.from_checkpoint(checkpoint)

Evaluate a policy

import gymnasium as gym

env = gym.make("CartPole-v1")
obs, _ = env.reset()
done = False
total = 0
while not done:
    action = algo.compute_single_action(obs)
    obs, reward, terminated, truncated, _ = env.step(action)
    total += reward
    done = terminated or truncated
print("episode return:", total)

Try a different algorithm

from ray.rllib.algorithms.dqn import DQNConfig

config = DQNConfig().environment("CartPole-v1")
algo = config.build()

Run with Ray Tune

Use Tune to search over hyperparameters:
from ray import tune
from ray.rllib.algorithms.ppo import PPOConfig

config = (
    PPOConfig()
    .environment("CartPole-v1")
    .training(lr=tune.loguniform(1e-5, 1e-2))
)

tuner = tune.Tuner(
    "PPO",
    param_space=config,
    tune_config=tune.TuneConfig(num_samples=10, metric="env_runners/episode_return_mean", mode="max"),
)
tuner.fit()

Next steps

Algorithms

Pick the right algorithm for your problem.

Environments

Wrap your own simulator.