diff --git a/_sources/tutorials/installation/index.rst.txt b/_sources/tutorials/installation/index.rst.txt
index 000f97a..633d77c 100644
--- a/_sources/tutorials/installation/index.rst.txt
+++ b/_sources/tutorials/installation/index.rst.txt
@@ -5,7 +5,7 @@ GenerativeRL can be installed using pip:
 
 .. code-block:: console
 
-   $ pip install grl
+   $ pip install GenerativeRL
 
 You can also install the latest development version from GitHub:
 
diff --git a/searchindex.js b/searchindex.js
index 3739b5a..3fa4a72 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["api_doc/agents/index", "api_doc/algorithms/index", "api_doc/datasets/index", "api_doc/generative_models/index", "api_doc/neural_network/index", "api_doc/numerical_methods/index", "api_doc/rl_modules/index", "api_doc/utils/index", "concepts/index", "index", "tutorials/installation/index", "tutorials/quick_start/index", "user_guide/evaluating_agents", "user_guide/index", "user_guide/installation", "user_guide/training_agents", "user_guide/training_generative_models"], "filenames": ["api_doc/agents/index.rst", "api_doc/algorithms/index.rst", "api_doc/datasets/index.rst", "api_doc/generative_models/index.rst", "api_doc/neural_network/index.rst", "api_doc/numerical_methods/index.rst", "api_doc/rl_modules/index.rst", "api_doc/utils/index.rst", "concepts/index.rst", "index.rst", "tutorials/installation/index.rst", "tutorials/quick_start/index.rst", "user_guide/evaluating_agents.rst", "user_guide/index.rst", "user_guide/installation.rst", "user_guide/training_agents.rst", "user_guide/training_generative_models.rst"], "titles": ["grl.agents", "grl.algorithms", "grl.datasets", "grl.generative_models", "grl.neural_network", "grl.numerical_methods", "grl.rl_modules", "grl.utils", "Concepts", "GenerativeRL Documentation", "Installation", "Quick Start", "How to evaluate RL agents performance", "User Guide", "How to install GenerativeRL and its dependencies", "How to train and deploy reinforcement learning agents", "How to train generative models"], "terms": {"class": [0, 1, 2, 3, 4, 5, 6, 8, 12, 15, 16], "config": [0, 1, 3, 5, 6, 11, 12, 15, 16], "model": [0, 1, 3, 4, 5, 9, 13, 15], "sourc": [0, 1, 2, 3, 4, 5, 6, 7, 8], "overview": [0, 1, 2, 3, 4, 5, 6, 7], "The": [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 16], "qgpo": [0, 1, 2, 11, 15], "algorithm": [0, 2, 8, 9, 11, 12, 15], "interfac": [0, 1, 2, 3, 4, 5, 6, 8, 11], "__init__": [0, 1, 2, 3, 4, 5, 6, 16], "action": [0, 1, 2, 6, 8, 12], "initi": [0, 1, 2, 3, 4, 5, 6, 8, 11, 16], "paramet": [0, 1, 2, 3, 4, 5, 6, 7, 8], "easydict": [0, 1, 3, 5, 6, 16], "configur": [0, 1, 3, 5, 6, 8, 11, 15, 16], "union": [0, 1, 3, 4, 5, 6], "torch": [0, 1, 3, 4, 5, 6], "nn": [0, 1, 3, 5, 6, 16], "modul": [0, 1, 3, 5, 6, 12, 15, 16], "moduledict": [0, 1, 3], "act": [0, 11, 12, 15], "ob": 0, "return_as_torch_tensor": 0, "fals": [0, 1, 3, 4, 5, 6, 7], "given": [0, 1, 3, 5, 6, 8], "an": [0, 1, 3, 4, 5, 8, 11, 12, 15], "observ": [0, 6, 8, 11, 12, 15], "return": [0, 1, 3, 4, 5, 6, 7, 12, 15, 16], "np": 0, "ndarrai": 0, "tensor": [0, 1, 3, 4, 5, 6, 8], "dict": [0, 1, 6, 16], "bool": [0, 1, 3, 4, 5, 6, 7], "whether": [0, 1, 3, 4, 5, 6, 7, 8, 14], "type": [0, 1, 3, 4, 5, 6, 7, 8, 16], "srpo": [0, 1], "train": [0, 1, 2, 3, 6, 8, 9, 11, 13], "gener": [0, 1, 2, 3, 5, 7, 9, 13], "polici": [0, 1, 2, 3, 6, 8, 11], "thi": [0, 1, 3, 4, 6, 8, 9, 11, 16], "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 16], "design": [0, 3, 9], "us": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "gmpgalgorithm": [0, 9], "gmpoalgorithm": [0, 9], "numpi": 0, "arrai": 0, "critic": [1, 8], "network": [1, 4, 6, 8, 13], "forward": [1, 3, 4, 6, 8, 16], "compute_double_q": [1, 6], "state": [1, 3, 5, 6, 11], "none": [1, 3, 4, 5, 6, 7, 16], "output": [1, 3, 4, 5, 6, 16], "two": [1, 3, 6], "q": [1, 6, 11], "tensordict": [1, 3, 4, 5, 6], "input": [1, 3, 4, 5, 6, 12, 15, 16], "first": [1, 5, 6, 11], "q2": [1, 6], "second": [1, 6], "q1": [1, 6], "q_loss": 1, "reward": [1, 8, 11, 12, 15], "next_stat": 1, "done": [1, 11, 12, 15], "fake_next_act": 1, "discount_factor": 1, "1": [1, 3, 4, 5, 14, 16], "0": [1, 3, 4, 5, 14, 16], "calcul": [1, 6], "loss": [1, 3, 6, 16], "next": 1, "fake": [1, 2], "float": [1, 3, 4, 5], "discount": 1, "factor": [1, 4, 5], "sampl": [1, 2, 3, 5, 8, 16], "behaviour_policy_sampl": 1, "compute_q": 1, "behaviour_policy_loss": 1, "energy_guidance_loss": [1, 3], "intern": [1, 6], "share": [1, 6], "both": [1, 3, 6, 16], "scriptmodul": [1, 6], "behaviour": [1, 2], "batch_siz": [1, 3], "solver_config": [1, 3], "t_span": [1, 3, 5], "which": [1, 2, 3, 5, 6, 8, 12, 14, 16], "condit": [1, 3, 4, 5, 6, 16], "od": [1, 8, 9], "solver": [1, 3, 5, 16], "time": [1, 3, 5, 8, 16], "span": [1, 3], "sde": [1, 8, 9, 16], "valu": [1, 3, 5, 6, 7, 8], "energi": [1, 2, 3], "guidanc": [1, 3, 4], "guidance_scal": [1, 3], "scale": [1, 3, 4, 5, 6, 16], "simul": [1, 6, 12], "dataset": [1, 8, 9, 11, 15], "guid": [1, 3, 11], "optim": [1, 3, 8, 11], "offlin": [1, 3, 8], "reinforc": [1, 3, 9, 12, 13, 14], "learn": [1, 3, 4, 9, 12, 13, 14], "base": [1, 2, 3, 5, 8], "diffus": [1, 3, 4, 5, 8, 9, 11, 16], "deploi": [1, 8, 9, 11, 12, 13], "must": 1, "contain": 1, "follow": [1, 5, 8, 14, 15, 16], "kei": [1, 8], "deploy": [1, 8, 11], "object": [1, 8, 13], "environ": [1, 2, 6, 8, 11, 12, 14, 15], "qgpodataset": [1, 9], "qgpoagent": [1, 9], "A": [1, 6, 11, 16], "weight": [1, 3, 4, 8, 16], "bia": 1, "run": [1, 6, 8, 12, 14], "creat": [1, 8, 11, 15, 16], "automat": [1, 8], "when": 1, "function": [1, 3, 4, 5, 6, 8, 11, 16], "call": [1, 11, 16], "v_loss": [1, 6], "srpo_actor_loss": 1, "srpoagent": [1, 9], "gmpo": 1, "includ": [1, 8, 11, 16], "optin": 1, "policy_optimization_loss_by_advantage_weighted_regress": 1, "policy_optimization_loss_by_advantage_weighted_regression_softmax": 1, "maximum_likelihood": [1, 3], "with_grad": [1, 3], "size": [1, 3, 4], "int": [1, 3, 4, 5, 6, 7], "tupl": [1, 3, 4], "list": [1, 3, 4, 6, 13], "batch": [1, 3, 4], "gradient": [1, 3, 8], "beta": [1, 3, 5], "weight_clamp": 1, "100": 1, "fake_act": 1, "seed": [1, 7, 8], "gpg": 1, "gpdataset": [1, 9], "random": [1, 6, 7, 8], "gpo": 1, "in_support_ql_loss": 1, "gmpg": 1, "env_id": [2, 6, 11, 12, 15], "contrast": [2, 3], "predict": [2, 3, 5], "need": [2, 6, 8, 11, 15], "true": [2, 3, 4, 5, 7], "from": [2, 3, 5, 8, 10, 11, 15, 16], "support": [2, 3, 5, 11, 16], "__getitem__": 2, "__len__": 2, "method": [2, 3, 5, 6, 8, 11, 12, 15, 16], "str": [2, 3, 5, 6], "id": [2, 6], "d4rl": [2, 14], "sometim": 2, "data": [2, 3, 4, 5, 8, 16], "augment": [2, 8], "variou": [3, 8], "continu": [3, 8], "path": [3, 5, 8, 13], "comput": [3, 5, 8], "score": [3, 8, 16], "veloc": [3, 16], "It": [3, 5, 6, 14], "can": [3, 5, 8, 10, 11, 12, 14, 15, 16], "via": 3, "nois": [3, 5, 16], "match": [3, 8, 16], "flow": [3, 5, 8, 9, 16], "ar": [3, 8, 15, 16], "score_funct": 3, "score_matching_loss": [3, 16], "velocity_funct": 3, "flow_matching_loss": [3, 16], "data_prediction_funct": [3, 5], "t": [3, 4, 5, 8, 16], "x": [3, 4, 5, 16], "frac": [3, 5, 16], "sigma": [3, 4, 5, 8, 16], "x_t": [3, 8, 16], "2": [3, 4, 5, 14, 16], "nabla_": [3, 16], "log": [3, 5, 8, 11, 16], "p_": 3, "theta": [3, 16], "": [3, 5, 11], "treetensor": 3, "dpo_loss": 3, "ref_dm": 3, "process": [3, 5, 6, 8, 11, 16], "direct": [3, 8], "dpo": 3, "develop": [3, 10], "featur": 3, "recommend": [3, 5], "averag": 3, "across": [3, 8], "forward_sampl": 3, "note": [3, 14], "revers": [3, 8, 16], "thu": 3, "form": [3, 5, 16], "rather": 3, "encod": 3, "latent": [3, 4], "space": [3, 5], "forward_sample_process": 3, "all": [3, 4], "intermedi": [3, 5], "log_prob": 3, "using_hutchinson_trace_estim": 3, "probabl": [3, 4, 5], "noise_funct": [3, 5, 16], "x_0": [3, 16], "final": [3, 4, 5], "provid": [3, 7, 8, 9, 11, 15, 16], "gaussian": [3, 5, 16], "distribut": [3, 8, 16], "result": [3, 8], "shape": [3, 4, 5, 16], "where": [3, 5, 16], "number": [3, 4, 5, 6, 11], "step": [3, 5, 6, 11, 12, 14, 15], "b": 3, "could": 3, "scalar": [3, 5], "b1": 3, "b2": 3, "n": [3, 4, 5, 11], "d": [3, 5, 16], "dimens": [3, 4, 8], "d1": 3, "d2": 3, "extra": 3, "If": [3, 5, 7], "sample_forward_process": 3, "repeat": 3, "same": [3, 5, 16], "sample_forward_process_with_fixed_x": 3, "fixed_x": 3, "fixed_mask": 3, "fix": [3, 8], "mask": 3, "sample_with_fixed_x": 3, "sample_with_log_prob": 3, "likelihood": [3, 8, 16], "weighting_schem": 3, "uncondit": [3, 4], "scheme": 3, "vanilla": 3, "maximum": [3, 8, 16], "estim": [3, 8, 16], "refer": [3, 11, 13, 15], "paper": 3, "more": [3, 5, 11, 13, 15], "detail": [3, 5, 11, 13], "lambda": [3, 5, 16], "denot": [3, 16], "g": [3, 5, 8, 14, 16], "numer": [3, 8], "stabil": 3, "we": [3, 5, 8, 11, 16], "mont": 3, "carlo": 3, "approxim": [3, 5, 6], "integr": [3, 5, 8], "p": [3, 5, 14, 16], "balanc": 3, "mse": 3, "through": [3, 4, 8, 11], "stochast": [3, 5, 8, 13], "differenti": [3, 5, 8, 16], "equat": [3, 5, 8, 16], "v_": [3, 16], "energy_model": 3, "text": [3, 16], "e": [3, 5, 8, 14, 16], "c": [3, 4, 14, 16], "sim": 3, "exp": 3, "mathcal": [3, 5, 16], "z": 3, "sample_without_energy_guid": 3, "score_function_with_energy_guid": 3, "data_prediction_function_with_energy_guid": 3, "cep": 3, "propos": 3, "exact": 3, "noise_function_with_energy_guid": 3, "nose": 3, "nabla": 3, "sample_with_fixed_x_without_energy_guid": 3, "without": [3, 5], "independ": [3, 8, 16], "get_typ": 3, "x0": [3, 5], "x1": 3, "flow_matching_loss_with_mask": 3, "signal": [3, 8], "either": 3, "ha": [3, 5, 6, 12, 15, 16], "correspond": 3, "element": [3, 4, 5], "usual": [3, 16], "x_1": [3, 16], "log_prob_x_0": 3, "function_log_prob_x_0": 3, "callabl": [3, 5, 6], "hutchinson": 3, "trace": 3, "jacobian": 3, "drift": [3, 5, 8, 16], "faster": 3, "less": 3, "accur": [3, 8], "set": [3, 7, 16], "high": [3, 5, 8], "dimension": 3, "log_likelihood": 3, "optimal_transport_flow_matching_loss": 3, "transport": 3, "plan": 3, "sample_with_mask": 3, "sample_with_mask_forward_process": 3, "between": [3, 5, 8, 16], "flow_matching_loss_small_batch_ot_plan": 3, "small": [3, 6], "acceler": 3, "concaten": 4, "along": 4, "last": [4, 5, 6], "layer": [4, 16], "hidden_s": [4, 16], "output_s": 4, "activ": 4, "dropout": 4, "layernorm": 4, "final_activ": 4, "shrink": 4, "multi": 4, "perceptron": 4, "fulli": 4, "connect": 4, "fc1": 4, "act1": 4, "fcn": 4, "actn": 4, "out": 4, "hidden": 4, "channel": 4, "option": [4, 7], "zero": 4, "default": [4, 7, 15], "block": [4, 11], "shrinkag": 4, "kwarg": [4, 5], "pass": 4, "mlp": [4, 16], "keyword": 4, "argument": [4, 5], "output_dim": [4, 16], "t_dim": [4, 16], "input_dim": 4, "condition_dim": 4, "condition_hidden_dim": 4, "t_condition_hidden_dim": 4, "tempor": 4, "spatial": 4, "residu": 4, "multipl": 4, "temporalspatialresblock": 4, "input_s": 4, "32": 4, "patch_siz": 4, "in_channel": 4, "4": [4, 5], "1152": 4, "depth": 4, "28": 4, "num_head": 4, "16": 4, "mlp_ratio": 4, "class_dropout_prob": 4, "num_class": 4, "1000": 4, "learn_sigma": 4, "transform": [4, 8, 16], "backbon": [4, 16], "offici": 4, "implement": [4, 8, 12, 15], "github": [4, 10, 14], "repo": 4, "http": [4, 10, 14], "com": [4, 10, 14], "facebookresearch": 4, "blob": 4, "main": 4, "py": [4, 14], "patch": 4, "attent": 4, "head": 4, "respect": 4, "timestep": 4, "imag": [4, 8, 14], "represent": 4, "label": 4, "forward_with_cfg": 4, "cfg_scale": 4, "also": [4, 5, 8, 10, 15, 16], "classifi": [4, 8], "free": [4, 8], "initialize_weight": 4, "unpatchifi": 4, "img": 4, "h": 4, "w": [4, 16], "token_s": 4, "condition_embedd": 4, "1d": 4, "3d": 4, "inform": [4, 6, 11, 13, 15], "origin": 4, "video": [4, 8], "alia": 4, "patch_block_s": 4, "10": 4, "convolv": 4, "each": [4, 6], "token": 4, "total_patch": 4, "ordinari": [5, 8], "defin": [5, 8, 15, 16], "dx": 5, "f": [5, 8, 16], "dt": [5, 8, 16], "term": 5, "dw": 5, "wiener": [5, 16], "order": 5, "devic": [5, 16], "atol": 5, "1e": 5, "05": 5, "rtol": 5, "dpm_solver": 5, "singlestep": 5, "solver_typ": 5, "skip_typ": 5, "time_uniform": 5, "denois": [5, 8, 16], "dpm": 5, "should": 5, "3": [5, 14], "absolut": 5, "toler": 5, "adapt": [5, 8], "rel": 5, "total": 5, "evalu": [5, 6, 8, 9, 11, 13, 15], "nfe": 5, "multistep": 5, "singlestep_fix": 5, "taylor": 5, "slightli": 5, "impact": 5, "perform": [5, 9, 13, 15], "logsnr": 5, "time_quadrat": 5, "diffusion_process": 5, "save_intermedi": 5, "diffusionprocess": 5, "t_start": 5, "solut": 5, "t_end": 5, "x_end": 5, "ode_solv": 5, "euler": [5, 8], "01": 5, "librari": [5, 8, 9, 11, 14, 16], "torchdyn": [5, 8, 16], "torchdiffeq": [5, 8], "current": [5, 16], "addit": [5, 14], "For": [5, 8, 11, 13, 14, 15, 16], "exampl": [5, 8, 11, 13, 14, 16], "trajectori": 5, "len": [5, 16], "sde_solv": 5, "sde_noise_typ": 5, "diagon": 5, "sde_typ": 5, "ito": 5, "001": 5, "torchsd": 5, "stratonovich": 5, "logqp": 5, "case": [5, 6, 11], "mu": 5, "written": 5, "mathrm": [5, 16], "w_": 5, "sqrt": 5, "covari": 5, "matrix": 5, "standard": [5, 16], "deviat": 5, "half": 5, "differ": [5, 8, 13], "vp": [5, 16], "int_": [5, 16], "linear": [5, 16], "todo": 5, "add": 5, "cosin": 5, "ve": 5, "opt": 5, "halflogsnr": 5, "inversehalflogsnr": 5, "invers": 5, "sinc": 5, "invert": 5, "beta_1": [5, 16], "beta_0": [5, 16], "d_covariance_dt": 5, "deriv": [5, 16], "d_log_scale_dt": 5, "d_scale_dt": 5, "d_std_dt": 5, "diffusion_squar": 5, "drift_coeffici": 5, "coeffici": [5, 16], "satisfi": 5, "log_scal": 5, "std": 5, "simpl": [6, 8, 11], "gym": [6, 8, 11, 12, 14, 15], "generativerl": [6, 8, 10, 12, 13, 15, 16], "collect": [6, 12], "episod": 6, "singl": [6, 8], "suitabl": 6, "experi": 6, "collect_episod": 6, "collect_step": 6, "accord": 6, "num_episod": 6, "num_step": 6, "sever": 6, "reset": [6, 11, 12, 15], "begin": 6, "No": 6, "histori": 6, "store": 6, "dictionari": [6, 8, 15], "random_polici": 6, "until": 6, "end": 6, "render_arg": 6, "resultswil": 6, "one": 6, "shot": 6, "mean": [6, 16], "bellman": 6, "backup": 6, "compute_double_v": 6, "v2": 6, "v1": 6, "v": [6, 16], "doubl": 6, "compute_mininum_v": 6, "minimum": 6, "minimum_v": 6, "compute_mininum_q": 6, "minimum_q": 6, "seed_valu": 7, "cudnn_determinist": 7, "cudnn_benchmark": 7, "make": [7, 8, 9, 11, 12, 14, 15], "cudnn": 7, "oper": 7, "determinist": 7, "enabl": [7, 8], "benchmark": 7, "convolut": 7, "framework": [8, 9], "consist": 8, "code": [8, 16], "api": [8, 11, 13, 15, 16], "user": [8, 12, 16], "friendli": 8, "rl": [8, 9, 13, 15], "agent": [8, 9, 11, 13], "In": [8, 11, 12, 15, 16], "section": [8, 11, 13, 15], "explor": 8, "core": 8, "discuss": 8, "underpin": 8, "how": [8, 9, 11, 13], "thei": 8, "leverag": 8, "address": 8, "complex": 8, "problem": [8, 9, 14], "field": [8, 16], "addition": 8, "explain": 8, "why": 8, "import": [8, 11, 12, 14, 15, 16], "what": 8, "uniqu": 8, "wide": 8, "rang": [8, 11, 12, 15, 16], "applic": [8, 16], "machin": 8, "new": [8, 11, 16], "typic": [8, 16], "most": 8, "unsupervis": 8, "techniqu": 8, "appli": 8, "task": 8, "audio": 8, "interpol": 8, "focus": 8, "dynam": 8, "These": [8, 16], "have": [8, 14], "capac": 8, "captur": 8, "demonstr": [8, 11], "promis": 8, "varieti": 8, "its": [8, 9, 11, 13, 15, 16], "variant": [8, 16], "qualiti": 8, "solv": [8, 9, 14], "dx_t": [8, 16], "dw_t": [8, 16], "unifi": [8, 12, 16], "howev": 8, "vari": 8, "definit": 8, "some": [8, 14], "under": [8, 12, 15], "common": 8, "while": [8, 15, 16], "other": [8, 11, 13, 15], "mai": 8, "requir": [8, 14], "specif": [8, 11, 15], "There": 8, "four": 8, "open": 8, "neural": [8, 13], "parameter": [8, 13], "certain": 8, "part": 8, "potenti": 8, "determin": 8, "procedur": 8, "fundament": 8, "maxim": 8, "pretrain": 8, "like": 8, "bridg": [8, 16], "fine": 8, "tune": 8, "advantag": 8, "regress": 8, "adjoint": 8, "involv": 8, "depend": [8, 9, 13], "maruyama": 8, "rung": 8, "kutta": 8, "offer": 8, "flexibl": [8, 11], "allow": 8, "custom": [8, 13], "extend": 8, "suit": [8, 11, 14], "instanc": [8, 11, 15], "easili": 8, "own": [8, 16], "architectur": [8, 16], "tailor": 8, "format": [8, 11], "decis": [8, 9], "interact": [8, 15], "receiv": 8, "penalti": 8, "cumul": 8, "take": [8, 12, 15, 16], "updat": 8, "categor": 8, "directli": [8, 16], "onlin": 8, "strategi": 8, "off": 8, "actor": 8, "research": 8, "improv": 8, "effici": 8, "synthet": 8, "decoupl": 8, "littl": 8, "modif": 8, "rank": 8, "least": 8, "pytorch": [8, 14], "unif": 8, "within": [8, 11], "simplic": 8, "intuit": 8, "extens": 8, "modular": 8, "built": 8, "mix": 8, "compon": [8, 11], "reproduc": 8, "ensur": 8, "checkpoint": 8, "possibl": 8, "minim": 8, "seek": 8, "extern": 8, "lightweight": 8, "instal": [8, 9, 13], "platform": 8, "compat": 8, "exist": 8, "work": [8, 16], "seamlessli": 8, "openai": [8, 11], "torchrl": 8, "python": [9, 14], "aim": 9, "combin": 9, "power": [9, 11], "capabl": 9, "quick": 9, "start": 9, "explan": 9, "principl": 9, "grl": [9, 10, 11, 12, 14, 15, 16], "gpagent": 9, "qgpocrit": 9, "qgpopolici": 9, "qgpoalgorithm": [9, 11, 15], "srpocrit": 9, "srpopolici": 9, "srpoalgorithm": 9, "gmpocrit": 9, "gmpopolici": 9, "gmpgcritic": 9, "gmpgpolici": 9, "qgpod4rldataset": 9, "gpd4rldataset": 9, "generative_model": [9, 16], "diffusionmodel": [9, 16], "energyconditionaldiffusionmodel": 9, "independentconditionalflowmodel": 9, "optimaltransportconditionalflowmodel": 9, "neural_network": [9, 16], "concatenatelay": 9, "multilayerperceptron": 9, "concatenatemlp": 9, "temporalspatialresidualnet": [9, 16], "dit": 9, "dit1d": 9, "dit2d": 9, "dit3d": 9, "numerical_method": [9, 16], "dpmsolver": 9, "odesolv": [9, 16], "sdesolv": 9, "gaussianconditionalprobabilitypath": [9, 16], "rl_modul": 9, "gymenvsimul": 9, "oneshotvaluefunct": 9, "vnetwork": 9, "doublevnetwork": 9, "qnetwork": 9, "doubleqnetwork": 9, "util": [9, 11], "set_se": 9, "pip": [10, 14], "you": [10, 11, 14, 15], "latest": 10, "version": [10, 14], "git": [10, 14], "opendilab": [10, 14], "easi": 11, "swiss": 11, "roll": 11, "colab": 11, "usag": [11, 13], "found": 11, "folder": 11, "grl_pipelin": [11, 15], "tutori": 11, "here": [11, 13, 16], "halfcheetah": 11, "diffusion_model": [11, 15, 16], "d4rl_halfcheetah_qgpo": [11, 15], "def": [11, 16], "qgpo_pipelin": 11, "env": [11, 12, 15], "_": [11, 12, 15, 16], "num_deploy_step": [11, 12, 15], "render": [11, 12, 15], "__name__": 11, "__main__": 11, "info": 11, "necessari": 11, "well": 11, "encapsul": 11, "after": [11, 14], "obtain": [11, 16], "loop": 11, "execut": 11, "specifi": 11, "print": 11, "consol": 11, "modifi": 11, "your": 11, "advanc": [11, 13], "pleas": [11, 13, 15], "document": [11, 13, 15], "9": 14, "higher": 14, "command": 14, "clone": 14, "cd": 14, "pybullet": 14, "mujoco": 14, "deepmind": 14, "control": 14, "etc": 14, "dm_control": 14, "setup": 14, "licens": 14, "special": 14, "23": 14, "anoth": 14, "thing": 14, "sudo": 14, "apt": 14, "get": 14, "libgl1": 14, "mesa": 14, "glx": 14, "libglib2": 14, "libsm6": 14, "libxext6": 14, "libxrend": 14, "dev": 14, "y": 14, "swig": 14, "gcc": 14, "local": 14, "dnsutil": 14, "cmake": 14, "build": 14, "essenti": 14, "libglew": 14, "libosmesa6": 14, "libglfw3": 14, "libsdl2": 14, "libglm": 14, "libfreetype6": 14, "patchelf": 14, "ffmpeg": 14, "mkdir": 14, "root": 14, "wget": 14, "org": 14, "download": 14, "mujoco210": 14, "linux": 14, "x86_64": 14, "tar": 14, "gz": 14, "o": 14, "xf": 14, "export": 14, "ld_library_path": 14, "mjpro210": 14, "bin": 14, "farama": 14, "foundat": 14, "lockfil": 14, "cython": 14, "check": 14, "success": 14, "everi": [15, 16], "hyperparamet": 15, "copi": 15, "trained_model": 15, "divers": 16, "describ": 16, "evolut": 16, "over": 16, "increment": 16, "probability_path": 16, "kind": 16, "varianc": 16, "preserv": 16, "gvp": 16, "usal": 16, "want": 16, "normal": 16, "target": 16, "By": 16, "fokker": 16, "planck": 16, "kolmogorov": 16, "fpk": 16, "hat": 16, "_t": 16, "s_": 16, "codebas": 16, "ddpm": 16, "compar": 16, "Or": 16, "nerual": 16, "therefor": 16, "intrinsicmodel": 16, "ani": 16, "cnn": 16, "u": 16, "net": 16, "x_size": 16, "alpha": 16, "arg": 16, "linear_vp_sd": 16, "20": 16, "t_encod": 16, "512": 16, "256": 16, "128": 16, "t_embedding_dim": 16, "register_modul": 16, "regist": 16, "so": 16, "mymodul": 16, "self": 16, "super": 16, "modulelist": 16, "append": 16, "relu": 16, "mle": 16, "onli": 16, "squar": 16, "error": 16, "l": 16, "dsm": 16, "mathbb": 16, "left": 16, "right": 16, "cfm": 16, "simpli": 16}, "objects": {"grl": [[0, 0, 0, "-", "agents"], [1, 0, 0, "-", "algorithms"], [2, 0, 0, "-", "datasets"], [3, 0, 0, "-", "generative_models"], [4, 0, 0, "-", "neural_network"], [5, 0, 0, "-", "numerical_methods"], [6, 0, 0, "-", "rl_modules"], [7, 0, 0, "-", "utils"]], "grl.agents": [[0, 1, 1, "", "GPAgent"], [0, 1, 1, "", "QGPOAgent"], [0, 1, 1, "", "SRPOAgent"]], "grl.agents.GPAgent": [[0, 2, 1, "", "__init__"], [0, 2, 1, "", "act"]], "grl.agents.QGPOAgent": [[0, 2, 1, "", "__init__"], [0, 2, 1, "", "act"]], "grl.agents.SRPOAgent": [[0, 2, 1, "", "__init__"], [0, 2, 1, "", "act"]], "grl.algorithms": [[1, 1, 1, "", "GMPGAlgorithm"], [1, 1, 1, "", "GMPGCritic"], [1, 1, 1, "", "GMPGPolicy"], [1, 1, 1, "", "GMPOAlgorithm"], [1, 1, 1, "", "GMPOCritic"], [1, 1, 1, "", "GMPOPolicy"], [1, 1, 1, "", "QGPOAlgorithm"], [1, 1, 1, "", "QGPOCritic"], [1, 1, 1, "", "QGPOPolicy"], [1, 1, 1, "", "SRPOAlgorithm"], [1, 1, 1, "", "SRPOCritic"], [1, 1, 1, "", "SRPOPolicy"]], "grl.algorithms.GMPGAlgorithm": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "train"]], "grl.algorithms.GMPGCritic": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "compute_double_q"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "in_support_ql_loss"]], "grl.algorithms.GMPGPolicy": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "behaviour_policy_loss"], [1, 2, 1, "", "behaviour_policy_sample"], [1, 2, 1, "", "compute_q"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "sample"]], "grl.algorithms.GMPOAlgorithm": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "train"]], "grl.algorithms.GMPOCritic": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "compute_double_q"], [1, 2, 1, "", "forward"]], "grl.algorithms.GMPOPolicy": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "behaviour_policy_loss"], [1, 2, 1, "", "behaviour_policy_sample"], [1, 2, 1, "", "compute_q"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "policy_optimization_loss_by_advantage_weighted_regression"], [1, 2, 1, "", "policy_optimization_loss_by_advantage_weighted_regression_softmax"], [1, 2, 1, "", "sample"]], "grl.algorithms.QGPOAlgorithm": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "deploy"], [1, 2, 1, "", "train"]], "grl.algorithms.QGPOCritic": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "compute_double_q"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "q_loss"]], "grl.algorithms.QGPOPolicy": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "behaviour_policy_loss"], [1, 2, 1, "", "behaviour_policy_sample"], [1, 2, 1, "", "compute_q"], [1, 2, 1, "", "energy_guidance_loss"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "q_loss"], [1, 2, 1, "", "sample"]], "grl.algorithms.SRPOAlgorithm": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "deploy"], [1, 2, 1, "", "train"]], "grl.algorithms.SRPOCritic": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "forward"]], "grl.algorithms.SRPOPolicy": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "behaviour_policy_loss"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "sample"], [1, 2, 1, "", "srpo_actor_loss"]], "grl.datasets": [[2, 1, 1, "", "GPD4RLDataset"], [2, 1, 1, "", "GPDataset"], [2, 1, 1, "", "QGPOD4RLDataset"], [2, 1, 1, "", "QGPODataset"]], "grl.datasets.GPD4RLDataset": [[2, 2, 1, "", "__init__"]], "grl.datasets.GPDataset": [[2, 2, 1, "", "__init__"]], "grl.datasets.QGPOD4RLDataset": [[2, 2, 1, "", "__init__"]], "grl.datasets.QGPODataset": [[2, 2, 1, "", "__init__"]], "grl.generative_models": [[3, 1, 1, "", "DiffusionModel"], [3, 1, 1, "", "EnergyConditionalDiffusionModel"], [3, 1, 1, "", "IndependentConditionalFlowModel"], [3, 1, 1, "", "OptimalTransportConditionalFlowModel"]], "grl.generative_models.DiffusionModel": [[3, 2, 1, "", "__init__"], [3, 2, 1, "", "data_prediction_function"], [3, 2, 1, "", "dpo_loss"], [3, 2, 1, "", "flow_matching_loss"], [3, 2, 1, "", "forward_sample"], [3, 2, 1, "", "forward_sample_process"], [3, 2, 1, "", "log_prob"], [3, 2, 1, "", "noise_function"], [3, 2, 1, "", "sample"], [3, 2, 1, "", "sample_forward_process"], [3, 2, 1, "", "sample_forward_process_with_fixed_x"], [3, 2, 1, "", "sample_with_fixed_x"], [3, 2, 1, "", "sample_with_log_prob"], [3, 2, 1, "", "score_function"], [3, 2, 1, "", "score_matching_loss"], [3, 2, 1, "", "velocity_function"]], "grl.generative_models.EnergyConditionalDiffusionModel": [[3, 2, 1, "", "__init__"], [3, 2, 1, "", "data_prediction_function"], [3, 2, 1, "", "data_prediction_function_with_energy_guidance"], [3, 2, 1, "", "energy_guidance_loss"], [3, 2, 1, "", "flow_matching_loss"], [3, 2, 1, "", "noise_function"], [3, 2, 1, "", "noise_function_with_energy_guidance"], [3, 2, 1, "", "sample"], [3, 2, 1, "", "sample_forward_process"], [3, 2, 1, "", "sample_forward_process_with_fixed_x"], [3, 2, 1, "", "sample_with_fixed_x"], [3, 2, 1, "", "sample_with_fixed_x_without_energy_guidance"], [3, 2, 1, "", "sample_without_energy_guidance"], [3, 2, 1, "", "score_function"], [3, 2, 1, "", "score_function_with_energy_guidance"], [3, 2, 1, "", "score_matching_loss"], [3, 2, 1, "", "velocity_function"]], "grl.generative_models.IndependentConditionalFlowModel": [[3, 2, 1, "", "__init__"], [3, 2, 1, "", "flow_matching_loss"], [3, 2, 1, "", "flow_matching_loss_with_mask"], [3, 2, 1, "", "forward_sample"], [3, 2, 1, "", "forward_sample_process"], [3, 2, 1, "", "log_prob"], [3, 2, 1, "", "optimal_transport_flow_matching_loss"], [3, 2, 1, "", "sample"], [3, 2, 1, "", "sample_forward_process"], [3, 2, 1, "", "sample_with_log_prob"], [3, 2, 1, "", "sample_with_mask"], [3, 2, 1, "", "sample_with_mask_forward_process"]], "grl.generative_models.OptimalTransportConditionalFlowModel": [[3, 2, 1, "", "__init__"], [3, 2, 1, "", "flow_matching_loss"], [3, 2, 1, "", "flow_matching_loss_small_batch_OT_plan"], [3, 2, 1, "", "sample"], [3, 2, 1, "", "sample_forward_process"]], "grl.neural_network": [[4, 1, 1, "", "ConcatenateLayer"], [4, 1, 1, "", "ConcatenateMLP"], [4, 1, 1, "", "DiT"], [4, 1, 1, "", "DiT1D"], [4, 3, 1, "", "DiT2D"], [4, 1, 1, "", "DiT3D"], [4, 1, 1, "", "MultiLayerPerceptron"], [4, 1, 1, "", "TemporalSpatialResidualNet"]], "grl.neural_network.ConcatenateLayer": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"]], "grl.neural_network.ConcatenateMLP": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"]], "grl.neural_network.DiT": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"], [4, 2, 1, "", "forward_with_cfg"], [4, 2, 1, "", "initialize_weights"], [4, 2, 1, "", "unpatchify"]], "grl.neural_network.DiT1D": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"], [4, 2, 1, "", "initialize_weights"]], "grl.neural_network.DiT3D": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"], [4, 2, 1, "", "initialize_weights"], [4, 2, 1, "", "unpatchify"]], "grl.neural_network.MultiLayerPerceptron": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"]], "grl.neural_network.TemporalSpatialResidualNet": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"]], "grl.numerical_methods": [[5, 1, 1, "", "DPMSolver"], [5, 1, 1, "", "GaussianConditionalProbabilityPath"], [5, 1, 1, "", "ODE"], [5, 1, 1, "", "ODESolver"], [5, 1, 1, "", "SDE"], [5, 1, 1, "", "SDESolver"]], "grl.numerical_methods.DPMSolver": [[5, 2, 1, "", "__init__"], [5, 2, 1, "", "integrate"]], "grl.numerical_methods.GaussianConditionalProbabilityPath": [[5, 2, 1, "", "HalfLogSNR"], [5, 2, 1, "", "InverseHalfLogSNR"], [5, 2, 1, "", "__init__"], [5, 2, 1, "", "covariance"], [5, 2, 1, "", "d_covariance_dt"], [5, 2, 1, "", "d_log_scale_dt"], [5, 2, 1, "", "d_scale_dt"], [5, 2, 1, "", "d_std_dt"], [5, 2, 1, "", "diffusion"], [5, 2, 1, "", "diffusion_squared"], [5, 2, 1, "", "drift"], [5, 2, 1, "", "drift_coefficient"], [5, 2, 1, "", "log_scale"], [5, 2, 1, "", "scale"], [5, 2, 1, "", "std"]], "grl.numerical_methods.ODE": [[5, 2, 1, "", "__init__"]], "grl.numerical_methods.ODESolver": [[5, 2, 1, "", "__init__"], [5, 2, 1, "", "integrate"]], "grl.numerical_methods.SDE": [[5, 2, 1, "", "__init__"]], "grl.numerical_methods.SDESolver": [[5, 2, 1, "", "__init__"], [5, 2, 1, "", "integrate"]], "grl.rl_modules": [[6, 1, 1, "", "DoubleQNetwork"], [6, 1, 1, "", "DoubleVNetwork"], [6, 1, 1, "", "GymEnvSimulator"], [6, 1, 1, "", "OneShotValueFunction"], [6, 1, 1, "", "QNetwork"], [6, 1, 1, "", "VNetwork"]], "grl.rl_modules.DoubleQNetwork": [[6, 2, 1, "", "__init__"], [6, 2, 1, "", "compute_double_q"], [6, 2, 1, "", "compute_mininum_q"], [6, 2, 1, "", "forward"]], "grl.rl_modules.DoubleVNetwork": [[6, 2, 1, "", "__init__"], [6, 2, 1, "", "compute_double_v"], [6, 2, 1, "", "compute_mininum_v"], [6, 2, 1, "", "forward"]], "grl.rl_modules.GymEnvSimulator": [[6, 2, 1, "", "__init__"], [6, 2, 1, "", "collect_episodes"], [6, 2, 1, "", "collect_steps"], [6, 2, 1, "", "evaluate"]], "grl.rl_modules.OneShotValueFunction": [[6, 2, 1, "", "__init__"], [6, 2, 1, "", "compute_double_v"], [6, 2, 1, "", "forward"], [6, 2, 1, "", "v_loss"]], "grl.rl_modules.QNetwork": [[6, 2, 1, "", "__init__"], [6, 2, 1, "", "forward"]], "grl.rl_modules.VNetwork": [[6, 2, 1, "", "__init__"], [6, 2, 1, "", "forward"]], "grl.utils": [[7, 4, 1, "", "set_seed"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:attribute", "4": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "attribute", "Python attribute"], "4": ["py", "function", "Python function"]}, "titleterms": {"grl": [0, 1, 2, 3, 4, 5, 6, 7], "agent": [0, 12, 15], "qgpoagent": 0, "srpoagent": 0, "gpagent": 0, "algorithm": 1, "qgpocrit": 1, "qgpopolici": 1, "qgpoalgorithm": 1, "srpocrit": 1, "srpopolici": 1, "srpoalgorithm": 1, "gmpocrit": 1, "gmpopolici": 1, "gmpoalgorithm": 1, "gmpgcritic": 1, "gmpgpolici": 1, "gmpgalgorithm": 1, "dataset": 2, "qgpod4rldataset": 2, "qgpodataset": 2, "gpd4rldataset": 2, "gpdataset": 2, "generative_model": 3, "diffusionmodel": 3, "energyconditionaldiffusionmodel": 3, "independentconditionalflowmodel": 3, "optimaltransportconditionalflowmodel": 3, "neural_network": 4, "concatenatelay": 4, "multilayerperceptron": 4, "concatenatemlp": 4, "temporalspatialresidualnet": 4, "dit": 4, "dit1d": 4, "dit2d": 4, "dit3d": 4, "numerical_method": 5, "od": 5, "sde": 5, "dpmsolver": 5, "odesolv": 5, "sdesolv": 5, "gaussianconditionalprobabilitypath": 5, "rl_modul": 6, "gymenvsimul": 6, "oneshotvaluefunct": 6, "vnetwork": 6, "doublevnetwork": 6, "qnetwork": 6, "doubleqnetwork": 6, "util": 7, "set_se": 7, "concept": [8, 9], "overview": [8, 9], "gener": [8, 11, 16], "model": [8, 11, 16], "reinforc": [8, 11, 15], "learn": [8, 11, 15], "design": 8, "principl": 8, "generativerl": [9, 11, 14], "document": 9, "tutori": 9, "user": [9, 13], "guid": [9, 13], "api": 9, "instal": [10, 14], "quick": 11, "start": 11, "explan": 11, "how": [12, 14, 15, 16], "evalu": 12, "rl": 12, "perform": 12, "its": 14, "depend": 14, "train": [15, 16], "deploi": 15, "stochast": 16, "path": 16, "parameter": 16, "custom": 16, "neural": 16, "network": 16, "object": 16, "differ": 16}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx.ext.todo": 2, "sphinx.ext.intersphinx": 1, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"grl.agents": [[0, "module-grl.agents"]], "QGPOAgent": [[0, "qgpoagent"]], "SRPOAgent": [[0, "srpoagent"]], "GPAgent": [[0, "gpagent"]], "grl.algorithms": [[1, "module-grl.algorithms"]], "QGPOCritic": [[1, "qgpocritic"]], "QGPOPolicy": [[1, "qgpopolicy"]], "QGPOAlgorithm": [[1, "qgpoalgorithm"]], "SRPOCritic": [[1, "srpocritic"]], "SRPOPolicy": [[1, "srpopolicy"]], "SRPOAlgorithm": [[1, "srpoalgorithm"]], "GMPOCritic": [[1, "gmpocritic"]], "GMPOPolicy": [[1, "gmpopolicy"]], "GMPOAlgorithm": [[1, "gmpoalgorithm"]], "GMPGCritic": [[1, "gmpgcritic"]], "GMPGPolicy": [[1, "gmpgpolicy"]], "GMPGAlgorithm": [[1, "gmpgalgorithm"]], "grl.datasets": [[2, "module-grl.datasets"]], "QGPOD4RLDataset": [[2, "qgpod4rldataset"]], "QGPODataset": [[2, "qgpodataset"]], "GPD4RLDataset": [[2, "gpd4rldataset"]], "GPDataset": [[2, "gpdataset"]], "grl.generative_models": [[3, "module-grl.generative_models"]], "DiffusionModel": [[3, "diffusionmodel"]], "EnergyConditionalDiffusionModel": [[3, "energyconditionaldiffusionmodel"]], "IndependentConditionalFlowModel": [[3, "independentconditionalflowmodel"]], "OptimalTransportConditionalFlowModel": [[3, "optimaltransportconditionalflowmodel"]], "grl.neural_network": [[4, "module-grl.neural_network"]], "ConcatenateLayer": [[4, "concatenatelayer"]], "MultiLayerPerceptron": [[4, "multilayerperceptron"]], "ConcatenateMLP": [[4, "concatenatemlp"]], "TemporalSpatialResidualNet": [[4, "temporalspatialresidualnet"]], "DiT": [[4, "dit"]], "DiT1D": [[4, "dit1d"]], "DiT2D": [[4, "dit2d"]], "DiT3D": [[4, "dit3d"]], "grl.numerical_methods": [[5, "module-grl.numerical_methods"]], "ODE": [[5, "ode"]], "SDE": [[5, "sde"]], "DPMSolver": [[5, "dpmsolver"]], "ODESolver": [[5, "odesolver"]], "SDESolver": [[5, "sdesolver"]], "GaussianConditionalProbabilityPath": [[5, "gaussianconditionalprobabilitypath"]], "grl.rl_modules": [[6, "module-grl.rl_modules"]], "GymEnvSimulator": [[6, "gymenvsimulator"]], "OneShotValueFunction": [[6, "oneshotvaluefunction"]], "VNetwork": [[6, "vnetwork"]], "DoubleVNetwork": [[6, "doublevnetwork"]], "QNetwork": [[6, "qnetwork"]], "DoubleQNetwork": [[6, "doubleqnetwork"]], "grl.utils": [[7, "module-grl.utils"]], "set_seed": [[7, "set-seed"]], "Concepts": [[8, "concepts"], [9, null]], "Concepts Overview": [[8, "concepts-overview"]], "Generative Models": [[8, "generative-models"]], "Reinforcement Learning": [[8, "reinforcement-learning"], [11, "reinforcement-learning"]], "Design Principles": [[8, "design-principles"]], "GenerativeRL Documentation": [[9, "generativerl-documentation"]], "Overview": [[9, "overview"]], "Tutorials": [[9, null]], "User Guide": [[9, null], [13, "user-guide"], [13, null]], "API Documentation": [[9, null]], "Installation": [[10, "installation"]], "Quick Start": [[11, "quick-start"]], "Generative model in GenerativeRL": [[11, "generative-model-in-generativerl"]], "Explanation": [[11, "explanation"]], "How to evaluate RL agents performance": [[12, "how-to-evaluate-rl-agents-performance"]], "How to install GenerativeRL and its dependencies": [[14, "how-to-install-generativerl-and-its-dependencies"]], "How to train and deploy reinforcement learning agents": [[15, "how-to-train-and-deploy-reinforcement-learning-agents"]], "How to train generative models": [[16, "how-to-train-generative-models"]], "Stochastic path": [[16, "stochastic-path"]], "Model parameterization": [[16, "model-parameterization"]], "Customized neural network": [[16, "customized-neural-network"]], "Training objective for different generative models": [[16, "training-objective-for-different-generative-models"]]}, "indexentries": {"gpagent (class in grl.agents)": [[0, "grl.agents.GPAgent"]], "qgpoagent (class in grl.agents)": [[0, "grl.agents.QGPOAgent"]], "srpoagent (class in grl.agents)": [[0, "grl.agents.SRPOAgent"]], "__init__() (grl.agents.gpagent method)": [[0, "grl.agents.GPAgent.__init__"]], "__init__() (grl.agents.qgpoagent method)": [[0, "grl.agents.QGPOAgent.__init__"]], "__init__() (grl.agents.srpoagent method)": [[0, "grl.agents.SRPOAgent.__init__"]], "act() (grl.agents.gpagent method)": [[0, "grl.agents.GPAgent.act"]], "act() (grl.agents.qgpoagent method)": [[0, "grl.agents.QGPOAgent.act"]], "act() (grl.agents.srpoagent method)": [[0, "grl.agents.SRPOAgent.act"]], "grl.agents": [[0, "module-grl.agents"]], "module": [[0, "module-grl.agents"], [1, "module-grl.algorithms"], [2, "module-grl.datasets"], [3, "module-grl.generative_models"], [4, "module-grl.neural_network"], [5, "module-grl.numerical_methods"], [6, "module-grl.rl_modules"], [7, "module-grl.utils"]], "gmpgalgorithm (class in grl.algorithms)": [[1, "grl.algorithms.GMPGAlgorithm"]], "gmpgcritic (class in grl.algorithms)": [[1, "grl.algorithms.GMPGCritic"]], "gmpgpolicy (class in grl.algorithms)": [[1, "grl.algorithms.GMPGPolicy"]], "gmpoalgorithm (class in grl.algorithms)": [[1, "grl.algorithms.GMPOAlgorithm"]], "gmpocritic (class in grl.algorithms)": [[1, "grl.algorithms.GMPOCritic"]], "gmpopolicy (class in grl.algorithms)": [[1, "grl.algorithms.GMPOPolicy"]], "qgpoalgorithm (class in grl.algorithms)": [[1, "grl.algorithms.QGPOAlgorithm"]], "qgpocritic (class in grl.algorithms)": [[1, "grl.algorithms.QGPOCritic"]], "qgpopolicy (class in grl.algorithms)": [[1, "grl.algorithms.QGPOPolicy"]], "srpoalgorithm (class in grl.algorithms)": [[1, "grl.algorithms.SRPOAlgorithm"]], "srpocritic (class in grl.algorithms)": [[1, "grl.algorithms.SRPOCritic"]], "srpopolicy (class in grl.algorithms)": [[1, "grl.algorithms.SRPOPolicy"]], "__init__() (grl.algorithms.gmpgalgorithm method)": [[1, "grl.algorithms.GMPGAlgorithm.__init__"]], "__init__() (grl.algorithms.gmpgcritic method)": [[1, "grl.algorithms.GMPGCritic.__init__"]], "__init__() (grl.algorithms.gmpgpolicy method)": [[1, "grl.algorithms.GMPGPolicy.__init__"]], "__init__() (grl.algorithms.gmpoalgorithm method)": [[1, "grl.algorithms.GMPOAlgorithm.__init__"]], "__init__() (grl.algorithms.gmpocritic method)": [[1, "grl.algorithms.GMPOCritic.__init__"]], "__init__() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.__init__"]], "__init__() (grl.algorithms.qgpoalgorithm method)": [[1, "grl.algorithms.QGPOAlgorithm.__init__"]], "__init__() (grl.algorithms.qgpocritic method)": [[1, "grl.algorithms.QGPOCritic.__init__"]], "__init__() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.__init__"]], "__init__() (grl.algorithms.srpoalgorithm method)": [[1, "grl.algorithms.SRPOAlgorithm.__init__"]], "__init__() (grl.algorithms.srpocritic method)": [[1, "grl.algorithms.SRPOCritic.__init__"]], "__init__() (grl.algorithms.srpopolicy method)": [[1, "grl.algorithms.SRPOPolicy.__init__"]], "behaviour_policy_loss() (grl.algorithms.gmpgpolicy method)": [[1, "grl.algorithms.GMPGPolicy.behaviour_policy_loss"]], "behaviour_policy_loss() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.behaviour_policy_loss"]], "behaviour_policy_loss() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.behaviour_policy_loss"]], "behaviour_policy_loss() (grl.algorithms.srpopolicy method)": [[1, "grl.algorithms.SRPOPolicy.behaviour_policy_loss"]], "behaviour_policy_sample() (grl.algorithms.gmpgpolicy method)": [[1, "grl.algorithms.GMPGPolicy.behaviour_policy_sample"]], "behaviour_policy_sample() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.behaviour_policy_sample"]], "behaviour_policy_sample() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.behaviour_policy_sample"]], "compute_double_q() (grl.algorithms.gmpgcritic method)": [[1, "grl.algorithms.GMPGCritic.compute_double_q"]], "compute_double_q() (grl.algorithms.gmpocritic method)": [[1, "grl.algorithms.GMPOCritic.compute_double_q"]], "compute_double_q() (grl.algorithms.qgpocritic method)": [[1, "grl.algorithms.QGPOCritic.compute_double_q"]], "compute_q() (grl.algorithms.gmpgpolicy method)": [[1, "grl.algorithms.GMPGPolicy.compute_q"]], "compute_q() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.compute_q"]], "compute_q() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.compute_q"]], "deploy() (grl.algorithms.qgpoalgorithm method)": [[1, "grl.algorithms.QGPOAlgorithm.deploy"]], "deploy() (grl.algorithms.srpoalgorithm method)": [[1, "grl.algorithms.SRPOAlgorithm.deploy"]], "energy_guidance_loss() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.energy_guidance_loss"]], "forward() (grl.algorithms.gmpgcritic method)": [[1, "grl.algorithms.GMPGCritic.forward"]], "forward() (grl.algorithms.gmpgpolicy method)": [[1, "grl.algorithms.GMPGPolicy.forward"]], "forward() (grl.algorithms.gmpocritic method)": [[1, "grl.algorithms.GMPOCritic.forward"]], "forward() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.forward"]], "forward() (grl.algorithms.qgpocritic method)": [[1, "grl.algorithms.QGPOCritic.forward"]], "forward() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.forward"]], "forward() (grl.algorithms.srpocritic method)": [[1, "grl.algorithms.SRPOCritic.forward"]], "forward() (grl.algorithms.srpopolicy method)": [[1, "grl.algorithms.SRPOPolicy.forward"]], "grl.algorithms": [[1, "module-grl.algorithms"]], "in_support_ql_loss() (grl.algorithms.gmpgcritic method)": [[1, "grl.algorithms.GMPGCritic.in_support_ql_loss"]], "policy_optimization_loss_by_advantage_weighted_regression() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.policy_optimization_loss_by_advantage_weighted_regression"]], "policy_optimization_loss_by_advantage_weighted_regression_softmax() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.policy_optimization_loss_by_advantage_weighted_regression_softmax"]], "q_loss() (grl.algorithms.qgpocritic method)": [[1, "grl.algorithms.QGPOCritic.q_loss"]], "q_loss() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.q_loss"]], "sample() (grl.algorithms.gmpgpolicy method)": [[1, "grl.algorithms.GMPGPolicy.sample"]], "sample() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.sample"]], "sample() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.sample"]], "sample() (grl.algorithms.srpopolicy method)": [[1, "grl.algorithms.SRPOPolicy.sample"]], "srpo_actor_loss() (grl.algorithms.srpopolicy method)": [[1, "grl.algorithms.SRPOPolicy.srpo_actor_loss"]], "train() (grl.algorithms.gmpgalgorithm method)": [[1, "grl.algorithms.GMPGAlgorithm.train"]], "train() (grl.algorithms.gmpoalgorithm method)": [[1, "grl.algorithms.GMPOAlgorithm.train"]], "train() (grl.algorithms.qgpoalgorithm method)": [[1, "grl.algorithms.QGPOAlgorithm.train"]], "train() (grl.algorithms.srpoalgorithm method)": [[1, "grl.algorithms.SRPOAlgorithm.train"]], "gpd4rldataset (class in grl.datasets)": [[2, "grl.datasets.GPD4RLDataset"]], "gpdataset (class in grl.datasets)": [[2, "grl.datasets.GPDataset"]], "qgpod4rldataset (class in grl.datasets)": [[2, "grl.datasets.QGPOD4RLDataset"]], "qgpodataset (class in grl.datasets)": [[2, "grl.datasets.QGPODataset"]], "__init__() (grl.datasets.gpd4rldataset method)": [[2, "grl.datasets.GPD4RLDataset.__init__"]], "__init__() (grl.datasets.gpdataset method)": [[2, "grl.datasets.GPDataset.__init__"]], "__init__() (grl.datasets.qgpod4rldataset method)": [[2, "grl.datasets.QGPOD4RLDataset.__init__"]], "__init__() (grl.datasets.qgpodataset method)": [[2, "grl.datasets.QGPODataset.__init__"]], "grl.datasets": [[2, "module-grl.datasets"]], "diffusionmodel (class in grl.generative_models)": [[3, "grl.generative_models.DiffusionModel"]], "energyconditionaldiffusionmodel (class in grl.generative_models)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel"]], "independentconditionalflowmodel (class in grl.generative_models)": [[3, "grl.generative_models.IndependentConditionalFlowModel"]], "optimaltransportconditionalflowmodel (class in grl.generative_models)": [[3, "grl.generative_models.OptimalTransportConditionalFlowModel"]], "__init__() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.__init__"]], "__init__() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.__init__"]], "__init__() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.__init__"]], "__init__() (grl.generative_models.optimaltransportconditionalflowmodel method)": [[3, "grl.generative_models.OptimalTransportConditionalFlowModel.__init__"]], "data_prediction_function() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.data_prediction_function"]], "data_prediction_function() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.data_prediction_function"]], "data_prediction_function_with_energy_guidance() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.data_prediction_function_with_energy_guidance"]], "dpo_loss() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.dpo_loss"]], "energy_guidance_loss() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.energy_guidance_loss"]], "flow_matching_loss() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.flow_matching_loss"]], "flow_matching_loss() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.flow_matching_loss"]], "flow_matching_loss() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.flow_matching_loss"]], "flow_matching_loss() (grl.generative_models.optimaltransportconditionalflowmodel method)": [[3, "grl.generative_models.OptimalTransportConditionalFlowModel.flow_matching_loss"]], "flow_matching_loss_small_batch_ot_plan() (grl.generative_models.optimaltransportconditionalflowmodel method)": [[3, "grl.generative_models.OptimalTransportConditionalFlowModel.flow_matching_loss_small_batch_OT_plan"]], "flow_matching_loss_with_mask() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.flow_matching_loss_with_mask"]], "forward_sample() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.forward_sample"]], "forward_sample() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.forward_sample"]], "forward_sample_process() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.forward_sample_process"]], "forward_sample_process() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.forward_sample_process"]], "grl.generative_models": [[3, "module-grl.generative_models"]], "log_prob() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.log_prob"]], "log_prob() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.log_prob"]], "noise_function() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.noise_function"]], "noise_function() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.noise_function"]], "noise_function_with_energy_guidance() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.noise_function_with_energy_guidance"]], "optimal_transport_flow_matching_loss() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.optimal_transport_flow_matching_loss"]], "sample() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.sample"]], "sample() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.sample"]], "sample() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.sample"]], "sample() (grl.generative_models.optimaltransportconditionalflowmodel method)": [[3, "grl.generative_models.OptimalTransportConditionalFlowModel.sample"]], "sample_forward_process() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.sample_forward_process"]], "sample_forward_process() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.sample_forward_process"]], "sample_forward_process() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.sample_forward_process"]], "sample_forward_process() (grl.generative_models.optimaltransportconditionalflowmodel method)": [[3, "grl.generative_models.OptimalTransportConditionalFlowModel.sample_forward_process"]], "sample_forward_process_with_fixed_x() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.sample_forward_process_with_fixed_x"]], "sample_forward_process_with_fixed_x() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.sample_forward_process_with_fixed_x"]], "sample_with_fixed_x() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.sample_with_fixed_x"]], "sample_with_fixed_x() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.sample_with_fixed_x"]], "sample_with_fixed_x_without_energy_guidance() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.sample_with_fixed_x_without_energy_guidance"]], "sample_with_log_prob() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.sample_with_log_prob"]], "sample_with_log_prob() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.sample_with_log_prob"]], "sample_with_mask() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.sample_with_mask"]], "sample_with_mask_forward_process() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.sample_with_mask_forward_process"]], "sample_without_energy_guidance() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.sample_without_energy_guidance"]], "score_function() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.score_function"]], "score_function() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.score_function"]], "score_function_with_energy_guidance() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.score_function_with_energy_guidance"]], "score_matching_loss() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.score_matching_loss"]], "score_matching_loss() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.score_matching_loss"]], "velocity_function() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.velocity_function"]], "velocity_function() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.velocity_function"]], "concatenatelayer (class in grl.neural_network)": [[4, "grl.neural_network.ConcatenateLayer"]], "concatenatemlp (class in grl.neural_network)": [[4, "grl.neural_network.ConcatenateMLP"]], "dit (class in grl.neural_network)": [[4, "grl.neural_network.DiT"]], "dit1d (class in grl.neural_network)": [[4, "grl.neural_network.DiT1D"]], "dit2d (in module grl.neural_network)": [[4, "grl.neural_network.DiT2D"]], "dit3d (class in grl.neural_network)": [[4, "grl.neural_network.DiT3D"]], "multilayerperceptron (class in grl.neural_network)": [[4, "grl.neural_network.MultiLayerPerceptron"]], "temporalspatialresidualnet (class in grl.neural_network)": [[4, "grl.neural_network.TemporalSpatialResidualNet"]], "__init__() (grl.neural_network.concatenatelayer method)": [[4, "grl.neural_network.ConcatenateLayer.__init__"]], "__init__() (grl.neural_network.concatenatemlp method)": [[4, "grl.neural_network.ConcatenateMLP.__init__"]], "__init__() (grl.neural_network.dit method)": [[4, "grl.neural_network.DiT.__init__"]], "__init__() (grl.neural_network.dit1d method)": [[4, "grl.neural_network.DiT1D.__init__"]], "__init__() (grl.neural_network.dit3d method)": [[4, "grl.neural_network.DiT3D.__init__"]], "__init__() (grl.neural_network.multilayerperceptron method)": [[4, "grl.neural_network.MultiLayerPerceptron.__init__"]], "__init__() (grl.neural_network.temporalspatialresidualnet method)": [[4, "grl.neural_network.TemporalSpatialResidualNet.__init__"]], "forward() (grl.neural_network.concatenatelayer method)": [[4, "grl.neural_network.ConcatenateLayer.forward"]], "forward() (grl.neural_network.concatenatemlp method)": [[4, "grl.neural_network.ConcatenateMLP.forward"]], "forward() (grl.neural_network.dit method)": [[4, "grl.neural_network.DiT.forward"]], "forward() (grl.neural_network.dit1d method)": [[4, "grl.neural_network.DiT1D.forward"]], "forward() (grl.neural_network.dit3d method)": [[4, "grl.neural_network.DiT3D.forward"]], "forward() (grl.neural_network.multilayerperceptron method)": [[4, "grl.neural_network.MultiLayerPerceptron.forward"]], "forward() (grl.neural_network.temporalspatialresidualnet method)": [[4, "grl.neural_network.TemporalSpatialResidualNet.forward"]], "forward_with_cfg() (grl.neural_network.dit method)": [[4, "grl.neural_network.DiT.forward_with_cfg"]], "grl.neural_network": [[4, "module-grl.neural_network"]], "initialize_weights() (grl.neural_network.dit method)": [[4, "grl.neural_network.DiT.initialize_weights"]], "initialize_weights() (grl.neural_network.dit1d method)": [[4, "grl.neural_network.DiT1D.initialize_weights"]], "initialize_weights() (grl.neural_network.dit3d method)": [[4, "grl.neural_network.DiT3D.initialize_weights"]], "unpatchify() (grl.neural_network.dit method)": [[4, "grl.neural_network.DiT.unpatchify"]], "unpatchify() (grl.neural_network.dit3d method)": [[4, "grl.neural_network.DiT3D.unpatchify"]], "dpmsolver (class in grl.numerical_methods)": [[5, "grl.numerical_methods.DPMSolver"]], "gaussianconditionalprobabilitypath (class in grl.numerical_methods)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath"]], "halflogsnr() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.HalfLogSNR"]], "inversehalflogsnr() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.InverseHalfLogSNR"]], "ode (class in grl.numerical_methods)": [[5, "grl.numerical_methods.ODE"]], "odesolver (class in grl.numerical_methods)": [[5, "grl.numerical_methods.ODESolver"]], "sde (class in grl.numerical_methods)": [[5, "grl.numerical_methods.SDE"]], "sdesolver (class in grl.numerical_methods)": [[5, "grl.numerical_methods.SDESolver"]], "__init__() (grl.numerical_methods.dpmsolver method)": [[5, "grl.numerical_methods.DPMSolver.__init__"]], "__init__() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.__init__"]], "__init__() (grl.numerical_methods.ode method)": [[5, "grl.numerical_methods.ODE.__init__"]], "__init__() (grl.numerical_methods.odesolver method)": [[5, "grl.numerical_methods.ODESolver.__init__"]], "__init__() (grl.numerical_methods.sde method)": [[5, "grl.numerical_methods.SDE.__init__"]], "__init__() (grl.numerical_methods.sdesolver method)": [[5, "grl.numerical_methods.SDESolver.__init__"]], "covariance() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.covariance"]], "d_covariance_dt() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.d_covariance_dt"]], "d_log_scale_dt() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.d_log_scale_dt"]], "d_scale_dt() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.d_scale_dt"]], "d_std_dt() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.d_std_dt"]], "diffusion() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.diffusion"]], "diffusion_squared() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.diffusion_squared"]], "drift() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.drift"]], "drift_coefficient() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.drift_coefficient"]], "grl.numerical_methods": [[5, "module-grl.numerical_methods"]], "integrate() (grl.numerical_methods.dpmsolver method)": [[5, "grl.numerical_methods.DPMSolver.integrate"]], "integrate() (grl.numerical_methods.odesolver method)": [[5, "grl.numerical_methods.ODESolver.integrate"]], "integrate() (grl.numerical_methods.sdesolver method)": [[5, "grl.numerical_methods.SDESolver.integrate"]], "log_scale() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.log_scale"]], "scale() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.scale"]], "std() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.std"]], "doubleqnetwork (class in grl.rl_modules)": [[6, "grl.rl_modules.DoubleQNetwork"]], "doublevnetwork (class in grl.rl_modules)": [[6, "grl.rl_modules.DoubleVNetwork"]], "gymenvsimulator (class in grl.rl_modules)": [[6, "grl.rl_modules.GymEnvSimulator"]], "oneshotvaluefunction (class in grl.rl_modules)": [[6, "grl.rl_modules.OneShotValueFunction"]], "qnetwork (class in grl.rl_modules)": [[6, "grl.rl_modules.QNetwork"]], "vnetwork (class in grl.rl_modules)": [[6, "grl.rl_modules.VNetwork"]], "__init__() (grl.rl_modules.doubleqnetwork method)": [[6, "grl.rl_modules.DoubleQNetwork.__init__"]], "__init__() (grl.rl_modules.doublevnetwork method)": [[6, "grl.rl_modules.DoubleVNetwork.__init__"]], "__init__() (grl.rl_modules.gymenvsimulator method)": [[6, "grl.rl_modules.GymEnvSimulator.__init__"]], "__init__() (grl.rl_modules.oneshotvaluefunction method)": [[6, "grl.rl_modules.OneShotValueFunction.__init__"]], "__init__() (grl.rl_modules.qnetwork method)": [[6, "grl.rl_modules.QNetwork.__init__"]], "__init__() (grl.rl_modules.vnetwork method)": [[6, "grl.rl_modules.VNetwork.__init__"]], "collect_episodes() (grl.rl_modules.gymenvsimulator method)": [[6, "grl.rl_modules.GymEnvSimulator.collect_episodes"]], "collect_steps() (grl.rl_modules.gymenvsimulator method)": [[6, "grl.rl_modules.GymEnvSimulator.collect_steps"]], "compute_double_q() (grl.rl_modules.doubleqnetwork method)": [[6, "grl.rl_modules.DoubleQNetwork.compute_double_q"]], "compute_double_v() (grl.rl_modules.doublevnetwork method)": [[6, "grl.rl_modules.DoubleVNetwork.compute_double_v"]], "compute_double_v() (grl.rl_modules.oneshotvaluefunction method)": [[6, "grl.rl_modules.OneShotValueFunction.compute_double_v"]], "compute_mininum_q() (grl.rl_modules.doubleqnetwork method)": [[6, "grl.rl_modules.DoubleQNetwork.compute_mininum_q"]], "compute_mininum_v() (grl.rl_modules.doublevnetwork method)": [[6, "grl.rl_modules.DoubleVNetwork.compute_mininum_v"]], "evaluate() (grl.rl_modules.gymenvsimulator method)": [[6, "grl.rl_modules.GymEnvSimulator.evaluate"]], "forward() (grl.rl_modules.doubleqnetwork method)": [[6, "grl.rl_modules.DoubleQNetwork.forward"]], "forward() (grl.rl_modules.doublevnetwork method)": [[6, "grl.rl_modules.DoubleVNetwork.forward"]], "forward() (grl.rl_modules.oneshotvaluefunction method)": [[6, "grl.rl_modules.OneShotValueFunction.forward"]], "forward() (grl.rl_modules.qnetwork method)": [[6, "grl.rl_modules.QNetwork.forward"]], "forward() (grl.rl_modules.vnetwork method)": [[6, "grl.rl_modules.VNetwork.forward"]], "grl.rl_modules": [[6, "module-grl.rl_modules"]], "v_loss() (grl.rl_modules.oneshotvaluefunction method)": [[6, "grl.rl_modules.OneShotValueFunction.v_loss"]], "grl.utils": [[7, "module-grl.utils"]], "set_seed() (in module grl.utils)": [[7, "grl.utils.set_seed"]]}})
\ No newline at end of file
+Search.setIndex({"docnames": ["api_doc/agents/index", "api_doc/algorithms/index", "api_doc/datasets/index", "api_doc/generative_models/index", "api_doc/neural_network/index", "api_doc/numerical_methods/index", "api_doc/rl_modules/index", "api_doc/utils/index", "concepts/index", "index", "tutorials/installation/index", "tutorials/quick_start/index", "user_guide/evaluating_agents", "user_guide/index", "user_guide/installation", "user_guide/training_agents", "user_guide/training_generative_models"], "filenames": ["api_doc/agents/index.rst", "api_doc/algorithms/index.rst", "api_doc/datasets/index.rst", "api_doc/generative_models/index.rst", "api_doc/neural_network/index.rst", "api_doc/numerical_methods/index.rst", "api_doc/rl_modules/index.rst", "api_doc/utils/index.rst", "concepts/index.rst", "index.rst", "tutorials/installation/index.rst", "tutorials/quick_start/index.rst", "user_guide/evaluating_agents.rst", "user_guide/index.rst", "user_guide/installation.rst", "user_guide/training_agents.rst", "user_guide/training_generative_models.rst"], "titles": ["grl.agents", "grl.algorithms", "grl.datasets", "grl.generative_models", "grl.neural_network", "grl.numerical_methods", "grl.rl_modules", "grl.utils", "Concepts", "GenerativeRL Documentation", "Installation", "Quick Start", "How to evaluate RL agents performance", "User Guide", "How to install GenerativeRL and its dependencies", "How to train and deploy reinforcement learning agents", "How to train generative models"], "terms": {"class": [0, 1, 2, 3, 4, 5, 6, 8, 12, 15, 16], "config": [0, 1, 3, 5, 6, 11, 12, 15, 16], "model": [0, 1, 3, 4, 5, 9, 13, 15], "sourc": [0, 1, 2, 3, 4, 5, 6, 7, 8], "overview": [0, 1, 2, 3, 4, 5, 6, 7], "The": [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 16], "qgpo": [0, 1, 2, 11, 15], "algorithm": [0, 2, 8, 9, 11, 12, 15], "interfac": [0, 1, 2, 3, 4, 5, 6, 8, 11], "__init__": [0, 1, 2, 3, 4, 5, 6, 16], "action": [0, 1, 2, 6, 8, 12], "initi": [0, 1, 2, 3, 4, 5, 6, 8, 11, 16], "paramet": [0, 1, 2, 3, 4, 5, 6, 7, 8], "easydict": [0, 1, 3, 5, 6, 16], "configur": [0, 1, 3, 5, 6, 8, 11, 15, 16], "union": [0, 1, 3, 4, 5, 6], "torch": [0, 1, 3, 4, 5, 6], "nn": [0, 1, 3, 5, 6, 16], "modul": [0, 1, 3, 5, 6, 12, 15, 16], "moduledict": [0, 1, 3], "act": [0, 11, 12, 15], "ob": 0, "return_as_torch_tensor": 0, "fals": [0, 1, 3, 4, 5, 6, 7], "given": [0, 1, 3, 5, 6, 8], "an": [0, 1, 3, 4, 5, 8, 11, 12, 15], "observ": [0, 6, 8, 11, 12, 15], "return": [0, 1, 3, 4, 5, 6, 7, 12, 15, 16], "np": 0, "ndarrai": 0, "tensor": [0, 1, 3, 4, 5, 6, 8], "dict": [0, 1, 6, 16], "bool": [0, 1, 3, 4, 5, 6, 7], "whether": [0, 1, 3, 4, 5, 6, 7, 8, 14], "type": [0, 1, 3, 4, 5, 6, 7, 8, 16], "srpo": [0, 1], "train": [0, 1, 2, 3, 6, 8, 9, 11, 13], "gener": [0, 1, 2, 3, 5, 7, 9, 13], "polici": [0, 1, 2, 3, 6, 8, 11], "thi": [0, 1, 3, 4, 6, 8, 9, 11, 16], "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 16], "design": [0, 3, 9], "us": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "gmpgalgorithm": [0, 9], "gmpoalgorithm": [0, 9], "numpi": 0, "arrai": 0, "critic": [1, 8], "network": [1, 4, 6, 8, 13], "forward": [1, 3, 4, 6, 8, 16], "compute_double_q": [1, 6], "state": [1, 3, 5, 6, 11], "none": [1, 3, 4, 5, 6, 7, 16], "output": [1, 3, 4, 5, 6, 16], "two": [1, 3, 6], "q": [1, 6, 11], "tensordict": [1, 3, 4, 5, 6], "input": [1, 3, 4, 5, 6, 12, 15, 16], "first": [1, 5, 6, 11], "q2": [1, 6], "second": [1, 6], "q1": [1, 6], "q_loss": 1, "reward": [1, 8, 11, 12, 15], "next_stat": 1, "done": [1, 11, 12, 15], "fake_next_act": 1, "discount_factor": 1, "1": [1, 3, 4, 5, 14, 16], "0": [1, 3, 4, 5, 14, 16], "calcul": [1, 6], "loss": [1, 3, 6, 16], "next": 1, "fake": [1, 2], "float": [1, 3, 4, 5], "discount": 1, "factor": [1, 4, 5], "sampl": [1, 2, 3, 5, 8, 16], "behaviour_policy_sampl": 1, "compute_q": 1, "behaviour_policy_loss": 1, "energy_guidance_loss": [1, 3], "intern": [1, 6], "share": [1, 6], "both": [1, 3, 6, 16], "scriptmodul": [1, 6], "behaviour": [1, 2], "batch_siz": [1, 3], "solver_config": [1, 3], "t_span": [1, 3, 5], "which": [1, 2, 3, 5, 6, 8, 12, 14, 16], "condit": [1, 3, 4, 5, 6, 16], "od": [1, 8, 9], "solver": [1, 3, 5, 16], "time": [1, 3, 5, 8, 16], "span": [1, 3], "sde": [1, 8, 9, 16], "valu": [1, 3, 5, 6, 7, 8], "energi": [1, 2, 3], "guidanc": [1, 3, 4], "guidance_scal": [1, 3], "scale": [1, 3, 4, 5, 6, 16], "simul": [1, 6, 12], "dataset": [1, 8, 9, 11, 15], "guid": [1, 3, 11], "optim": [1, 3, 8, 11], "offlin": [1, 3, 8], "reinforc": [1, 3, 9, 12, 13, 14], "learn": [1, 3, 4, 9, 12, 13, 14], "base": [1, 2, 3, 5, 8], "diffus": [1, 3, 4, 5, 8, 9, 11, 16], "deploi": [1, 8, 9, 11, 12, 13], "must": 1, "contain": 1, "follow": [1, 5, 8, 14, 15, 16], "kei": [1, 8], "deploy": [1, 8, 11], "object": [1, 8, 13], "environ": [1, 2, 6, 8, 11, 12, 14, 15], "qgpodataset": [1, 9], "qgpoagent": [1, 9], "A": [1, 6, 11, 16], "weight": [1, 3, 4, 8, 16], "bia": 1, "run": [1, 6, 8, 12, 14], "creat": [1, 8, 11, 15, 16], "automat": [1, 8], "when": 1, "function": [1, 3, 4, 5, 6, 8, 11, 16], "call": [1, 11, 16], "v_loss": [1, 6], "srpo_actor_loss": 1, "srpoagent": [1, 9], "gmpo": 1, "includ": [1, 8, 11, 16], "optin": 1, "policy_optimization_loss_by_advantage_weighted_regress": 1, "policy_optimization_loss_by_advantage_weighted_regression_softmax": 1, "maximum_likelihood": [1, 3], "with_grad": [1, 3], "size": [1, 3, 4], "int": [1, 3, 4, 5, 6, 7], "tupl": [1, 3, 4], "list": [1, 3, 4, 6, 13], "batch": [1, 3, 4], "gradient": [1, 3, 8], "beta": [1, 3, 5], "weight_clamp": 1, "100": 1, "fake_act": 1, "seed": [1, 7, 8], "gpg": 1, "gpdataset": [1, 9], "random": [1, 6, 7, 8], "gpo": 1, "in_support_ql_loss": 1, "gmpg": 1, "env_id": [2, 6, 11, 12, 15], "contrast": [2, 3], "predict": [2, 3, 5], "need": [2, 6, 8, 11, 15], "true": [2, 3, 4, 5, 7], "from": [2, 3, 5, 8, 10, 11, 15, 16], "support": [2, 3, 5, 11, 16], "__getitem__": 2, "__len__": 2, "method": [2, 3, 5, 6, 8, 11, 12, 15, 16], "str": [2, 3, 5, 6], "id": [2, 6], "d4rl": [2, 14], "sometim": 2, "data": [2, 3, 4, 5, 8, 16], "augment": [2, 8], "variou": [3, 8], "continu": [3, 8], "path": [3, 5, 8, 13], "comput": [3, 5, 8], "score": [3, 8, 16], "veloc": [3, 16], "It": [3, 5, 6, 14], "can": [3, 5, 8, 10, 11, 12, 14, 15, 16], "via": 3, "nois": [3, 5, 16], "match": [3, 8, 16], "flow": [3, 5, 8, 9, 16], "ar": [3, 8, 15, 16], "score_funct": 3, "score_matching_loss": [3, 16], "velocity_funct": 3, "flow_matching_loss": [3, 16], "data_prediction_funct": [3, 5], "t": [3, 4, 5, 8, 16], "x": [3, 4, 5, 16], "frac": [3, 5, 16], "sigma": [3, 4, 5, 8, 16], "x_t": [3, 8, 16], "2": [3, 4, 5, 14, 16], "nabla_": [3, 16], "log": [3, 5, 8, 11, 16], "p_": 3, "theta": [3, 16], "": [3, 5, 11], "treetensor": 3, "dpo_loss": 3, "ref_dm": 3, "process": [3, 5, 6, 8, 11, 16], "direct": [3, 8], "dpo": 3, "develop": [3, 10], "featur": 3, "recommend": [3, 5], "averag": 3, "across": [3, 8], "forward_sampl": 3, "note": [3, 14], "revers": [3, 8, 16], "thu": 3, "form": [3, 5, 16], "rather": 3, "encod": 3, "latent": [3, 4], "space": [3, 5], "forward_sample_process": 3, "all": [3, 4], "intermedi": [3, 5], "log_prob": 3, "using_hutchinson_trace_estim": 3, "probabl": [3, 4, 5], "noise_funct": [3, 5, 16], "x_0": [3, 16], "final": [3, 4, 5], "provid": [3, 7, 8, 9, 11, 15, 16], "gaussian": [3, 5, 16], "distribut": [3, 8, 16], "result": [3, 8], "shape": [3, 4, 5, 16], "where": [3, 5, 16], "number": [3, 4, 5, 6, 11], "step": [3, 5, 6, 11, 12, 14, 15], "b": 3, "could": 3, "scalar": [3, 5], "b1": 3, "b2": 3, "n": [3, 4, 5, 11], "d": [3, 5, 16], "dimens": [3, 4, 8], "d1": 3, "d2": 3, "extra": 3, "If": [3, 5, 7], "sample_forward_process": 3, "repeat": 3, "same": [3, 5, 16], "sample_forward_process_with_fixed_x": 3, "fixed_x": 3, "fixed_mask": 3, "fix": [3, 8], "mask": 3, "sample_with_fixed_x": 3, "sample_with_log_prob": 3, "likelihood": [3, 8, 16], "weighting_schem": 3, "uncondit": [3, 4], "scheme": 3, "vanilla": 3, "maximum": [3, 8, 16], "estim": [3, 8, 16], "refer": [3, 11, 13, 15], "paper": 3, "more": [3, 5, 11, 13, 15], "detail": [3, 5, 11, 13], "lambda": [3, 5, 16], "denot": [3, 16], "g": [3, 5, 8, 14, 16], "numer": [3, 8], "stabil": 3, "we": [3, 5, 8, 11, 16], "mont": 3, "carlo": 3, "approxim": [3, 5, 6], "integr": [3, 5, 8], "p": [3, 5, 14, 16], "balanc": 3, "mse": 3, "through": [3, 4, 8, 11], "stochast": [3, 5, 8, 13], "differenti": [3, 5, 8, 16], "equat": [3, 5, 8, 16], "v_": [3, 16], "energy_model": 3, "text": [3, 16], "e": [3, 5, 8, 14, 16], "c": [3, 4, 14, 16], "sim": 3, "exp": 3, "mathcal": [3, 5, 16], "z": 3, "sample_without_energy_guid": 3, "score_function_with_energy_guid": 3, "data_prediction_function_with_energy_guid": 3, "cep": 3, "propos": 3, "exact": 3, "noise_function_with_energy_guid": 3, "nose": 3, "nabla": 3, "sample_with_fixed_x_without_energy_guid": 3, "without": [3, 5], "independ": [3, 8, 16], "get_typ": 3, "x0": [3, 5], "x1": 3, "flow_matching_loss_with_mask": 3, "signal": [3, 8], "either": 3, "ha": [3, 5, 6, 12, 15, 16], "correspond": 3, "element": [3, 4, 5], "usual": [3, 16], "x_1": [3, 16], "log_prob_x_0": 3, "function_log_prob_x_0": 3, "callabl": [3, 5, 6], "hutchinson": 3, "trace": 3, "jacobian": 3, "drift": [3, 5, 8, 16], "faster": 3, "less": 3, "accur": [3, 8], "set": [3, 7, 16], "high": [3, 5, 8], "dimension": 3, "log_likelihood": 3, "optimal_transport_flow_matching_loss": 3, "transport": 3, "plan": 3, "sample_with_mask": 3, "sample_with_mask_forward_process": 3, "between": [3, 5, 8, 16], "flow_matching_loss_small_batch_ot_plan": 3, "small": [3, 6], "acceler": 3, "concaten": 4, "along": 4, "last": [4, 5, 6], "layer": [4, 16], "hidden_s": [4, 16], "output_s": 4, "activ": 4, "dropout": 4, "layernorm": 4, "final_activ": 4, "shrink": 4, "multi": 4, "perceptron": 4, "fulli": 4, "connect": 4, "fc1": 4, "act1": 4, "fcn": 4, "actn": 4, "out": 4, "hidden": 4, "channel": 4, "option": [4, 7], "zero": 4, "default": [4, 7, 15], "block": [4, 11], "shrinkag": 4, "kwarg": [4, 5], "pass": 4, "mlp": [4, 16], "keyword": 4, "argument": [4, 5], "output_dim": [4, 16], "t_dim": [4, 16], "input_dim": 4, "condition_dim": 4, "condition_hidden_dim": 4, "t_condition_hidden_dim": 4, "tempor": 4, "spatial": 4, "residu": 4, "multipl": 4, "temporalspatialresblock": 4, "input_s": 4, "32": 4, "patch_siz": 4, "in_channel": 4, "4": [4, 5], "1152": 4, "depth": 4, "28": 4, "num_head": 4, "16": 4, "mlp_ratio": 4, "class_dropout_prob": 4, "num_class": 4, "1000": 4, "learn_sigma": 4, "transform": [4, 8, 16], "backbon": [4, 16], "offici": 4, "implement": [4, 8, 12, 15], "github": [4, 10, 14], "repo": 4, "http": [4, 10, 14], "com": [4, 10, 14], "facebookresearch": 4, "blob": 4, "main": 4, "py": [4, 14], "patch": 4, "attent": 4, "head": 4, "respect": 4, "timestep": 4, "imag": [4, 8, 14], "represent": 4, "label": 4, "forward_with_cfg": 4, "cfg_scale": 4, "also": [4, 5, 8, 10, 15, 16], "classifi": [4, 8], "free": [4, 8], "initialize_weight": 4, "unpatchifi": 4, "img": 4, "h": 4, "w": [4, 16], "token_s": 4, "condition_embedd": 4, "1d": 4, "3d": 4, "inform": [4, 6, 11, 13, 15], "origin": 4, "video": [4, 8], "alia": 4, "patch_block_s": 4, "10": 4, "convolv": 4, "each": [4, 6], "token": 4, "total_patch": 4, "ordinari": [5, 8], "defin": [5, 8, 15, 16], "dx": 5, "f": [5, 8, 16], "dt": [5, 8, 16], "term": 5, "dw": 5, "wiener": [5, 16], "order": 5, "devic": [5, 16], "atol": 5, "1e": 5, "05": 5, "rtol": 5, "dpm_solver": 5, "singlestep": 5, "solver_typ": 5, "skip_typ": 5, "time_uniform": 5, "denois": [5, 8, 16], "dpm": 5, "should": 5, "3": [5, 14], "absolut": 5, "toler": 5, "adapt": [5, 8], "rel": 5, "total": 5, "evalu": [5, 6, 8, 9, 11, 13, 15], "nfe": 5, "multistep": 5, "singlestep_fix": 5, "taylor": 5, "slightli": 5, "impact": 5, "perform": [5, 9, 13, 15], "logsnr": 5, "time_quadrat": 5, "diffusion_process": 5, "save_intermedi": 5, "diffusionprocess": 5, "t_start": 5, "solut": 5, "t_end": 5, "x_end": 5, "ode_solv": 5, "euler": [5, 8], "01": 5, "librari": [5, 8, 9, 11, 14, 16], "torchdyn": [5, 8, 16], "torchdiffeq": [5, 8], "current": [5, 16], "addit": [5, 14], "For": [5, 8, 11, 13, 14, 15, 16], "exampl": [5, 8, 11, 13, 14, 16], "trajectori": 5, "len": [5, 16], "sde_solv": 5, "sde_noise_typ": 5, "diagon": 5, "sde_typ": 5, "ito": 5, "001": 5, "torchsd": 5, "stratonovich": 5, "logqp": 5, "case": [5, 6, 11], "mu": 5, "written": 5, "mathrm": [5, 16], "w_": 5, "sqrt": 5, "covari": 5, "matrix": 5, "standard": [5, 16], "deviat": 5, "half": 5, "differ": [5, 8, 13], "vp": [5, 16], "int_": [5, 16], "linear": [5, 16], "todo": 5, "add": 5, "cosin": 5, "ve": 5, "opt": 5, "halflogsnr": 5, "inversehalflogsnr": 5, "invers": 5, "sinc": 5, "invert": 5, "beta_1": [5, 16], "beta_0": [5, 16], "d_covariance_dt": 5, "deriv": [5, 16], "d_log_scale_dt": 5, "d_scale_dt": 5, "d_std_dt": 5, "diffusion_squar": 5, "drift_coeffici": 5, "coeffici": [5, 16], "satisfi": 5, "log_scal": 5, "std": 5, "simpl": [6, 8, 11], "gym": [6, 8, 11, 12, 14, 15], "generativerl": [6, 8, 10, 12, 13, 15, 16], "collect": [6, 12], "episod": 6, "singl": [6, 8], "suitabl": 6, "experi": 6, "collect_episod": 6, "collect_step": 6, "accord": 6, "num_episod": 6, "num_step": 6, "sever": 6, "reset": [6, 11, 12, 15], "begin": 6, "No": 6, "histori": 6, "store": 6, "dictionari": [6, 8, 15], "random_polici": 6, "until": 6, "end": 6, "render_arg": 6, "resultswil": 6, "one": 6, "shot": 6, "mean": [6, 16], "bellman": 6, "backup": 6, "compute_double_v": 6, "v2": 6, "v1": 6, "v": [6, 16], "doubl": 6, "compute_mininum_v": 6, "minimum": 6, "minimum_v": 6, "compute_mininum_q": 6, "minimum_q": 6, "seed_valu": 7, "cudnn_determinist": 7, "cudnn_benchmark": 7, "make": [7, 8, 9, 11, 12, 14, 15], "cudnn": 7, "oper": 7, "determinist": 7, "enabl": [7, 8], "benchmark": 7, "convolut": 7, "framework": [8, 9], "consist": 8, "code": [8, 16], "api": [8, 11, 13, 15, 16], "user": [8, 12, 16], "friendli": 8, "rl": [8, 9, 13, 15], "agent": [8, 9, 11, 13], "In": [8, 11, 12, 15, 16], "section": [8, 11, 13, 15], "explor": 8, "core": 8, "discuss": 8, "underpin": 8, "how": [8, 9, 11, 13], "thei": 8, "leverag": 8, "address": 8, "complex": 8, "problem": [8, 9, 14], "field": [8, 16], "addition": 8, "explain": 8, "why": 8, "import": [8, 11, 12, 14, 15, 16], "what": 8, "uniqu": 8, "wide": 8, "rang": [8, 11, 12, 15, 16], "applic": [8, 16], "machin": 8, "new": [8, 11, 16], "typic": [8, 16], "most": 8, "unsupervis": 8, "techniqu": 8, "appli": 8, "task": 8, "audio": 8, "interpol": 8, "focus": 8, "dynam": 8, "These": [8, 16], "have": [8, 14], "capac": 8, "captur": 8, "demonstr": [8, 11], "promis": 8, "varieti": 8, "its": [8, 9, 11, 13, 15, 16], "variant": [8, 16], "qualiti": 8, "solv": [8, 9, 14], "dx_t": [8, 16], "dw_t": [8, 16], "unifi": [8, 12, 16], "howev": 8, "vari": 8, "definit": 8, "some": [8, 14], "under": [8, 12, 15], "common": 8, "while": [8, 15, 16], "other": [8, 11, 13, 15], "mai": 8, "requir": [8, 14], "specif": [8, 11, 15], "There": 8, "four": 8, "open": 8, "neural": [8, 13], "parameter": [8, 13], "certain": 8, "part": 8, "potenti": 8, "determin": 8, "procedur": 8, "fundament": 8, "maxim": 8, "pretrain": 8, "like": 8, "bridg": [8, 16], "fine": 8, "tune": 8, "advantag": 8, "regress": 8, "adjoint": 8, "involv": 8, "depend": [8, 9, 13], "maruyama": 8, "rung": 8, "kutta": 8, "offer": 8, "flexibl": [8, 11], "allow": 8, "custom": [8, 13], "extend": 8, "suit": [8, 11, 14], "instanc": [8, 11, 15], "easili": 8, "own": [8, 16], "architectur": [8, 16], "tailor": 8, "format": [8, 11], "decis": [8, 9], "interact": [8, 15], "receiv": 8, "penalti": 8, "cumul": 8, "take": [8, 12, 15, 16], "updat": 8, "categor": 8, "directli": [8, 16], "onlin": 8, "strategi": 8, "off": 8, "actor": 8, "research": 8, "improv": 8, "effici": 8, "synthet": 8, "decoupl": 8, "littl": 8, "modif": 8, "rank": 8, "least": 8, "pytorch": [8, 14], "unif": 8, "within": [8, 11], "simplic": 8, "intuit": 8, "extens": 8, "modular": 8, "built": 8, "mix": 8, "compon": [8, 11], "reproduc": 8, "ensur": 8, "checkpoint": 8, "possibl": 8, "minim": 8, "seek": 8, "extern": 8, "lightweight": 8, "instal": [8, 9, 13], "platform": 8, "compat": 8, "exist": 8, "work": [8, 16], "seamlessli": 8, "openai": [8, 11], "torchrl": 8, "python": [9, 14], "aim": 9, "combin": 9, "power": [9, 11], "capabl": 9, "quick": 9, "start": 9, "explan": 9, "principl": 9, "grl": [9, 11, 12, 14, 15, 16], "gpagent": 9, "qgpocrit": 9, "qgpopolici": 9, "qgpoalgorithm": [9, 11, 15], "srpocrit": 9, "srpopolici": 9, "srpoalgorithm": 9, "gmpocrit": 9, "gmpopolici": 9, "gmpgcritic": 9, "gmpgpolici": 9, "qgpod4rldataset": 9, "gpd4rldataset": 9, "generative_model": [9, 16], "diffusionmodel": [9, 16], "energyconditionaldiffusionmodel": 9, "independentconditionalflowmodel": 9, "optimaltransportconditionalflowmodel": 9, "neural_network": [9, 16], "concatenatelay": 9, "multilayerperceptron": 9, "concatenatemlp": 9, "temporalspatialresidualnet": [9, 16], "dit": 9, "dit1d": 9, "dit2d": 9, "dit3d": 9, "numerical_method": [9, 16], "dpmsolver": 9, "odesolv": [9, 16], "sdesolv": 9, "gaussianconditionalprobabilitypath": [9, 16], "rl_modul": 9, "gymenvsimul": 9, "oneshotvaluefunct": 9, "vnetwork": 9, "doublevnetwork": 9, "qnetwork": 9, "doubleqnetwork": 9, "util": [9, 11], "set_se": 9, "pip": [10, 14], "you": [10, 11, 14, 15], "latest": 10, "version": [10, 14], "git": [10, 14], "opendilab": [10, 14], "easi": 11, "swiss": 11, "roll": 11, "colab": 11, "usag": [11, 13], "found": 11, "folder": 11, "grl_pipelin": [11, 15], "tutori": 11, "here": [11, 13, 16], "halfcheetah": 11, "diffusion_model": [11, 15, 16], "d4rl_halfcheetah_qgpo": [11, 15], "def": [11, 16], "qgpo_pipelin": 11, "env": [11, 12, 15], "_": [11, 12, 15, 16], "num_deploy_step": [11, 12, 15], "render": [11, 12, 15], "__name__": 11, "__main__": 11, "info": 11, "necessari": 11, "well": 11, "encapsul": 11, "after": [11, 14], "obtain": [11, 16], "loop": 11, "execut": 11, "specifi": 11, "print": 11, "consol": 11, "modifi": 11, "your": 11, "advanc": [11, 13], "pleas": [11, 13, 15], "document": [11, 13, 15], "9": 14, "higher": 14, "command": 14, "clone": 14, "cd": 14, "pybullet": 14, "mujoco": 14, "deepmind": 14, "control": 14, "etc": 14, "dm_control": 14, "setup": 14, "licens": 14, "special": 14, "23": 14, "anoth": 14, "thing": 14, "sudo": 14, "apt": 14, "get": 14, "libgl1": 14, "mesa": 14, "glx": 14, "libglib2": 14, "libsm6": 14, "libxext6": 14, "libxrend": 14, "dev": 14, "y": 14, "swig": 14, "gcc": 14, "local": 14, "dnsutil": 14, "cmake": 14, "build": 14, "essenti": 14, "libglew": 14, "libosmesa6": 14, "libglfw3": 14, "libsdl2": 14, "libglm": 14, "libfreetype6": 14, "patchelf": 14, "ffmpeg": 14, "mkdir": 14, "root": 14, "wget": 14, "org": 14, "download": 14, "mujoco210": 14, "linux": 14, "x86_64": 14, "tar": 14, "gz": 14, "o": 14, "xf": 14, "export": 14, "ld_library_path": 14, "mjpro210": 14, "bin": 14, "farama": 14, "foundat": 14, "lockfil": 14, "cython": 14, "check": 14, "success": 14, "everi": [15, 16], "hyperparamet": 15, "copi": 15, "trained_model": 15, "divers": 16, "describ": 16, "evolut": 16, "over": 16, "increment": 16, "probability_path": 16, "kind": 16, "varianc": 16, "preserv": 16, "gvp": 16, "usal": 16, "want": 16, "normal": 16, "target": 16, "By": 16, "fokker": 16, "planck": 16, "kolmogorov": 16, "fpk": 16, "hat": 16, "_t": 16, "s_": 16, "codebas": 16, "ddpm": 16, "compar": 16, "Or": 16, "nerual": 16, "therefor": 16, "intrinsicmodel": 16, "ani": 16, "cnn": 16, "u": 16, "net": 16, "x_size": 16, "alpha": 16, "arg": 16, "linear_vp_sd": 16, "20": 16, "t_encod": 16, "512": 16, "256": 16, "128": 16, "t_embedding_dim": 16, "register_modul": 16, "regist": 16, "so": 16, "mymodul": 16, "self": 16, "super": 16, "modulelist": 16, "append": 16, "relu": 16, "mle": 16, "onli": 16, "squar": 16, "error": 16, "l": 16, "dsm": 16, "mathbb": 16, "left": 16, "right": 16, "cfm": 16, "simpli": 16}, "objects": {"grl": [[0, 0, 0, "-", "agents"], [1, 0, 0, "-", "algorithms"], [2, 0, 0, "-", "datasets"], [3, 0, 0, "-", "generative_models"], [4, 0, 0, "-", "neural_network"], [5, 0, 0, "-", "numerical_methods"], [6, 0, 0, "-", "rl_modules"], [7, 0, 0, "-", "utils"]], "grl.agents": [[0, 1, 1, "", "GPAgent"], [0, 1, 1, "", "QGPOAgent"], [0, 1, 1, "", "SRPOAgent"]], "grl.agents.GPAgent": [[0, 2, 1, "", "__init__"], [0, 2, 1, "", "act"]], "grl.agents.QGPOAgent": [[0, 2, 1, "", "__init__"], [0, 2, 1, "", "act"]], "grl.agents.SRPOAgent": [[0, 2, 1, "", "__init__"], [0, 2, 1, "", "act"]], "grl.algorithms": [[1, 1, 1, "", "GMPGAlgorithm"], [1, 1, 1, "", "GMPGCritic"], [1, 1, 1, "", "GMPGPolicy"], [1, 1, 1, "", "GMPOAlgorithm"], [1, 1, 1, "", "GMPOCritic"], [1, 1, 1, "", "GMPOPolicy"], [1, 1, 1, "", "QGPOAlgorithm"], [1, 1, 1, "", "QGPOCritic"], [1, 1, 1, "", "QGPOPolicy"], [1, 1, 1, "", "SRPOAlgorithm"], [1, 1, 1, "", "SRPOCritic"], [1, 1, 1, "", "SRPOPolicy"]], "grl.algorithms.GMPGAlgorithm": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "train"]], "grl.algorithms.GMPGCritic": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "compute_double_q"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "in_support_ql_loss"]], "grl.algorithms.GMPGPolicy": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "behaviour_policy_loss"], [1, 2, 1, "", "behaviour_policy_sample"], [1, 2, 1, "", "compute_q"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "sample"]], "grl.algorithms.GMPOAlgorithm": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "train"]], "grl.algorithms.GMPOCritic": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "compute_double_q"], [1, 2, 1, "", "forward"]], "grl.algorithms.GMPOPolicy": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "behaviour_policy_loss"], [1, 2, 1, "", "behaviour_policy_sample"], [1, 2, 1, "", "compute_q"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "policy_optimization_loss_by_advantage_weighted_regression"], [1, 2, 1, "", "policy_optimization_loss_by_advantage_weighted_regression_softmax"], [1, 2, 1, "", "sample"]], "grl.algorithms.QGPOAlgorithm": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "deploy"], [1, 2, 1, "", "train"]], "grl.algorithms.QGPOCritic": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "compute_double_q"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "q_loss"]], "grl.algorithms.QGPOPolicy": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "behaviour_policy_loss"], [1, 2, 1, "", "behaviour_policy_sample"], [1, 2, 1, "", "compute_q"], [1, 2, 1, "", "energy_guidance_loss"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "q_loss"], [1, 2, 1, "", "sample"]], "grl.algorithms.SRPOAlgorithm": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "deploy"], [1, 2, 1, "", "train"]], "grl.algorithms.SRPOCritic": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "forward"]], "grl.algorithms.SRPOPolicy": [[1, 2, 1, "", "__init__"], [1, 2, 1, "", "behaviour_policy_loss"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "sample"], [1, 2, 1, "", "srpo_actor_loss"]], "grl.datasets": [[2, 1, 1, "", "GPD4RLDataset"], [2, 1, 1, "", "GPDataset"], [2, 1, 1, "", "QGPOD4RLDataset"], [2, 1, 1, "", "QGPODataset"]], "grl.datasets.GPD4RLDataset": [[2, 2, 1, "", "__init__"]], "grl.datasets.GPDataset": [[2, 2, 1, "", "__init__"]], "grl.datasets.QGPOD4RLDataset": [[2, 2, 1, "", "__init__"]], "grl.datasets.QGPODataset": [[2, 2, 1, "", "__init__"]], "grl.generative_models": [[3, 1, 1, "", "DiffusionModel"], [3, 1, 1, "", "EnergyConditionalDiffusionModel"], [3, 1, 1, "", "IndependentConditionalFlowModel"], [3, 1, 1, "", "OptimalTransportConditionalFlowModel"]], "grl.generative_models.DiffusionModel": [[3, 2, 1, "", "__init__"], [3, 2, 1, "", "data_prediction_function"], [3, 2, 1, "", "dpo_loss"], [3, 2, 1, "", "flow_matching_loss"], [3, 2, 1, "", "forward_sample"], [3, 2, 1, "", "forward_sample_process"], [3, 2, 1, "", "log_prob"], [3, 2, 1, "", "noise_function"], [3, 2, 1, "", "sample"], [3, 2, 1, "", "sample_forward_process"], [3, 2, 1, "", "sample_forward_process_with_fixed_x"], [3, 2, 1, "", "sample_with_fixed_x"], [3, 2, 1, "", "sample_with_log_prob"], [3, 2, 1, "", "score_function"], [3, 2, 1, "", "score_matching_loss"], [3, 2, 1, "", "velocity_function"]], "grl.generative_models.EnergyConditionalDiffusionModel": [[3, 2, 1, "", "__init__"], [3, 2, 1, "", "data_prediction_function"], [3, 2, 1, "", "data_prediction_function_with_energy_guidance"], [3, 2, 1, "", "energy_guidance_loss"], [3, 2, 1, "", "flow_matching_loss"], [3, 2, 1, "", "noise_function"], [3, 2, 1, "", "noise_function_with_energy_guidance"], [3, 2, 1, "", "sample"], [3, 2, 1, "", "sample_forward_process"], [3, 2, 1, "", "sample_forward_process_with_fixed_x"], [3, 2, 1, "", "sample_with_fixed_x"], [3, 2, 1, "", "sample_with_fixed_x_without_energy_guidance"], [3, 2, 1, "", "sample_without_energy_guidance"], [3, 2, 1, "", "score_function"], [3, 2, 1, "", "score_function_with_energy_guidance"], [3, 2, 1, "", "score_matching_loss"], [3, 2, 1, "", "velocity_function"]], "grl.generative_models.IndependentConditionalFlowModel": [[3, 2, 1, "", "__init__"], [3, 2, 1, "", "flow_matching_loss"], [3, 2, 1, "", "flow_matching_loss_with_mask"], [3, 2, 1, "", "forward_sample"], [3, 2, 1, "", "forward_sample_process"], [3, 2, 1, "", "log_prob"], [3, 2, 1, "", "optimal_transport_flow_matching_loss"], [3, 2, 1, "", "sample"], [3, 2, 1, "", "sample_forward_process"], [3, 2, 1, "", "sample_with_log_prob"], [3, 2, 1, "", "sample_with_mask"], [3, 2, 1, "", "sample_with_mask_forward_process"]], "grl.generative_models.OptimalTransportConditionalFlowModel": [[3, 2, 1, "", "__init__"], [3, 2, 1, "", "flow_matching_loss"], [3, 2, 1, "", "flow_matching_loss_small_batch_OT_plan"], [3, 2, 1, "", "sample"], [3, 2, 1, "", "sample_forward_process"]], "grl.neural_network": [[4, 1, 1, "", "ConcatenateLayer"], [4, 1, 1, "", "ConcatenateMLP"], [4, 1, 1, "", "DiT"], [4, 1, 1, "", "DiT1D"], [4, 3, 1, "", "DiT2D"], [4, 1, 1, "", "DiT3D"], [4, 1, 1, "", "MultiLayerPerceptron"], [4, 1, 1, "", "TemporalSpatialResidualNet"]], "grl.neural_network.ConcatenateLayer": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"]], "grl.neural_network.ConcatenateMLP": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"]], "grl.neural_network.DiT": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"], [4, 2, 1, "", "forward_with_cfg"], [4, 2, 1, "", "initialize_weights"], [4, 2, 1, "", "unpatchify"]], "grl.neural_network.DiT1D": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"], [4, 2, 1, "", "initialize_weights"]], "grl.neural_network.DiT3D": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"], [4, 2, 1, "", "initialize_weights"], [4, 2, 1, "", "unpatchify"]], "grl.neural_network.MultiLayerPerceptron": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"]], "grl.neural_network.TemporalSpatialResidualNet": [[4, 2, 1, "", "__init__"], [4, 2, 1, "", "forward"]], "grl.numerical_methods": [[5, 1, 1, "", "DPMSolver"], [5, 1, 1, "", "GaussianConditionalProbabilityPath"], [5, 1, 1, "", "ODE"], [5, 1, 1, "", "ODESolver"], [5, 1, 1, "", "SDE"], [5, 1, 1, "", "SDESolver"]], "grl.numerical_methods.DPMSolver": [[5, 2, 1, "", "__init__"], [5, 2, 1, "", "integrate"]], "grl.numerical_methods.GaussianConditionalProbabilityPath": [[5, 2, 1, "", "HalfLogSNR"], [5, 2, 1, "", "InverseHalfLogSNR"], [5, 2, 1, "", "__init__"], [5, 2, 1, "", "covariance"], [5, 2, 1, "", "d_covariance_dt"], [5, 2, 1, "", "d_log_scale_dt"], [5, 2, 1, "", "d_scale_dt"], [5, 2, 1, "", "d_std_dt"], [5, 2, 1, "", "diffusion"], [5, 2, 1, "", "diffusion_squared"], [5, 2, 1, "", "drift"], [5, 2, 1, "", "drift_coefficient"], [5, 2, 1, "", "log_scale"], [5, 2, 1, "", "scale"], [5, 2, 1, "", "std"]], "grl.numerical_methods.ODE": [[5, 2, 1, "", "__init__"]], "grl.numerical_methods.ODESolver": [[5, 2, 1, "", "__init__"], [5, 2, 1, "", "integrate"]], "grl.numerical_methods.SDE": [[5, 2, 1, "", "__init__"]], "grl.numerical_methods.SDESolver": [[5, 2, 1, "", "__init__"], [5, 2, 1, "", "integrate"]], "grl.rl_modules": [[6, 1, 1, "", "DoubleQNetwork"], [6, 1, 1, "", "DoubleVNetwork"], [6, 1, 1, "", "GymEnvSimulator"], [6, 1, 1, "", "OneShotValueFunction"], [6, 1, 1, "", "QNetwork"], [6, 1, 1, "", "VNetwork"]], "grl.rl_modules.DoubleQNetwork": [[6, 2, 1, "", "__init__"], [6, 2, 1, "", "compute_double_q"], [6, 2, 1, "", "compute_mininum_q"], [6, 2, 1, "", "forward"]], "grl.rl_modules.DoubleVNetwork": [[6, 2, 1, "", "__init__"], [6, 2, 1, "", "compute_double_v"], [6, 2, 1, "", "compute_mininum_v"], [6, 2, 1, "", "forward"]], "grl.rl_modules.GymEnvSimulator": [[6, 2, 1, "", "__init__"], [6, 2, 1, "", "collect_episodes"], [6, 2, 1, "", "collect_steps"], [6, 2, 1, "", "evaluate"]], "grl.rl_modules.OneShotValueFunction": [[6, 2, 1, "", "__init__"], [6, 2, 1, "", "compute_double_v"], [6, 2, 1, "", "forward"], [6, 2, 1, "", "v_loss"]], "grl.rl_modules.QNetwork": [[6, 2, 1, "", "__init__"], [6, 2, 1, "", "forward"]], "grl.rl_modules.VNetwork": [[6, 2, 1, "", "__init__"], [6, 2, 1, "", "forward"]], "grl.utils": [[7, 4, 1, "", "set_seed"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:attribute", "4": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "attribute", "Python attribute"], "4": ["py", "function", "Python function"]}, "titleterms": {"grl": [0, 1, 2, 3, 4, 5, 6, 7], "agent": [0, 12, 15], "qgpoagent": 0, "srpoagent": 0, "gpagent": 0, "algorithm": 1, "qgpocrit": 1, "qgpopolici": 1, "qgpoalgorithm": 1, "srpocrit": 1, "srpopolici": 1, "srpoalgorithm": 1, "gmpocrit": 1, "gmpopolici": 1, "gmpoalgorithm": 1, "gmpgcritic": 1, "gmpgpolici": 1, "gmpgalgorithm": 1, "dataset": 2, "qgpod4rldataset": 2, "qgpodataset": 2, "gpd4rldataset": 2, "gpdataset": 2, "generative_model": 3, "diffusionmodel": 3, "energyconditionaldiffusionmodel": 3, "independentconditionalflowmodel": 3, "optimaltransportconditionalflowmodel": 3, "neural_network": 4, "concatenatelay": 4, "multilayerperceptron": 4, "concatenatemlp": 4, "temporalspatialresidualnet": 4, "dit": 4, "dit1d": 4, "dit2d": 4, "dit3d": 4, "numerical_method": 5, "od": 5, "sde": 5, "dpmsolver": 5, "odesolv": 5, "sdesolv": 5, "gaussianconditionalprobabilitypath": 5, "rl_modul": 6, "gymenvsimul": 6, "oneshotvaluefunct": 6, "vnetwork": 6, "doublevnetwork": 6, "qnetwork": 6, "doubleqnetwork": 6, "util": 7, "set_se": 7, "concept": [8, 9], "overview": [8, 9], "gener": [8, 11, 16], "model": [8, 11, 16], "reinforc": [8, 11, 15], "learn": [8, 11, 15], "design": 8, "principl": 8, "generativerl": [9, 11, 14], "document": 9, "tutori": 9, "user": [9, 13], "guid": [9, 13], "api": 9, "instal": [10, 14], "quick": 11, "start": 11, "explan": 11, "how": [12, 14, 15, 16], "evalu": 12, "rl": 12, "perform": 12, "its": 14, "depend": 14, "train": [15, 16], "deploi": 15, "stochast": 16, "path": 16, "parameter": 16, "custom": 16, "neural": 16, "network": 16, "object": 16, "differ": 16}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx.ext.todo": 2, "sphinx.ext.intersphinx": 1, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"grl.agents": [[0, "module-grl.agents"]], "QGPOAgent": [[0, "qgpoagent"]], "SRPOAgent": [[0, "srpoagent"]], "GPAgent": [[0, "gpagent"]], "grl.algorithms": [[1, "module-grl.algorithms"]], "QGPOCritic": [[1, "qgpocritic"]], "QGPOPolicy": [[1, "qgpopolicy"]], "QGPOAlgorithm": [[1, "qgpoalgorithm"]], "SRPOCritic": [[1, "srpocritic"]], "SRPOPolicy": [[1, "srpopolicy"]], "SRPOAlgorithm": [[1, "srpoalgorithm"]], "GMPOCritic": [[1, "gmpocritic"]], "GMPOPolicy": [[1, "gmpopolicy"]], "GMPOAlgorithm": [[1, "gmpoalgorithm"]], "GMPGCritic": [[1, "gmpgcritic"]], "GMPGPolicy": [[1, "gmpgpolicy"]], "GMPGAlgorithm": [[1, "gmpgalgorithm"]], "grl.datasets": [[2, "module-grl.datasets"]], "QGPOD4RLDataset": [[2, "qgpod4rldataset"]], "QGPODataset": [[2, "qgpodataset"]], "GPD4RLDataset": [[2, "gpd4rldataset"]], "GPDataset": [[2, "gpdataset"]], "grl.generative_models": [[3, "module-grl.generative_models"]], "DiffusionModel": [[3, "diffusionmodel"]], "EnergyConditionalDiffusionModel": [[3, "energyconditionaldiffusionmodel"]], "IndependentConditionalFlowModel": [[3, "independentconditionalflowmodel"]], "OptimalTransportConditionalFlowModel": [[3, "optimaltransportconditionalflowmodel"]], "grl.neural_network": [[4, "module-grl.neural_network"]], "ConcatenateLayer": [[4, "concatenatelayer"]], "MultiLayerPerceptron": [[4, "multilayerperceptron"]], "ConcatenateMLP": [[4, "concatenatemlp"]], "TemporalSpatialResidualNet": [[4, "temporalspatialresidualnet"]], "DiT": [[4, "dit"]], "DiT1D": [[4, "dit1d"]], "DiT2D": [[4, "dit2d"]], "DiT3D": [[4, "dit3d"]], "grl.numerical_methods": [[5, "module-grl.numerical_methods"]], "ODE": [[5, "ode"]], "SDE": [[5, "sde"]], "DPMSolver": [[5, "dpmsolver"]], "ODESolver": [[5, "odesolver"]], "SDESolver": [[5, "sdesolver"]], "GaussianConditionalProbabilityPath": [[5, "gaussianconditionalprobabilitypath"]], "grl.rl_modules": [[6, "module-grl.rl_modules"]], "GymEnvSimulator": [[6, "gymenvsimulator"]], "OneShotValueFunction": [[6, "oneshotvaluefunction"]], "VNetwork": [[6, "vnetwork"]], "DoubleVNetwork": [[6, "doublevnetwork"]], "QNetwork": [[6, "qnetwork"]], "DoubleQNetwork": [[6, "doubleqnetwork"]], "grl.utils": [[7, "module-grl.utils"]], "set_seed": [[7, "set-seed"]], "Concepts": [[8, "concepts"], [9, null]], "Concepts Overview": [[8, "concepts-overview"]], "Generative Models": [[8, "generative-models"]], "Reinforcement Learning": [[8, "reinforcement-learning"], [11, "reinforcement-learning"]], "Design Principles": [[8, "design-principles"]], "GenerativeRL Documentation": [[9, "generativerl-documentation"]], "Overview": [[9, "overview"]], "Tutorials": [[9, null]], "User Guide": [[9, null], [13, "user-guide"], [13, null]], "API Documentation": [[9, null]], "Installation": [[10, "installation"]], "Quick Start": [[11, "quick-start"]], "Generative model in GenerativeRL": [[11, "generative-model-in-generativerl"]], "Explanation": [[11, "explanation"]], "How to evaluate RL agents performance": [[12, "how-to-evaluate-rl-agents-performance"]], "How to install GenerativeRL and its dependencies": [[14, "how-to-install-generativerl-and-its-dependencies"]], "How to train and deploy reinforcement learning agents": [[15, "how-to-train-and-deploy-reinforcement-learning-agents"]], "How to train generative models": [[16, "how-to-train-generative-models"]], "Stochastic path": [[16, "stochastic-path"]], "Model parameterization": [[16, "model-parameterization"]], "Customized neural network": [[16, "customized-neural-network"]], "Training objective for different generative models": [[16, "training-objective-for-different-generative-models"]]}, "indexentries": {"gpagent (class in grl.agents)": [[0, "grl.agents.GPAgent"]], "qgpoagent (class in grl.agents)": [[0, "grl.agents.QGPOAgent"]], "srpoagent (class in grl.agents)": [[0, "grl.agents.SRPOAgent"]], "__init__() (grl.agents.gpagent method)": [[0, "grl.agents.GPAgent.__init__"]], "__init__() (grl.agents.qgpoagent method)": [[0, "grl.agents.QGPOAgent.__init__"]], "__init__() (grl.agents.srpoagent method)": [[0, "grl.agents.SRPOAgent.__init__"]], "act() (grl.agents.gpagent method)": [[0, "grl.agents.GPAgent.act"]], "act() (grl.agents.qgpoagent method)": [[0, "grl.agents.QGPOAgent.act"]], "act() (grl.agents.srpoagent method)": [[0, "grl.agents.SRPOAgent.act"]], "grl.agents": [[0, "module-grl.agents"]], "module": [[0, "module-grl.agents"], [1, "module-grl.algorithms"], [2, "module-grl.datasets"], [3, "module-grl.generative_models"], [4, "module-grl.neural_network"], [5, "module-grl.numerical_methods"], [6, "module-grl.rl_modules"], [7, "module-grl.utils"]], "gmpgalgorithm (class in grl.algorithms)": [[1, "grl.algorithms.GMPGAlgorithm"]], "gmpgcritic (class in grl.algorithms)": [[1, "grl.algorithms.GMPGCritic"]], "gmpgpolicy (class in grl.algorithms)": [[1, "grl.algorithms.GMPGPolicy"]], "gmpoalgorithm (class in grl.algorithms)": [[1, "grl.algorithms.GMPOAlgorithm"]], "gmpocritic (class in grl.algorithms)": [[1, "grl.algorithms.GMPOCritic"]], "gmpopolicy (class in grl.algorithms)": [[1, "grl.algorithms.GMPOPolicy"]], "qgpoalgorithm (class in grl.algorithms)": [[1, "grl.algorithms.QGPOAlgorithm"]], "qgpocritic (class in grl.algorithms)": [[1, "grl.algorithms.QGPOCritic"]], "qgpopolicy (class in grl.algorithms)": [[1, "grl.algorithms.QGPOPolicy"]], "srpoalgorithm (class in grl.algorithms)": [[1, "grl.algorithms.SRPOAlgorithm"]], "srpocritic (class in grl.algorithms)": [[1, "grl.algorithms.SRPOCritic"]], "srpopolicy (class in grl.algorithms)": [[1, "grl.algorithms.SRPOPolicy"]], "__init__() (grl.algorithms.gmpgalgorithm method)": [[1, "grl.algorithms.GMPGAlgorithm.__init__"]], "__init__() (grl.algorithms.gmpgcritic method)": [[1, "grl.algorithms.GMPGCritic.__init__"]], "__init__() (grl.algorithms.gmpgpolicy method)": [[1, "grl.algorithms.GMPGPolicy.__init__"]], "__init__() (grl.algorithms.gmpoalgorithm method)": [[1, "grl.algorithms.GMPOAlgorithm.__init__"]], "__init__() (grl.algorithms.gmpocritic method)": [[1, "grl.algorithms.GMPOCritic.__init__"]], "__init__() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.__init__"]], "__init__() (grl.algorithms.qgpoalgorithm method)": [[1, "grl.algorithms.QGPOAlgorithm.__init__"]], "__init__() (grl.algorithms.qgpocritic method)": [[1, "grl.algorithms.QGPOCritic.__init__"]], "__init__() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.__init__"]], "__init__() (grl.algorithms.srpoalgorithm method)": [[1, "grl.algorithms.SRPOAlgorithm.__init__"]], "__init__() (grl.algorithms.srpocritic method)": [[1, "grl.algorithms.SRPOCritic.__init__"]], "__init__() (grl.algorithms.srpopolicy method)": [[1, "grl.algorithms.SRPOPolicy.__init__"]], "behaviour_policy_loss() (grl.algorithms.gmpgpolicy method)": [[1, "grl.algorithms.GMPGPolicy.behaviour_policy_loss"]], "behaviour_policy_loss() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.behaviour_policy_loss"]], "behaviour_policy_loss() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.behaviour_policy_loss"]], "behaviour_policy_loss() (grl.algorithms.srpopolicy method)": [[1, "grl.algorithms.SRPOPolicy.behaviour_policy_loss"]], "behaviour_policy_sample() (grl.algorithms.gmpgpolicy method)": [[1, "grl.algorithms.GMPGPolicy.behaviour_policy_sample"]], "behaviour_policy_sample() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.behaviour_policy_sample"]], "behaviour_policy_sample() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.behaviour_policy_sample"]], "compute_double_q() (grl.algorithms.gmpgcritic method)": [[1, "grl.algorithms.GMPGCritic.compute_double_q"]], "compute_double_q() (grl.algorithms.gmpocritic method)": [[1, "grl.algorithms.GMPOCritic.compute_double_q"]], "compute_double_q() (grl.algorithms.qgpocritic method)": [[1, "grl.algorithms.QGPOCritic.compute_double_q"]], "compute_q() (grl.algorithms.gmpgpolicy method)": [[1, "grl.algorithms.GMPGPolicy.compute_q"]], "compute_q() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.compute_q"]], "compute_q() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.compute_q"]], "deploy() (grl.algorithms.qgpoalgorithm method)": [[1, "grl.algorithms.QGPOAlgorithm.deploy"]], "deploy() (grl.algorithms.srpoalgorithm method)": [[1, "grl.algorithms.SRPOAlgorithm.deploy"]], "energy_guidance_loss() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.energy_guidance_loss"]], "forward() (grl.algorithms.gmpgcritic method)": [[1, "grl.algorithms.GMPGCritic.forward"]], "forward() (grl.algorithms.gmpgpolicy method)": [[1, "grl.algorithms.GMPGPolicy.forward"]], "forward() (grl.algorithms.gmpocritic method)": [[1, "grl.algorithms.GMPOCritic.forward"]], "forward() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.forward"]], "forward() (grl.algorithms.qgpocritic method)": [[1, "grl.algorithms.QGPOCritic.forward"]], "forward() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.forward"]], "forward() (grl.algorithms.srpocritic method)": [[1, "grl.algorithms.SRPOCritic.forward"]], "forward() (grl.algorithms.srpopolicy method)": [[1, "grl.algorithms.SRPOPolicy.forward"]], "grl.algorithms": [[1, "module-grl.algorithms"]], "in_support_ql_loss() (grl.algorithms.gmpgcritic method)": [[1, "grl.algorithms.GMPGCritic.in_support_ql_loss"]], "policy_optimization_loss_by_advantage_weighted_regression() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.policy_optimization_loss_by_advantage_weighted_regression"]], "policy_optimization_loss_by_advantage_weighted_regression_softmax() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.policy_optimization_loss_by_advantage_weighted_regression_softmax"]], "q_loss() (grl.algorithms.qgpocritic method)": [[1, "grl.algorithms.QGPOCritic.q_loss"]], "q_loss() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.q_loss"]], "sample() (grl.algorithms.gmpgpolicy method)": [[1, "grl.algorithms.GMPGPolicy.sample"]], "sample() (grl.algorithms.gmpopolicy method)": [[1, "grl.algorithms.GMPOPolicy.sample"]], "sample() (grl.algorithms.qgpopolicy method)": [[1, "grl.algorithms.QGPOPolicy.sample"]], "sample() (grl.algorithms.srpopolicy method)": [[1, "grl.algorithms.SRPOPolicy.sample"]], "srpo_actor_loss() (grl.algorithms.srpopolicy method)": [[1, "grl.algorithms.SRPOPolicy.srpo_actor_loss"]], "train() (grl.algorithms.gmpgalgorithm method)": [[1, "grl.algorithms.GMPGAlgorithm.train"]], "train() (grl.algorithms.gmpoalgorithm method)": [[1, "grl.algorithms.GMPOAlgorithm.train"]], "train() (grl.algorithms.qgpoalgorithm method)": [[1, "grl.algorithms.QGPOAlgorithm.train"]], "train() (grl.algorithms.srpoalgorithm method)": [[1, "grl.algorithms.SRPOAlgorithm.train"]], "gpd4rldataset (class in grl.datasets)": [[2, "grl.datasets.GPD4RLDataset"]], "gpdataset (class in grl.datasets)": [[2, "grl.datasets.GPDataset"]], "qgpod4rldataset (class in grl.datasets)": [[2, "grl.datasets.QGPOD4RLDataset"]], "qgpodataset (class in grl.datasets)": [[2, "grl.datasets.QGPODataset"]], "__init__() (grl.datasets.gpd4rldataset method)": [[2, "grl.datasets.GPD4RLDataset.__init__"]], "__init__() (grl.datasets.gpdataset method)": [[2, "grl.datasets.GPDataset.__init__"]], "__init__() (grl.datasets.qgpod4rldataset method)": [[2, "grl.datasets.QGPOD4RLDataset.__init__"]], "__init__() (grl.datasets.qgpodataset method)": [[2, "grl.datasets.QGPODataset.__init__"]], "grl.datasets": [[2, "module-grl.datasets"]], "diffusionmodel (class in grl.generative_models)": [[3, "grl.generative_models.DiffusionModel"]], "energyconditionaldiffusionmodel (class in grl.generative_models)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel"]], "independentconditionalflowmodel (class in grl.generative_models)": [[3, "grl.generative_models.IndependentConditionalFlowModel"]], "optimaltransportconditionalflowmodel (class in grl.generative_models)": [[3, "grl.generative_models.OptimalTransportConditionalFlowModel"]], "__init__() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.__init__"]], "__init__() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.__init__"]], "__init__() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.__init__"]], "__init__() (grl.generative_models.optimaltransportconditionalflowmodel method)": [[3, "grl.generative_models.OptimalTransportConditionalFlowModel.__init__"]], "data_prediction_function() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.data_prediction_function"]], "data_prediction_function() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.data_prediction_function"]], "data_prediction_function_with_energy_guidance() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.data_prediction_function_with_energy_guidance"]], "dpo_loss() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.dpo_loss"]], "energy_guidance_loss() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.energy_guidance_loss"]], "flow_matching_loss() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.flow_matching_loss"]], "flow_matching_loss() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.flow_matching_loss"]], "flow_matching_loss() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.flow_matching_loss"]], "flow_matching_loss() (grl.generative_models.optimaltransportconditionalflowmodel method)": [[3, "grl.generative_models.OptimalTransportConditionalFlowModel.flow_matching_loss"]], "flow_matching_loss_small_batch_ot_plan() (grl.generative_models.optimaltransportconditionalflowmodel method)": [[3, "grl.generative_models.OptimalTransportConditionalFlowModel.flow_matching_loss_small_batch_OT_plan"]], "flow_matching_loss_with_mask() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.flow_matching_loss_with_mask"]], "forward_sample() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.forward_sample"]], "forward_sample() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.forward_sample"]], "forward_sample_process() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.forward_sample_process"]], "forward_sample_process() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.forward_sample_process"]], "grl.generative_models": [[3, "module-grl.generative_models"]], "log_prob() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.log_prob"]], "log_prob() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.log_prob"]], "noise_function() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.noise_function"]], "noise_function() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.noise_function"]], "noise_function_with_energy_guidance() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.noise_function_with_energy_guidance"]], "optimal_transport_flow_matching_loss() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.optimal_transport_flow_matching_loss"]], "sample() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.sample"]], "sample() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.sample"]], "sample() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.sample"]], "sample() (grl.generative_models.optimaltransportconditionalflowmodel method)": [[3, "grl.generative_models.OptimalTransportConditionalFlowModel.sample"]], "sample_forward_process() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.sample_forward_process"]], "sample_forward_process() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.sample_forward_process"]], "sample_forward_process() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.sample_forward_process"]], "sample_forward_process() (grl.generative_models.optimaltransportconditionalflowmodel method)": [[3, "grl.generative_models.OptimalTransportConditionalFlowModel.sample_forward_process"]], "sample_forward_process_with_fixed_x() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.sample_forward_process_with_fixed_x"]], "sample_forward_process_with_fixed_x() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.sample_forward_process_with_fixed_x"]], "sample_with_fixed_x() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.sample_with_fixed_x"]], "sample_with_fixed_x() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.sample_with_fixed_x"]], "sample_with_fixed_x_without_energy_guidance() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.sample_with_fixed_x_without_energy_guidance"]], "sample_with_log_prob() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.sample_with_log_prob"]], "sample_with_log_prob() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.sample_with_log_prob"]], "sample_with_mask() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.sample_with_mask"]], "sample_with_mask_forward_process() (grl.generative_models.independentconditionalflowmodel method)": [[3, "grl.generative_models.IndependentConditionalFlowModel.sample_with_mask_forward_process"]], "sample_without_energy_guidance() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.sample_without_energy_guidance"]], "score_function() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.score_function"]], "score_function() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.score_function"]], "score_function_with_energy_guidance() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.score_function_with_energy_guidance"]], "score_matching_loss() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.score_matching_loss"]], "score_matching_loss() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.score_matching_loss"]], "velocity_function() (grl.generative_models.diffusionmodel method)": [[3, "grl.generative_models.DiffusionModel.velocity_function"]], "velocity_function() (grl.generative_models.energyconditionaldiffusionmodel method)": [[3, "grl.generative_models.EnergyConditionalDiffusionModel.velocity_function"]], "concatenatelayer (class in grl.neural_network)": [[4, "grl.neural_network.ConcatenateLayer"]], "concatenatemlp (class in grl.neural_network)": [[4, "grl.neural_network.ConcatenateMLP"]], "dit (class in grl.neural_network)": [[4, "grl.neural_network.DiT"]], "dit1d (class in grl.neural_network)": [[4, "grl.neural_network.DiT1D"]], "dit2d (in module grl.neural_network)": [[4, "grl.neural_network.DiT2D"]], "dit3d (class in grl.neural_network)": [[4, "grl.neural_network.DiT3D"]], "multilayerperceptron (class in grl.neural_network)": [[4, "grl.neural_network.MultiLayerPerceptron"]], "temporalspatialresidualnet (class in grl.neural_network)": [[4, "grl.neural_network.TemporalSpatialResidualNet"]], "__init__() (grl.neural_network.concatenatelayer method)": [[4, "grl.neural_network.ConcatenateLayer.__init__"]], "__init__() (grl.neural_network.concatenatemlp method)": [[4, "grl.neural_network.ConcatenateMLP.__init__"]], "__init__() (grl.neural_network.dit method)": [[4, "grl.neural_network.DiT.__init__"]], "__init__() (grl.neural_network.dit1d method)": [[4, "grl.neural_network.DiT1D.__init__"]], "__init__() (grl.neural_network.dit3d method)": [[4, "grl.neural_network.DiT3D.__init__"]], "__init__() (grl.neural_network.multilayerperceptron method)": [[4, "grl.neural_network.MultiLayerPerceptron.__init__"]], "__init__() (grl.neural_network.temporalspatialresidualnet method)": [[4, "grl.neural_network.TemporalSpatialResidualNet.__init__"]], "forward() (grl.neural_network.concatenatelayer method)": [[4, "grl.neural_network.ConcatenateLayer.forward"]], "forward() (grl.neural_network.concatenatemlp method)": [[4, "grl.neural_network.ConcatenateMLP.forward"]], "forward() (grl.neural_network.dit method)": [[4, "grl.neural_network.DiT.forward"]], "forward() (grl.neural_network.dit1d method)": [[4, "grl.neural_network.DiT1D.forward"]], "forward() (grl.neural_network.dit3d method)": [[4, "grl.neural_network.DiT3D.forward"]], "forward() (grl.neural_network.multilayerperceptron method)": [[4, "grl.neural_network.MultiLayerPerceptron.forward"]], "forward() (grl.neural_network.temporalspatialresidualnet method)": [[4, "grl.neural_network.TemporalSpatialResidualNet.forward"]], "forward_with_cfg() (grl.neural_network.dit method)": [[4, "grl.neural_network.DiT.forward_with_cfg"]], "grl.neural_network": [[4, "module-grl.neural_network"]], "initialize_weights() (grl.neural_network.dit method)": [[4, "grl.neural_network.DiT.initialize_weights"]], "initialize_weights() (grl.neural_network.dit1d method)": [[4, "grl.neural_network.DiT1D.initialize_weights"]], "initialize_weights() (grl.neural_network.dit3d method)": [[4, "grl.neural_network.DiT3D.initialize_weights"]], "unpatchify() (grl.neural_network.dit method)": [[4, "grl.neural_network.DiT.unpatchify"]], "unpatchify() (grl.neural_network.dit3d method)": [[4, "grl.neural_network.DiT3D.unpatchify"]], "dpmsolver (class in grl.numerical_methods)": [[5, "grl.numerical_methods.DPMSolver"]], "gaussianconditionalprobabilitypath (class in grl.numerical_methods)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath"]], "halflogsnr() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.HalfLogSNR"]], "inversehalflogsnr() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.InverseHalfLogSNR"]], "ode (class in grl.numerical_methods)": [[5, "grl.numerical_methods.ODE"]], "odesolver (class in grl.numerical_methods)": [[5, "grl.numerical_methods.ODESolver"]], "sde (class in grl.numerical_methods)": [[5, "grl.numerical_methods.SDE"]], "sdesolver (class in grl.numerical_methods)": [[5, "grl.numerical_methods.SDESolver"]], "__init__() (grl.numerical_methods.dpmsolver method)": [[5, "grl.numerical_methods.DPMSolver.__init__"]], "__init__() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.__init__"]], "__init__() (grl.numerical_methods.ode method)": [[5, "grl.numerical_methods.ODE.__init__"]], "__init__() (grl.numerical_methods.odesolver method)": [[5, "grl.numerical_methods.ODESolver.__init__"]], "__init__() (grl.numerical_methods.sde method)": [[5, "grl.numerical_methods.SDE.__init__"]], "__init__() (grl.numerical_methods.sdesolver method)": [[5, "grl.numerical_methods.SDESolver.__init__"]], "covariance() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.covariance"]], "d_covariance_dt() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.d_covariance_dt"]], "d_log_scale_dt() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.d_log_scale_dt"]], "d_scale_dt() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.d_scale_dt"]], "d_std_dt() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.d_std_dt"]], "diffusion() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.diffusion"]], "diffusion_squared() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.diffusion_squared"]], "drift() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.drift"]], "drift_coefficient() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.drift_coefficient"]], "grl.numerical_methods": [[5, "module-grl.numerical_methods"]], "integrate() (grl.numerical_methods.dpmsolver method)": [[5, "grl.numerical_methods.DPMSolver.integrate"]], "integrate() (grl.numerical_methods.odesolver method)": [[5, "grl.numerical_methods.ODESolver.integrate"]], "integrate() (grl.numerical_methods.sdesolver method)": [[5, "grl.numerical_methods.SDESolver.integrate"]], "log_scale() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.log_scale"]], "scale() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.scale"]], "std() (grl.numerical_methods.gaussianconditionalprobabilitypath method)": [[5, "grl.numerical_methods.GaussianConditionalProbabilityPath.std"]], "doubleqnetwork (class in grl.rl_modules)": [[6, "grl.rl_modules.DoubleQNetwork"]], "doublevnetwork (class in grl.rl_modules)": [[6, "grl.rl_modules.DoubleVNetwork"]], "gymenvsimulator (class in grl.rl_modules)": [[6, "grl.rl_modules.GymEnvSimulator"]], "oneshotvaluefunction (class in grl.rl_modules)": [[6, "grl.rl_modules.OneShotValueFunction"]], "qnetwork (class in grl.rl_modules)": [[6, "grl.rl_modules.QNetwork"]], "vnetwork (class in grl.rl_modules)": [[6, "grl.rl_modules.VNetwork"]], "__init__() (grl.rl_modules.doubleqnetwork method)": [[6, "grl.rl_modules.DoubleQNetwork.__init__"]], "__init__() (grl.rl_modules.doublevnetwork method)": [[6, "grl.rl_modules.DoubleVNetwork.__init__"]], "__init__() (grl.rl_modules.gymenvsimulator method)": [[6, "grl.rl_modules.GymEnvSimulator.__init__"]], "__init__() (grl.rl_modules.oneshotvaluefunction method)": [[6, "grl.rl_modules.OneShotValueFunction.__init__"]], "__init__() (grl.rl_modules.qnetwork method)": [[6, "grl.rl_modules.QNetwork.__init__"]], "__init__() (grl.rl_modules.vnetwork method)": [[6, "grl.rl_modules.VNetwork.__init__"]], "collect_episodes() (grl.rl_modules.gymenvsimulator method)": [[6, "grl.rl_modules.GymEnvSimulator.collect_episodes"]], "collect_steps() (grl.rl_modules.gymenvsimulator method)": [[6, "grl.rl_modules.GymEnvSimulator.collect_steps"]], "compute_double_q() (grl.rl_modules.doubleqnetwork method)": [[6, "grl.rl_modules.DoubleQNetwork.compute_double_q"]], "compute_double_v() (grl.rl_modules.doublevnetwork method)": [[6, "grl.rl_modules.DoubleVNetwork.compute_double_v"]], "compute_double_v() (grl.rl_modules.oneshotvaluefunction method)": [[6, "grl.rl_modules.OneShotValueFunction.compute_double_v"]], "compute_mininum_q() (grl.rl_modules.doubleqnetwork method)": [[6, "grl.rl_modules.DoubleQNetwork.compute_mininum_q"]], "compute_mininum_v() (grl.rl_modules.doublevnetwork method)": [[6, "grl.rl_modules.DoubleVNetwork.compute_mininum_v"]], "evaluate() (grl.rl_modules.gymenvsimulator method)": [[6, "grl.rl_modules.GymEnvSimulator.evaluate"]], "forward() (grl.rl_modules.doubleqnetwork method)": [[6, "grl.rl_modules.DoubleQNetwork.forward"]], "forward() (grl.rl_modules.doublevnetwork method)": [[6, "grl.rl_modules.DoubleVNetwork.forward"]], "forward() (grl.rl_modules.oneshotvaluefunction method)": [[6, "grl.rl_modules.OneShotValueFunction.forward"]], "forward() (grl.rl_modules.qnetwork method)": [[6, "grl.rl_modules.QNetwork.forward"]], "forward() (grl.rl_modules.vnetwork method)": [[6, "grl.rl_modules.VNetwork.forward"]], "grl.rl_modules": [[6, "module-grl.rl_modules"]], "v_loss() (grl.rl_modules.oneshotvaluefunction method)": [[6, "grl.rl_modules.OneShotValueFunction.v_loss"]], "grl.utils": [[7, "module-grl.utils"]], "set_seed() (in module grl.utils)": [[7, "grl.utils.set_seed"]]}})
\ No newline at end of file
diff --git a/tutorials/installation/index.html b/tutorials/installation/index.html
index 16e3241..d1c512e 100644
--- a/tutorials/installation/index.html
+++ b/tutorials/installation/index.html
@@ -313,7 +313,7 @@
   <section id="installation">
 <h1>Installation<a class="headerlink" href="#installation" title="Permalink to this heading">¶</a></h1>
 <p>GenerativeRL can be installed using pip:</p>
-<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>pip<span class="w"> </span>install<span class="w"> </span>grl
+<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>pip<span class="w"> </span>install<span class="w"> </span>GenerativeRL
 </pre></div>
 </div>
 <p>You can also install the latest development version from GitHub:</p>