isaacgymenvs/cfg/task/AllegroHandDextremeADR.yaml

# used to create the object
name: AllegroHandADR

physics_engine: ${..physics_engine}

# if given, will override the device setting in gym.
env: 
  numEnvs: ${resolve_default:16384,${...num_envs}}
  envSpacing: 0.75
  episodeLength: 320 # Not used, but would be 8 sec if resetTime is not set
  resetTime: 8 # Max time till reset, in seconds, if a goal wasn't achieved. Will overwrite the episodeLength if is > 0.
  enableDebugVis: False
  aggregateMode: 1

  clipObservations: 50.0
  clipActions: 1.0
  discreteActions: False

  stiffnessScale: 1.0
  forceLimitScale: 1.0

  useRelativeControl: False
  dofSpeedScale: 20.0

  use_capped_dof_control: False 
  max_dof_radians_per_second: 6.2832 

  max_effort: 0.5

  num_success_hold_steps: 0

  actionsMovingAverage: 
    range: [0.15, 0.2]
    schedule_steps: 1000_000
    #schedule_steps: 300_000
    schedule_freq: 500 # schedule every 500 steps for stability

  controlFrequencyInv: 2 #2 # 30 Hz #3 # 20 Hz

  cubeObsDelayProb: 0.3
  maxObjectSkipObs: 2

  # Action Delay related 
  actionDelayProbMax: 0.3
  actionLatencyMax: 15
  actionLatencyScheduledSteps: 2_000_000

  startPositionNoise: 0.01
  startRotationNoise: 0.0

  resetPositionNoise: 0.03
  resetPositionNoiseZ: 0.01
  resetRotationNoise: 0.0
  resetDofPosRandomInterval: 0.2
  resetDofVelRandomInterval: 0.0

  startObjectPoseDY: -0.15
  startObjectPoseDZ: 0.06

  # Random forces applied to the object
  forceScale: 2.0
  forceProbRange: [0.001, 0.1]
  forceDecay: 0.99
  forceDecayInterval: 0.08

  # Random Adversarial Perturbations
  random_network_adversary:
    enable: True
    # prob: 0.30
    weight_sample_freq: 1000 # steps 

  random_cube_observation:
    enable: True 
    prob: 0.3

  # reward -> dictionary
  distRewardScale: -10.0
  rotRewardScale: 1.0
  rotEps: 0.1
  actionPenaltyScale: -0.001
  actionDeltaPenaltyScale: -0.2 #-0.01
  reachGoalBonus: 250
  fallDistance: 0.24
  fallPenalty: 0.0

  objectType: "block" # can be block, egg or pen
  observationType: "no_vel" #"full_state" # can be "no_vel", "full_state"
  asymmetric_observations: True
  successTolerance: 0.1
  printNumSuccesses: False
  maxConsecutiveSuccesses: 50

  asset:
    assetFileName: "urdf/kuka_allegro_description/allegro_touch_sensor.urdf"
    assetFileNameBlock: "urdf/objects/cube_multicolor_allegro.urdf"
    assetFileNameEgg: "mjcf/open_ai_assets/hand/egg.xml"
    assetFileNamePen: "mjcf/open_ai_assets/hand/pen.xml"

  # set to True if you use camera sensors in the environment
  enableCameraSensors: False

task:
  randomize: True
  randomization_params:
    frequency: 720   # Define how many simulation steps between generating new randomizations

    sim_params:
      gravity:
        range: [0, 0.6]
        operation: "additive"
        distribution: "gaussian"

    actor_params:
      hand:
        scale:
          range: [0.95, 1.05]
          operation: "scaling"
          distribution: "uniform"
          setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
          # schedule: "linear"  # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
          # schedule_steps: 30000
        color: True
        dof_properties:
          damping: 
            range: [0.01, 20.0]
            operation: "scaling"
            distribution: "loguniform"
          stiffness:
            range: [0.01, 20.0]
            operation: "scaling"
            distribution: "loguniform"
            # schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
            # schedule_steps: 30000
          effort:
            range: [0.4, 10.0]
            operation: "scaling"
            distribution: "uniform"
          friction:
            range: [0.0, 10.0]
            operation: "scaling"
            distribution: "uniform"
          armature:
            range: [0.0, 10.0]
            operation: "scaling"
            distribution: "uniform"
          lower:
            # range: [0, 0.01]
            # operation: "additive"
            # distribution: "gaussian"
            range: [-5.0, 5.0]
            operation: "additive"
            distribution: "uniform"
          upper:
            # range: [0, 0.01]
            # operation: "additive"
            # distribution: "gaussian"
            range: [-5.0, 5.0]
            operation: "additive"
            distribution: "uniform"
            # schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
            # schedule_steps: 30000

        rigid_body_properties:
          mass:
            # range: [0.5, 2.0]
            # range: [0.5, 1.5]
            range: [0.4, 1.6] # change when runtime API is available
            operation: "scaling"
            distribution: "uniform"
            setup_only: False # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
            # schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
            # schedule_steps: 30000

        rigid_shape_properties:
          friction:
            num_buckets: 250
            # range: [0.2, 1.2] #[0.7, 1.3]
            range: [0.01, 2.0]
            operation: "scaling"
            distribution: "uniform"
            # schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
            # schedule_steps: 30000
          restitution:
            num_buckets: 100
            range: [0.0, 0.5]
            operation: "additive"
            distribution: "uniform"

      object:
        scale:
          range: [0.95, 1.05]
          operation: "scaling"
          distribution: "uniform"
          setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
          # schedule: "linear"  # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
          # schedule_steps: 30000

        rigid_body_properties:
          mass:
            # range: [0.5, 1.5]
            range: [0.3, 1.7] # after fixing the API expand it even more
            operation: "scaling"
            distribution: "uniform"
            setup_only: False # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
            # schedule: "linear"  # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
            # schedule_steps: 30000

        rigid_shape_properties:
          friction:
            # num_buckets: 250
            # range: [0.2, 1.2] #[0.7, 1.3]
            # operation: "scaling"
            # distribution: "uniform"
            num_buckets: 250
            range: [0.01, 2.0]
            operation: "scaling"
            distribution: "uniform"
            # distribution: "loguniform"
            # schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
            # schedule_steps: 30000
          restitution:
            num_buckets: 100
            range: [0.0, 0.5]
            operation: "additive"
            distribution: "uniform"

  adr:

    use_adr: True

    # set to false to not do update ADR ranges. useful for evaluation or training a base policy
    update_adr_ranges: True 
    clear_other_queues: False

    # if set, boundary sampling and performance eval will occur at (bound + delta) instead of at bound.
    adr_extended_boundary_sample: False

    worker_adr_boundary_fraction: 0.4 # fraction of workers dedicated to measuring perf of ends of ADR ranges to update the ranges

    adr_queue_threshold_length: 256

    adr_objective_threshold_low: 5
    adr_objective_threshold_high: 20

    adr_rollout_perf_alpha: 0.99

    adr_load_from_checkpoint: false

    # raw ADR params. more are added by affine transforms code
    params:
      ### Hand Properties
      hand_damping:
        range_path: actor_params.hand.dof_properties.damping.range
        init_range: [0.5, 2.0]
        limits: [0.01, 20.0]
        delta: 0.01
        delta_style: 'additive'

      # todo: double-check values. Do they multiply?
      hand_stiffness:
        range_path: actor_params.hand.dof_properties.stiffness.range
        init_range: [0.8, 1.2]
        limits: [0.01, 20.0]
        delta: 0.01
        delta_style: 'additive'
      hand_joint_friction:
        range_path: actor_params.hand.dof_properties.friction.range
        init_range: [0.8, 1.2]
        limits: [0.0, 10.0]
        delta: 0.01
        delta_style: 'additive'
      hand_armature:
        range_path: actor_params.hand.dof_properties.armature.range
        init_range: [0.8, 1.2]
        limits: [0.0, 10.0]
        delta: 0.01
        delta_style: 'additive'
      hand_effort:
        range_path: actor_params.hand.dof_properties.effort.range
        init_range: [0.9, 1.1]
        limits: [0.4, 10.0]
        delta: 0.01
        delta_style: 'additive'
      hand_lower:
        range_path: actor_params.hand.dof_properties.lower.range
        init_range: [0.0, 0.0]
        limits: [-5.0, 5.0]
        delta: 0.02
        delta_style: 'additive' 
      hand_upper:
        range_path: actor_params.hand.dof_properties.upper.range
        init_range: [0.0, 0.0]
        limits: [-5.0, 5.0]
        delta: 0.02
        delta_style: 'additive'
      # todo randomize fingertips and hand parameters independently
      hand_mass:
        range_path: actor_params.hand.rigid_body_properties.mass.range
        init_range: [0.8, 1.2]
        limits: [0.01, 10.0]
        delta: 0.01
        delta_style: 'additive'
      hand_friction_fingertips:
        range_path: actor_params.hand.rigid_shape_properties.friction.range #.fingertips
        init_range: [0.9, 1.1]
        limits: [0.1, 2.0]
        delta: 0.01
        delta_style: 'additive'
      hand_restitution:
        range_path: actor_params.hand.rigid_shape_properties.restitution.range
        init_range: [0.0, 0.1]
        limits: [0.0, 1.0]
        delta: 0.01
        delta_style: 'additive'
      object_mass:
        range_path: actor_params.object.rigid_body_properties.mass.range
        init_range: [0.8, 1.2]
        limits: [0.01, 10.0]
        delta: 0.01
        delta_style: 'additive'
      object_friction:
        range_path: actor_params.object.rigid_shape_properties.friction.range
        init_range: [0.4, 0.8]
        limits: [0.01, 2.0]
        delta: 0.01
        delta_style: 'additive'
      object_restitution:
        range_path: actor_params.object.rigid_shape_properties.restitution.range
        init_range: [0.0, 0.1]
        limits: [0.0, 1.0]
        delta: 0.01
        delta_style: 'additive'
      
    # Observation Params

      cube_obs_delay_prob:
        # chance of adding an additional delay on top of the inverse refresh rate for cube pose
        init_range: [0.0, 0.05]
        limits: [0.0, 0.7]
        delta: 0.01
        delta_style: 'additive' 
      cube_pose_refresh_rate:
        # inverse refresh rate for cube pose (simulates camera)
        init_range: [1.0, 1.0]
        limits: [1.0, 6.0]
        delta: 0.2
        delta_style: 'additive' 
      # Action Params
      action_delay_prob:
        # per episode the probability that there will be an extra, stochastic, delay on top of the previous delay per step
        init_range: [0.0, 0.05]
        limits: [0.0, 0.7]
        delta: 0.01
        delta_style: 'additive' 
      action_latency:
        # the number of steps per environment that the action will be delayed for
        init_range: [0.0, 0.0]
        limits: [0, 60]
        delta: 0.1
        delta_style: 'additive' 

      # Affine Transformation params, to encode a transform of ax + b + c to obs or act
      # for each of these:
      # _scaling is the params of coefficient a (sampled once per episode)
      # _additive is the params of coefficient b (sampled once per episode)
      # _white is the params of coefficient c (sampled once per step)
      # ADR does not directly generate the distributions but rather sets stdev of gaussian
      # noise on each (refer to OAI paper appendix on randomisation.)

      affine_action_scaling:
        init_range: [0.0, 0.0]
        limits: [0.0, 4.0]
        delta: 0.0
        delta_style: 'additive' 
      affine_action_additive:
        init_range: [0.0, 0.04]
        limits: [0.0, 4.0]
        delta: 0.01
        delta_style: 'additive' 
      affine_action_white:
        init_range: [0.0, 0.04]
        limits: [0.0, 4.0]
        delta: 0.01
        delta_style: 'additive' 

      affine_cube_pose_scaling:
        init_range: [0.0, 0.0]
        limits: [0.000, 4.0]
        delta: 0.0
        delta_style: 'additive' 
      affine_cube_pose_additive:
        init_range: [0.0, 0.04]
        limits: [0.0, 4.0]
        delta: 0.01
        delta_style: 'additive' 
      affine_cube_pose_white:
        init_range: [0.0, 0.04]
        limits: [0.0, 4.0]
        delta: 0.01
        delta_style: 'additive' 

      affine_dof_pos_scaling:
        init_range: [0.0, 0.0]
        limits: [0.0, 4.0]
        delta: 0.0
        delta_style: 'additive' 
      affine_dof_pos_additive:
        init_range: [0.0, 0.04]
        limits: [0.0, 4.0]
        delta: 0.01
        delta_style: 'additive' 
      affine_dof_pos_white:
        init_range: [0.0, 0.04]
        limits: [0.0, 4.0]
        delta: 0.01
        delta_style: 'additive' 
      
      rna_alpha:
        init_range: [0.0, 0.0]
        limits: [0.0, 1.0]
        delta: 0.01
        delta_style: 'additive'

sim:
  dt: 0.01667 # 1/60
  substeps: 2
  up_axis: "z"
  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
  gravity: [0.0, 0.0, -9.81]
  physx:
    num_threads: ${....num_threads}
    solver_type: ${....solver_type}
    use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
    num_position_iterations: 8
    num_velocity_iterations: 0
    max_gpu_contact_pairs: 8388608 # 8*1024*1024
    num_subscenes: ${....num_subscenes}
    contact_offset: 0.002
    rest_offset: 0.0
    bounce_threshold_velocity: 0.2
    max_depenetration_velocity: 1.0 #1000.0
    default_buffer_size_multiplier: 20.0
    contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)