[misc] fix reward model issue with TokenClassification model and support running particular steps instead of epochs #40

	name: dataset

	on:
	# Trigger the workflow on push or pull request,
	# but only for the main branch
	push:
	branches:
	- main
	paths:
	- "*/.py"
	- .github/workflows/dataset.yml
	pull_request:
	branches:
	- main
	paths:
	- "*/.py"
	- .github/workflows/dataset.yml

	jobs:
	ray:
	runs-on: [self-hosted, gpu]
	steps:
	- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	fetch-depth: 0
	- name: Install the current repository
	run: \|
	pip install -e .[test] --user
	- name: Running dataset tests
	run: \|
	[ ! -d "$HOME/verl-data" ] && git clone --depth 1 https://github.com/eric-haibin-lin/verl-data ~/verl-data
	pytest -s -x tests/verl
	- name: Running ray test using cupy (move it to L20 when dockerfile ready)
	run: \|
	cd tests/ray
	pytest -s -x test_rvdz.py

Provide feedback