Skip to content

Training data format versions

borg323 edited this page Mar 21, 2023 · 1 revision

The training data generated by lc0 started with version 3 and were progressively updated to the current version 6. Earlier formats were generated by lczero.

Version 3 training data format:

struct V3TrainingData {
  uint32_t version;
  float probabilities[1858];
  uint64_t planes[104];
  uint8_t castling_us_ooo;
  uint8_t castling_us_oo;
  uint8_t castling_them_ooo;
  uint8_t castling_them_oo;
  uint8_t side_to_move;
  uint8_t rule50_count;
  uint8_t move_count;
  int8_t result;
} PACKED_STRUCT;
static_assert(sizeof(V3TrainingData) == 8276, "Wrong struct size");

Version 4 training data format difference from version 3:

@@ -35,7 +35,7 @@ namespace lczero {
 
 #pragma pack(push, 1)
 
-struct V3TrainingData {
+struct V4TrainingData {
   uint32_t version;
   float probabilities[1858];
   uint64_t planes[104];
@@ -47,8 +47,12 @@ struct V3TrainingData {
   uint8_t rule50_count;
   uint8_t move_count;
   int8_t result;
+  float root_q;
+  float best_q;
+  float root_d;
+  float best_d;
 } PACKED_STRUCT;
-static_assert(sizeof(V3TrainingData) == 8276, "Wrong struct size");
+static_assert(sizeof(V4TrainingData) == 8292, "Wrong struct size");
 
 #pragma pack(pop)

Version 5 training data format difference from version 4:

@@ -35,24 +37,33 @@ namespace lczero {
 
 #pragma pack(push, 1)
 
-struct V4TrainingData {
+struct V5TrainingData {
   uint32_t version;
+  uint32_t input_format;
   float probabilities[1858];
   uint64_t planes[104];
   uint8_t castling_us_ooo;
   uint8_t castling_us_oo;
   uint8_t castling_them_ooo;
   uint8_t castling_them_oo;
-  uint8_t side_to_move;
+  // For input type 3 contains enpassant column as a mask.
+  uint8_t side_to_move_or_enpassant;
   uint8_t rule50_count;
-  uint8_t move_count;
+  // For input type 3 contains a bit field indicating the transform that was
+  // used and the original side to move info.
+  // Side to move is in the top bit, transform in the lower bits.
+  // In versions prior to v5 this spot contained an unused move count field.
+  uint8_t invariance_info;
   int8_t result;
   float root_q;
   float best_q;
   float root_d;
   float best_d;
+  float root_m;      // In plies.
+  float best_m;      // In plies.
+  float plies_left;  // This is the training target for MLH.
 } PACKED_STRUCT;
-static_assert(sizeof(V4TrainingData) == 8292, "Wrong struct size");
+static_assert(sizeof(V5TrainingData) == 8308, "Wrong struct size");
 
 #pragma pack(pop)
 

Version 6 training data format difference from version 5:

@@ -37,7 +37,7 @@ namespace lczero {
 
 #pragma pack(push, 1)
 
-struct V5TrainingData {
+struct V6TrainingData {
   uint32_t version;
   uint32_t input_format;
   float probabilities[1858];
@@ -49,12 +49,19 @@ struct V5TrainingData {
   // For input type 3 contains enpassant column as a mask.
   uint8_t side_to_move_or_enpassant;
   uint8_t rule50_count;
-  // For input type 3 contains a bit field indicating the transform that was
-  // used and the original side to move info.
-  // Side to move is in the top bit, transform in the lower bits.
+  // Bitfield with the following allocation:
+  //  bit 7: side to move (input type 3)
+  //  bit 6: position marked for deletion by the rescorer (never set by lc0)
+  //  bit 5: game adjudicated (v6)
+  //  bit 4: max game length exceeded (v6)
+  //  bit 3: best_q is for proven best move (v6)
+  //  bit 2: transpose transform (input type 3)
+  //  bit 1: mirror transform (input type 3)
+  //  bit 0: flip transform (input type 3)
   // In versions prior to v5 this spot contained an unused move count field.
   uint8_t invariance_info;
-  int8_t result;
+  // In versions prior to v6 this spot contained thr result as an int8_t.
+  uint8_t dummy;
   float root_q;
   float best_q;
   float root_d;
@@ -62,8 +69,24 @@ struct V5TrainingData {
   float root_m;      // In plies.
   float best_m;      // In plies.
   float plies_left;  // This is the training target for MLH.
+  float result_q;
+  float result_d;
+  float played_q;
+  float played_d;
+  float played_m;
+  // The folowing may be NaN if not found in cache.
+  float orig_q;      // For value repair.
+  float orig_d;
+  float orig_m;
+  uint32_t visits;
+  // Indices in the probabilities array.
+  uint16_t played_idx;
+  uint16_t best_idx;
+  // Kullback-Leibler divergence between visits and policy (denominator)
+  float policy_kld;
+  uint32_t reserved;
 } PACKED_STRUCT;
-static_assert(sizeof(V5TrainingData) == 8308, "Wrong struct size");
+static_assert(sizeof(V6TrainingData) == 8356, "Wrong struct size");
 
 #pragma pack(pop)
 

Current (version 6) training data format:

struct V6TrainingData {
  uint32_t version;
  uint32_t input_format;
  float probabilities[1858];
  uint64_t planes[104];
  uint8_t castling_us_ooo;
  uint8_t castling_us_oo;
  uint8_t castling_them_ooo;
  uint8_t castling_them_oo;
  // For input type 3 contains enpassant column as a mask.
  uint8_t side_to_move_or_enpassant;
  uint8_t rule50_count;
  // Bitfield with the following allocation:
  //  bit 7: side to move (input type 3)
  //  bit 6: position marked for deletion by the rescorer (never set by lc0)
  //  bit 5: game adjudicated (v6)
  //  bit 4: max game length exceeded (v6)
  //  bit 3: best_q is for proven best move (v6)
  //  bit 2: transpose transform (input type 3)
  //  bit 1: mirror transform (input type 3)
  //  bit 0: flip transform (input type 3)
  // In versions prior to v5 this spot contained an unused move count field.
  uint8_t invariance_info;
  // In versions prior to v6 this spot contained thr result as an int8_t.
  uint8_t dummy;
  float root_q;
  float best_q;
  float root_d;
  float best_d;
  float root_m;      // In plies.
  float best_m;      // In plies.
  float plies_left;  // This is the training target for MLH.
  float result_q;
  float result_d;
  float played_q;
  float played_d;
  float played_m;
  // The folowing may be NaN if not found in cache.
  float orig_q;      // For value repair.
  float orig_d;
  float orig_m;
  uint32_t visits;
  // Indices in the probabilities array.
  uint16_t played_idx;
  uint16_t best_idx;
  // Kullback-Leibler divergence between visits and policy (denominator)
  float policy_kld;
  uint32_t reserved;
} PACKED_STRUCT;
static_assert(sizeof(V6TrainingData) == 8356, "Wrong struct size");
Clone this wiki locally