Skip to content

Commit

Permalink
feat: Merge pull request #5 from Vadoola/main
Browse files Browse the repository at this point in the history
Early version of Kaitai Struct file for SBRegion.dat files
  • Loading branch information
hutcheb authored Apr 5, 2024
2 parents 6a6ae4d + 0860bb8 commit 1711fcf
Show file tree
Hide file tree
Showing 2 changed files with 373 additions and 0 deletions.
162 changes: 162 additions & 0 deletions resources/Kaitai Struct/comps.ksy
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
meta:
id: comps_dat
title: Comps.dat file extracted from an RSLogix 5000 ACD
file-extension: dat
license: CC0-1.0
ks-version: 0.9
endian: le
bit-endian: le
doc: |
Docs for Comps.dat of ACD File Format
doc-ref:
- URL Reference?
seq:
- id: magic_number_maybe
type: u4
repeat: expr
repeat-expr: 2
- id: header
type: header
- id: unknown_data_between_header_and_records
type: u1
repeat: expr
repeat-expr: header.region_pointer_offset - 28
- id: region_header
type: region_header
- id: unknown_betw_reghead_and_region
type: u1
repeat: expr
repeat-expr: region_header.pointer_records_region - header.region_pointer_offset - 22
- id: record_header
type: record_header
- id: unknown_between_head_and_rec
type: u1
repeat: expr
repeat-expr: 36 #self.pointer_records_region + self.record_header_length
- id: records
type: record
repeat: expr
#repeat-expr: 2 #self.header.no_records + self.header.no_records_table2
repeat-expr: header.num_records + header.num_records_table2

types:
header:
seq:
- id: total_length
type: u4
- id: region_pointer_offset
type: u4
- id: header_unknown_1
type: u4
- id: num_records
type: u4
- id: num_records_table2
type: u4
region_header:
seq:
- id: magic_num
contents: [0xfe, 0xfe]
- id: region_pointer_length
type: u4
- id: rec_header_unknown_1
type: u4
- id: rec_header_unknown_2
type: u4
- id: pointer_metadata_region
type: u4
- id: pointer_records_region
type: u4
record_header:
seq:
- id: magic_num
contents: [0xfe, 0xfe]
- id: record_header_length
type: u4
- id: unknown4
type: u4
- id: unknown5
type: u4
- id: record_format
type: u4
enum: record_format
enums:
record_format:
#there might be other types, but these are the 2 in the python file
#and I haven't dug too deep yet in my sample files
132: xfer_db
512: tag_record_i_presume
record:
seq:
#- id: header
# type: record_header
#- id: unknwon_between_head_and_ident
# type: u1
# repeat: expr
# repeat-expr: 36 #self.pointer_records_region + self.record_header_length
- id: identifier
type: u2
- id: length
type: u4
- id: data
type:
switch-on: identifier
cases:
0xFAFA: dat_record
0xFDFD: ptr_dat_record
dat_record:
seq:
- id: length
type: u4
- id: dat_rec_unknown_1
type: u4
- id: sequence_number
type: u2
- id: record_type
type: u2
#enum: ?
- id: dat_rec_unknown_2
type: u4
- id: obj_id
type: u4
- id: parent_id_offset
type: u4
- id: rec_text
type: str
size: 124
encoding: UTF-16LE
- id: unknown_remaining_rec_data
type: u1
repeat: expr
repeat-expr: length - 144
ptr_dat_record:
# Not sure what this is specifically, but it appears there aren't any in
# The sample program I'm testing with currently. Perhaps these are Aliases?
# I would have to check if this program has any aliases.
seq:
- id: ptr_rec_unknown_1
type: u4
repeat: expr
repeat-expr: 2
- id: sequence_number
type: u2
- id: record_type
type: u2
#enum: ?
- id: ptr_dat_rec_unknown_2
type: u4
- id: obj_id
type: u4
- id: parent_id_offset
type: u4
- id: rec_text
type: str
size: 124
encoding: UTF-16LE
# since I have no examples of this currently, I'm not sure if this is the
#end of the record or not. The python code stops reading bytes at this point
#but it also did for the data record and there is no length field in this one
#or perhaps there is since the python code jups over the first 8 bytes of
# the ptr record, and the first 4 are the length in the other one...
# I would need an example to try and test agains though.
# hell honestly these 2 record types are 99% identical.
# It's possible these could get merged, but for now I'll leave them seperate
211 changes: 211 additions & 0 deletions resources/Kaitai Struct/sbregion.ksy
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
meta:
id: sbregion
title: SBRegion.dat file extracted from an RSLogix 5000 ACD
file-extension: dat
license: CC0-1.0
ks-version: 0.9
endian: le
bit-endian: le
doc: |
Docs for sbregion.dat of ACD File Format
doc-ref:
- URL Reference?
seq:
- id: header
type: header
- id: unknown_data_between_header_and_region
type: u4
repeat: expr
repeat-expr: (header.region_pointer_offset / 4) - 7
- id: region_info
type: region_info
- id: unknown_data_between_header_and_rec_info
type: u1
repeat: expr
repeat-expr: region_info.pointer_records_region - header.region_pointer_offset - 22
- id: record_info
type: record_info
- id: records
type: record
repeat: expr
repeat-expr: header.num_of_records + header.table2_num_of_records
#repeat-expr: 1



types:
header:
seq:
- id: hd_unk_1
type: u4
- id: hd_unk_2
type: u4
- id: total_length
doc: Total Length of the file? (records?) in bytes
type: u4
- id: region_pointer_offset
type: u4
- id: header_unknown_1
type: u4
- id: num_of_records
type: u4
- id: table2_num_of_records
type: u4
region_info:
seq:
- id: region_magic_number
contents: [0xfe, 0xfe]
- id: region_pointer_length
type: u4
- id: region_unknown_1
type: u4
- id: region_unknown_2
type: u4
- id: pointer_metadata_region
type: u4
- id: pointer_records_region
type: u4
record_info:
seq:
- id: rec_magic_number
contents: [0xfe, 0xfe]
- id: record_info_length
type: u4
- id: record_info_unknown_1
type: u4
- id: record_info_unknown_2
type: u4
#In the python program there were only 2 known values here
#132 -> Cross Reference Database if I understand the exception comment correctly
#512 -> ...?rungs?
- id: record_format
type: u4
- id: record_info_unknown_remaining
type: u1
repeat: expr
repeat-expr: record_info_length - 18
record:
seq:
- id: rec_magic_num
contents: [0xfa, 0xfa]
- id: length
type: u4
- id: identifier
type: u4
- id: erroneous_6_bytes
type: u1
repeat: expr
repeat-expr: 6
# Lang Type values I've seen so far while testing (with a single ACD file)
# - REGION LE UID
# - the text for these come back as gibberish or a foreign language...must be binary not text
# - Rung NT
# - The text from what I've seen is clearly rung text
# - REGION NT
# - also looks like Rung text...not sure what the difference is here between this and Rung NT
# - REGION_MANGLED_SPECIFIER
# - So in the Rung text instead of tags it uses some sort of identifier surrounded by @ symbols
# - For example:
# - OTL(@d2e1f164@)
# - EQU(@361c9540@.@b7ca3c9b@[@475fa515@].@31a4059f@.@9d3d011d@,0)
# - In these region mangled specifiers it all just these identifiers
# - Such as
# - @8d186df7@
# - @080668b4@[2]
# - @2a060d86@.@f9255b24@
# - So I'm getting these are tag references of some sort...but how exactly they refer back to
# - a tag is unclear at the moment
# - REGION_REF_COUNT
# - In every single one of these the text field appeared to be empty (or had unprintable bytes)
# - REGION AST
# - the text for these come back as gibberish or a foreign language...must be binary not text
# - Doing a quick glance...it looks like it could have some UTF-8 in the data
- id: rec_type
type: str
size: 29
encoding: UTF-8
- id: rec_unknown_1
type: u1
repeat: expr
repeat-expr: 12
- id: rec_length
type: u4
# I'm not seeing any errors in the Katai IDE, but when I did a quick test
# with the python generated parser, I was getting some errors about some
# records text not being parsable as UTF-16
#- id: text
# type: str
# size: text_length
# encoding: UTF-16LE
#encoding: UTF-8
- id: rec_data
type:
switch-on: rec_type
cases:
'"REGION AST\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"': region_ast
'"REGION LE UID\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"': region_le_uid
'"REGION NT\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"': region_nt
'"REGION_MANGLED_SPECIFIER\0\0\0\0\0"': region_mangled_specifier
'"REGION_REF_COUNT\0\0\0\0\0\0\0\0\0\0\0\0\0"': region_ref_count
'"Rung NT\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"': rung_nt
_: unknown_rec

region_ast:
seq:
- id: data
type: str
size: _parent.rec_length
encoding: UTF-8
#Their is still some gibberish in here, but in UTF8 there is some legible
#text here (as opposed to East Asian characters). So it could be either
#UTF8 or Ascii mixed with some binary info?
region_le_uid:
seq:
- id: data
type: str
size: _parent.rec_length
encoding: UTF-16LE
#encoding: UTF-16BE
#encoding: UTF-8
#This doesn't appear to be UTF16 or UTF-8
#must just be some binary data
region_nt:
seq:
- id: data
type: str
size: _parent.rec_length
encoding: UTF-16LE
#this one I would say is probably safe to say is UTF-16LE
region_mangled_specifier:
seq:
- id: data
type: str
size: _parent.rec_length
encoding: UTF-16LE
#the sample file I'm working with only has 3 instances of this type
#using UTF-16LE and UTF-8 parse the same, but there are \0x00 chars
#in between all the letters...so it's probably safe to say this is
#UTF-16LE
region_ref_count:
seq:
- id: data
type: str
size: _parent.rec_length
encoding: UTF-16LE
#encoding: UTF-16BE
#encoding: UTF-8
#the sample file I'm working with only has 3 instances of this type
#all of them are 4 bytes, and don't parse to anything meaningfull
#in UTF8/16 or ascii. I'm guessing this is just an int or something
rung_nt:
seq:
- id: data
type: str
size: _parent.rec_length
encoding: UTF-16LE
unknown_rec:
seq:
- id: data
type: str
size: _parent.rec_length
encoding: UTF-16LE

0 comments on commit 1711fcf

Please sign in to comment.