-
-
Notifications
You must be signed in to change notification settings - Fork 73
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* basic bitvector * jacobsons start and refactor to uint for accurate machine words * confident that jacobson rank is working * reusing the incoming bitvector instead of copying everyithing for jacobson rank * access and bounds checking * just do uint64 for simplicity. bound checking and access * bit vector fixes, rsa good enough, wavelet start * Simple wavelet tree with access * wavelet fix access, add select, fix rsa bitvector select * got count working, but had to throw out jacobsons * rsa fixes and refactors * bwt locate * extract * doc BWT, refactor, and return a possible error during construction * add TODO about sorting and the nullChar * bwt examples * wavelet tree doc * wavelet tree explanation * doc and note for waveletTree * add bwt high level. move wavelet tree's some rsa bv docs * simplify bitvector, docs for bitvector and rsaBitvector * Cite Ben Langmead. --------- Co-authored-by: Willow Carretero Chavez <[email protected]> Co-authored-by: Timothy Stiles <[email protected]>
- Loading branch information
1 parent
27c7d28
commit 176dd5b
Showing
10 changed files
with
2,455 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
package bwt | ||
|
||
import ( | ||
"fmt" | ||
"math" | ||
) | ||
|
||
const wordSize = 64 | ||
|
||
// bitvector a sequence of 1's and 0's. You can also think | ||
// of this as an array of bits. This allows us to encode | ||
// data in a memory efficient manner. | ||
type bitvector struct { | ||
bits []uint64 | ||
numberOfBits int | ||
} | ||
|
||
// newBitVector will return an initialized bitvector with | ||
// the specified number of zeroed bits. | ||
func newBitVector(initialNumberOfBits int) bitvector { | ||
capacity := getNumOfBitSetsNeededForNumOfBits(initialNumberOfBits) | ||
bits := make([]uint64, capacity) | ||
return bitvector{ | ||
bits: bits, | ||
numberOfBits: initialNumberOfBits, | ||
} | ||
} | ||
|
||
// getBitSet gets the while word as some offset from the | ||
// bitvector. Useful if you'd prefer to work with the | ||
// word rather than with individual bits. | ||
func (b bitvector) getBitSet(bitSetPos int) uint64 { | ||
return b.bits[bitSetPos] | ||
} | ||
|
||
// getBit returns the value of the bit at a given offset | ||
// True represents 1 | ||
// False represents 0 | ||
func (b bitvector) getBit(i int) bool { | ||
b.checkBounds(i) | ||
|
||
chunkStart := i / wordSize | ||
offset := i % wordSize | ||
|
||
return (b.bits[chunkStart] & (uint64(1) << (63 - offset))) != 0 | ||
} | ||
|
||
// setBit sets the value of the bit at a given offset | ||
// True represents 1 | ||
// False represents 0 | ||
func (b bitvector) setBit(i int, val bool) { | ||
b.checkBounds(i) | ||
|
||
chunkStart := i / wordSize | ||
offset := i % wordSize | ||
|
||
if val { | ||
b.bits[chunkStart] |= uint64(1) << (63 - offset) | ||
} else { | ||
b.bits[chunkStart] &= ^(uint64(1) << (63 - offset)) | ||
} | ||
} | ||
|
||
func (b bitvector) checkBounds(i int) { | ||
if i >= b.len() || i < 0 { | ||
msg := fmt.Sprintf("access of %d is out of bounds for bitvector with length %d", i, b.len()) | ||
panic(msg) | ||
} | ||
} | ||
|
||
func (b bitvector) len() int { | ||
return b.numberOfBits | ||
} | ||
|
||
func getNumOfBitSetsNeededForNumOfBits(n int) int { | ||
return int(math.Ceil(float64(n) / wordSize)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
package bwt | ||
|
||
import ( | ||
"testing" | ||
) | ||
|
||
type GetBitTestCase struct { | ||
position int | ||
expected bool | ||
} | ||
|
||
func TestBitVector(t *testing.T) { | ||
initialNumberOfBits := wordSize*10 + 1 | ||
|
||
bv := newBitVector(initialNumberOfBits) | ||
|
||
if bv.len() != initialNumberOfBits { | ||
t.Fatalf("expected len to be %d but got %d", initialNumberOfBits, bv.len()) | ||
} | ||
|
||
for i := 0; i < initialNumberOfBits; i++ { | ||
bv.setBit(i, true) | ||
} | ||
|
||
bv.setBit(3, false) | ||
bv.setBit(11, false) | ||
bv.setBit(13, false) | ||
bv.setBit(23, false) | ||
bv.setBit(24, false) | ||
bv.setBit(25, false) | ||
bv.setBit(42, false) | ||
bv.setBit(63, false) | ||
bv.setBit(64, false) | ||
bv.setBit(255, false) | ||
bv.setBit(256, false) | ||
|
||
getBitTestCases := []GetBitTestCase{ | ||
{0, true}, | ||
{1, true}, | ||
{2, true}, | ||
{3, false}, | ||
{4, true}, | ||
{7, true}, | ||
{8, true}, | ||
{9, true}, | ||
{10, true}, | ||
{11, false}, | ||
{12, true}, | ||
{13, false}, | ||
{23, false}, | ||
{24, false}, | ||
{25, false}, | ||
{42, false}, | ||
{15, true}, | ||
{16, true}, | ||
{62, true}, | ||
{63, false}, | ||
{64, false}, | ||
// Test past the first word | ||
{65, true}, | ||
{72, true}, | ||
{79, true}, | ||
{80, true}, | ||
{255, false}, | ||
{256, false}, | ||
{511, true}, | ||
{512, true}, | ||
} | ||
|
||
for _, v := range getBitTestCases { | ||
actual := bv.getBit(v.position) | ||
if actual != v.expected { | ||
t.Fatalf("expected %dth bit to be %t but got %t", v.position, v.expected, actual) | ||
} | ||
} | ||
} | ||
|
||
func TestBitVectorBoundPanic_GetBit_Lower(t *testing.T) { | ||
defer func() { _ = recover() }() | ||
|
||
initialNumberOfBits := wordSize*10 + 1 | ||
bv := newBitVector(initialNumberOfBits) | ||
bv.getBit(-1) | ||
|
||
t.Fatalf("expected get bit lower bound panic") | ||
} | ||
|
||
func TestBitVectorBoundPanic_GetBit_Upper(t *testing.T) { | ||
defer func() { _ = recover() }() | ||
initialNumberOfBits := wordSize*10 + 1 | ||
bv := newBitVector(initialNumberOfBits) | ||
bv.getBit(initialNumberOfBits) | ||
|
||
t.Fatalf("expected get bit upper bound panic") | ||
} | ||
|
||
func TestBitVectorBoundPanic_SetBit_Lower(t *testing.T) { | ||
defer func() { | ||
if r := recover(); r != nil { | ||
return | ||
} | ||
t.Fatalf("expected set bit lower bound panic") | ||
}() | ||
initialNumberOfBits := wordSize*10 + 1 | ||
bv := newBitVector(initialNumberOfBits) | ||
bv.setBit(-1, true) | ||
} | ||
|
||
func TestBitVectorBoundPanic_SetBit_Upper(t *testing.T) { | ||
defer func() { | ||
if r := recover(); r != nil { | ||
return | ||
} | ||
t.Fatalf("expected set bit upper bound panic") | ||
}() | ||
initialNumberOfBits := wordSize*10 + 1 | ||
bv := newBitVector(initialNumberOfBits) | ||
bv.setBit(initialNumberOfBits, true) | ||
} |
Oops, something went wrong.