Skip to content

Commit

Permalink
wrote crude wrapper around rawhash for signal mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
maximilianmordig committed Jun 3, 2024
1 parent dd0ac71 commit 95fd6ae
Show file tree
Hide file tree
Showing 9 changed files with 175 additions and 85 deletions.
10 changes: 5 additions & 5 deletions python_wrapper/example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand All @@ -26,7 +26,7 @@
"True"
]
},
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -53,7 +53,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -71,7 +71,7 @@
"source": [
"# cppppy.\n",
"# dir()\n",
"cppyy.gbl.RawHashMapper(5, [\"hello\"])"
"cppyy.gbl.RawHashMapper(5, [b\"hello\"])"
]
},
{
Expand Down
44 changes: 44 additions & 0 deletions python_wrapper/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#%%
from pathlib import Path
import cppyy
import os

os.chdir("/home/mmordig/rawhash_project/rawhash2_new/build/extern")

header_file = Path("/home/mmordig/rawhash_project/rawhash2_new/src/rawhash_wrapper.hpp")
library_file = Path("/home/mmordig/rawhash_project/rawhash2_new/build/src/librawhash2_wrapper.so")
cppyy.include(str(header_file))
# cppyy.add_library_path("/home/mmordig/rawhash_project/rawhash2_new/build/extern/hdf5/lib")
cppyy.add_library_path("/home/mmordig/rawhash_project/rawhash2_new/build/src") # for shared libs
cppyy.load_library("librawhash2_wrapper")

# cppyy.add_library_path(str(library_file.parent))
# cppyy.load_library(str(library_file.name))
# cppyy.load_library(str(library_file))

# cppyy.gbl.RawHashMapper(5, [b"hello"])
args = ["my_program", "-x", "sensitive", "-t", "8", "-p", "/home/mmordig/rawhash_project/rawhash2_new/extern/kmer_models/legacy/legacy_r9.4_180mv_450bps_6mer/template_median68pA.model", "-d", "example_out/ref.ind", "/home/mmordig/rawhash_project/rawhash2_new/test/data/d2_ecoli_r94/ref.fa", ]
mapper = cppyy.gbl.RawHashMapper(len(args), args)
mapper.idx_info()
#%%
import numpy as np

# Create a numpy array
arr = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)

# Ensure the numpy array is contiguous
arr = np.ascontiguousarray(arr, dtype=np.float32)

import cppyy.ll
cppyy.set_debug()
# Call the C++ function with the numpy array
alignments = mapper.map(cppyy.ll.cast["float*"](arr.ctypes.data), len(arr))

def format_alignment(alignment):
# return alignment
return f"Alignment(contig: {alignment.contig}, start: {alignment.ref_start}, end: {alignment.ref_end}, strand: {alignment.is_pos_strand})"

for (i, alignment) in enumerate(alignments):
print(f"Alignment {i}: {format_alignment(alignment)}")
#%%
1
68 changes: 0 additions & 68 deletions src/rawhash_ruclient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,72 +140,4 @@ void RawHashDecisionMaker::mark_final_decision(ReadIdentifier const& read_ident,
channel_read_mapper.free_mappings();
}

// todo1: remove
// // map reads coming from readuntil
// void map_reads_from_ru() {
// // see map_worker_pipeline for what is freed and how (memory allocator)
// // reuse buffer and reg for each read
// ri_tbuf_t* b = ri_tbuf_init();
// ri_reg1_t* reg = (ri_reg1_t*)malloc(sizeof(ri_reg1_t));

// todo: cannot reuse reg0 due to write out

// todo: write_out_mappings_and_free(reg0, ri);
// }

// map reads from a single channel to a single reference
// class ChannelDataMapper {
// public:
// ChannelDataMapper(const ri_idx_t *ri, const ri_mapopt_t *opt): ri(ri), opt(opt) {
// b = ri_tbuf_init();
// }

// // called by thread, tries to map current read in channel
// void map_reads() {

// {
// ri_reg1_t* reg0 = (ri_reg1_t*)malloc(sizeof(ri_reg1_t)); // todo: calloc and push to queue
// // start new read
// init_reg1_t(reg0);

// uint32_t c_count = 0;
// double mean_sum = 0, std_dev_sum = 0;
// uint32_t n_events_sum = 0;
// double t = ri_realtime();

// // todo
// uint32_t l_chunk = 10;
// ri_sig_t* sig = NULL;
// uint32_t qlen = 100;

// // process chunks

// double mapping_time = ri_realtime() - t;
// #ifdef PROFILERH
// ri_maptime += mapping_time;
// #endif

// try_mapping_if_none_found(reg0, opt);
// compute_tag_and_mapping_info(c_count, c_count * l_chunk, reg0, opt, mapping_time, qlen, sig->name, ri);
// free_most_of_ri_reg1_t(b->km, reg0);
// km_destroy_and_recreate(&(b->km));

// // todo: write to queue instead which writes out reads
// write_out_mappings_and_free(reg0, ri);

// // wait until read has ended
// }

// }

// ~ChannelDataMapper() {
// // free buffer and reg
// ri_tbuf_destroy(b);
// }
// private:
// ri_tbuf_t* b = NULL; // thread-local buffer
// const ri_idx_t *ri; // reference index
// const ri_mapopt_t *opt; // mapping options
// };

#endif
12 changes: 6 additions & 6 deletions src/rawhash_ruclient.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ struct SingleChannelCalibration {
double offset;
};

struct Alignment {
char* contig;
uint32_t ref_start;
uint32_t ref_end;
bool pos_strand;
};
// struct Alignment {
// char* contig;
// uint32_t ref_start;
// uint32_t ref_end;
// bool pos_strand;
// };

typedef DecisionMaker::ChunkType ChunkType;

Expand Down
85 changes: 84 additions & 1 deletion src/rawhash_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

#include "rawhash_wrapper.hpp"
#include "rawhash.h"
#include "rmap.h"
#include "cli_parsing.cpp"

RawHashMapper::RawHashMapper(int argc, char *argv[]) {
std::cout << "RawHashMapper constructor" << std::endl;

// copied from main()
liftrlimit();
Expand Down Expand Up @@ -73,13 +75,94 @@ RawHashMapper::RawHashMapper(int argc, char *argv[]) {
if (ri_verbose >= 3) ri_idx_stat(ri);

_ri = ri;
_opt = new ri_mapopt_t;
*((ri_mapopt_t*)_opt) = opt; // copy constructor
}

RawHashMapper::~RawHashMapper() {
fprintf(stderr, "[M::%s] closing the index\n", __func__);
ri_idx_destroy((ri_idx_t*)_ri);
delete _opt;
}

void RawHashMapper::info() {
void RawHashMapper::idx_info() const {
fprintf(stderr, "[M::%s] index info\n", __func__); // todo
ri_idx_stat((ri_idx_t*)_ri);
}

std::vector<Alignment> RawHashMapper::map(float* raw_signal, int signal_length) {
// print
fprintf(stderr, "[M::%s] mapping\n", __func__);
for (int i = 0; i < signal_length; i++) {
fprintf(stderr, "%f ", raw_signal[i]);
}

ri_idx_t* ri = (ri_idx_t*)_ri;
pipeline_mt pipeline = {
// .n_processed = 0,
// .n_threads = 1,
// .n_fp = 0,
// .cur_fp = 0,
// .n_f = 0,
// .cur_f = 0,
// .mini_batch_size = 0,
.opt = (ri_mapopt_t*)_opt,
// .f = NULL,
// .fp = NULL,
.ri = ri,
// .fn = NULL,
// .su_nreads = 0,
// .su_nestimations = 0,
// .ab_count = 0,
// .su_cur = 0,
// .su_estimations = NULL,
// .su_c_estimations = NULL,
// .su_stop = 0,
.map_model = NULL
};
ri_tbuf_t *buf = ri_tbuf_init();
char* read_name = "read123";
ri_sig_t signal = {
.rid = 123,
.l_sig = signal_length,
.name = read_name,

.offset = 0, // todo
.sig = raw_signal,
};
ri_sig_t* signal_ptr = &signal; // since &&signal not working since &signal is temporary
ri_reg1_t reg0_noptr; // will be initialized
ri_reg1_t* reg0 = &reg0_noptr;
step_mt data = {
.p = &pipeline,
.n_sig = 1,
.sig = (ri_sig_t**)(&signal_ptr),
.reg = (ri_reg1_t**)(&reg0),
.buf = &buf
};
map_worker_for(&data, 0, 0);

// write out
write_out_mappings_to_stdout(reg0, ri);
std::vector<Alignment> alignments;
for (int m = 0; m < reg0->n_maps; ++m) {
if (reg0->maps[m].ref_id < ri->n_seq) {
// mapped
alignments.push_back(Alignment {
.contig = (ri->flag & RI_I_SIG_TARGET) ? ri->sig[reg0->maps[m].ref_id].name : ri->seq[reg0->maps[m].ref_id].name,
.ref_start = reg0->maps[m].fragment_start_position,
.ref_end = reg0->maps[m].fragment_start_position + reg0->maps[m].fragment_length,
.is_pos_strand = not reg0->maps[m].rev
});
}
}
// todo: dummy alignment
alignments.push_back(Alignment {
.contig = "fakealignment_chr123", .ref_start = 123, .ref_end = 456, .is_pos_strand = true
});
free_mappings_ri_reg1_t(reg0);

ri_tbuf_destroy(buf);

return alignments;
}
32 changes: 29 additions & 3 deletions src/rawhash_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,42 @@
#pragma once
#include <cstdint>
#include <vector>

struct Alignment {
const char* contig;
uint32_t ref_start;
uint32_t ref_end; // exclusive
bool is_pos_strand;
};

class RawHashMapper {
/**
* @brief Wrapper that can be used from other code (as an alternative to the CLI)
*
* This API is inefficient (no threading) and only for experimenting
* with RawHash from Python without having to load the index repeatedly.
*
* Also mixes malloc/free and new/delete
*/
public:
RawHashMapper(int argc, char *argv[]);
~RawHashMapper();
void info();

// private:
/*
* Map the reads
* Same as ri_map_file_frag -> map_worker_pipeline -> map_worker_for,
* except it reads from memory rather than the file
* and does not perform the pipeline step
*/
std::vector<Alignment> map(float* raw_signal, int signal_length);

/**
* Get info about the index
*/
void idx_info() const;

private:
// ri_idx_t *ri;
void *_ri;
void *_ri; // ri_idx_t*
void* _opt; // ri_mapopt_t*
};
2 changes: 1 addition & 1 deletion src/rmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@ bool continue_mapping_with_new_chunk(const ri_idx_t *ri, // reference index


// map a single read to a reference, called in parallel
static void map_worker_for(void *_data,
void map_worker_for(void *_data,
long i,
int tid) // kt_for() callback
{
Expand Down
5 changes: 5 additions & 0 deletions src/rmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ bool continue_mapping_with_new_chunk(const ri_idx_t *ri, // reference index
double* std_dev_sum, // see mean_sum arg
uint32_t* n_events_sum // number of events in all seen chunks (not including current chunk)
);

// map a single read to a reference, called in parallel
void map_worker_for(void *_data,
long i,
int tid);
#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/wrapper_example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

int main(int argc, char *argv[]) {
RawHashMapper mapper(argc, argv);
mapper.info();
mapper.idx_info();
return 0;
}

0 comments on commit 95fd6ae

Please sign in to comment.