Skip to content

Commit 2c77b3c

Browse files
committed
ont overlapping
1 parent 3067771 commit 2c77b3c

File tree

6 files changed

+170
-4
lines changed

6 files changed

+170
-4
lines changed

Assembly.cpp

+28-2
Original file line numberDiff line numberDiff line change
@@ -1050,6 +1050,21 @@ void ha_ec(int64_t round, int num_pround, int des_idx, uint64_t *tot_b, uint64_t
10501050
}
10511051

10521052

1053+
int ha_ec_dbg(void)
1054+
{
1055+
int hom_cov, het_cov;
1056+
1057+
ha_idx = ha_pt_gen(&asm_opt, 0, 0, 0, &R_INF, &hom_cov, &het_cov); // build the index
1058+
asm_opt.hom_cov = hom_cov; asm_opt.het_cov = het_cov;
1059+
ha_opt_update_cov(&asm_opt, hom_cov);
1060+
1061+
cal_ec_r_dbg(asm_opt.thread_num, R_INF.total_reads);
1062+
1063+
ha_pt_destroy(ha_idx); ha_idx = NULL;
1064+
1065+
return 0;
1066+
}
1067+
10531068
void ha_overlap_and_correct(int round)
10541069
{
10551070
int i, hom_cov, het_cov, r_out = 0;
@@ -2003,8 +2018,7 @@ int ha_assemble_ovec(void)
20032018
ha_flt_tab = ha_idx = NULL;
20042019

20052020
// construct hash table for high occurrence k-mers
2006-
if (!(asm_opt.flag & HA_F_NO_KMER_FLT) && ha_flt_tab == NULL)
2007-
{
2021+
if (!(asm_opt.flag & HA_F_NO_KMER_FLT) && ha_flt_tab == NULL) {
20082022
ha_flt_tab = ha_ft_gen(&asm_opt, &R_INF, &hom_cov, 0, 0);
20092023
ha_opt_update_cov(&asm_opt, hom_cov);
20102024
}
@@ -2026,6 +2040,18 @@ int ha_assemble_ovec(void)
20262040
return 0;
20272041
}
20282042

2043+
int ha_assemble_ovec_cc(void)
2044+
{
2045+
ha_idx = NULL;
2046+
2047+
ha_opt_reset_to_round(&asm_opt, 0); // this update asm_opt.roundID and a few other fields
2048+
ha_ec_dbg();
2049+
2050+
// Output_PAF0(R_INF.paf, "0");
2051+
destory_All_reads(&R_INF);
2052+
return 0;
2053+
}
2054+
20292055
int ha_assemble(void)
20302056
{
20312057
// debug_mc_g_t(MC_NAME);

Assembly.h

+1
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,6 @@ ha_ovec_buf_t *ha_ovec_buf_init(void *km, int is_final, int save_ov, int is_ug);
5151
void ha_ovec_destroy(ha_ovec_buf_t *b);
5252
int64_t ha_ovec_mem(const ha_ovec_buf_t *b, int64_t *mem_a);
5353
int ha_assemble_ovec(void);
54+
int ha_ec_dbg(void);
5455

5556
#endif

CommandLines.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include <pthread.h>
66
#include <stdint.h>
77

8-
#define HA_VERSION "0.24.0-r702"
8+
#define HA_VERSION "0.24.0-r703"
99

1010
#define VERBOSE 0
1111

ecovlp.cpp

+138
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,23 @@ typedef struct {
6363
uint8_t *cr;
6464
} ec_ovec_buf_t;
6565

66+
typedef struct {
67+
uint32_t n_thread, n_a, chunk_size, cn;
68+
FILE *fp;
69+
} cal_ec_r_dbg_t;
70+
71+
typedef struct {
72+
ma_hit_t *a;
73+
size_t n, m;
74+
asg16_v ec;
75+
} r_dbg_step_res_t;
76+
77+
typedef struct { // data structure for each step in kt_pipeline()
78+
ec_ovec_buf_t *buf;
79+
r_dbg_step_res_t *res;
80+
uint32_t si, ei;
81+
} cal_ec_r_dbg_step_t;
82+
6683
ec_ovec_buf_t* gen_ec_ovec_buf_t(uint32_t n);
6784
void destroy_ec_ovec_buf_t(ec_ovec_buf_t *p);
6885

@@ -3352,6 +3369,61 @@ static void worker_hap_ec(void *data, long i, int tid)
33523369
**/
33533370
}
33543371

3372+
static void worker_hap_ec_dbg_paf(void *data, long i, int tid)
3373+
{
3374+
ec_ovec_buf_t0 *b = &(((cal_ec_r_dbg_step_t*)data)->buf->a[tid]);
3375+
r_dbg_step_res_t *rr = &(((cal_ec_r_dbg_step_t*)data)->res[tid]);
3376+
uint32_t high_occ = asm_opt.hom_cov * (2.0 - HA_KMER_GOOD_RATIO);
3377+
uint32_t low_occ = asm_opt.hom_cov * HA_KMER_GOOD_RATIO;
3378+
overlap_region *aux_o = NULL; i += ((cal_ec_r_dbg_step_t*)data)->si;
3379+
3380+
// debug_retrive_bqual(D, &b->v8t, i, 256); return;
3381+
3382+
recover_UC_Read(&b->self_read, &R_INF, i);
3383+
3384+
h_ec_lchain(b->ab, i, b->self_read.seq, b->self_read.length, asm_opt.mz_win, asm_opt.k_mer_length, &R_INF, &b->olist, &b->clist, ((asm_opt.is_ont)?(0.05):(0.02)), asm_opt.max_n_chain, 1, NULL, NULL, &(b->sp), &high_occ, &low_occ, 1, 1, 3, 0.7, 2, 32, COV_W);///ONT high error
3385+
3386+
aux_o = fetch_aux_ovlp(&b->olist);///must be here
3387+
3388+
// stderr_phase_ovlp(&b->olist);
3389+
gen_hc_r_alin_ea(&b->olist, &b->clist, &R_INF, &b->self_read, &b->ovlp_read, &b->exz, aux_o, asm_opt.max_ov_diff_ec, (asm_opt.is_ont)?(WINDOW_OHC):(WINDOW_HC), i, E_KHIT, 1, &b->v16, &b->v64, &(R_INF.paf[i]), 0);
3390+
3391+
uint32_t k, m, tl; overlap_region *z; bit_extz_t ez; ma_hit_t *t;
3392+
for (k = 0; k < b->olist.length; k++) {
3393+
z = &(b->olist.list[k]);
3394+
if(!(z->w_list.n)) continue;
3395+
3396+
tl = Get_READ_LENGTH((R_INF), z->y_id);
3397+
for (m = 0; m < z->w_list.n; m++) {
3398+
if(is_ualn_win(z->w_list.a[m])) continue;
3399+
set_bit_extz_t(ez, (*z), m);
3400+
kv_pushp(ma_hit_t, *rr, &t);
3401+
3402+
t->qns = z->x_id; t->qns = t->qns << 32;
3403+
t->tn = z->y_id;
3404+
3405+
t->qns = t->qns | (uint64_t)(z->w_list.a[m].x_start);
3406+
t->qe = z->w_list.a[m].x_end + 1;
3407+
3408+
t->ts = z->w_list.a[m].y_start;
3409+
t->te = z->w_list.a[m].y_end + 1;
3410+
3411+
t->rev = z->y_pos_strand;
3412+
3413+
t->bl = rr->ec.n;
3414+
kv_resize(uint16_t, rr->ec, rr->ec.n + ez.cigar.n);
3415+
memcpy(rr->ec.a + rr->ec.n, ez.cigar.a, ez.cigar.n * sizeof((*(rr->ec.a))));
3416+
rr->ec.n += ez.cigar.n;
3417+
t->cc = rr->ec.n - t->bl;
3418+
3419+
if(t->rev) {
3420+
t->ts = tl - z->w_list.a[m].y_end - 1;
3421+
t->te = tl - z->w_list.a[m].y_start;
3422+
}
3423+
}
3424+
}
3425+
}
3426+
33553427
uint32_t adjust_exact_match(asg16_v *in, int64_t xs0, int64_t xe0, int64_t ys0, int64_t ye0, uint64_t *rxs, uint64_t *rxe, uint64_t *rys, uint64_t *rye, uint32_t rev)
33563428
{
33573429
*rxs = *rxe = *rys = *rye = 0;
@@ -6106,6 +6178,72 @@ void cal_ec_r(uint64_t n_thre, uint64_t round, uint64_t n_round, uint64_t n_a, u
61066178
// }
61076179
}
61086180

6181+
void print_ov_dbg_paf(FILE *fp, char *ref_str, char *ref_id, int32_t ref_id_n, char *qry_str, char *qry_id, int32_t qry_id_n, uint64_t rs, uint64_t re, uint64_t rl, uint64_t qs, uint64_t qe, uint64_t ql, uint64_t rev, bit_extz_t *ez, char *ezh)
6182+
{
6183+
uint64_t ci = 0; uint16_t c; uint32_t cl;
6184+
fprintf(fp, "%.*s\t%lu\t%lu\t%lu\t", qry_id_n, qry_id, ql, qs, qe);
6185+
fprintf(fp, "%c\t", "+-"[rev]);
6186+
fprintf(fp, "%.*s\t%lu\t%lu\t%lu\t", ref_id_n, ref_id, rl, rs, re);
6187+
fprintf(fp, "255\tcg:Z:");
6188+
while (ci < ez->cigar.n) {
6189+
ci = pop_trace(&(ez->cigar), ci, &c, &cl);
6190+
fprintf(fp, "%u%c", cl, ezh[c]);
6191+
}
6192+
fprintf(fp, "\n");
6193+
}
6194+
6195+
static void *worker_ov_dbg_pipeline(void *data, int step, void *in) // callback for kt_pipeline()
6196+
{
6197+
cal_ec_r_dbg_t *p = (cal_ec_r_dbg_t*)data; char cm[4]; cm[0] = 'M'; cm[1] = 'M'; cm[2] = 'I'; cm[3] = 'D';
6198+
// cal_ec_r_dbg_step_t
6199+
if (step == 0) { // step 1: read a block of sequences
6200+
cal_ec_r_dbg_step_t *s; CALLOC(s, 1);
6201+
s->si = p->cn; p->cn += p->chunk_size;
6202+
if(p->cn > p->n_a) p->cn = p->n_a; s->ei = p->cn;
6203+
if(s->si >= s->ei) free(s);
6204+
else return s;
6205+
} else if (step == 1) { // step 2: alignment
6206+
cal_ec_r_dbg_step_t *s = (cal_ec_r_dbg_step_t*)in;
6207+
s->buf = gen_ec_ovec_buf_t(p->n_thread); CALLOC(s->res, p->n_thread);
6208+
kt_for(p->n_thread, worker_hap_ec_dbg_paf, s, (s->ei - s->si));
6209+
destroy_ec_ovec_buf_t(s->buf);
6210+
return s;
6211+
} else if (step == 2) { // step 3: dump
6212+
cal_ec_r_dbg_step_t *s = (cal_ec_r_dbg_step_t*)in; uint64_t k, z; bit_extz_t ez; memset(&ez, 0, sizeof(ez));
6213+
// UC_Read qu; UC_Read tu; init_UC_Read(&qu); init_UC_Read(&tu);
6214+
for (k = 0; k < p->n_thread; k++) {
6215+
for (z = 0; z < s->res[k].n; z++) {
6216+
ez.cigar.a = s->res[k].ec.a + s->res[k].a[z].bl; ez.cigar.n = ez.cigar.m = s->res[k].a[z].cc;
6217+
6218+
// UC_Read_resize(qu, (s->res[k].a[z].qe - ((uint32_t)s->res[k].a[z].qns)));
6219+
// recover_UC_Read_sub_region(qu.seq, ((uint32_t)s->res[k].a[z].qns), (s->res[k].a[z].qe - ((uint32_t)s->res[k].a[z].qns)), 0, &R_INF, (s->res[k].a[z].qns>>32));
6220+
6221+
// UC_Read_resize(tu, (s->res[k].a[z].te - s->res[k].a[z].ts));
6222+
// recover_UC_Read_sub_region(tu.seq, s->res[k].a[z].ts, s->res[k].a[z].te - s->res[k].a[z].ts, 0, &R_INF, s->res[k].a[z].tn);
6223+
6224+
print_ov_dbg_paf(p->fp, NULL/**qu.seq**/, Get_NAME(R_INF, (s->res[k].a[z].qns>>32)), Get_NAME_LENGTH(R_INF, (s->res[k].a[z].qns>>32)),
6225+
NULL/**tu.seq**/, Get_NAME(R_INF, (s->res[k].a[z].tn)), Get_NAME_LENGTH(R_INF, (s->res[k].a[z].tn)), (uint32_t)s->res[k].a[z].qns, s->res[k].a[z].qe, Get_READ_LENGTH(R_INF, (s->res[k].a[z].qns>>32)), s->res[k].a[z].ts, s->res[k].a[z].te, Get_READ_LENGTH(R_INF, (s->res[k].a[z].tn)), s->res[k].a[z].rev, &ez, cm);
6226+
}
6227+
free(s->res[k].a); free(s->res[k].ec.a);
6228+
}
6229+
free(s->res); free(s); ///destory_UC_Read(&qu); destory_UC_Read(&tu);
6230+
}
6231+
return 0;
6232+
}
6233+
6234+
void cal_ec_r_dbg(uint64_t n_thre, uint64_t n_a)
6235+
{
6236+
char *paf = NULL; MALLOC(paf, (strlen(asm_opt.output_file_name)+64));
6237+
sprintf(paf, "%s.ovlp.paf", asm_opt.output_file_name);
6238+
6239+
cal_ec_r_dbg_t sl; memset(&sl, 0, sizeof(sl));
6240+
sl.n_thread = n_thre; sl.n_a = n_a; sl.chunk_size = 2000; sl.cn = 0; sl.fp = fopen(paf, "w");
6241+
6242+
kt_pipeline(3, worker_ov_dbg_pipeline, &sl, 3);
6243+
6244+
fclose(sl.fp); free(paf);
6245+
}
6246+
61096247
void destroy_cc_v(cc_v *z)
61106248
{
61116249
uint64_t k;

ecovlp.h

+1
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@ void cal_ov_r(uint64_t n_thre, uint64_t n_a, uint64_t new_idx);
1515
void handle_chemical_r(uint64_t n_thre, uint64_t n_a);
1616
void handle_chemical_arc(uint64_t n_thre, uint64_t n_a);
1717
uint8_t* gen_chemical_arc_rf(uint64_t n_thre, uint64_t n_a);
18+
void cal_ec_r_dbg(uint64_t n_thre, uint64_t n_a);
1819

1920
#endif

main.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ int main(int argc, char *argv[])
6262
// ed_band_cal_global_128bit((char*)"ACTTTTTT", 8, (char*)"AATTTT", 6, 3));
6363
// exit(1);
6464
if(asm_opt.sec_in) ret = ha_assemble_pair();
65-
else if(asm_opt.dbg_ovec_cal) ret = ha_assemble_ovec();
65+
else if(asm_opt.dbg_ovec_cal) ret = ha_ec_dbg();
6666
else ret = ha_assemble();
6767

6868
destory_opt(&asm_opt);

0 commit comments

Comments
 (0)