@@ -63,6 +63,23 @@ typedef struct {
63
63
uint8_t *cr;
64
64
} ec_ovec_buf_t ;
65
65
66
+ typedef struct {
67
+ uint32_t n_thread, n_a, chunk_size, cn;
68
+ FILE *fp;
69
+ } cal_ec_r_dbg_t ;
70
+
71
+ typedef struct {
72
+ ma_hit_t *a;
73
+ size_t n, m;
74
+ asg16_v ec;
75
+ } r_dbg_step_res_t ;
76
+
77
+ typedef struct { // data structure for each step in kt_pipeline()
78
+ ec_ovec_buf_t *buf;
79
+ r_dbg_step_res_t *res;
80
+ uint32_t si, ei;
81
+ } cal_ec_r_dbg_step_t ;
82
+
66
83
ec_ovec_buf_t * gen_ec_ovec_buf_t (uint32_t n);
67
84
void destroy_ec_ovec_buf_t (ec_ovec_buf_t *p);
68
85
@@ -3352,6 +3369,61 @@ static void worker_hap_ec(void *data, long i, int tid)
3352
3369
**/
3353
3370
}
3354
3371
3372
+ static void worker_hap_ec_dbg_paf (void *data, long i, int tid)
3373
+ {
3374
+ ec_ovec_buf_t0 *b = &(((cal_ec_r_dbg_step_t *)data)->buf ->a [tid]);
3375
+ r_dbg_step_res_t *rr = &(((cal_ec_r_dbg_step_t *)data)->res [tid]);
3376
+ uint32_t high_occ = asm_opt.hom_cov * (2.0 - HA_KMER_GOOD_RATIO);
3377
+ uint32_t low_occ = asm_opt.hom_cov * HA_KMER_GOOD_RATIO;
3378
+ overlap_region *aux_o = NULL ; i += ((cal_ec_r_dbg_step_t *)data)->si ;
3379
+
3380
+ // debug_retrive_bqual(D, &b->v8t, i, 256); return;
3381
+
3382
+ recover_UC_Read (&b->self_read , &R_INF, i);
3383
+
3384
+ h_ec_lchain (b->ab , i, b->self_read .seq , b->self_read .length , asm_opt.mz_win , asm_opt.k_mer_length , &R_INF, &b->olist , &b->clist , ((asm_opt.is_ont )?(0.05 ):(0.02 )), asm_opt.max_n_chain , 1 , NULL , NULL , &(b->sp ), &high_occ, &low_occ, 1 , 1 , 3 , 0.7 , 2 , 32 , COV_W);// /ONT high error
3385
+
3386
+ aux_o = fetch_aux_ovlp (&b->olist );// /must be here
3387
+
3388
+ // stderr_phase_ovlp(&b->olist);
3389
+ gen_hc_r_alin_ea (&b->olist , &b->clist , &R_INF, &b->self_read , &b->ovlp_read , &b->exz , aux_o, asm_opt.max_ov_diff_ec , (asm_opt.is_ont )?(WINDOW_OHC):(WINDOW_HC), i, E_KHIT, 1 , &b->v16 , &b->v64 , &(R_INF.paf [i]), 0 );
3390
+
3391
+ uint32_t k, m, tl; overlap_region *z; bit_extz_t ez; ma_hit_t *t;
3392
+ for (k = 0 ; k < b->olist .length ; k++) {
3393
+ z = &(b->olist .list [k]);
3394
+ if (!(z->w_list .n )) continue ;
3395
+
3396
+ tl = Get_READ_LENGTH ((R_INF), z->y_id );
3397
+ for (m = 0 ; m < z->w_list .n ; m++) {
3398
+ if (is_ualn_win (z->w_list .a [m])) continue ;
3399
+ set_bit_extz_t (ez, (*z), m);
3400
+ kv_pushp (ma_hit_t , *rr, &t);
3401
+
3402
+ t->qns = z->x_id ; t->qns = t->qns << 32 ;
3403
+ t->tn = z->y_id ;
3404
+
3405
+ t->qns = t->qns | (uint64_t )(z->w_list .a [m].x_start );
3406
+ t->qe = z->w_list .a [m].x_end + 1 ;
3407
+
3408
+ t->ts = z->w_list .a [m].y_start ;
3409
+ t->te = z->w_list .a [m].y_end + 1 ;
3410
+
3411
+ t->rev = z->y_pos_strand ;
3412
+
3413
+ t->bl = rr->ec .n ;
3414
+ kv_resize (uint16_t , rr->ec , rr->ec .n + ez.cigar .n );
3415
+ memcpy (rr->ec .a + rr->ec .n , ez.cigar .a , ez.cigar .n * sizeof ((*(rr->ec .a ))));
3416
+ rr->ec .n += ez.cigar .n ;
3417
+ t->cc = rr->ec .n - t->bl ;
3418
+
3419
+ if (t->rev ) {
3420
+ t->ts = tl - z->w_list .a [m].y_end - 1 ;
3421
+ t->te = tl - z->w_list .a [m].y_start ;
3422
+ }
3423
+ }
3424
+ }
3425
+ }
3426
+
3355
3427
uint32_t adjust_exact_match (asg16_v *in, int64_t xs0, int64_t xe0, int64_t ys0, int64_t ye0, uint64_t *rxs, uint64_t *rxe, uint64_t *rys, uint64_t *rye, uint32_t rev)
3356
3428
{
3357
3429
*rxs = *rxe = *rys = *rye = 0 ;
@@ -6106,6 +6178,72 @@ void cal_ec_r(uint64_t n_thre, uint64_t round, uint64_t n_round, uint64_t n_a, u
6106
6178
// }
6107
6179
}
6108
6180
6181
+ void print_ov_dbg_paf (FILE *fp, char *ref_str, char *ref_id, int32_t ref_id_n, char *qry_str, char *qry_id, int32_t qry_id_n, uint64_t rs, uint64_t re, uint64_t rl, uint64_t qs, uint64_t qe, uint64_t ql, uint64_t rev, bit_extz_t *ez, char *ezh)
6182
+ {
6183
+ uint64_t ci = 0 ; uint16_t c; uint32_t cl;
6184
+ fprintf (fp, " %.*s\t %lu\t %lu\t %lu\t " , qry_id_n, qry_id, ql, qs, qe);
6185
+ fprintf (fp, " %c\t " , " +-" [rev]);
6186
+ fprintf (fp, " %.*s\t %lu\t %lu\t %lu\t " , ref_id_n, ref_id, rl, rs, re);
6187
+ fprintf (fp, " 255\t cg:Z:" );
6188
+ while (ci < ez->cigar .n ) {
6189
+ ci = pop_trace (&(ez->cigar ), ci, &c, &cl);
6190
+ fprintf (fp, " %u%c" , cl, ezh[c]);
6191
+ }
6192
+ fprintf (fp, " \n " );
6193
+ }
6194
+
6195
+ static void *worker_ov_dbg_pipeline (void *data, int step, void *in) // callback for kt_pipeline()
6196
+ {
6197
+ cal_ec_r_dbg_t *p = (cal_ec_r_dbg_t *)data; char cm[4 ]; cm[0 ] = ' M' ; cm[1 ] = ' M' ; cm[2 ] = ' I' ; cm[3 ] = ' D' ;
6198
+ // cal_ec_r_dbg_step_t
6199
+ if (step == 0 ) { // step 1: read a block of sequences
6200
+ cal_ec_r_dbg_step_t *s; CALLOC (s, 1 );
6201
+ s->si = p->cn ; p->cn += p->chunk_size ;
6202
+ if (p->cn > p->n_a ) p->cn = p->n_a ; s->ei = p->cn ;
6203
+ if (s->si >= s->ei ) free (s);
6204
+ else return s;
6205
+ } else if (step == 1 ) { // step 2: alignment
6206
+ cal_ec_r_dbg_step_t *s = (cal_ec_r_dbg_step_t *)in;
6207
+ s->buf = gen_ec_ovec_buf_t (p->n_thread ); CALLOC (s->res , p->n_thread );
6208
+ kt_for (p->n_thread , worker_hap_ec_dbg_paf, s, (s->ei - s->si ));
6209
+ destroy_ec_ovec_buf_t (s->buf );
6210
+ return s;
6211
+ } else if (step == 2 ) { // step 3: dump
6212
+ cal_ec_r_dbg_step_t *s = (cal_ec_r_dbg_step_t *)in; uint64_t k, z; bit_extz_t ez; memset (&ez, 0 , sizeof (ez));
6213
+ // UC_Read qu; UC_Read tu; init_UC_Read(&qu); init_UC_Read(&tu);
6214
+ for (k = 0 ; k < p->n_thread ; k++) {
6215
+ for (z = 0 ; z < s->res [k].n ; z++) {
6216
+ ez.cigar .a = s->res [k].ec .a + s->res [k].a [z].bl ; ez.cigar .n = ez.cigar .m = s->res [k].a [z].cc ;
6217
+
6218
+ // UC_Read_resize(qu, (s->res[k].a[z].qe - ((uint32_t)s->res[k].a[z].qns)));
6219
+ // recover_UC_Read_sub_region(qu.seq, ((uint32_t)s->res[k].a[z].qns), (s->res[k].a[z].qe - ((uint32_t)s->res[k].a[z].qns)), 0, &R_INF, (s->res[k].a[z].qns>>32));
6220
+
6221
+ // UC_Read_resize(tu, (s->res[k].a[z].te - s->res[k].a[z].ts));
6222
+ // recover_UC_Read_sub_region(tu.seq, s->res[k].a[z].ts, s->res[k].a[z].te - s->res[k].a[z].ts, 0, &R_INF, s->res[k].a[z].tn);
6223
+
6224
+ print_ov_dbg_paf (p->fp , NULL /* *qu.seq**/ , Get_NAME (R_INF, (s->res [k].a [z].qns >>32 )), Get_NAME_LENGTH (R_INF, (s->res [k].a [z].qns >>32 )),
6225
+ NULL /* *tu.seq**/ , Get_NAME (R_INF, (s->res [k].a [z].tn )), Get_NAME_LENGTH (R_INF, (s->res [k].a [z].tn )), (uint32_t )s->res [k].a [z].qns , s->res [k].a [z].qe , Get_READ_LENGTH (R_INF, (s->res [k].a [z].qns >>32 )), s->res [k].a [z].ts , s->res [k].a [z].te , Get_READ_LENGTH (R_INF, (s->res [k].a [z].tn )), s->res [k].a [z].rev , &ez, cm);
6226
+ }
6227
+ free (s->res [k].a ); free (s->res [k].ec .a );
6228
+ }
6229
+ free (s->res ); free (s); // /destory_UC_Read(&qu); destory_UC_Read(&tu);
6230
+ }
6231
+ return 0 ;
6232
+ }
6233
+
6234
+ void cal_ec_r_dbg (uint64_t n_thre, uint64_t n_a)
6235
+ {
6236
+ char *paf = NULL ; MALLOC (paf, (strlen (asm_opt.output_file_name )+64 ));
6237
+ sprintf (paf, " %s.ovlp.paf" , asm_opt.output_file_name );
6238
+
6239
+ cal_ec_r_dbg_t sl; memset (&sl, 0 , sizeof (sl));
6240
+ sl.n_thread = n_thre; sl.n_a = n_a; sl.chunk_size = 2000 ; sl.cn = 0 ; sl.fp = fopen (paf, " w" );
6241
+
6242
+ kt_pipeline (3 , worker_ov_dbg_pipeline, &sl, 3 );
6243
+
6244
+ fclose (sl.fp ); free (paf);
6245
+ }
6246
+
6109
6247
void destroy_cc_v (cc_v *z)
6110
6248
{
6111
6249
uint64_t k;
0 commit comments