@@ -60,6 +60,7 @@ typedef struct {
60
60
typedef struct {
61
61
ec_ovec_buf_t0 *a;
62
62
uint32_t n, rev;
63
+ uint8_t *cr;
63
64
} ec_ovec_buf_t ;
64
65
65
66
ec_ovec_buf_t * gen_ec_ovec_buf_t (uint32_t n);
@@ -180,7 +181,7 @@ void destroy_ec_ovec_buf_t(ec_ovec_buf_t *p)
180
181
destroy_cns_gfa (&(z->cns ));
181
182
182
183
}
183
- free (p->a ); free (p);
184
+ free (p->a ); free (p-> cr ); free (p );
184
185
185
186
// fprintf(stderr, "[M::%s-chains] #->%lld\n", __func__, asm_opt.num_bases);
186
187
// fprintf(stderr, "[M::%s-passed-chains-0] #->%lld\n", __func__, asm_opt.num_corrected_bases);
@@ -3931,10 +3932,11 @@ uint32_t is_chemical_r(ma_hit_t_alloc *ov, asg64_v *idx, int64_t len, int64_t co
3931
3932
}
3932
3933
3933
3934
3934
- uint32_t is_chemical_r_adv (ma_hit_t_alloc *ov, asg64_v *idx, int64_t len, int64_t cov, int64_t cut_len, double dup_rate)
3935
+ uint32_t is_chemical_r_adv (ma_hit_t_alloc *ov, asg64_v *idx, int64_t len, int64_t cov, int64_t cut_len, double dup_rate, uint64_t is_del )
3935
3936
{
3936
3937
uint64_t k, s, e; int64_t dp, old_dp, st = 0 , ed, s0, e0 , rr, lt;
3937
3938
for (k = idx->n = 0 ; k < ov->length ; k++) {
3939
+ if (is_del && ov->buffer [k].del ) continue ;
3938
3940
s0 = (uint32_t )ov->buffer [k].qns ; e0 = ov->buffer [k].qe ;
3939
3941
if (s0 > 0 ) s0 += cut_len;
3940
3942
if (e0 < len) e0 -= cut_len;
@@ -3988,6 +3990,64 @@ uint32_t is_chemical_r_adv(ma_hit_t_alloc *ov, asg64_v *idx, int64_t len, int64_
3988
3990
return 0 ;
3989
3991
}
3990
3992
3993
+ int64_t cal_chemical_r_adv (ma_hit_t_alloc *ov, asg64_v *idx, int64_t len, int64_t cut_len, double dup_rate, uint64_t is_del)
3994
+ {
3995
+ uint64_t k, s, e; int64_t dp, old_dp, st = 0 , ed, s0, e0 , rr, lt, min_cov;
3996
+ for (k = idx->n = 0 ; k < ov->length ; k++) {
3997
+ if (is_del && ov->buffer [k].del ) continue ;
3998
+ s0 = (uint32_t )ov->buffer [k].qns ; e0 = ov->buffer [k].qe ;
3999
+ if (s0 > 0 ) s0 += cut_len;
4000
+ if (e0 < len) e0 -= cut_len;
4001
+ if (e0 <= s0) continue ;
4002
+ s = s0; e = e0 ;
4003
+
4004
+ lt = Get_READ_LENGTH ((R_INF), ov->buffer [k].tn );
4005
+ rr = (lt >= len)?(lt - len):(len - lt);
4006
+ if ((rr <= (len*dup_rate)) && (rr <= (lt*dup_rate)) && (ov->buffer [k].rev )) {
4007
+ dp = (ov->buffer [k].qe ) - ((uint32_t )ov->buffer [k].qns ); dp = len - dp;
4008
+ old_dp = ov->buffer [k].te - ov->buffer [k].ts ; old_dp = lt - old_dp;
4009
+ if ((dp <= (len*dup_rate)) && (old_dp <= (lt*dup_rate))) continue ;
4010
+ }
4011
+
4012
+ kv_push (uint64_t , (*idx), (s<<1 ));
4013
+ kv_push (uint64_t , (*idx), (e<<1 )|1 );
4014
+ }
4015
+
4016
+ radix_sort_ec64 (idx->a , idx->a + idx->n ); s0 = e0 = rr = -1 ; min_cov = INT64_MAX;
4017
+ for (k = 0 , dp = 0 , st = ed = 0 ; k < idx->n ; ++k) {
4018
+ old_dp = dp;
4019
+ // /if a[j] is qe
4020
+ if (idx->a [k]&1 ) --dp;
4021
+ else ++dp;
4022
+
4023
+ ed = idx->a [k]>>1 ;
4024
+ if (ed > st) {
4025
+ // if(ov->length && ((ov->buffer[0].qns>>32) == 5045637)) {
4026
+ // fprintf(stderr, "[M::%s]\tmd::[%ld,%ld)\tcov::%ld\tlen::%ld\tid::%lu\n", __func__, st, ed, old_dp, len, ov->buffer[0].qns>>32);
4027
+ // }
4028
+ if (old_dp <= min_cov) {
4029
+ // if(ov->length && (ov->buffer[0].qns>>32) == 22344) fprintf(stderr, "[M::%s]\tmd::[%ld,%ld)\tcov::%ld\tlen::%ld\n", __func__, st, ed, old_dp, len);
4030
+ min_cov = old_dp;
4031
+ }
4032
+ }
4033
+ st = ed;
4034
+ }
4035
+
4036
+
4037
+ ed = len; old_dp = dp;
4038
+ if (ed > st) {
4039
+ // if(ov->length && ((ov->buffer[0].qns>>32) == 5045637)) {
4040
+ // fprintf(stderr, "[M::%s]\tmd::[%ld,%ld)\tcov::%ld\tlen::%ld\tid::%lu\n", __func__, st, ed, old_dp, len, ov->buffer[0].qns>>32);
4041
+ // }
4042
+ if (old_dp <= min_cov) {
4043
+ // if(ov->length && (ov->buffer[0].qns>>32) == 22344) fprintf(stderr, "[M::%s]\tmd::[%ld,%ld)\tcov::%ld\tlen::%ld\n", __func__, st, ed, old_dp, len);
4044
+ min_cov = old_dp;
4045
+ }
4046
+ }
4047
+
4048
+ return min_cov;
4049
+ }
4050
+
3991
4051
void prt_dbg_rid_paf (ma_hit_t_alloc *ov, UC_Read *ra, asg8_v *qa)
3992
4052
{
3993
4053
if (!(ov->length )) return ;
@@ -4051,7 +4111,7 @@ static void worker_hap_dc_ec_chemical_r(void *data, long i, int tid)
4051
4111
if (b->cnt [1 ] == 0 ) {
4052
4112
// if(i == 6204620) prt_dbg_rid_paf(&(R_INF.paf[i]), &(b->self_read), &(b->v8q));
4053
4113
// if(is_chemical_r(&(R_INF.paf[i]), &b->v64, Get_READ_LENGTH((R_INF), i), 3, 16)) {
4054
- if (is_chemical_r_adv (&(R_INF.paf [i]), &b->v64 , Get_READ_LENGTH ((R_INF), i), asm_opt.chemical_cov , asm_opt.chemical_flank , 0.02 )) {
4114
+ if (is_chemical_r_adv (&(R_INF.paf [i]), &b->v64 , Get_READ_LENGTH ((R_INF), i), asm_opt.chemical_cov , asm_opt.chemical_flank , 0.02 , 0 )) {
4055
4115
// fprintf(stderr, "-um-[M::%s]\tqn::%u::%.*s\n\n", __func__, (uint32_t)(i), (int)Get_NAME_LENGTH(R_INF, i), Get_NAME((R_INF), i));
4056
4116
R_INF.paf [i].length = 0 ; b->cnt [0 ]++;
4057
4117
}
@@ -4075,11 +4135,56 @@ static void worker_hap_dc_ec_chemical_r(void *data, long i, int tid)
4075
4135
static void worker_hap_dc_ec_chemical_arc (void *data, long i, int tid)
4076
4136
{
4077
4137
ec_ovec_buf_t0 *b = &(((ec_ovec_buf_t *)data)->a [tid]);
4078
- ma_hit_t_alloc *paf = &(R_INF.paf [i]); uint64_t k;
4138
+ ma_hit_t_alloc *paf = &(R_INF.paf [i]), *rev; uint64_t k, z;
4139
+
4140
+ if (b->cnt [1 ] == 0 ) {
4141
+ if (is_chemical_r_adv (&(R_INF.paf [i]), &b->v64 , Get_READ_LENGTH ((R_INF), i), asm_opt.chemical_cov , asm_opt.chemical_flank , 0.02 , 1 )) {
4142
+ // fprintf(stderr, "-um-[M::%s]\tqn::%u::%.*s\n\n", __func__, (uint32_t)(i), (int)Get_NAME_LENGTH(R_INF, i), Get_NAME((R_INF), i));
4143
+ for (k = 0 ; k < paf->length ; k++) paf->buffer [k].del = 1 ; b->cnt [0 ]++;
4144
+ }
4145
+ } else if (b->cnt [1 ] == 1 ) {
4146
+ for (k = 0 ; k < paf->length ; k++) {
4147
+ if ((Get_qn (paf->buffer [k])) > (Get_tn (paf->buffer [k]))) continue ;
4148
+ rev = &(R_INF.paf [paf->buffer [k].tn ]);
4149
+ for (z = 0 ; z < rev->length ; z++) {
4150
+ if ((rev->buffer [z].tn == (Get_qn (paf->buffer [k])))) {
4151
+ if (paf->buffer [k].del != rev->buffer [z].del ) {
4152
+ paf->buffer [k].del = rev->buffer [z].del = 1 ;
4153
+ }
4154
+ }
4155
+ }
4156
+ }
4157
+ }
4079
4158
4080
- if (is_chemical_r_adv (&(R_INF.paf [i]), &b->v64 , Get_READ_LENGTH ((R_INF), i), asm_opt.chemical_cov , asm_opt.chemical_flank , 0.02 )) {
4081
- // fprintf(stderr, "-um-[M::%s]\tqn::%u::%.*s\n\n", __func__, (uint32_t)(i), (int)Get_NAME_LENGTH(R_INF, i), Get_NAME((R_INF), i));
4082
- for (k = 0 ; k < paf->length ; k++) paf->buffer [k].del = 1 ; b->cnt [0 ]++;
4159
+ refresh_ec_ovec_buf_t0 (b, REFRESH_N);
4160
+ }
4161
+
4162
+ static void worker_hap_dc_ec_chemical_arc_mark (void *data, long i, int tid)
4163
+ {
4164
+ ec_ovec_buf_t0 *b = &(((ec_ovec_buf_t *)data)->a [tid]);
4165
+ ma_hit_t_alloc *paf = &(R_INF.paf [i]), *rev; uint64_t k, z; int64_t cov, msk_cut = asm_opt.chemical_cov ;
4166
+ uint8_t *msk = ((ec_ovec_buf_t *)data)->cr ;
4167
+
4168
+ if (b->cnt [1 ] == 0 ) {
4169
+ msk[i] = (uint8_t )-1 ;
4170
+ cov = cal_chemical_r_adv (&(R_INF.paf [i]), &b->v64 , Get_READ_LENGTH ((R_INF), i), asm_opt.chemical_flank , 0.02 , 1 );
4171
+ if (cov <= msk_cut) msk[i] = cov;
4172
+ if (cov <= msk_cut/* *FORCE_CUT**/ ) {
4173
+ // fprintf(stderr, "-um-[M::%s]\tqn::%u::%.*s\n\n", __func__, (uint32_t)(i), (int)Get_NAME_LENGTH(R_INF, i), Get_NAME((R_INF), i));
4174
+ for (k = 0 ; k < paf->length ; k++) paf->buffer [k].del = 1 ; b->cnt [0 ]++;
4175
+ }
4176
+ } else if (b->cnt [1 ] == 1 ) {
4177
+ for (k = 0 ; k < paf->length ; k++) {
4178
+ if ((Get_qn (paf->buffer [k])) > (Get_tn (paf->buffer [k]))) continue ;
4179
+ rev = &(R_INF.paf [paf->buffer [k].tn ]);
4180
+ for (z = 0 ; z < rev->length ; z++) {
4181
+ if ((rev->buffer [z].tn == (Get_qn (paf->buffer [k])))) {
4182
+ if ((paf->buffer [k].del != rev->buffer [z].del ) || (msk[Get_qn (paf->buffer [k])] <= msk_cut/* *FORCE_CUT**/ ) || (msk[Get_tn (paf->buffer [k])] <= msk_cut/* *FORCE_CUT**/ )) {
4183
+ paf->buffer [k].del = rev->buffer [z].del = 1 ;
4184
+ }
4185
+ }
4186
+ }
4187
+ }
4083
4188
}
4084
4189
4085
4190
refresh_ec_ovec_buf_t0 (b, REFRESH_N);
@@ -6066,7 +6171,7 @@ void handle_chemical_r(uint64_t n_thre, uint64_t n_a)
6066
6171
6067
6172
kt_for (n_thre, worker_hap_dc_ec_chemical_r, b, n_a);
6068
6173
6069
- fprintf (stderr, " [M::%s] # chemical reads: %lu, # arcs:: %lu\n " , __func__, chem_n, dedup);
6174
+ fprintf (stderr, " [M::%s] # chimeric reads: %lu, # arcs:: %lu\n " , __func__, chem_n, dedup);
6070
6175
6071
6176
destroy_ec_ovec_buf_t (b);
6072
6177
}
@@ -6076,16 +6181,44 @@ void handle_chemical_arc(uint64_t n_thre, uint64_t n_a)
6076
6181
ec_ovec_buf_t *b = NULL ; uint64_t k, chem_n = 0 ;
6077
6182
b = gen_ec_ovec_buf_t (n_thre);
6078
6183
for (k = 0 ; k < n_thre; ++k) {
6079
- b->a [k].cnt [0 ] = 0 ;
6184
+ b->a [k].cnt [0 ] = 0 ; b-> a [k]. cnt [ 1 ] = 0 ;
6080
6185
}
6081
6186
6082
6187
kt_for (n_thre, worker_hap_dc_ec_chemical_arc, b, n_a);
6083
6188
6084
6189
for (k = 0 ; k < n_thre; ++k) {
6085
6190
chem_n += b->a [k].cnt [0 ];
6191
+ b->a [k].cnt [0 ] = 0 ; b->a [k].cnt [1 ] = 1 ;
6086
6192
}
6087
6193
6088
- fprintf (stderr, " [M::%s] # chemical reads: %lu\n " , __func__, chem_n);
6194
+ kt_for (n_thre, worker_hap_dc_ec_chemical_arc, b, n_a);
6195
+
6196
+ fprintf (stderr, " [M::%s] # chimeric reads: %lu\n " , __func__, chem_n);
6089
6197
6090
6198
destroy_ec_ovec_buf_t (b);
6199
+ }
6200
+
6201
+ uint8_t * gen_chemical_arc_rf (uint64_t n_thre, uint64_t n_a)
6202
+ {
6203
+ ec_ovec_buf_t *b = NULL ; uint64_t k, chem_n = 0 ; uint8_t *ra = NULL ;
6204
+ b = gen_ec_ovec_buf_t (n_thre);
6205
+ for (k = 0 ; k < n_thre; ++k) {
6206
+ b->a [k].cnt [0 ] = 0 ; b->a [k].cnt [1 ] = 0 ;
6207
+ }
6208
+ MALLOC (ra, n_a); // /memset(ra, -1, sizeof((*ra))*n_a);
6209
+ b->cr = ra;
6210
+
6211
+ kt_for (n_thre, worker_hap_dc_ec_chemical_arc_mark, b, n_a);
6212
+
6213
+ for (k = 0 ; k < n_thre; ++k) {
6214
+ chem_n += b->a [k].cnt [0 ];
6215
+ b->a [k].cnt [0 ] = 0 ; b->a [k].cnt [1 ] = 1 ;
6216
+ }
6217
+
6218
+ kt_for (n_thre, worker_hap_dc_ec_chemical_arc_mark, b, n_a);
6219
+
6220
+ fprintf (stderr, " [M::%s] # chimeric reads: %lu\n " , __func__, chem_n);
6221
+
6222
+ b->cr = NULL ; destroy_ec_ovec_buf_t (b);
6223
+ return ra;
6091
6224
}
0 commit comments