Skip to content

Commit 69b4ffb

Browse files
committed
Added missing reference genome parameters of ngs-bits tools
1 parent 5eb4c2a commit 69b4ffb

11 files changed

+25
-21
lines changed

src/NGS/an_spliceai.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ function annotate_spliceai_scores($in, $vcf_filtered, $out)
243243
$splice_env = get_path("splice_env", true);
244244
exec2("cut -f 2,4,5 -d'\t' {$splice_env}/lib/python3.10/site-packages/spliceai/annotations/".strtolower($build).".txt | sed 's/^/chr/' | sed '1d' > {$spliceai_regions}");
245245
$tmp2 = $parser->tempFile("_spliceai_filtered_regions.vcf");
246-
$parser->exec(get_path("ngs-bits")."/VcfFilter", "-reg {$spliceai_regions} -in {$tmp1} -out {$tmp2}", true);
246+
$parser->exec(get_path("ngs-bits")."/VcfFilter", "-reg {$spliceai_regions} -in {$tmp1} -out {$tmp2} -ref ".genome_fasta($build), true);
247247
$var_count = vcf_variant_count($tmp2);
248248
$parser->log("Variants after SpliceAI transcript regions filter: {$var_count}");
249249

src/NGS/an_vep.php

+6-5
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,14 @@ function annotation_file_path($rel_path, $is_optional=false)
6161
$local_data = get_path("local_data");
6262
$vep_data_path = "{$local_data}/".basename(get_path("vep_data"))."/"; //the data is copied to the local data folder by 'data_setup' to speed up annotations (and prevent hanging annotation jobs)
6363
$data_folder = get_path("data_folder");
64+
$genome = genome_fasta($build);
6465

6566
$args = array();
6667
$args[] = "-i $in --format vcf"; //input
6768
$args[] = "-o $vep_output --vcf --no_stats --force_overwrite"; //output
6869
$args[] = "--species homo_sapiens --assembly {$build}"; //species
6970
$args[] = "--fork {$threads}"; //speed (--buffer_size did not change run time when between 1000 and 20000)
70-
$args[] = "--offline --cache --dir_cache {$vep_data_path}/ --fasta ".genome_fasta($build); //paths to data
71+
$args[] = "--offline --cache --dir_cache {$vep_data_path}/ --fasta {$genome}"; //paths to data
7172
$args[] = "--transcript_version --domains --failed 1"; //annotation options
7273
$args[] = "--regulatory"; //regulatory features
7374
$fields[] = "BIOTYPE";
@@ -101,14 +102,14 @@ function annotation_file_path($rel_path, $is_optional=false)
101102
//add consequences (Ensembl)
102103
$gff = get_path("data_folder")."/dbs/Ensembl/Homo_sapiens.GRCh38.112.gff3";
103104
$vcf_output_consequence = $parser->tempFile("_consequence.vcf");
104-
$parser->exec(get_path("ngs-bits")."/VcfAnnotateConsequence", " -in {$vep_output} -out {$vcf_output_consequence} -threads {$threads} -tag CSQ2 -gff {$gff}", true);
105+
$parser->exec(get_path("ngs-bits")."/VcfAnnotateConsequence", " -in {$vep_output} -out {$vcf_output_consequence} -threads {$threads} -tag CSQ2 -gff {$gff} -ref {$genome}", true);
105106

106107
//add consequences (RefSeq)
107108
if($annotate_refseq_consequences)
108109
{
109110
$gff2 = get_path("data_folder")."/dbs/RefSeq/Homo_sapiens.GRCh38.p14.gff3";
110111
$vcf_output_refseq = $parser->tempFile("_refseq.vcf");
111-
$parser->exec(get_path("ngs-bits")."/VcfAnnotateConsequence", " -in {$vcf_output_consequence} -out {$vcf_output_refseq} -threads {$threads} -tag CSQ_REFSEQ -gff {$gff2} -source refseq", true);
112+
$parser->exec(get_path("ngs-bits")."/VcfAnnotateConsequence", " -in {$vcf_output_consequence} -out {$vcf_output_refseq} -threads {$threads} -tag CSQ_REFSEQ -gff {$gff2} -source refseq -ref {$genome}", true);
112113
$vcf_output_consequence = $vcf_output_refseq;
113114
}
114115

@@ -118,7 +119,7 @@ function annotation_file_path($rel_path, $is_optional=false)
118119

119120
//add MaxEntScan annotation
120121
$vcf_output_mes = $parser->tempFile("_mes.vcf");
121-
$parser->exec(get_path("ngs-bits")."/VcfAnnotateMaxEntScan", "-gff {$gff} -in {$vcf_output_phylop} -out {$vcf_output_mes} -ref ".genome_fasta($build)." -swa -threads {$threads} -min_score 0.0 -decimals 1", true);
122+
$parser->exec(get_path("ngs-bits")."/VcfAnnotateMaxEntScan", "-gff {$gff} -in {$vcf_output_phylop} -out {$vcf_output_mes} -ref {$genome} -swa -threads {$threads} -min_score 0.0 -decimals 1", true);
122123

123124
// create config file
124125
$config_file_path = $parser->tempFile(".config");
@@ -327,7 +328,7 @@ function annotation_file_path($rel_path, $is_optional=false)
327328
//check vcf file
328329
if($check_lines >= 0)
329330
{
330-
$parser->exec(get_path("ngs-bits")."VcfCheck", "-in $out -lines $check_lines -ref ".genome_fasta($build), true);
331+
$parser->exec(get_path("ngs-bits")."VcfCheck", "-in $out -lines $check_lines -ref {$genome}", true);
331332
}
332333

333334
?>

src/NGS/db_check_gender.php

+2-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@
7979
}
8080

8181
//determine gender from BAM
82-
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-in {$in} -method {$method} {$args} -build ".ngsbits_build($build), true);
82+
$genome = genome_fasta($build);
83+
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-in {$in} -method {$method} {$args} -build ".ngsbits_build($build)." -ref {$genome}", true);
8384
$gender2 = explode("\t", $stdout[1])[1];
8485
if (starts_with($gender2, "unknown"))
8586
{

src/NGS/merge_gvcf.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@
183183
$pipeline[] = array("zcat", $tmp_vcf);
184184

185185
//filter variants according to variant quality>5
186-
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-qual 5");
186+
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-qual 5 -ref $genome");
187187

188188
//split complex variants to primitives
189189
//this step has to be performed before vcfbreakmulti - otherwise mulitallelic variants that contain both 'hom' and 'het' genotypes fail - see NA12878 amplicon test chr2:215632236-215632276
@@ -193,7 +193,7 @@
193193
$pipeline[] = array(get_path("vcflib")."vcfbreakmulti", "");
194194

195195
//remove invalid variants
196-
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-remove_invalid");
196+
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-remove_invalid -ref $genome");
197197

198198
//normalize all variants and align INDELs to the left
199199
$pipeline[] = array(get_path("ngs-bits")."VcfLeftNormalize", "-stream -ref $genome");

src/NGS/vc_clair.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
$pipeline[] = array("zcat", "{$clair_vcf}");
9292

9393
//filter variants according to variant quality>5
94-
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-qual 5 -remove_invalid");
94+
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-qual 5 -remove_invalid -ref $genome");
9595

9696
//split complex variants to primitives
9797
//this step has to be performed before vcfbreakmulti - otherwise mulitallelic variants that contain both 'hom' and 'het' genotypes fail - see NA12878 amplicon test chr2:215632236-215632276
@@ -143,7 +143,7 @@
143143
//create/copy gvcf:
144144
$pipeline = array();
145145
$pipeline[] = array("zcat", $clair_gvcf);
146-
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-remove_invalid");
146+
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-remove_invalid -ref $genome");
147147
$pipeline[] = array("bgzip", "-c > {$out_gvcf}");
148148
$parser->execPipeline($pipeline, "gVCF post processing");
149149

src/NGS/vc_clair_trio.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@
9999
$pipeline[] = array("zcat", "{$clair_vcf}");
100100

101101
//filter variants according to variant quality>5
102-
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-qual 5 -remove_invalid");
102+
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-qual 5 -remove_invalid -ref $genome");
103103

104104
//split complex variants to primitives
105105
//this step has to be performed before vcfbreakmulti - otherwise mulitallelic variants that contain both 'hom' and 'het' genotypes fail - see NA12878 amplicon test chr2:215632236-215632276

src/NGS/vc_viral_load.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@
133133

134134
// mismatches
135135
$region = $fields[0] . ":" . ($fields[1]+1) . "-" . $fields[2];
136-
list($stdout) = $parser->exec(get_path("ngs-bits")."VcfFilter", "-in {$viral_vcf} -reg '{$region}'", false);
136+
list($stdout) = $parser->exec(get_path("ngs-bits")."VcfFilter", "-in {$viral_vcf} -reg '{$region}' -ref $genome", false);
137137
$variant_count = 0;
138138
foreach($stdout as $line)
139139
{

src/Pipelines/analyze.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@
393393
//filter by target region (extended by 200) and quality 5
394394
$target = $parser->tempFile("_roi_extended.bed");
395395
$parser->exec($ngsbits."BedExtend"," -in ".$sys['target_file']." -n 200 -out $target -fai ".$genome.".fai", true);
396-
$pipeline[] = array($ngsbits."VcfFilter", "-reg {$target} -qual 5 -filter_clear");
396+
$pipeline[] = array($ngsbits."VcfFilter", "-reg {$target} -qual 5 -filter_clear -ref $genome");
397397

398398
//split multi-allelic variants
399399
$pipeline[] = array(get_path("vcflib")."vcfbreakmulti", "");
@@ -480,7 +480,7 @@
480480
$pipeline[] = array("zcat", $dragen_output_vcf);
481481

482482
//filter by target region and quality 5
483-
$pipeline[] = array($ngsbits."VcfFilter", "-reg chrMT:1-16569 -qual 5 -filter_clear");
483+
$pipeline[] = array($ngsbits."VcfFilter", "-reg chrMT:1-16569 -qual 5 -filter_clear -ref $genome");
484484

485485
//split multi-allelic variants
486486
$pipeline[] = array(get_path("vcflib")."vcfbreakmulti", "");

src/Pipelines/somatic_dna.php

+3-3
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ function($str) { return explode("\t", $str); },
279279
}
280280
else
281281
{
282-
$out = $parser->exec(get_path("ngs-bits") . "/SampleGender", "-in $t_bam -build ".ngsbits_build($sys['build'])." -method sry", true);
282+
$out = $parser->exec(get_path("ngs-bits") . "/SampleGender", "-in $t_bam -build ".ngsbits_build($sys['build'])." -method sry -ref {$ref_genome}", true);
283283
list(,,$cov_sry) = explode("\t", $out[0][1]);
284284

285285
if(is_numeric($cov_sry) && (float)$cov_sry >= 30)
@@ -739,7 +739,7 @@ function($str) { return explode("\t", $str); },
739739
{
740740
$ngsbits = get_path("ngs-bits");
741741
$pipeline = [
742-
["{$ngsbits}BedAnnotateGC", "-in ".$sys['target_file']." -clear -ref ".genome_fasta($sys['build'])],
742+
["{$ngsbits}BedAnnotateGC", "-in ".$sys['target_file']." -clear -ref {$ref_genome}"],
743743
["{$ngsbits}BedAnnotateGenes", "-out {$target_bed}"],
744744
];
745745
$parser->execPipeline($pipeline, "creating annotated BED file for ClinCNV");
@@ -755,7 +755,7 @@ function($str) { return explode("\t", $str); },
755755
{
756756
$pipeline = [
757757
[get_path("ngs-bits")."BedChunk", "-in ".$sys['target_file']." -n {$bin_size}"],
758-
[get_path("ngs-bits")."BedAnnotateGC", "-clear -ref ".genome_fasta($sys['build'])],
758+
[get_path("ngs-bits")."BedAnnotateGC", "-clear -ref {$ref_genome}"],
759759
[get_path("ngs-bits")."BedAnnotateGenes", "-out {$target_bed}"]
760760
];
761761
$parser->execPipeline($pipeline, "creating annotated BED file for ClinCNV");

src/Pipelines/trio.php

+3-2
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ function fix_gsvar_file($gsvar, $sample_c, $sample_f, $sample_m, $gender_data)
149149

150150
//prepare multi-sample paramters
151151
$sys = load_system($system, $sample_c); //required in case the the system is unset
152+
$genome = genome_fasta($sys['build']);
152153
$args_multisample = [
153154
"-bams $c $f $m",
154155
"-status affected control control",
@@ -286,7 +287,7 @@ function fix_gsvar_file($gsvar, $sample_c, $sample_f, $sample_m, $gender_data)
286287
print "Medelian errors: ".number_format(100.0*$vars_mendelian_error/$vars_high_depth, 2)."% (of {$vars_high_depth} high-depth, autosomal variants)\n";
287288

288289
//determine gender of child
289-
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-method hetx -in $c -build ".ngsbits_build($sys['build']), true);
290+
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-method hetx -in $c -build ".ngsbits_build($sys['build'])." -ref {$genome}", true);
290291
$gender_data = explode("\t", $stdout[1])[1];
291292
if ($gender_data!="male" && $gender_data!="female") $gender_data = "n/a";
292293
print "Gender of child (from data): {$gender_data}\n";
@@ -336,7 +337,7 @@ function fix_gsvar_file($gsvar, $sample_c, $sample_f, $sample_m, $gender_data)
336337
if ($is_wgs_shallow)
337338
{
338339
//determine gender of child
339-
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-method xy -in $c -build ".ngsbits_build($sys['build']), true);
340+
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-method xy -in $c -build ".ngsbits_build($sys['build'])." -ref {$genome}", true);
340341
$gender_data = explode("\t", $stdout[1])[1];
341342
if ($gender_data!="male" && $gender_data!="female") $gender_data = "n/a";
342343
print "Gender of child (from data): {$gender_data}\n";

src/Pipelines/trio_longread.php

+2-1
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,8 @@ function fix_gsvar_file($gsvar, $sample_c, $sample_f, $sample_m, $gender_data)
321321
print "Medelian errors: ".number_format(100.0*$vars_mendelian_error/$vars_high_depth, 2)."% (of {$vars_high_depth} high-depth, autosomal variants)\n";
322322

323323
//determine gender of child
324-
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-method hetx -in $c -build ".ngsbits_build($build), true);
324+
$genome = genome_fasta($build);
325+
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-method hetx -in $c -build ".ngsbits_build($build)." -ref {$genome}", true);
325326
$gender_data = explode("\t", $stdout[1])[1];
326327
if ($gender_data!="male" && $gender_data!="female") $gender_data = "n/a";
327328
print "Gender of child (from data): {$gender_data}\n";

0 commit comments

Comments
 (0)