-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathremove_pcr_duplicates.pl
executable file
·52 lines (40 loc) · 1.05 KB
/
remove_pcr_duplicates.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!perl -w
use warnings;
use strict;
my $input=$ARGV[0];
my $output=$ARGV[1];
my %reads;
my $original_reads = 0;
my $removed_reads = 0;
my $written_reads = 0;
open(INFILE, $input) or die "Can't open ${input}\n";
open(OUTFILE, ">".$output) or die "Can't open ${output}\n";
while(<INFILE>) {
my $header_a = $_;
my $read = <INFILE>;
my $header_b = <INFILE>;
my $quals = <INFILE>;
$original_reads++;
my $r = substr($read, 0, 100);
#print $read;
#print $r, "\n";
if (defined $reads{$r}) {
$removed_reads++;
} else {
$reads{$r} = 1;
print OUTFILE $header_a;
print OUTFILE $read;
print OUTFILE $header_b;
print OUTFILE $quals;
$written_reads++;
}
}
close(OUTFILE);
close(INFILE);
my $pc = 0;
if ($removed_reads > 0) {
$pc = (100 * $removed_reads) / $original_reads;
}
open(STATFILE, ">".$output.".stats") or die "Can't open stat file\n";
printf STATFILE "%s\t%d\t%d\t%d\t%.2f\n", $input, $original_reads, $removed_reads, $written_reads, $pc;
close(STATFILE);