forked from cmungall/obo-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathobo-simple-merge.pl
executable file
·130 lines (111 loc) · 2.81 KB
/
obo-simple-merge.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/perl -w
use strict;
my %tag_h=();
my $regexp = '';
my $noheader;
my $negate;
my $count;
my $rmc;
my $keep_dupes;
while ($ARGV[0] =~ /^\-.+/) {
my $opt = shift @ARGV;
if ($opt eq '-h' || $opt eq '--help') {
print usage();
exit 0;
}
elsif ($opt eq '-c' || $opt eq '--count') {
$count = 1;
}
elsif ($opt eq '--noheader') {
$noheader = 1;
}
elsif ($opt eq '--keep-duplicates') {
$keep_dupes = 1;
}
elsif ($opt eq '--remove-comments') {
$rmc = 1;
}
}
my %th = ();
$/ = "\n\n";
my $n = 0;
while (@ARGV) {
my $f = pop @ARGV;
if ($f eq '-') {
*F=*STDIN;
}
else {
open(F,$f) || die $f;
}
my $hdr = 0;
while(<F>) {
if ($rmc) {
s/^\!*//;
s/\n\!*/\n/g;
}
if (!$hdr && $_ !~ /^\[/) {
print unless $noheader || $count;
$hdr = 1;
$noheader = 1; # show max 1 times
}
else {
if (/\nid: (\S+)/) {
my $id = $1;
#print STDERR "id: $id\n";
if ($th{$id}) {
if ($th{$id} eq $_) {
print STDERR "IDENTICAL: $id\n";
}
else {
if (compr($th{$id}) eq compr($_)) {
print STDERR "NEAR-IDENTICAL: $id\n";
}
else {
print STDERR "========================================\nDITCHING:\n\n$_ <<< >>>>\n\nUSING:\n\n$th{$id}";
if ($keep_dupes) {
my $n=1;
while ($th{"$id-duplicate-$n"}) {
$n++;
}
$th{"$id-duplicate-$n"} = $_;
}
}
}
}
else {
$th{$id} = $_;
}
}
else {
# no ID - this is allowed; e.g. Annotation stanzas
$th{$_} = $_;
}
}
}
}
foreach my $id (sort (keys %th)) {
#print "! $id\n";
print $th{$id};
}
exit 0;
sub compr {
my $s = shift;
$s =~ s/\s+//g;
$s;
}
sub scriptname {
my @p = split(/\//,$0);
pop @p;
}
sub usage {
my $sn = scriptname();
<<EOM;
$sn [--noheader] [--remove-comments] OBO-FILE1 OBO-FILE2 [OBO-FILE3...]
Merges multiple obo files together. Each stanza is treated as atomic,
and identified by its ID. No attempt is made to merge tags within a
stanza.
The last file specified on the command line has highest precedence.
A report is written on STDERR
See also: obo-merge-tags.pl
EOM
}