forked from cmungall/obo-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathobo-assign-ids.pl
executable file
·108 lines (94 loc) · 2.03 KB
/
obo-assign-ids.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/perl -w
use strict;
my %tag_h=();
my $regexp = '';
my $noheader;
my $negate;
my $idmatch;
my $idspace;
my $idnum;
while ($ARGV[0] =~ /^\-/) {
my $opt = shift @ARGV;
print STDERR "opt:$opt\n";
if ($opt eq '-h' || $opt eq '--help') {
print usage();
exit 0;
}
if ($opt eq '-m' || $opt eq '--idmatch') {
$idmatch = shift @ARGV;
print STDERR "m=$idmatch\n";
}
if ($opt eq '-s' || $opt eq '--idspace') {
$idspace = shift @ARGV;
}
if ($opt eq '-n' || $opt eq '--idnum') {
$idnum = shift @ARGV;
}
if ($opt eq '--noheader') {
$noheader = 1;
}
}
die usage() unless $idmatch;
die usage() unless $idspace;
if (!$idnum) {
print `egrep '^id: $idspace:0' $ARGV[0] | sort`;
die "specify --idnum";
}
my %idmap = ();
my @lines = ();
while(<>) {
push(@lines,$_);
if (/^id:\s*(\S+)/) {
my $id = $1;
#if ($id =~ /$idmatch/ && $id !~ /^$idspace:\d+$/) {
if ($id =~ /$idmatch/) {
if ($idmap{$id}) {
}
elsif ($id =~ /^$idspace:\d+$/) {
$idmap{$id} = undef;
}
else {
$idmap{$id} = sprintf("$idspace:%07d",$idnum++);
#print STDERR "$id ==> $idmap{$id}\n";
}
}
}
}
foreach (@lines) {
if (/^id:\s*(\S+)/) {
if ($idmap{$1}) {
print "id: $idmap{$1}\n";
print "alt_id: $1\n";
}
else {
print $_;
}
}
else {
if (/($idspace:\S+)/x) {
my $id = $1;
if ($idmap{$id}) {
my $new = $idmap{$id};
s/$idspace:\S+/$new/g;
}
}
print $_;
}
}
exit 0;
sub compr {
my $s = shift;
$s =~ s/\s+//g;
$s;
}
sub scriptname {
my @p = split(/\//,$0);
pop @p;
}
sub usage {
my $sn = scriptname();
<<EOM;
$sn --idmatch REGEXP --idspace IDSPACE --idnum COUNTER [OBO FILES]
Assigns OBO-style identifiers to terms
EOM
}