forked from cmungall/obo-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmesh2obo.pl
executable file
·127 lines (111 loc) · 2.28 KB
/
mesh2obo.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/perl -w
use strict;
my %tag_h=();
my $regexp = '';
my $noheader;
my $negate;
my $count;
while ($ARGV[0] =~ /^\-.+/) {
my $opt = shift @ARGV;
if ($opt eq '-h' || $opt eq '--help') {
print usage();
exit 0;
}
if ($opt eq '-r' || $opt eq '--regexp') {
$regexp = shift @ARGV;
}
if ($opt eq '--regexp-file') {
my $f = shift @ARGV;
my @or = ();
open(F,$f);
while(<F>) {
chomp;
push(@or,$_);
}
close(F);
$regexp = sprintf('id: (%s)', join('|',@or));
}
if ($opt eq '-c' || $opt eq '--count') {
$count = 1;
}
if ($opt eq '--noheader') {
$noheader = 1;
}
if ($opt eq '-v' || $opt eq '--neg') {
$negate = 1;
}
}
$/ = "*NEWRECORD";
my $n = 0;
while (@ARGV) {
my $f = pop @ARGV;
if ($f eq '-') {
*F=*STDIN;
}
else {
open(F,$f) || die $f;
}
my $hdr = 0;
while(<F>) {
if (!$hdr && $_ !~ /^\[/) {
print unless $noheader || $count;
$hdr = 1;
}
else {
if ($negate) {
if ($_ !~ /$regexp/) {
$n++;
m2obo($_) unless $count;
}
}
else {
if (/$regexp/) {
$n++;
m2obo($_) unless $count;
}
}
}
}
}
if ($count) {
print "$n\n";
}
exit 0;
sub m2obo {
my $s = shift;
my @lines = split(/\n/,$s);
my %h = ();
foreach (@lines) {
if (/^(\S+)\s*=\s*(.*)/) {
push(@{$h{lc($1)}},$2);
}
}
print "[Term]\n";
print "id: $h{ui}->[0]\n";
print "name: $h{mh}->[0]\n";
my @entries = @{$h{entry} || []};
foreach (@entries) {
s/\|.*//;
print "synonym: \"$_\" RELATED []\n" if $_;
}
print "xref: $_\n" foreach @{$h{mn}};
my $def = shift @{$h{ms}};
if ($def) {
$def =~ s/\"/\\\"/g;
print "def: \"$def\" []\n";
}
print "\n";
}
sub scriptname {
my @p = split(/\//,$0);
pop @p;
}
sub usage {
my $sn = scriptname();
<<EOM;
$sn [--noheader] [--neg] [--r REGULAR-EXPRESSION] [--regexp-file FILE] OBO-FILE
filters out stanzas from obo files
Example:
$sn -r 'def:.*transcript' go.obo
EOM
}