Skip to content

Commit 5e529df

Browse files
committed
Add made command
1 parent 16e1888 commit 5e529df

File tree

2 files changed

+359
-0
lines changed

2 files changed

+359
-0
lines changed

psamm/commands/made.py

+358
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,358 @@
1+
# This file is part of PSAMM.
2+
#
3+
# PSAMM is free software: you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License as published by
5+
# the Free Software Foundation, either version 3 of the License, or
6+
# (at your option) any later version.
7+
#
8+
# PSAMM is distributed in the hope that it will be useful,
9+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
# GNU General Public License for more details.
12+
#
13+
# You should have received a copy of the GNU General Public License
14+
# along with PSAMM. If not, see <http://www.gnu.org/licenses/>.
15+
#
16+
# Copyright 2014-2017 Jon Lund Steffensen <[email protected]>
17+
# Copyright 2016 Keith Dufault-Thompson <[email protected]>
18+
# Copyright 2016 Julie Cuddigan <[email protected]>
19+
20+
"""Metabolic Adjustment by Differential Expression (MADE) command."""
21+
22+
from __future__ import unicode_literals
23+
24+
import time
25+
import logging
26+
import math
27+
import csv
28+
29+
from six import iteritems
30+
31+
from ..command import SolverCommandMixin, MetabolicMixin, Command
32+
from ..util import MaybeRelative
33+
from ..lpsolver import lp
34+
from ..expression import boolean
35+
36+
# Module-level logging
37+
logger = logging.getLogger(__name__)
38+
39+
40+
class MadeFluxBalance(MetabolicMixin, SolverCommandMixin, Command):
41+
"""Run MADE flux balance analysis on the model.
42+
43+
Args:
44+
gene_var1 = Dictionary, key:value = gene expression objects:their new
45+
variable id, first set
46+
gene_var2 = Dictionary; key:value = gene expression objects:their new
47+
variable id, second set
48+
var_ineqvar1 = xi; Dictionary, key:value = new variable ids:their
49+
defined inequality variable, first set
50+
var_ineqvar2 = xi+1; Dictionary, key:value = new variable ids:their
51+
defined inequality variable, second set
52+
gene_pval = Dictionary, key:value = gene ID:gene fold change
53+
probability (pvalue)
54+
gene_diff = Dictionary, key:value = gene ID: binary up/down/constant
55+
regulation values
56+
gvdict = Dictionary, key:value = gene ID:defined variable ids from both
57+
sets (each key has 2 values)
58+
problem = Flux balance problem
59+
"""
60+
61+
@classmethod
62+
def init_parser(cls, parser):
63+
parser.add_argument(
64+
'--flux-threshold',
65+
help='Enter maximum objective flux as a decimal or percent',
66+
type=MaybeRelative, default=MaybeRelative('100%'))
67+
parser.add_argument(
68+
'--transc-file', help='Enter path to transcriptomic data file',
69+
metavar='FILE')
70+
parser.add_argument('--fva', help='Enable FVA', action='store_true')
71+
super(MadeFluxBalance, cls).init_parser(parser)
72+
73+
def run(self):
74+
"""Run MADE implementation."""
75+
gene_dict = self.get_gene_dict()
76+
77+
biomass_fun = self._model.biomass_reaction
78+
79+
# Create problem instance
80+
solver = self._get_solver(integer=True)
81+
prob = solver.create_problem()
82+
v_0 = prob.namespace()
83+
v_1 = prob.namespace()
84+
85+
# Define flux variables
86+
for reaction_id in self._mm.reactions:
87+
lower, upper = self._mm.limits[reaction_id]
88+
v_0.define([reaction_id], lower=lower, upper=upper)
89+
v_1.define([reaction_id], lower=lower, upper=upper)
90+
91+
# Create mass balance constraints for both conditions
92+
massbalance_0_lhs = {compound: 0 for compound in self._mm.compounds}
93+
massbalance_1_lhs = {compound: 0 for compound in self._mm.compounds}
94+
for (compound, reaction_id), value in iteritems(self._mm.matrix):
95+
massbalance_0_lhs[compound] += v_0(reaction_id) * value
96+
massbalance_1_lhs[compound] += v_1(reaction_id) * value
97+
for _, lhs in iteritems(massbalance_0_lhs):
98+
prob.add_linear_constraints(lhs == 0)
99+
for _, lhs in iteritems(massbalance_1_lhs):
100+
prob.add_linear_constraints(lhs == 0)
101+
102+
start_time = time.time()
103+
104+
# Set biomass flux threshold
105+
flux_threshold = self._args.flux_threshold
106+
if flux_threshold.relative:
107+
prob.set_objective(v_0(biomass_fun))
108+
result = prob.solve(lp.ObjectiveSense.Maximize)
109+
if not result:
110+
raise Exception('Failed to solve FBA')
111+
flux_threshold.reference = result.get_value(v_0(biomass_fun))
112+
113+
prob.add_linear_constraints(v_0(biomass_fun) >= float(flux_threshold))
114+
prob.add_linear_constraints(v_1(biomass_fun) >= float(flux_threshold))
115+
116+
gene_term_0 = prob.namespace()
117+
gene_term_1 = prob.namespace()
118+
119+
reaction_0 = prob.namespace(
120+
self._mm.reactions, types=lp.VariableType.Binary)
121+
reaction_1 = prob.namespace(
122+
self._mm.reactions, types=lp.VariableType.Binary)
123+
124+
gene_term_dict_0 = {}
125+
gene_term_dict_1 = {}
126+
for rxn_id, exp in sorted(iteritems(gene_dict)):
127+
create_gpr_constraints(
128+
prob, rxn_id, exp, reaction_0(rxn_id), gene_term_0,
129+
gene_term_dict_0)
130+
create_gpr_constraints(
131+
prob, rxn_id, exp, reaction_1(rxn_id), gene_term_1,
132+
gene_term_dict_1)
133+
134+
if self._args.transc_file is not None:
135+
con1, con2, gene_pval, gene_diff = idc(
136+
open_file(self._args.transc_file))
137+
138+
add_final_constraints(self._mm, prob, v_0, reaction_0)
139+
add_final_constraints(self._mm, prob, v_1, reaction_1)
140+
result = make_obj_fun(
141+
prob, gene_diff, gene_pval, gene_term_0, gene_term_1)
142+
143+
# Run FBA
144+
for reaction_id in sorted(self._mm.reactions):
145+
print('{}\t{}\t{}\t{}\t{}\t{}\t{}'.format(
146+
reaction_id, result.get_value(v_0(reaction_id)),
147+
result.get_value(v_1(reaction_id)),
148+
result.get_value(reaction_0(reaction_id)) > 0.5,
149+
result.get_value(reaction_1(reaction_id)) > 0.5,
150+
self._mm.get_reaction(reaction_id),
151+
gene_dict.get(reaction_id, '')))
152+
153+
logger.info('Solving took {:.2f} seconds'.format(
154+
time.time() - start_time))
155+
156+
def get_gene_dict(self):
157+
"""Using the reaction file called inside of the model file, it returns
158+
a dictionary with reaction IDs as keys and their associated
159+
gene-protein reaction (GPR) logic (i.e. (gene 1 and gene 2) or gene 3)
160+
as values of type str.
161+
"""
162+
gene_dict = {}
163+
for reaction in self._model.parse_reactions():
164+
if reaction.genes is not None:
165+
gene_dict[reaction.id] = boolean.Expression(reaction.genes)
166+
167+
return gene_dict
168+
169+
170+
def make_obj_fun(prob, gene_diff, gene_pval, gene_term_0, gene_term_1):
171+
"""Constructs the MADE objective funtion from dictionaries of LP variables.
172+
173+
Objective function consists of the summation of three functions dependent
174+
on the up/down regulation of gene expression between conditions. The
175+
functions contain a weighting function, and the difference between the
176+
binary representations of condition 1 and condition 2.
177+
"""
178+
i_vars = 0.0 # Increasing gene expression
179+
d_vars = 0.0 # Decreasing gene expression
180+
c_vars = 0.0 # Constant gene expression
181+
182+
def weight(p):
183+
return -math.log10(p)
184+
185+
for gene in gene_pval:
186+
# Comment by Julie/Matt?: Limitation of math.log()
187+
wp = max(2.2204460492e-16, gene_pval[gene])
188+
189+
x_0 = gene_term_0(boolean.Variable(gene))
190+
x_1 = gene_term_1(boolean.Variable(gene))
191+
# x_delta = x_0 XOR X_1
192+
prob.define(('xor', gene), types=lp.VariableType.Binary)
193+
x_delta = prob.var(('xor', gene))
194+
prob.add_linear_constraints(
195+
x_delta <= x_0 + x_1,
196+
x_delta >= x_0 - x_1, x_delta >= x_1 - x_0,
197+
x_delta <= 2 - x_0 - x_1)
198+
199+
if gene_diff[gene] == 1:
200+
i_vars += weight(wp) * (x_1 - x_0)
201+
elif gene_diff[gene] == -1:
202+
d_vars += weight(wp) * (x_0 - x_1)
203+
elif gene_diff[gene] == 0:
204+
c_vars += weight(wp) * x_delta
205+
206+
objective = i_vars + d_vars - c_vars
207+
208+
prob.set_objective(objective)
209+
result = prob.solve(lp.ObjectiveSense.Maximize)
210+
if not result:
211+
raise Exception('Unable to solve: ' + result.status)
212+
213+
obj_value = result.get_value(objective)
214+
logger.info('Objective: {}'.format(obj_value))
215+
# prob.add_linear_constraints(objective == obj_value)
216+
217+
return result
218+
219+
220+
def create_gpr_constraints(prob, rxn_id, exp, reaction_var, gene_term,
221+
gene_term_dict):
222+
"""Opens all gene-logic containers, defines content, outputs the linear
223+
inequalities by calling bool_ineqs(). Sorts data into dictionaries that
224+
are used in other functions. Is recursive. No output.
225+
226+
Args:
227+
exp_obj: All of the expression objects (genes, AND, OR)
228+
var_gen: Counter used for relabeling the genes and arguments as
229+
variables
230+
new_var_id: Variable ID, also includes original reaction ID for first
231+
layer
232+
"""
233+
next_terms = iter([exp.root])
234+
current_type = None
235+
variable = reaction_var
236+
arguments = []
237+
stack = []
238+
while True:
239+
try:
240+
term = next(next_terms)
241+
except StopIteration:
242+
term = None
243+
244+
if term in gene_term_dict:
245+
arguments.append(gene_term_dict[term])
246+
elif isinstance(term, boolean.Variable):
247+
gene_term.define([term], types=lp.VariableType.Binary)
248+
term_var = gene_term(term)
249+
gene_term_dict[term] = term_var
250+
arguments.append(term_var)
251+
elif isinstance(term, (boolean.And, boolean.Or)):
252+
stack.append((current_type, next_terms, variable, arguments))
253+
current_type = term.__class__
254+
next_terms = iter(term)
255+
arguments = []
256+
gene_term.define([term], types=lp.VariableType.Binary)
257+
variable = gene_term(term)
258+
gene_term_dict[term] = variable
259+
else:
260+
# End of term
261+
if current_type is None:
262+
prob.add_linear_constraints(variable == arguments[0])
263+
break
264+
elif current_type == boolean.And:
265+
prob.add_linear_constraints(
266+
*and_constraints(variable, arguments))
267+
elif current_type == boolean.Or:
268+
prob.add_linear_constraints(
269+
*or_constraints(variable, arguments))
270+
271+
term_var = variable
272+
current_type, next_terms, variable, arguments = stack.pop()
273+
arguments.append(term_var)
274+
275+
276+
def and_constraints(var, arguments):
277+
"""Create constraints for boolean AND.
278+
279+
Creates constraints for: var = And(arguments) where var and each argument
280+
is a binary variable.
281+
"""
282+
var_sum = 0
283+
for arg in arguments:
284+
yield var <= arg
285+
var_sum += arg
286+
yield var >= var_sum - (len(arguments) - 1)
287+
288+
289+
def or_constraints(var, arguments):
290+
"""Create constraints for boolean OR.
291+
292+
Creates constraints for: var = Or(arguments) where var and each argument
293+
is a binary variable.
294+
"""
295+
var_sum = 0
296+
for arg in arguments:
297+
yield var >= arg
298+
var_sum += arg
299+
yield var <= var_sum
300+
301+
302+
def open_file(path):
303+
"""Returns the contents of model file in a tuple of dictionaries.
304+
File Form: tsv format, FOUR Columns: (1) Gene name, (2) Condition 1 Data,
305+
(3) Condition 2 Data, (4) P-value of the fold change for transition 1->2.
306+
"""
307+
file1 = open(path)
308+
con1_dict = {}
309+
con2_dict = {}
310+
pval_dict = {}
311+
312+
file1.readline()
313+
for row in csv.reader(file1, delimiter=str('\t')):
314+
con1_dict[row[0]] = float(row[1])
315+
con2_dict[row[0]] = float(row[2])
316+
if float(row[3]) == float(0.0):
317+
pval_dict[row[0]] = 1e-400
318+
else:
319+
pval_dict[row[0]] = float(row[3])
320+
321+
return con1_dict, con2_dict, pval_dict
322+
323+
324+
def idc(dicts):
325+
"""Used for accessing the list of dictionaries created in open_file()
326+
Creates a dictionary for the gene ID and a value = [-1, 0, +1]
327+
corresponding to decreasing, constant, and inreasing expression between the
328+
conditions.
329+
"""
330+
con1 = dicts[0]
331+
con2 = dicts[1]
332+
pval = dicts[2]
333+
diff = {}
334+
335+
for key in con1:
336+
if con2[key] == con1[key]:
337+
diff[key] = 0
338+
elif con2[key] > con1[key]:
339+
diff[key] = 1
340+
else:
341+
diff[key] = -1
342+
343+
return con1, con2, pval, diff
344+
345+
346+
def add_final_constraints(mm, prob, v, z):
347+
"""Takes the metabolic model, the LP Problem, and the binary
348+
dictionaries of each condition. Adds constraints connecting flux
349+
variables, reactions, and their flux bounds.
350+
"""
351+
for rxn in mm.reactions:
352+
vmin = mm.limits[rxn].lower
353+
vmax = mm.limits[rxn].upper
354+
flux_var = v(rxn)
355+
active = z(rxn)
356+
357+
prob.add_linear_constraints(
358+
active*vmax >= flux_var, flux_var >= active*vmin)

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
'gapcheck = psamm.commands.gapcheck:GapCheckCommand',
6464
'gapfill = psamm.commands.gapfill:GapFillCommand',
6565
'genedelete = psamm.commands.genedelete:GeneDeletionCommand',
66+
'made = psamm.commands.made:MadeFluxBalance',
6667
'masscheck = psamm.commands.masscheck:MassConsistencyCommand',
6768
'randomsparse = psamm.commands.randomsparse:RandomSparseNetworkCommand',
6869
'robustness = psamm.commands.robustness:RobustnessCommand',

0 commit comments

Comments
 (0)