-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfree_energy.py
212 lines (167 loc) · 7.98 KB
/
free_energy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
##!/Users/martinmccullagh/anaconda/bin/python
# NOTE: will have to point to version of python and have MDAnalysis library
# USAGE: dist_windows.py [config file]
# CONFIG FILE FORMAT:
# psffile = [psf, prmtop, gro, or pdb file]
# dcdfile = [traj file in format trr, dcd, etc]
# atom_sel_1 = [CHARMM style atom selection for first group of atoms]
# atom_sel_2 = [CHARMM style atom selection for second group of atoms]
# dist_max = [maximum distance]
# dist_min = [minimum distance]
# dist_delta = [distance bin size]
# load libraries
import sys
import os
import numpy as np
import math
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
#################################################################################################################
############################################## SUBROUTINEs ##############################################
#################################################################################################################
# define subroutines
def ParseConfigFile(cfg_file):
global dist_min,dist_max,dist_delta,output_hist_name,equilib,counts_out,low_end,r_free_out,u_free_out,pmf_vert
f = open(cfg_file)
for line in f:
# first remove comments
if '#' in line:
line, comment = line.split('#',1)
if '=' in line:
option, value = line.split('=',1)
option = option.strip()
value = value.strip()
print "Option:", option, " Value:", value
# check value
if option.lower()=='dist_min':
dist_min = float(value)
elif option.lower()=='dist_max':
dist_max = float(value)
elif option.lower()=='dist_delta':
dist_delta = float(value)
elif option.lower()=='hist_out':
output_hist_name = value
elif option.lower()=='r_free_out':
r_free_out = value
elif option.lower()=='u_free_out':
u_free_out = value
elif option.lower()=='equilibration_steps':
equilib = int(value)
elif option.lower()=='bin_counts_outfile':
counts_out = value
elif option.lower()=='ignore_bins_below':
low_end = int(value)
elif option.lower()=='wham_vertical_correction':
pmf_vert = value
else :
print "Option:", option, " is not recognized"
f.close()
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
#################################################################################################################
############################################## MAIN PROGRAM #############################################
#################################################################################################################
ParseConfigFile(sys.argv[1])
k = 0.001987 # kCal K^-1 mol^-1
T = 298.0 # K
kT = k*T
wham_spring_constant = 20.0 # wham spring constant is 2x the AMBER spring constant
restraint_const = 0.5*wham_spring_constant/(kT) ## 1/2 * wham_spring_constant * 1/(kT) [in kcal/mol]
total_bins = int((dist_max - dist_min)/dist_delta) # distance of bin is bin# * dist_delta + dist_min
edges = np.zeros(total_bins+1)
for j in range(total_bins+1):
edges[j] = dist_min + j*dist_delta
half_bins = np.zeros(total_bins)
for k in range(total_bins):
half_bins[k] = edges[k] + dist_delta/2.0 # Mid point of each bin.
count = 0
window_value = []
for dirs, subdirs, files in os.walk('./'):
if dirs[2:4] == "us":
window = float(dirs[4:])
window_string = dirs[4:]
if count == 0 or window < window_min:
window_min = window
if count == 0 or window > window_max:
window_max = window
check_file = dirs + "/PDI_com.job0." + window_string + ".sort.colvars.traj"
print "Checking " + check_file
if os.path.isfile(check_file):
# open file and create histogram
datalist = np.loadtxt(check_file)
### Start binning algorithm ###
nColumns = datalist.shape[1]
nSteps = datalist.shape[0] # rows
post_equil_steps = nSteps - equilib # This way I could subtract the 5 ns of equilibration here.
# print 'Number of steps: %d, number of steps after equilibration: %d' %(nSteps,post_equil_steps) # convert steps to time so its easier to understand how much time is being taken out
weighting_constant = nSteps*dist_delta*4.0*np.pi
restraint_position = window/10.0 # window put into angstroms
# print 'weighting_constant, %f' %weighting_constant
# x_min = np.ndarray.min(datalist[equilib:,1]) # the minimum collective variable value.
# x_max = np.ndarray.max(datalist[equilib:,1]) # the maximum collective variable value.
# bins = int((x_max - x_min)/dist_delta) # divide distance the dat file spans into bins of dist_delta size.
# print 'Window %d has %d bins' %(window,bins)
weighted_counts = np.zeros(total_bins)
counts = np.zeros(total_bins) # will be the number of points in each bin.
for j in range(equilib,nSteps): # loop inclusive of equilib, exclusive of nSteps.
index = int((datalist[j][1] - dist_min)/dist_delta) # index data as nearest int steps from dist_min. This makes every window's histogram span the entire range and be positioned correctly wrt other windows
if index not in range(total_bins):
print 'Timestep %d is trying to be binned in index %d...' %(j,index)
if index == total_bins: # Data points binned into the last bin don't have a bin to go to so we move them to the last actual bin. Index of -1 is last index. -2 would be second to last...
weighted_counts[-1] += np.exp(restraint_const * (datalist[j][1] - restraint_position)**2)/(weighting_constant * datalist[j][1]**2) # nSteps*dist_delta ---> normalizes the probability density; 4.0*np.pi*datalist[j][1]**2 ---> volume correction
counts[-1] += 1
else:
weighted_counts[index] += np.exp(restraint_const * (datalist[j][1] - restraint_position)**2)/(weighting_constant * datalist[j][1]**2) # nSteps*dist_delta ---> normalizes the probability density; 4.0*np.pi*datalist[j][1]**2 ---> volume correction
counts[index] += 1
for j in range(total_bins):
if counts[j] <= low_end: # Ignore bins with few points
weighted_counts[j] = 0
############## Calculate Restricted Free Energy ###############
#r_free_energy = np.zeros(total_bins)
#for j in range(total_bins):
# r_free_energy[j] = -kT*np.log(weighted_counts[j])
#min_fe = np.ndarray.min(r_free_energy)
#r_free_energy -= min_fe # Make the FE have its minimum at zero
############## Calculate Unrestricted Free Energy ###############
u_free_energy = np.zeros(total_bins)
restraint_position = window/10.0 # window put into angstroms
#print 'restraint position for window # %d: %f' %(window,restraint_position)
for j in range(total_bins): # first two u_free_energy calculations are okay if restraint isn't weighted in weighted_counts
#u_free_energy[j] = r_free_energy[j] - kT * (restraint_const * (half_bins[j] - restraint_position)**2)
#u_free_energy[j] = -kT * np.log(weighted_counts[j] * np.exp(restraint_const * (half_bins[j] - restraint_position)**2))
u_free_energy[j] = -kT * np.log(weighted_counts[j])
##############################
if count == 0:
temp_hist = weighted_counts
bin_counts = counts
#temp_r_free = r_free_energy
temp_u_free = u_free_energy
else:
temp_hist = np.column_stack((temp_hist,weighted_counts))
bin_counts = np.column_stack((bin_counts,counts))
#temp_r_free = np.column_stack((temp_r_free,r_free_energy))
temp_u_free = np.column_stack((temp_u_free,u_free_energy))
##############################
count += 1
window_value.append(window)
col_var = np.zeros(total_bins)
for t in range(total_bins):
col_var[t] = dist_min + t*dist_delta
end_hist = np.column_stack((col_var,temp_hist))
np.savetxt(output_hist_name,end_hist)
end_counts = np.column_stack((col_var,bin_counts))
np.savetxt(counts_out,end_counts)
##end_r_free = np.column_stack((col_var,temp_r_free))
##np.savetxt(r_free_out,end_r_free)
pmf_vert_correction = np.loadtxt(pmf_vert)
print temp_u_free.shape
for j in range(temp_u_free.shape[1]):
for i in range(temp_u_free.shape[0]):
temp_u_free[i][j] += pmf_vert_correction[j]
end_u_free = np.column_stack((col_var,temp_u_free))
np.savetxt(u_free_out,end_u_free)