Skip to content

Commit e36b7e0

Browse files
authoredDec 9, 2018
Create calculate_prob_cdf.py
1 parent 98651af commit e36b7e0

File tree

1 file changed

+33
-0
lines changed

1 file changed

+33
-0
lines changed
 

‎calculate_prob_cdf.py

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import pandas as pd
2+
from sklearn import preprocessing
3+
import numpy as np
4+
import glob
5+
import math
6+
7+
# Function to Convert the cM to probability of recombination
8+
def probability (genaticMaps):
9+
prob = (1.0 - math.exp(-genaticMaps["cM"] / 50)) / 2.0
10+
return prob
11+
12+
13+
# Read the files in the current directory that contains all the genetic map
14+
for filename in glob.glob('*.txt'):
15+
genaticMaps = pd.read_csv(filename, sep="\t")
16+
17+
# 1. Convert the cM to probability of recombination
18+
genaticMaps["Recmb_Prob"] = genaticMaps.apply(probability, axis=1)
19+
20+
# 2. Normalize the probability
21+
X_normalized = preprocessing.normalize(genaticMaps["Recmb_Prob"], norm='l1')
22+
df_normalized = pd.DataFrame(X_normalized)
23+
df1_transposed = df_normalized.T
24+
df1_transposed['chr'] = genaticMaps["chr"]
25+
df1_transposed['pos'] = genaticMaps["pos"]
26+
df1_transposed['prob'] = df1_transposed[0]
27+
del df1_transposed[0]
28+
29+
#3. Calculate CDF
30+
df1_transposed['cdf'] = np.cumsum(df1_transposed["prob"])
31+
32+
# Save the final result to a new file
33+
df1_transposed.to_csv("./genetic_map/CDF_"+filename, sep="\t", index=False)

0 commit comments

Comments
 (0)