5
5
import pandas
6
6
import scipy .sparse
7
7
8
- from hetmatpy .matrix import metaedge_to_adjacency_matrix
9
8
import hetmatpy .degree_weight
9
+ from hetmatpy .matrix import metaedge_to_adjacency_matrix
10
10
11
11
12
12
def degrees_to_degree_to_ind (degrees ):
@@ -18,16 +18,22 @@ def degrees_to_degree_to_ind(degrees):
18
18
19
19
def metapath_to_degree_dicts (graph , metapath ):
20
20
metapath = graph .metagraph .get_metapath (metapath )
21
- _ , _ , source_adj_mat = metaedge_to_adjacency_matrix (graph , metapath [0 ], dense_threshold = 0.7 )
22
- _ , _ , target_adj_mat = metaedge_to_adjacency_matrix (graph , metapath [- 1 ], dense_threshold = 0.7 )
21
+ _ , _ , source_adj_mat = metaedge_to_adjacency_matrix (
22
+ graph , metapath [0 ], dense_threshold = 0.7
23
+ )
24
+ _ , _ , target_adj_mat = metaedge_to_adjacency_matrix (
25
+ graph , metapath [- 1 ], dense_threshold = 0.7
26
+ )
23
27
source_degrees = source_adj_mat .sum (axis = 1 ).flat
24
28
target_degrees = target_adj_mat .sum (axis = 0 ).flat
25
29
source_degree_to_ind = degrees_to_degree_to_ind (source_degrees )
26
30
target_degree_to_ind = degrees_to_degree_to_ind (target_degrees )
27
31
return source_degree_to_ind , target_degree_to_ind
28
32
29
33
30
- def generate_degree_group_stats (source_degree_to_ind , target_degree_to_ind , matrix , scale = False , scaler = 1 ):
34
+ def generate_degree_group_stats (
35
+ source_degree_to_ind , target_degree_to_ind , matrix , scale = False , scaler = 1
36
+ ):
31
37
"""
32
38
Yield dictionaries with degree grouped stats
33
39
"""
@@ -41,31 +47,37 @@ def generate_degree_group_stats(source_degree_to_ind, target_degree_to_ind, matr
41
47
# row_matrix = scipy.sparse.csc_matrix(row_matrix)
42
48
for target_degree , col_inds in target_degree_to_ind .items ():
43
49
row = {
44
- ' source_degree' : source_degree ,
45
- ' target_degree' : target_degree ,
50
+ " source_degree" : source_degree ,
51
+ " target_degree" : target_degree ,
46
52
}
47
- row ['n' ] = len (row_inds ) * len (col_inds )
53
+ row ["n" ] = len (row_inds ) * len (col_inds )
48
54
if source_degree == 0 or target_degree == 0 :
49
- row [' sum' ] = 0
50
- row [' nnz' ] = 0
51
- row [' sum_of_squares' ] = 0
55
+ row [" sum" ] = 0
56
+ row [" nnz" ] = 0
57
+ row [" sum_of_squares" ] = 0
52
58
yield row
53
59
continue
54
60
55
61
slice_matrix = row_matrix [:, col_inds ]
56
- values = slice_matrix .data if scipy .sparse .issparse (slice_matrix ) else slice_matrix
62
+ values = (
63
+ slice_matrix .data
64
+ if scipy .sparse .issparse (slice_matrix )
65
+ else slice_matrix
66
+ )
57
67
if scale :
58
68
values = numpy .arcsinh (values / scaler )
59
- row [' sum' ] = values .sum ()
60
- row [' sum_of_squares' ] = (values ** 2 ).sum ()
69
+ row [" sum" ] = values .sum ()
70
+ row [" sum_of_squares" ] = (values ** 2 ).sum ()
61
71
if scipy .sparse .issparse (slice_matrix ):
62
- row [' nnz' ] = slice_matrix .nnz
72
+ row [" nnz" ] = slice_matrix .nnz
63
73
else :
64
- row [' nnz' ] = numpy .count_nonzero (slice_matrix )
74
+ row [" nnz" ] = numpy .count_nonzero (slice_matrix )
65
75
yield row
66
76
67
77
68
- def dwpc_to_degrees (graph , metapath , damping = 0.5 , ignore_zeros = False , ignore_redundant = True ):
78
+ def dwpc_to_degrees (
79
+ graph , metapath , damping = 0.5 , ignore_zeros = False , ignore_redundant = True
80
+ ):
69
81
"""
70
82
Yield a description of each cell in a DWPC matrix adding source and target
71
83
node degree info as well as the corresponding path count.
@@ -78,26 +90,32 @@ def dwpc_to_degrees(graph, metapath, damping=0.5, ignore_zeros=False, ignore_red
78
90
the same DWPC.
79
91
"""
80
92
metapath = graph .metagraph .get_metapath (metapath )
81
- _ , _ , source_adj_mat = metaedge_to_adjacency_matrix (graph , metapath [0 ], dense_threshold = 0.7 )
82
- _ , _ , target_adj_mat = metaedge_to_adjacency_matrix (graph , metapath [- 1 ], dense_threshold = 0.7 )
93
+ _ , _ , source_adj_mat = metaedge_to_adjacency_matrix (
94
+ graph , metapath [0 ], dense_threshold = 0.7
95
+ )
96
+ _ , _ , target_adj_mat = metaedge_to_adjacency_matrix (
97
+ graph , metapath [- 1 ], dense_threshold = 0.7
98
+ )
83
99
source_degrees = source_adj_mat .sum (axis = 1 ).flat
84
100
target_degrees = target_adj_mat .sum (axis = 0 ).flat
85
101
del source_adj_mat , target_adj_mat
86
102
87
- source_path = graph .get_nodes_path (metapath .source (), file_format = ' tsv' )
88
- source_node_df = pandas .read_csv (source_path , sep = ' \t ' )
89
- source_node_names = list (source_node_df [' name' ])
103
+ source_path = graph .get_nodes_path (metapath .source (), file_format = " tsv" )
104
+ source_node_df = pandas .read_csv (source_path , sep = " \t " )
105
+ source_node_names = list (source_node_df [" name" ])
90
106
91
- target_path = graph .get_nodes_path (metapath .target (), file_format = ' tsv' )
92
- target_node_df = pandas .read_csv (target_path , sep = ' \t ' )
93
- target_node_names = list (target_node_df [' name' ])
107
+ target_path = graph .get_nodes_path (metapath .target (), file_format = " tsv" )
108
+ target_node_df = pandas .read_csv (target_path , sep = " \t " )
109
+ target_node_names = list (target_node_df [" name" ])
94
110
95
- row_names , col_names , dwpc_matrix = graph .read_path_counts (metapath , 'dwpc' , damping )
111
+ row_names , col_names , dwpc_matrix = graph .read_path_counts (
112
+ metapath , "dwpc" , damping
113
+ )
96
114
dwpc_matrix = numpy .arcsinh (dwpc_matrix / dwpc_matrix .mean ())
97
115
if scipy .sparse .issparse (dwpc_matrix ):
98
116
dwpc_matrix = dwpc_matrix .toarray ()
99
117
100
- _ , _ , path_count = graph .read_path_counts (metapath , ' dwpc' , 0.0 )
118
+ _ , _ , path_count = graph .read_path_counts (metapath , " dwpc" , 0.0 )
101
119
if scipy .sparse .issparse (path_count ):
102
120
path_count = path_count .toarray ()
103
121
@@ -110,14 +128,14 @@ def dwpc_to_degrees(graph, metapath, damping=0.5, ignore_zeros=False, ignore_red
110
128
if ignore_zeros and dwpc_value == 0 :
111
129
continue
112
130
row = {
113
- ' source_id' : row_names [row_ind ],
114
- ' target_id' : col_names [col_ind ],
115
- ' source_name' : source_node_names [row_ind ],
116
- ' target_name' : target_node_names [col_ind ],
117
- ' source_degree' : source_degrees [row_ind ],
118
- ' target_degree' : target_degrees [col_ind ],
119
- ' path_count' : path_count [row_ind , col_ind ],
120
- ' dwpc' : dwpc_value ,
131
+ " source_id" : row_names [row_ind ],
132
+ " target_id" : col_names [col_ind ],
133
+ " source_name" : source_node_names [row_ind ],
134
+ " target_name" : target_node_names [col_ind ],
135
+ " source_degree" : source_degrees [row_ind ],
136
+ " target_degree" : target_degrees [col_ind ],
137
+ " path_count" : path_count [row_ind , col_ind ],
138
+ " dwpc" : dwpc_value ,
121
139
}
122
140
yield collections .OrderedDict (row )
123
141
@@ -127,13 +145,19 @@ def single_permutation_degree_group(permuted_hetmat, metapath, dwpc_mean, dampin
127
145
Compute degree-grouped permutations for a single permuted_hetmat,
128
146
for one metapath.
129
147
"""
130
- _ , _ , matrix = hetmatpy .degree_weight .dwpc (permuted_hetmat , metapath , damping = damping , dense_threshold = 0.7 )
131
- source_deg_to_ind , target_deg_to_ind = hetmatpy .degree_group .metapath_to_degree_dicts (permuted_hetmat , metapath )
148
+ _ , _ , matrix = hetmatpy .degree_weight .dwpc (
149
+ permuted_hetmat , metapath , damping = damping , dense_threshold = 0.7
150
+ )
151
+ (
152
+ source_deg_to_ind ,
153
+ target_deg_to_ind ,
154
+ ) = hetmatpy .degree_group .metapath_to_degree_dicts (permuted_hetmat , metapath )
132
155
row_generator = hetmatpy .degree_group .generate_degree_group_stats (
133
- source_deg_to_ind , target_deg_to_ind , matrix , scale = True , scaler = dwpc_mean )
156
+ source_deg_to_ind , target_deg_to_ind , matrix , scale = True , scaler = dwpc_mean
157
+ )
134
158
degree_grouped_df = (
135
159
pandas .DataFrame (row_generator )
136
- .set_index ([' source_degree' , ' target_degree' ])
160
+ .set_index ([" source_degree" , " target_degree" ])
137
161
.assign (n_perms = 1 )
138
162
)
139
163
return degree_grouped_df
0 commit comments