I am trying to implement straightness centrality, as defined in this document as equation six (http://udsu-strath.com/wp-content/uploads/2010/06/PA2006_06new1.pdf) as a node's centrality determined by the sum of the the Eucledean distance divided by the graph distance between that node and all other nodes divided by 1/(N-1);
I have a graph of about 60,000 nodes; I tried iterating through it and using dask to parallalize it but that doesn't really cut any order off big O;
I've have it implemented by storing the x/y coordinates of each node in a dictionary with the key as the node number that can be accessed easily, but I am not too skilled at solving these sort of algorithm optimisation problems.
Like graph-tools have OpenMP Support, but I am not very sure how to write an algorithm to leverage that.
This is my current attempt
#too slow
def euclidean_dist(x1, y1, x2, y2):
return math.sqrt((x1 - x2)**2 + (y1 - y2)**2)
def bravo(target, n, vertID_dict_scattered, network_dist):
euclidean_distance = euclidean_dist(vertID_dict_scattered[n][0], vertID_dict_scattered[n][1], vertID_dict_scattered[target][0], vertID_dict_scattered[target][1])
return euclidean_distance / network_dist
@dask.delayed
def alpha(G, n, vertID_dict_scattered):
straightness = 0
sp = gt.shortest_distance(G, n, weights=G.edge_properties["mm_len"])
sp_scattered = sp
if len(sp.get_array()) > 0 and len(G) > 1:
for target, value in enumerate(sp):
if n != target:
network_dist = sp_scattered[target]
straightness += bravo(target, n, vertID_dict_scattered, network_dist)
straightness_df = straightness * (1.0 / (len(vertID_dict_scattered.keys()) - 1.0))
else:
straightness_df = 0
return n, straightness_df
def split_list(lst, chunk_size):
return [lst[i:i+chunk_size] for i in range(0, len(lst), chunk_size)]\
def straightness_centrality(G, vertID_dict):
# chunked_list = split_list(list(G.iter_vertices()), 32)
# G_scattered = client.scatter(G)
# vertID_dict_scattered = client.scatter(vertID_dict)
# result=[]
# for chunk in chunked_list:
# delayed_objs = [alpha(G_scattered, n, vertID_dict_scattered) for n in chunk]
# new_results = client.compute(delayed_objs)
# result.append(new_results)
chunked_list = split_list(list(G.iter_vertices()), 32)
G_scattered = client.scatter(G)
vertID_dict_scattered = client.scatter(vertID_dict)
results=[]
for chunk in chunked_list:
delayed_objs = [alpha(G_scattered, n, vertID_dict_scattered) for n in chunk]
new_results = client.compute(delayed_objs)
gathered_results = client.gather(new_results)
results.append(gathered_results)
return results
straightness_df = straightness_centrality(gtG, vertID_dict)