Graph view layer¶
a backend in memory graph mapped to BIOPAX datasets to allow in memory manipulation
In [1]:
import networkx as nx
import operator
import matplotlib.pyplot as plt
from biopax_explorer.graph import view
In [2]:
import logging
logging.getLogger().setLevel(logging.INFO)
In [3]:
gl= view.GraphModelLayer()
modelfile='data/output/graphlayer.graphml'
gl.write_graphml(modelfile)
Graph layer on the BIOPAX model¶
In [4]:
graph_all = nx.read_graphml(modelfile)
print("model entities in graph")
labels = {}
for node, data in graph_all.nodes(data=True):
labels[node] = data['name']
print(labels)
print("we select a subset of the entities")
st=['PhysicalEntity','Protein','Complex','ComplexAssembly','EntityReference','Control','Transport','Catalysis','MolecularInteraction','ProteinReference']
selected_nodes = [n for n,v in graph_all.nodes(data=True) if v['name'] in st ]
print("we draw the sub graph")
G = graph_all.subgraph(selected_nodes)
pos=nx.circular_layout(G)
nx.draw(G,pos, with_labels=True, node_size=155, node_color="skyblue", node_shape="o", alpha=0.5, linewidths=1, font_size=5,
font_color="grey", font_weight="bold", width=0.5, edge_color="grey", arrows=True)
labels = {}
for node, data in G.nodes(data=True):
labels[node] = data['name']
print(labels)
nx.draw_networkx_labels(G,pos, labels,font_size=10,alpha=0.9 ,font_color='blue')
plt.show()
model entities in graph {'1': 'EntityReference', '2': 'EntityFeature', '3': 'EntityReferenceTypeVocabulary', '4': 'Evidence', '5': 'Xref', '6': 'PathwayStep', '7': 'Interaction', '8': 'Degradation', '9': 'Provenance', '10': 'InteractionVocabulary', '11': 'Entity', '12': 'PhysicalEntity', '13': 'Stoichiometry', '14': 'CellVocabulary', '15': 'ModificationFeature', '16': 'SequenceLocation', '17': 'SequenceRegionVocabulary', '18': 'SequenceModificationVocabulary', '19': 'DnaRegion', '20': 'CellularLocationVocabulary', '21': 'Pathway', '22': 'BioSource', '23': 'TemplateReaction', '24': 'TransportWithBiochemicalReaction', '25': 'DeltaG', '26': 'KPrime', '27': 'TemplateReactionRegulation', '28': 'TissueVocabulary', '29': 'Conversion', '30': 'PhenotypeVocabulary', '31': 'CovalentBindingFeature', '32': 'BindingFeature', '33': 'SequenceInterval', '34': 'SequenceSite', '35': 'ChemicalStructure', '36': 'RnaRegionReference', '37': 'DnaRegionReference', '38': 'RnaRegion', '39': 'ProteinReference', '40': 'EvidenceCodeVocabulary', '41': 'BiochemicalPathwayStep', '42': 'PublicationXref', '43': 'MolecularInteraction', '44': 'Catalysis', '45': 'Dna', '46': 'SmallMolecule', '47': 'ControlledVocabulary', '48': 'Gene', '49': 'RelationshipTypeVocabulary', '50': 'GeneticInteraction', '51': 'Score', '52': 'ExperimentalFormVocabulary', '53': 'Protein', '54': 'DnaReference', '55': 'SmallMoleculeReference', '56': 'Complex', '57': 'Transport', '58': 'Rna', '59': 'ExperimentalForm', '60': 'RnaReference', '61': 'ComplexAssembly', '62': 'BiochemicalReaction', '63': 'RelationshipXref', '64': 'FragmentFeature', '65': 'Modulation', '66': 'Control', '67': 'UnificationXref', '68': 'UtilityClass'} we select a subset of the entities we draw the sub graph {'43': 'MolecularInteraction', '12': 'PhysicalEntity', '39': 'ProteinReference', '57': 'Transport', '44': 'Catalysis', '53': 'Protein', '61': 'ComplexAssembly', '1': 'EntityReference', '56': 'Complex', '66': 'Control'}
Graph layer on a BIOPAX dataset¶
In [5]:
gl=view.Factory(back="NX").graphDatasetLayer()
print(type(gl))
<class 'biopax_explorer.graph.view.GraphDatasetLayerNX'>
In [6]:
dataset="g6p"
db="http://db:3030"
from biopax_explorer.biopax.utils import gen_utils as gu
dburl=db+"/%s/query"
gl.model_instance_dict=gl.mpop.populate_domain_instance(dburl,dataset,gu.prefix(),gu.domain())
#gl.populate_dataset(db,dataset)
print("-----")
ct=0
collec=[]
maxct=10
st=['Protein','SmallMolecule']
gl.g = gl.newGraph()
for pk in gl.model_instance_dict.keys():
#print(pk)
inst=gl.model_instance_dict[pk]
if inst.__class__.__name__ in st:
ct=ct+1
if ct<=maxct:
collec.append(inst)
print("uri:",inst.pk)
##########################
#print(collec)
gl.build(collec)
print("number of nodes:",len(gl.g.nodes))
gl.write_graphml('data/output/datasetlayer.graphml')
gl.write_gexf('data/output/datasetlayer.gexf')
print("-----")
----- uri: http://www.reactome.org/biopax/56/71387#SmallMolecule100 uri: http://www.reactome.org/biopax/56/71387#SmallMolecule101 uri: http://www.reactome.org/biopax/56/71387#SmallMolecule105 uri: http://www.reactome.org/biopax/56/71387#SmallMolecule106 uri: http://www.reactome.org/biopax/56/71387#SmallMolecule119 uri: http://www.reactome.org/biopax/56/71387#SmallMolecule21 uri: http://www.reactome.org/biopax/56/71387#SmallMolecule25 uri: http://www.reactome.org/biopax/56/71387#SmallMolecule27 uri: http://www.reactome.org/biopax/56/71387#SmallMolecule28 uri: http://www.reactome.org/biopax/56/71387#SmallMolecule31 number of nodes: 33 -----
In [7]:
print(gl.g)
G=gl.g
pos=nx.circular_layout(G)
labels = {}
colors = []
for node, ndata in G.nodes(data=True):
print("-->",node)
asl=0
tp=ndata["ctype"]
if tp=='SmallMolecule' :
colors.append('grey')
else:
colors.append('skyblue')
for k in ["name","displayName"] :
#print(data)
#labels[node] = ""# data['uri']
if asl==0 :
if k in ndata.keys() and ndata[k] !='' :
#print(ndata[k],"!!!")
labels[node] = ndata[k] +" ("+tp+")"
asl=1
if "http:" in labels[node] :
asl=0
if asl==0:
labels[node] = tp +str(node) #+" "+ndata['uri']
#print(labels)
nx.draw(G,pos, with_labels=True, node_size=155, node_color=colors, node_shape="o", alpha=0.5, linewidths=1, font_size=5,
font_color="grey", font_weight="bold", width=0.5, edge_color="grey", arrows=True)
nx.draw_networkx_labels(G,pos, labels,font_size=6,alpha=0.9 ,font_color='blue')
plt.show()
Graph with 33 nodes and 40 edges --> 1 --> 2 --> 3 --> 4 --> 5 --> 6 --> 7 --> 8 --> 9 --> 10 --> 11 --> 12 --> 13 --> 14 --> 15 --> 16 --> 17 --> 18 --> 19 --> 20 --> 21 --> 22 --> 23 --> 24 --> 25 --> 26 --> 27 --> 28 --> 29 --> 30 --> 31 --> 32 --> 33
In [ ]:
In [ ]:
In [8]:
# compute shortest path between biopax entities as nodes / by URI
#print(gl.g.nodes)
ix=0
uri1=""
uri2=""
# first we select 2 entity URIs
for n, d in gl.g.nodes(data=True):
ix=ix+1
#print(n,d)
if ix==1:
uri1=d['uri']
print(uri1)
if ix==2:
uri2=d['uri']
print(uri2)
nl1=gl.selectNodeByAttributeValue("uri",uri1)
nl2=gl.selectNodeByAttributeValue("uri",uri2)
print(gl.g)
print(nl1)
print(nl2)
print("paths:")
for path in gl.k_shortest_paths(nl1[0], nl2[0],10):
print(path)
print("==========")
http://www.reactome.org/biopax/56/71387#SmallMolecule100 http://www.reactome.org/biopax/56/71387#Provenance1 Graph with 33 nodes and 40 edges [1] [2] paths: [1, 2] [1, 4, 6, 2] ==========
In [ ]:
In [9]:
print("graph traversal")
print("---------Breadth First Search----------")
ct=0
for e in nx.edge_bfs(gl.g, source=nl1[0], orientation=None):
ct+=1
print(e)
if ct==10:
break
print("---------Depth First Search----------")
ct=0
for e in nx.edge_dfs(gl.g, source=nl1[0], orientation=None):
ct+=1
print(e)
if ct==10:
break
graph traversal ---------Breadth First Search---------- (1, 2) (1, 3) (1, 4) (1, 5) (2, 6) (2, 9) (2, 13) (2, 16) (2, 19) (2, 22) ---------Depth First Search---------- (1, 2) (2, 6) (6, 7) (6, 4) (4, 1) (1, 3) (1, 5) (6, 8) (2, 9) (9, 10)
In [10]:
# filter graph edge and node by nodes
#filter -- by edges
def filter_by_node_att_val(g ):
edge_color_list=[]
edges=list( g.edges(data=True))
for e in edges:
#srcid=g.nodes(e[0])
#tgtid=g.nodes(e[1])
edge_attl=e[2]
n1=g.nodes(e[0])
#print(n1)
#if e[2][att]<thresh:
# self.g.remove_edge(*e[:2])
ng=filter_by_node_att_val(gl.g)
In [11]:
op_edge=operator.lt
#operator.le(a, b)
op_node=operator.ne
# eq,ne ,ge,gt,lt
final_view= gl.filter_graph(edge_att="weight",edge_val=15,node_att='displayName',node_val=['AMP','Mg2+'],op_edge=op_edge,op_node=op_node)
#final_view= filter_graph(gl,edge_att="weight",edge_val=10)
#final_view= filter_graph(gl,node_att='name',node_val='AMP')
print(final_view)
#print(final_view.nodes())
print("-------nodes------")
ct=0
for n,attv in final_view.nodes(data=True):
ct+=1
print(n , attv)
if ct>10:
break
#print(final_view.edges())
print("------edges------")
ct=0
for n1,n2,attv in final_view.edges(data=True):
ct+=1
print(n1,n2,attv)
if ct>10:
break
Graph with 33 nodes and 40 edges -------nodes------ 1 {'name': 'AMP', 'pk': 'http://www.reactome.org/biopax/56/71387#SmallMolecule100', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#SmallMolecule', 'ctype': 'SmallMolecule', 'uri': 'http://www.reactome.org/biopax/56/71387#SmallMolecule100', 'availability': '', 'comment': 'http://www.reactome.org/biopax/56/71387#SmallMolecule100http://www.reactome.org/biopax/56/71387#BiochemicalReaction104@Layout@http://www.reactome.org/biopax/56/71387#Pathway6@204@1485', 'displayName': 'AMP', 'standardName': 'AMP'} 2 {'name': 'Reactome', 'pk': 'http://www.reactome.org/biopax/56/71387#Provenance1', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#Provenance', 'ctype': 'Provenance', 'uri': 'http://www.reactome.org/biopax/56/71387#Provenance1', 'availability': None, 'comment': 'http://www.reactome.org', 'displayName': 'Reactome', 'standardName': ''} 3 {'name': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_109275', 'pk': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_109275', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#UnificationXref', 'ctype': 'UnificationXref', 'uri': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_109275', 'availability': None, 'comment': 'Database identifier. Use this URL to connect to the web page of this instance in Reactome: http://www.reactome.org/cgi-bin/eventbrowser?DB=gk_current&ID=109275', 'displayName': None, 'standardName': None} 4 {'name': 'http://localhost:3030/g6p/CellularLocationVocabulary_b761180db8a874567188c589b45cab17', 'pk': 'http://localhost:3030/g6p/CellularLocationVocabulary_b761180db8a874567188c589b45cab17', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#CellularLocationVocabulary', 'ctype': 'CellularLocationVocabulary', 'uri': 'http://localhost:3030/g6p/CellularLocationVocabulary_b761180db8a874567188c589b45cab17', 'availability': None, 'comment': '', 'displayName': None, 'standardName': None} 5 {'name': 'http://identifiers.org/chebi/CHEBI:16027', 'pk': 'http://identifiers.org/chebi/CHEBI:16027', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#EntityReference', 'ctype': 'EntityReference', 'uri': 'http://identifiers.org/chebi/CHEBI:16027', 'availability': None, 'comment': '', 'displayName': '', 'standardName': ''} 6 {'name': 'Mg2+', 'pk': 'http://www.reactome.org/biopax/56/71387#SmallMolecule101', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#SmallMolecule', 'ctype': 'SmallMolecule', 'uri': 'http://www.reactome.org/biopax/56/71387#SmallMolecule101', 'availability': '', 'comment': 'http://www.reactome.org/biopax/56/71387#SmallMolecule101http://www.reactome.org/biopax/56/71387#Complex122@Layout@http://www.reactome.org/biopax/56/71387#Pathway6@66@1399', 'displayName': 'Mg2+', 'standardName': ''} 7 {'name': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_114632', 'pk': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_114632', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#UnificationXref', 'ctype': 'UnificationXref', 'uri': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_114632', 'availability': None, 'comment': 'Database identifier. Use this URL to connect to the web page of this instance in Reactome: http://www.reactome.org/cgi-bin/eventbrowser?DB=gk_current&ID=114632', 'displayName': None, 'standardName': None} 8 {'name': 'http://identifiers.org/chebi/CHEBI:18420', 'pk': 'http://identifiers.org/chebi/CHEBI:18420', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#EntityReference', 'ctype': 'EntityReference', 'uri': 'http://identifiers.org/chebi/CHEBI:18420', 'availability': None, 'comment': '', 'displayName': '', 'standardName': ''} 9 {'name': 'TPNH', 'pk': 'http://www.reactome.org/biopax/56/71387#SmallMolecule105', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#SmallMolecule', 'ctype': 'SmallMolecule', 'uri': 'http://www.reactome.org/biopax/56/71387#SmallMolecule105', 'availability': '', 'comment': 'http://www.reactome.org/biopax/56/71387#SmallMolecule105@Layout@http://www.reactome.org/biopax/56/71387#Pathway6@553@1095', 'displayName': 'TPNH', 'standardName': 'TPNH'} 10 {'name': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_29364', 'pk': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_29364', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#UnificationXref', 'ctype': 'UnificationXref', 'uri': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_29364', 'availability': None, 'comment': 'Database identifier. Use this URL to connect to the web page of this instance in Reactome: http://www.reactome.org/cgi-bin/eventbrowser?DB=gk_current&ID=29364', 'displayName': None, 'standardName': None} 11 {'name': 'http://localhost:3030/g6p/CellularLocationVocabulary_d7b7576a6ce6c6961a2de2e8c0608786', 'pk': 'http://localhost:3030/g6p/CellularLocationVocabulary_d7b7576a6ce6c6961a2de2e8c0608786', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#CellularLocationVocabulary', 'ctype': 'CellularLocationVocabulary', 'uri': 'http://localhost:3030/g6p/CellularLocationVocabulary_d7b7576a6ce6c6961a2de2e8c0608786', 'availability': None, 'comment': '', 'displayName': None, 'standardName': None} ------edges------ 1 2 {'weight': 10, 'name': 'dataSource'} 1 3 {'weight': 10, 'name': 'xref'} 1 4 {'weight': 10, 'name': 'cellularLocation'} 1 5 {'weight': 10, 'name': 'entityReference'} 2 6 {'weight': 10, 'name': 'dataSource'} 2 9 {'weight': 10, 'name': 'dataSource'} 2 13 {'weight': 10, 'name': 'dataSource'} 2 16 {'weight': 10, 'name': 'dataSource'} 2 19 {'weight': 10, 'name': 'dataSource'} 2 22 {'weight': 10, 'name': 'dataSource'} 2 25 {'weight': 10, 'name': 'dataSource'}
In [12]:
for n, d in gl.g.nodes(data=True):
print(list(d.keys()))
break
['name', 'pk', 'rdf_type', 'ctype', 'uri', 'availability', 'comment', 'displayName', 'standardName']
In [13]:
rina=gl.revIndexNodeAttValues()
rina['displayName']
Out[13]:
{'AMP': [1], 'Reactome': [2], '': [5, 8, 12, 15, 18, 21, 24, 27, 30, 33], 'Mg2+': [6], 'TPNH': [9], 'TPN': [13], '6-Phospho-D-glucono-1,5-lactone': [16], 'Glc': [19], 'ATP': [22], 'G6P': [25], 'ADP': [28], 'Fru(6)P': [31]}
In [14]:
ina=gl.indexNodeAttValues()
nid=1
ina[nid]
Out[14]:
{'name': 'AMP', 'pk': 'http://www.reactome.org/biopax/56/71387#SmallMolecule100', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#SmallMolecule', 'ctype': 'SmallMolecule', 'uri': 'http://www.reactome.org/biopax/56/71387#SmallMolecule100', 'availability': '', 'comment': 'http://www.reactome.org/biopax/56/71387#SmallMolecule100http://www.reactome.org/biopax/56/71387#BiochemicalReaction104@Layout@http://www.reactome.org/biopax/56/71387#Pathway6@204@1485', 'displayName': 'AMP', 'standardName': 'AMP'}
In [ ]:
In [ ]:
In [ ]: