Graph view layer¶

a backend in memory graph mapped to BIOPAX datasets to allow in memory manipulation

In [1]:
import networkx as nx
import operator
import matplotlib.pyplot as plt
from biopax_explorer.graph import view
In [2]:
import logging
logging.getLogger().setLevel(logging.INFO)
In [3]:
gl= view.GraphModelLayer()
modelfile='data/output/graphlayer.graphml'
gl.write_graphml(modelfile)
Graph layer on the BIOPAX model¶
In [4]:
graph_all = nx.read_graphml(modelfile)

print("model entities in graph")
labels = {}    
for node, data in graph_all.nodes(data=True):
   labels[node] = data['name']
        
print(labels)
print("we select a subset of the entities")
st=['PhysicalEntity','Protein','Complex','ComplexAssembly','EntityReference','Control','Transport','Catalysis','MolecularInteraction','ProteinReference']
selected_nodes = [n for n,v in graph_all.nodes(data=True) if v['name'] in st   ]  


print("we draw the sub graph")

G = graph_all.subgraph(selected_nodes)

pos=nx.circular_layout(G)


nx.draw(G,pos, with_labels=True, node_size=155, node_color="skyblue", node_shape="o", alpha=0.5, linewidths=1, font_size=5, 
        font_color="grey", font_weight="bold", width=0.5, edge_color="grey", arrows=True)

 
labels = {}    
for node, data in G.nodes(data=True):
   labels[node] = data['name']
        
print(labels)
nx.draw_networkx_labels(G,pos, labels,font_size=10,alpha=0.9 ,font_color='blue')

plt.show()
model entities in graph
{'1': 'EntityReference', '2': 'EntityFeature', '3': 'EntityReferenceTypeVocabulary', '4': 'Evidence', '5': 'Xref', '6': 'PathwayStep', '7': 'Interaction', '8': 'Degradation', '9': 'Provenance', '10': 'InteractionVocabulary', '11': 'Entity', '12': 'PhysicalEntity', '13': 'Stoichiometry', '14': 'CellVocabulary', '15': 'ModificationFeature', '16': 'SequenceLocation', '17': 'SequenceRegionVocabulary', '18': 'SequenceModificationVocabulary', '19': 'DnaRegion', '20': 'CellularLocationVocabulary', '21': 'Pathway', '22': 'BioSource', '23': 'TemplateReaction', '24': 'TransportWithBiochemicalReaction', '25': 'DeltaG', '26': 'KPrime', '27': 'TemplateReactionRegulation', '28': 'TissueVocabulary', '29': 'Conversion', '30': 'PhenotypeVocabulary', '31': 'CovalentBindingFeature', '32': 'BindingFeature', '33': 'SequenceInterval', '34': 'SequenceSite', '35': 'ChemicalStructure', '36': 'RnaRegionReference', '37': 'DnaRegionReference', '38': 'RnaRegion', '39': 'ProteinReference', '40': 'EvidenceCodeVocabulary', '41': 'BiochemicalPathwayStep', '42': 'PublicationXref', '43': 'MolecularInteraction', '44': 'Catalysis', '45': 'Dna', '46': 'SmallMolecule', '47': 'ControlledVocabulary', '48': 'Gene', '49': 'RelationshipTypeVocabulary', '50': 'GeneticInteraction', '51': 'Score', '52': 'ExperimentalFormVocabulary', '53': 'Protein', '54': 'DnaReference', '55': 'SmallMoleculeReference', '56': 'Complex', '57': 'Transport', '58': 'Rna', '59': 'ExperimentalForm', '60': 'RnaReference', '61': 'ComplexAssembly', '62': 'BiochemicalReaction', '63': 'RelationshipXref', '64': 'FragmentFeature', '65': 'Modulation', '66': 'Control', '67': 'UnificationXref', '68': 'UtilityClass'}
we select a subset of the entities
we draw the sub graph
{'43': 'MolecularInteraction', '12': 'PhysicalEntity', '39': 'ProteinReference', '57': 'Transport', '44': 'Catalysis', '53': 'Protein', '61': 'ComplexAssembly', '1': 'EntityReference', '56': 'Complex', '66': 'Control'}
No description has been provided for this image
Graph layer on a BIOPAX dataset¶
In [5]:
 
gl=view.Factory(back="NX").graphDatasetLayer()
print(type(gl))
<class 'biopax_explorer.graph.view.GraphDatasetLayerNX'>
In [6]:
 

dataset="g6p"
db="http://db:3030"
from biopax_explorer.biopax.utils import gen_utils as gu

dburl=db+"/%s/query"
gl.model_instance_dict=gl.mpop.populate_domain_instance(dburl,dataset,gu.prefix(),gu.domain())
 
#gl.populate_dataset(db,dataset)
print("-----")
ct=0
collec=[]
maxct=10 
st=['Protein','SmallMolecule']
gl.g = gl.newGraph()
for pk in gl.model_instance_dict.keys():
        
   #print(pk) 
   inst=gl.model_instance_dict[pk] 
   
   if inst.__class__.__name__ in  st:
         
     ct=ct+1
     if ct<=maxct:
       collec.append(inst) 
       print("uri:",inst.pk)   
 
##########################
#print(collec) 
gl.build(collec)
print("number of nodes:",len(gl.g.nodes))
gl.write_graphml('data/output/datasetlayer.graphml')
gl.write_gexf('data/output/datasetlayer.gexf')
print("-----")
-----
uri: http://www.reactome.org/biopax/56/71387#SmallMolecule100
uri: http://www.reactome.org/biopax/56/71387#SmallMolecule101
uri: http://www.reactome.org/biopax/56/71387#SmallMolecule105
uri: http://www.reactome.org/biopax/56/71387#SmallMolecule106
uri: http://www.reactome.org/biopax/56/71387#SmallMolecule119
uri: http://www.reactome.org/biopax/56/71387#SmallMolecule21
uri: http://www.reactome.org/biopax/56/71387#SmallMolecule25
uri: http://www.reactome.org/biopax/56/71387#SmallMolecule27
uri: http://www.reactome.org/biopax/56/71387#SmallMolecule28
uri: http://www.reactome.org/biopax/56/71387#SmallMolecule31
number of nodes: 33
-----
In [7]:
print(gl.g)
G=gl.g
 
pos=nx.circular_layout(G)



 
labels = {}    
colors =  []  
for node, ndata in G.nodes(data=True):
   print("-->",node)   
   asl=0 
 
   tp=ndata["ctype"]
   if tp=='SmallMolecule' :
       colors.append('grey')
   else:
       colors.append('skyblue')
   for k in ["name","displayName"] :
    #print(data)  
     
     #labels[node] = ""# data['uri']  
     if asl==0 :
       if k in  ndata.keys() and ndata[k] !='' :
          #print(ndata[k],"!!!")
          labels[node] = ndata[k] +" ("+tp+")"
          asl=1 
          if "http:" in labels[node] :
              asl=0
       if asl==0:
          labels[node] = tp +str(node) #+" "+ndata['uri']
        
#print(labels) 
nx.draw(G,pos, with_labels=True, node_size=155,  node_color=colors,  node_shape="o", alpha=0.5, linewidths=1, font_size=5, 
        font_color="grey", font_weight="bold", width=0.5, edge_color="grey", arrows=True)

nx.draw_networkx_labels(G,pos, labels,font_size=6,alpha=0.9 ,font_color='blue')

plt.show()
Graph with 33 nodes and 40 edges
--> 1
--> 2
--> 3
--> 4
--> 5
--> 6
--> 7
--> 8
--> 9
--> 10
--> 11
--> 12
--> 13
--> 14
--> 15
--> 16
--> 17
--> 18
--> 19
--> 20
--> 21
--> 22
--> 23
--> 24
--> 25
--> 26
--> 27
--> 28
--> 29
--> 30
--> 31
--> 32
--> 33
No description has been provided for this image
In [ ]:
 
In [ ]:
 
In [8]:
# compute shortest path between biopax entities as nodes / by URI
#print(gl.g.nodes)
ix=0
uri1=""
uri2=""
# first we select 2 entity URIs
for n, d in gl.g.nodes(data=True):
    ix=ix+1
    #print(n,d)
    if ix==1:
      uri1=d['uri']
      print(uri1)
    if ix==2:
      uri2=d['uri']
      print(uri2)
 

    
nl1=gl.selectNodeByAttributeValue("uri",uri1)
nl2=gl.selectNodeByAttributeValue("uri",uri2)
print(gl.g)
print(nl1)
print(nl2)
print("paths:")
for path in gl.k_shortest_paths(nl1[0],  nl2[0],10):
    print(path)
print("==========")
 
http://www.reactome.org/biopax/56/71387#SmallMolecule100
http://www.reactome.org/biopax/56/71387#Provenance1
Graph with 33 nodes and 40 edges
[1]
[2]
paths:
[1, 2]
[1, 4, 6, 2]
==========
In [ ]:
 
In [9]:
print("graph traversal")
print("---------Breadth First Search----------")
ct=0
for e in nx.edge_bfs(gl.g, source=nl1[0], orientation=None):
    ct+=1
    print(e)
    if ct==10:
        break

print("---------Depth First Search----------")
ct=0
for e in nx.edge_dfs(gl.g, source=nl1[0], orientation=None):
    ct+=1
    print(e)
    if ct==10:
        break
graph traversal
---------Breadth First Search----------
(1, 2)
(1, 3)
(1, 4)
(1, 5)
(2, 6)
(2, 9)
(2, 13)
(2, 16)
(2, 19)
(2, 22)
---------Depth First Search----------
(1, 2)
(2, 6)
(6, 7)
(6, 4)
(4, 1)
(1, 3)
(1, 5)
(6, 8)
(2, 9)
(9, 10)
In [10]:
# filter graph edge and node by nodes 
#filter  -- by edges
def filter_by_node_att_val(g ):
     edge_color_list=[]
     edges=list( g.edges(data=True))
     for e in edges: 
      
       #srcid=g.nodes(e[0])
       #tgtid=g.nodes(e[1]) 
       edge_attl=e[2]
      
       n1=g.nodes(e[0])
       #print(n1)
       #if e[2][att]<thresh: 
       #  self.g.remove_edge(*e[:2])   

ng=filter_by_node_att_val(gl.g)
In [11]:
op_edge=operator.lt
#operator.le(a, b)
op_node=operator.ne
# eq,ne ,ge,gt,lt


final_view= gl.filter_graph(edge_att="weight",edge_val=15,node_att='displayName',node_val=['AMP','Mg2+'],op_edge=op_edge,op_node=op_node)
#final_view= filter_graph(gl,edge_att="weight",edge_val=10)
#final_view= filter_graph(gl,node_att='name',node_val='AMP')

print(final_view)
#print(final_view.nodes())
print("-------nodes------")
ct=0
for n,attv in final_view.nodes(data=True):  
    ct+=1
    print(n ,  attv)    
    if ct>10:
        break
#print(final_view.edges())
print("------edges------")
ct=0
for n1,n2,attv in final_view.edges(data=True): 
    ct+=1
    print(n1,n2,attv)    
    if ct>10:
        break 
Graph with 33 nodes and 40 edges
-------nodes------
1 {'name': 'AMP', 'pk': 'http://www.reactome.org/biopax/56/71387#SmallMolecule100', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#SmallMolecule', 'ctype': 'SmallMolecule', 'uri': 'http://www.reactome.org/biopax/56/71387#SmallMolecule100', 'availability': '', 'comment': 'http://www.reactome.org/biopax/56/71387#SmallMolecule100http://www.reactome.org/biopax/56/71387#BiochemicalReaction104@Layout@http://www.reactome.org/biopax/56/71387#Pathway6@204@1485', 'displayName': 'AMP', 'standardName': 'AMP'}
2 {'name': 'Reactome', 'pk': 'http://www.reactome.org/biopax/56/71387#Provenance1', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#Provenance', 'ctype': 'Provenance', 'uri': 'http://www.reactome.org/biopax/56/71387#Provenance1', 'availability': None, 'comment': 'http://www.reactome.org', 'displayName': 'Reactome', 'standardName': ''}
3 {'name': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_109275', 'pk': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_109275', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#UnificationXref', 'ctype': 'UnificationXref', 'uri': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_109275', 'availability': None, 'comment': 'Database identifier. Use this URL to connect to the web page of this instance in Reactome: http://www.reactome.org/cgi-bin/eventbrowser?DB=gk_current&ID=109275', 'displayName': None, 'standardName': None}
4 {'name': 'http://localhost:3030/g6p/CellularLocationVocabulary_b761180db8a874567188c589b45cab17', 'pk': 'http://localhost:3030/g6p/CellularLocationVocabulary_b761180db8a874567188c589b45cab17', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#CellularLocationVocabulary', 'ctype': 'CellularLocationVocabulary', 'uri': 'http://localhost:3030/g6p/CellularLocationVocabulary_b761180db8a874567188c589b45cab17', 'availability': None, 'comment': '', 'displayName': None, 'standardName': None}
5 {'name': 'http://identifiers.org/chebi/CHEBI:16027', 'pk': 'http://identifiers.org/chebi/CHEBI:16027', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#EntityReference', 'ctype': 'EntityReference', 'uri': 'http://identifiers.org/chebi/CHEBI:16027', 'availability': None, 'comment': '', 'displayName': '', 'standardName': ''}
6 {'name': 'Mg2+', 'pk': 'http://www.reactome.org/biopax/56/71387#SmallMolecule101', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#SmallMolecule', 'ctype': 'SmallMolecule', 'uri': 'http://www.reactome.org/biopax/56/71387#SmallMolecule101', 'availability': '', 'comment': 'http://www.reactome.org/biopax/56/71387#SmallMolecule101http://www.reactome.org/biopax/56/71387#Complex122@Layout@http://www.reactome.org/biopax/56/71387#Pathway6@66@1399', 'displayName': 'Mg2+', 'standardName': ''}
7 {'name': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_114632', 'pk': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_114632', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#UnificationXref', 'ctype': 'UnificationXref', 'uri': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_114632', 'availability': None, 'comment': 'Database identifier. Use this URL to connect to the web page of this instance in Reactome: http://www.reactome.org/cgi-bin/eventbrowser?DB=gk_current&ID=114632', 'displayName': None, 'standardName': None}
8 {'name': 'http://identifiers.org/chebi/CHEBI:18420', 'pk': 'http://identifiers.org/chebi/CHEBI:18420', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#EntityReference', 'ctype': 'EntityReference', 'uri': 'http://identifiers.org/chebi/CHEBI:18420', 'availability': None, 'comment': '', 'displayName': '', 'standardName': ''}
9 {'name': 'TPNH', 'pk': 'http://www.reactome.org/biopax/56/71387#SmallMolecule105', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#SmallMolecule', 'ctype': 'SmallMolecule', 'uri': 'http://www.reactome.org/biopax/56/71387#SmallMolecule105', 'availability': '', 'comment': 'http://www.reactome.org/biopax/56/71387#SmallMolecule105@Layout@http://www.reactome.org/biopax/56/71387#Pathway6@553@1095', 'displayName': 'TPNH', 'standardName': 'TPNH'}
10 {'name': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_29364', 'pk': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_29364', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#UnificationXref', 'ctype': 'UnificationXref', 'uri': 'http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_29364', 'availability': None, 'comment': 'Database identifier. Use this URL to connect to the web page of this instance in Reactome: http://www.reactome.org/cgi-bin/eventbrowser?DB=gk_current&ID=29364', 'displayName': None, 'standardName': None}
11 {'name': 'http://localhost:3030/g6p/CellularLocationVocabulary_d7b7576a6ce6c6961a2de2e8c0608786', 'pk': 'http://localhost:3030/g6p/CellularLocationVocabulary_d7b7576a6ce6c6961a2de2e8c0608786', 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#CellularLocationVocabulary', 'ctype': 'CellularLocationVocabulary', 'uri': 'http://localhost:3030/g6p/CellularLocationVocabulary_d7b7576a6ce6c6961a2de2e8c0608786', 'availability': None, 'comment': '', 'displayName': None, 'standardName': None}
------edges------
1 2 {'weight': 10, 'name': 'dataSource'}
1 3 {'weight': 10, 'name': 'xref'}
1 4 {'weight': 10, 'name': 'cellularLocation'}
1 5 {'weight': 10, 'name': 'entityReference'}
2 6 {'weight': 10, 'name': 'dataSource'}
2 9 {'weight': 10, 'name': 'dataSource'}
2 13 {'weight': 10, 'name': 'dataSource'}
2 16 {'weight': 10, 'name': 'dataSource'}
2 19 {'weight': 10, 'name': 'dataSource'}
2 22 {'weight': 10, 'name': 'dataSource'}
2 25 {'weight': 10, 'name': 'dataSource'}
In [12]:
for n, d in gl.g.nodes(data=True):
    print(list(d.keys()))
    break
['name', 'pk', 'rdf_type', 'ctype', 'uri', 'availability', 'comment', 'displayName', 'standardName']
In [13]:
rina=gl.revIndexNodeAttValues()
rina['displayName']
Out[13]:
{'AMP': [1],
 'Reactome': [2],
 '': [5, 8, 12, 15, 18, 21, 24, 27, 30, 33],
 'Mg2+': [6],
 'TPNH': [9],
 'TPN': [13],
 '6-Phospho-D-glucono-1,5-lactone': [16],
 'Glc': [19],
 'ATP': [22],
 'G6P': [25],
 'ADP': [28],
 'Fru(6)P': [31]}
In [14]:
ina=gl.indexNodeAttValues()

nid=1
ina[nid]
Out[14]:
{'name': 'AMP',
 'pk': 'http://www.reactome.org/biopax/56/71387#SmallMolecule100',
 'rdf_type': 'http://www.biopax.org/release/biopax-level3.owl#SmallMolecule',
 'ctype': 'SmallMolecule',
 'uri': 'http://www.reactome.org/biopax/56/71387#SmallMolecule100',
 'availability': '',
 'comment': 'http://www.reactome.org/biopax/56/71387#SmallMolecule100http://www.reactome.org/biopax/56/71387#BiochemicalReaction104@Layout@http://www.reactome.org/biopax/56/71387#Pathway6@204@1485',
 'displayName': 'AMP',
 'standardName': 'AMP'}
In [ ]:
 
In [ ]:
 
In [ ]: