Introduction to the object oriented Pattern syntax.¶
In [1]:
import logging
logging.getLogger().setLevel(logging.INFO)
In [2]:
# import and utilities
from biopax_explorer.pattern.pattern import PatternExecutor, Pattern
from biopax_explorer.query import EntityNode
from biopax_explorer.biopax import *
import json
In [3]:
from biopax_explorer.pattern.view import writePatternGraphView
from IPython.display import Image
Generating a Query from a Pattern construct using the BIOPAX model Entities¶
We construct a pattern and display its graphical representation¶
In [4]:
p=Pattern()
prot = EntityNode("P", Protein())
prot.whereAttribute("displayName", "ADPGK","CONTAINS")
entityReference=EntityNode("E", EntityReference())
prot.connectedWith(entityReference, "entityReference")
p.define(prot,entityReference)
#select * from e, p where
img="data/output/test_pattern_view.svg"
writePatternGraphView(p,img,"svg")
img="data/output/test_pattern_view.png"
writePatternGraphView(p,img)
Image(img)
Out[4]:
from a file¶
In [5]:
pe=PatternExecutor()
pe.datasetFile("data/input/export_all_g6p.xml")
# executePattern returns minimal information by instance (PK class), for memory and query speed optimization
result = pe.executePattern(p)
firstresult = result[:10]
print("""#--------glucokinase related proteins in the dataset----------""")
print("""#--------uris (identifiers)----------""")
for entity_row in firstresult:
for entity in entity_row:
if entity.meta_label in ['P']:
print(entity.pk)
#--------glucokinase related proteins in the dataset---------- #--------uris (identifiers)---------- http://www.reactome.org/biopax/56/71387#Protein186
from a SPARQL enpoint local or remote (fuseki)¶
In [6]:
print("""
#-------------------------------------------------------------------------------------
a Pattern that represents all Protein 'P' with an EntityReference 'E'
, 'P' must have a comment containing a specific keyword.
#-------------------------------------------------------------------------------------
""")
p=Pattern()
prot = EntityNode("P", Protein())
entityReference=EntityNode("E", EntityReference())
prot.connectedWith(entityReference, "entityReference")
prot.whereAttribute("displayName", "FRK","CONTAINS")
p.define(prot,entityReference)
#------------------------------------------------------------------------------------- a Pattern that represents all Protein 'P' with an EntityReference 'E' , 'P' must have a comment containing a specific keyword. #-------------------------------------------------------------------------------------
In [7]:
datasetN = "netpath"
datasetP = "panther"
db = "http://db:3030" # with local triple store deployed with docker-compose
#db = "https://rdf-ds.genouest.org" # using an online default triple store
peN = PatternExecutor(db,datasetN)
peP = PatternExecutor(db,datasetP)
In [8]:
print("""
#-------------------------------------------------------------------------------------
We query a dataset with the Pattern. The dataset is an RDF dataset stored in a triple store, local or remote.
defined by 2 parameters 'db' (url) and 'dataset'
#-------------------------------------------------------------------------------------
""")
print("""execution""")
peN.maxCount(100) # we limit the result maximum number to avoid overload : good practice during Pattern testing
result = peN.executePattern(p)
firstresult = result[:5]
print("""#------------------""")
i=0
for entity_row in firstresult:
for entity in entity_row:
i=i+1
print("%s uri:%s" %(i,entity.pk))
if entity.meta_label in ['P','E']:
print(" core entity: referenced in Pattern %s" %(type(entity)))
else:
print(" linked entity: neighbour from a core entity %s" %(type(entity)))
print("""#------------------""")
#------------------------------------------------------------------------------------- We query a dataset with the Pattern. The dataset is an RDF dataset stored in a triple store, local or remote. defined by 2 parameters 'db' (url) and 'dataset' #------------------------------------------------------------------------------------- execution #------------------ 1 uri:http://pathwaycommons.org/pc12/#Protein_fe9aa3569b03eb22514796f21db8dea3 core entity: referenced in Pattern <class 'rdfobj.mapper.PK'> 2 uri:http://identifiers.org/uniprot/P42685 core entity: referenced in Pattern <class 'rdfobj.mapper.PK'> 3 uri:http://pathwaycommons.org/pc12/#Protein_e327253efbb4440eb6664fc6e69627c4 core entity: referenced in Pattern <class 'rdfobj.mapper.PK'> 4 uri:http://identifiers.org/uniprot/P42685 core entity: referenced in Pattern <class 'rdfobj.mapper.PK'> #------------------
Display the SPARQL Query used in background¶
In [9]:
print("""
#-------------------------------------------------------------------------------------
For flexibility and learning, the internal generated SPARQL queries can be displayed-----------------------------------
They can be reused in other context
""")
querylist=PatternExecutor().queries(p)
for q in querylist:
print("#---generated sparql query---\n\n")
print(q)
#------------------------------------------------------------------------------------- For flexibility and learning, the internal generated SPARQL queries can be displayed----------------------------------- They can be reused in other context #---generated sparql query--- prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> prefix bi: <http://www.biopax.org/release/biopax-level3.owl#> select ?s2 ?o2 ?entityReferencen0E ?s2__rdft ?o2__rdft where { ?s2 ?entityReferencen0E ?o2 . ?s2 rdf:type ?s2__rdft . ?o2 rdf:type ?o2__rdft . { ?o2 a bi:EntityReference } UNION { ?o2 a bi:RnaRegionReference } UNION { ?o2 a bi:SmallMoleculeReference } UNION { ?o2 a bi:DnaReference } UNION { ?o2 a bi:DnaRegionReference } UNION { ?o2 a bi:ProteinReference } UNION { ?o2 a bi:RnaReference } . { ?s2 bi:displayName ?displayName }. { ?s2 a bi:Protein } FILTER ( ?entityReferencen0E = bi:entityReference ) . FILTER ( CONTAINS(?displayName,'FRK') ) . }
In [10]:
#----dev-----------
import importlib
def define_instance_from_name(module,class_name):
print("==%s define_instance_from_name==%s" %(module,class_name))
if isinstance(module,str):
module = importlib.import_module(module)
class_ = getattr(module, class_name)
instance = class_()
return instance
def define_module_from_name( module_name):
module = importlib.import_module(module_name)
globals()[module] = module
inst=define_instance_from_name("biopax","Catalysis")
print(inst)
#----------
==biopax define_instance_from_name==Catalysis Catalysis(pk=None, pop_state=None, exhausted=None, meta_label=None, rdf_type=http://www.biopax.org/release/biopax-level3.owl#Catalysis, _dataSource=None, _evidence=None, _xref=None, _availability=None, _comment=None, _displayName=None, _name=None, _standardName=None, _interactionType=None, _participant=None, _controlled=None, _controller=None, _controlType=None, _cofactor=None, _catalysisDirection=None)
create a pattern with connected entities, get the fully populated entities
In [11]:
p=Pattern()
controlled = EntityNode("CONTROLLED", Entity())
interaction=EntityNode("CONTROL", Control())
interaction.connectedWith(controlled, "controlled")
p.define(interaction,controlled)
print("""execution""")
## fetchEntities population real BIOPAX model instances with full attributes and relations
# with max_count, we limit the result maximum number to avoid overload
result = peN.fetchEntities(p, level=1, max_count=1)
print("""#------------------""")
i=0
for entity_row in result:
for entity in entity_row:
i=i+1
# we filter on meta data label (defined during pattern construction , in all EntityNode constructor
# This feature allow to keep the semanantic , even if there is multiple entities of the same class in the pattern search results
if entity.meta_label in ['CONTROLLED']:
print("%s CONTROLLED entity: referenced in Pattern %s" %(i,type(entity)))
print(" uri:%s" %(entity.pk))
#print(" json: %s" %(entity.to_json()))
elif entity.meta_label in ['CONTROL']:
print("%s CONTROL entity: referenced in Pattern %s" %(i,type(entity)))
print(" uri:%s" %(entity.pk))
#print(" json: %s" %(entity.to_json()))
execution #------------------ 1 CONTROL entity: referenced in Pattern <class 'biopax.catalysis.Catalysis'> uri:http://pathwaycommons.org/pc12/#Catalysis_84b80f728dd3f89bea7345778bb4eb94 2 CONTROLLED entity: referenced in Pattern <class 'biopax.biochemicalreaction.BiochemicalReaction'> uri:http://pathwaycommons.org/pc12/#BiochemicalReaction_b892991189fbca87ef55e70584793cc0
search an entity by its uri , json serialization
In [12]:
uri="http://pathwaycommons.org/pc12/#Catalysis_84b80f728dd3f89bea7345778bb4eb94"
p=Pattern()
controlled = EntityNode("CONTROLLED", Entity())
controller = EntityNode("CONTROLLER", Entity())
interaction=EntityNode("CONTROL", Control())
interaction.has_uri(uri)
interaction.connectedWith(controlled, "controlled")
interaction.connectedWith(controller, "controller")
p.define(interaction,controlled,controller)
print("""execution""")
## fetchEntities population real BIOPAX model instances with full attributes and relations
result = peN.fetchEntities(p, level=1, max_count=10)
print("""#------------------""")
i=0
selected_entity_list=None
for entity_row in result:
for entity in entity_row:
i=i+1
print("%s uri:%s" %(i,entity.pk))
if isinstance(entity, Control):
#print(entity.to_json())
controller=entity.get_controller()
if controller!=None:
for ctrl in controller:
print("controller:",ctrl.to_json())
selected_entity_list=controller
execution #------------------ 1 uri:http://pathwaycommons.org/pc12/#Catalysis_84b80f728dd3f89bea7345778bb4eb94 controller: { "uri": "http://pathwaycommons.org/pc12/#Protein_79856ff790bab5a19b49069093957a14", "dataSource": null, "evidence": null, "xref": null, "availability": null, "comment": null, "displayName": null, "name": null, "standardName": null, "__class__": "Entity" } 2 uri:http://pathwaycommons.org/pc12/#BiochemicalReaction_b892991189fbca87ef55e70584793cc0 3 uri:http://pathwaycommons.org/pc12/#Protein_79856ff790bab5a19b49069093957a14
In [13]:
from biopax_explorer.biopax.doc import helper
from biopax_explorer.biopax.utils import gen_utils as gu
search an entity by its uri , use the entity getter
In [14]:
for selected_entity in selected_entity_list:
p=Pattern()
my_entity = EntityNode("ENTITY", selected_entity)
my_entity.has_uri(selected_entity.pk)
p.define(my_entity)
print("""execution""")
result = peN.fetchEntities(p, level=1, max_count=10)
print("""#------------------""")
i=0
for entity_row in result:
for entity in entity_row:
i=i+1
print("%s uri:%s" %(i,entity.pk))
print("")
print("""#--------attributes using helper and getters----------""")
print(entity.get_displayName())
print(gu.entityToString(entity,helper)) # utilitary function to display attributes
print("""#--------json----------""")
print(entity.to_json())
execution #------------------ 1 uri:http://pathwaycommons.org/pc12/#Protein_79856ff790bab5a19b49069093957a14 #--------attributes using helper and getters---------- JAK3 uri/pk: http://pathwaycommons.org/pc12/#Protein_79856ff790bab5a19b49069093957a14 class:Protein dataSource : (Provenance, http://pathwaycommons.org/pc12/#Provenance_0cfa90c3ddb627e2e7f3af3d5bd9497d) comment : REPLACED http://pathwaycommons.org/pc12/Protein_79856ff790bab5a19b49069093957a14 displayName : JAK3 name : JAK3__10090 entityReference : [<biopax.entityreference.EntityReference object at 0x7f0ee9297880>] #--------json---------- { "uri": "http://pathwaycommons.org/pc12/#Protein_79856ff790bab5a19b49069093957a14", "dataSource": { "__uri__": "http://pathwaycommons.org/pc12/#Provenance_0cfa90c3ddb627e2e7f3af3d5bd9497d", "comment": null, "xref": null, "displayName": null, "name": null, "standardName": null }, "evidence": null, "xref": null, "availability": null, "comment": "REPLACED http://pathwaycommons.org/pc12/Protein_79856ff790bab5a19b49069093957a14", "displayName": "JAK3", "name": "JAK3__10090", "standardName": null, "cellularLocation": null, "feature": null, "memberPhysicalEntity": null, "notFeature": null, "entityReference": [ { "__class__": "EntityReference", "uri": "http://identifiers.org/uniprot/P52333" } ], "__class__": "Protein" }
In [ ]: