How to create a BIOPAX file from BIOPAX entities¶

In [1]:
from biopax_explorer.graph.serializer import  BPSerializer
from biopax_explorer.biopax import Entity,Catalysis,Xref,PhysicalEntity,Protein,SmallMolecule
import textwrap
In [2]:
#TODO: remove ent without pk from dict at end

We create a list of BIOPAX entities programmatically¶

In [3]:
entity1=Entity()
entity1.xref=Xref()
 
## warning: entities primary keys (pk/uri)
## must be unique
protein1=Protein(pk="1235")
 
catalysis1=Catalysis(pk="56464")
catalysis1.xref=Xref()
catalysis1.set_catalysisDirection('LEFT-TO-RIGHT')
catalysis1.set_cofactor(PhysicalEntity(pk="7897987987"))
catalysis1.set_controller([protein1])
catalysis1.set_controlled([SmallMolecule()])

print(catalysis1.to_json()) 
collec=[]
collec.append(catalysis1)
{
  "uri": "56464",
  "dataSource": null,
  "evidence": null,
  "xref": null,
  "availability": null,
  "comment": null,
  "displayName": null,
  "name": null,
  "standardName": null,
  "interactionType": null,
  "participant": null,
  "controlled": [
    {
      "__uri__": null,
      "dataSource": null,
      "evidence": null,
      "xref": null,
      "availability": null,
      "comment": null,
      "displayName": null,
      "name": null,
      "standardName": null,
      "cellularLocation": null,
      "feature": null,
      "memberPhysicalEntity": null,
      "notFeature": null,
      "entityReference": null
    }
  ],
  "controller": [
    {
      "__class__": "Protein",
      "uri": "1235"
    }
  ],
  "controlType": null,
  "cofactor": {
    "__class__": "PhysicalEntity",
    "uri": "7897987987"
  },
  "catalysisDirection": "LEFT-TO-RIGHT",
  "__class__": "Catalysis"
}

We used the BPSerializer utility class¶

In [4]:
userns="http://mynamespace.org/"

bps=BPSerializer(userns,collec)

print(" the following entities will  be saved : ")
for pk, ent in bps.collect_entity_dict.items():
    print(pk, ent.__class__.__name__)
    
print("no uri /pk for %s  entities(will not be saved) :" %(len(bps.collect_void_uri))  )
 


graph=bps.toRDFGraph()
for subj, pred, obj in graph:
    print(f"    s: {subj}, p: {pred}, o: {obj}")

    
 the following entities will  be saved : 
56464 Catalysis
1235 Protein
7897987987 PhysicalEntity
None SmallMolecule
no uri /pk for 1  entities(will not be saved) :
    s: http://mynamespace.org/1235, p: http://www.w3.org/1999/02/22-rdf-syntax-ns#type, o: http://www.biopax.org/release/biopax-level3.owl#Protein
    s: , p: http://www.w3.org/2002/07/owl#imports, o: http://www.biopax.org/release/biopax-level3.owl#
    s: http://mynamespace.org/56464, p: http://www.w3.org/1999/02/22-rdf-syntax-ns#type, o: http://www.biopax.org/release/biopax-level3.owl#Catalysis
    s: http://mynamespace.org/56464, p: http://www.biopax.org/release/biopax-level3.owl#cofactor, o: http://mynamespace.org/7897987987
    s: http://mynamespace.org/7897987987, p: http://www.w3.org/1999/02/22-rdf-syntax-ns#type, o: http://www.biopax.org/release/biopax-level3.owl#PhysicalEntity
    s: http://mynamespace.org/56464, p: http://www.biopax.org/release/biopax-level3.owl#controller, o: http://mynamespace.org/1235
    s: http://mynamespace.org/56464, p: http://www.biopax.org/release/biopax-level3.owl#catalysisDirection, o: LEFT-TO-RIGHT
    s: , p: http://www.w3.org/1999/02/22-rdf-syntax-ns#type, o: http://www.w3.org/2002/07/owl#Ontology

Output to a BIOPAX RDF/XML file¶

In [5]:
ofile="data/output/constructed_biopax.xml"

print("writing output to rdf/xml file %s " %(ofile))
bps.write(ofile)
print("#-------------------------")
print(textwrap.shorten(bps.rdf_xml(), width=1000))
writing output to rdf/xml file data/output/constructed_biopax.xml 
#-------------------------
<?xml version="1.0" encoding="utf-8"?> <rdf:RDF xmlns:bp="http://www.biopax.org/release/biopax-level3.owl#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" > <rdf:Description rdf:about="http://mynamespace.org/1235"> <rdf:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#Protein"/> </rdf:Description> <rdf:Description rdf:about=""> <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#Ontology"/> <owl:imports rdf:resource="http://www.biopax.org/release/biopax-level3.owl#"/> </rdf:Description> <rdf:Description rdf:about="http://mynamespace.org/56464"> <rdf:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#Catalysis"/> <bp:catalysisDirection>LEFT-TO-RIGHT</bp:catalysisDirection> <bp:controller rdf:resource="http://mynamespace.org/1235"/> <bp:cofactor rdf:resource="http://mynamespace.org/7897987987"/> </rdf:Description> <rdf:Description rdf:about="http://mynamespace.org/7897987987"> <rdf:type [...]

Run a pattern query on a triple store , save the resulting entities collection to a new biopax xml file¶

In [6]:
from biopax_explorer.graph.serializer import  BPSerializer
from biopax_explorer.pattern.rack import Rack
from biopax_explorer.pattern.pattern import PatternExecutor
 
 

dataset = "netpath"
db = "http://db:3030" # with local triple store deployed with docker-compose 


r = Rack()
exfile="data/output/export_query_from_pattern_%s.xml" % (dataset) 

pe = PatternExecutor(db,dataset) 

p = r.inComplexWith()

resultref = pe.executePattern(p,by_reference=True, max_count=30)
#resultref = pe.executePattern(p)
 
print("-- %s results in this dataset --" %(len(resultref)))
#resultref = peN.executePattern(p,by_reference=True, max_count=3)
reflist=[]
# first extract entity references only
# for quick query and memory optimization
for eref in resultref:
  for eref in eref:
      reflist.append(eref)
#print(reflist)      
# then populate only the selected entities
resultfull=pe.pump(reflist,level=1)
collec=[]
for row in resultfull:
  for entity in row:
      collec.append(entity)
#      print(entity)

 
#userns="http://mynamespace.org/"
userns=""
bps=BPSerializer(userns,collec)
 

graph=bps.toRDFGraph()
#for subj, pred, obj in graph:
#    print(f"    s: {subj}, p: {pred}, o: {obj}")


 

print("writing output to rdf/xml file %s " %(exfile))
bps.write(exfile)
print("#-------------------------")
print(textwrap.shorten(bps.rdf_xml(), width=2000))

 
-- 30 results in this dataset --
writing output to rdf/xml file data/output/export_query_from_pattern_netpath.xml 
#-------------------------
<?xml version="1.0" encoding="utf-8"?> <rdf:RDF xmlns:bp="http://www.biopax.org/release/biopax-level3.owl#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" > <rdf:Description rdf:about="http://pathwaycommons.org/pc12/#UnificationXref_uniprot_knowledgebase_P21580"> <rdf:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#Xref"/> </rdf:Description> <rdf:Description rdf:about="http://pathwaycommons.org/pc12/#Complex_151ed4c4ece14ff69a6d51679cb7ead7"> <rdf:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#Complex"/> <bp:comment>REPLACED http://pathwaycommons.org/pc12/Complex_151ed4c4ece14ff69a6d51679cb7ead7</bp:comment> <bp:displayName>Complex</bp:displayName> <bp:name>PRLR,JAK2</bp:name> <bp:dataSource rdf:resource="http://pathwaycommons.org/pc12/#Provenance_0cfa90c3ddb627e2e7f3af3d5bd9497d"/> <bp:cellularLocation rdf:resource="http://pathwaycommons.org/pc12/#CellularLocationVocabulary_dc68fffeee0259e0d3bd7a3f6d0cc067"/> <bp:component rdf:resource="http://pathwaycommons.org/pc12/#Protein_b8221ae4aa1927b358e88d6eb6621650"/> </rdf:Description> <rdf:Description rdf:about="http://identifiers.org/uniprot/O60674"> <rdf:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#EntityReference"/> </rdf:Description> <rdf:Description rdf:about="http://pathwaycommons.org/pc12/#Protein_26f69726eb79e168d2f8dfa8c32611d9"> <rdf:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#Protein"/> <bp:comment>REPLACED http://pathwaycommons.org/pc12/Protein_26f69726eb79e168d2f8dfa8c32611d9</bp:comment> <bp:displayName>TRAF2</bp:displayName> <bp:name>TRAF2__9606</bp:name> <bp:dataSource rdf:resource="http://pathwaycommons.org/pc12/#Provenance_0cfa90c3ddb627e2e7f3af3d5bd9497d"/> <bp:entityReference rdf:resource="http://identifiers.org/uniprot/Q12933"/> </rdf:Description> <rdf:Description rdf:about="http://pathwaycommons.org/pc12/#Complex_9695d8dca18d787a873ffda9d7430f9f"> <rdf:type [...]
In [ ]: