In [1]:
import os,pathlib
import textwrap
import networkx as nx
In [2]:
from biopax_explorer.biopax.utils import gen_utils as gu
from biopax_explorer.biopax import *
from biopax_explorer.query import client as cl
In [3]:
print("export rdf_xml from triples store")
dataset="g6p"
db="http://db:3030"
unwanted_subject_uri="http://localhost:3030/%s/data" %(dataset) # avoid unwanted uri (local triple-store deployment)
credentials="admin"
expath = pathlib.Path().resolve().parent.absolute()
exfile1="%s/input/export_all_%s.xml" % (expath,dataset)
sc=cl.BIOPAXStoreClient(db,dataset,credentials,unwanted_subject_uri)
#########################################all triples in graph
g=sc.store_to_graph()
print("#############RDF XML #########################")
print(textwrap.shorten(sc.rdf_xml_string(), width=1000))
sc.save_graph_as_rdf_xml(exfile1)
print("##############################################")
print("all triples exported to %s" %(exfile1))
print("##############################################")
export rdf_xml from triples store http://db:3030/g6p/query biopax http://www.biopax.org/release/biopax-level3.owl# http://localhost:3030/g6p/data 1000 #############RDF XML ######################### <?xml version="1.0" encoding="utf-8"?> <rdf:RDF xmlns:ns1="http://www.biopax.org/release/biopax-level3.owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" > <rdf:Description rdf:about="http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_R-ALL-114632"> <rdf:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#UnificationXref"/> <ns1:comment rdf:datatype="http://www.w3.org/2001/XMLSchema#string">Reactome stable identifier. Use this URL to connect to the web page of this instance in Reactome: http://www.reactome.org/cgi-bin/eventbrowser_st_id?ST_ID=R-ALL-114632.2</ns1:comment> <ns1:id rdf:datatype="http://www.w3.org/2001/XMLSchema#string">R-ALL-114632</ns1:id> <ns1:db rdf:datatype="http://www.w3.org/2001/XMLSchema#string">reactome</ns1:db> <ns1:idVersion rdf:datatype="http://www.w3.org/2001/XMLSchema#string">2</ns1:idVersion> </rdf:Description> <rdf:Description rdf:about="http://www.reactome.org/biopax/56/71387#SmallMolecule98"> <rdf:type [...] ############################################## all triples exported to /work/input/export_all_g6p.xml ##############################################
In [4]:
exfile2b="%s/input/export_query_%s.xml" % (expath,dataset)
sc=cl.BIOPAXStoreClient(db,dataset,credentials,unwanted_subject_uri)
##############################################only selected Triples
#use s,p,o variable names only or add parameters labels=["s","p","o"] for other variable names
q1="""
prefix bp: <http://www.biopax.org/release/biopax-level3.owl#>
select ?s ?p ?o
where {
?s ?p ?o.
{ ?o a bp:ProteinReference } .
{ ?s a bp:RnaRegionReference } UNION { ?s a bp:SmallMoleculeReference } UNION { ?s a bp:ProteinReference } UNION { ?s a bp:RnaReference } UNION { ?s a bp:DnaRegionReference } UNION { ?s a bp:DnaReference }
FILTER ( ?p = bp:memberEntityReference ).
}
"""
q2="""
prefix bp: <http://www.biopax.org/release/biopax-level3.owl#>
select ?s ?p ?o
where {
?s ?p ?o.
{ ?o a bp:Protein } UNION { ?o a bp:Complex } UNION { ?o a bp:RnaRegion } UNION { ?o a bp:Dna } UNION { ?o a bp:Rna } UNION { ?o a bp:DnaRegion } UNION { ?o a bp:SmallMolecule } .
{ ?s a bp:GeneticInteraction } UNION { ?s a bp:Conversion } UNION { ?s a bp:ComplexAssembly } UNION { ?s a bp:Degradation } UNION { ?s a bp:Transport } UNION { ?s a bp:BiochemicalReaction } UNION { ?s a bp:TransportWithBiochemicalReaction } UNION { ?s a bp:MolecularInteraction } UNION { ?s a bp:TemplateReaction } UNION { ?s a bp:Control } UNION { ?s a bp:Catalysis } UNION { ?s a bp:Modulation } UNION { ?s a bp:TemplateReactionRegulation }
FILTER ( ?p = bp:controller ).
}
"""
sc.custom_query_list_append(q1)
sc.custom_query_list_append(q2)
extension=2 #extension values : 0,1,2 # case 0 : only s,p,o from queries, case 1: add all triples with s=s , case 2: add case 1+ s=o
g=sc.store_custom_query_to_graph(extension)
sc.save_graph_as_rdf_xml(exfile2b)
print("##############################################")
print("selected triples from parql queries exported to %s " %(exfile2b))
print("##############################################")
############################################## selected triples from parql queries exported to /work/input/export_query_g6p.xml ##############################################
In [5]:
import requests
from requests.auth import HTTPBasicAuth
#########create a dataset in fuseki if not exist#########
##need admin role + credential
# Fuseki server configuration
FUSEKI_URL ="http://db:3030"
DATASET_NAME = 'test'
USERNAME = 'admin'
PASSWORD = 'admin'
def dataset_exists(fuseki_url, dataset_name, auth):
"""Check if a dataset exists on the Fuseki server."""
response = requests.get(f"{fuseki_url}/$/datasets", auth=auth)
if response.status_code == 200:
datasets = response.json().get('datasets', [])
return any(ds['ds.name'] == f"/{dataset_name}" for ds in datasets)
else:
raise Exception(f"Failed to retrieve datasets. Status code: {response.status_code}")
def create_dataset(fuseki_url, dataset_name, auth):
"""Create a dataset on the Fuseki server if it doesn't exist."""
if not dataset_exists(fuseki_url, dataset_name, auth):
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
data = {'dbName': dataset_name, 'dbType': 'mem'} # 'mem' for in-memory; use 'tdb' for TDB
response = requests.post(f"{fuseki_url}/$/datasets", headers=headers, data=data, auth=auth)
if response.status_code == 200:
print(f"Dataset '{dataset_name}' created successfully.")
else:
raise Exception(f"Failed to create dataset. Status code: {response.status_code}")
else:
print(f"Dataset '{dataset_name}' already exists.")
# Run the dataset creation with authentication
auth = HTTPBasicAuth(USERNAME, PASSWORD)
create_dataset(FUSEKI_URL, DATASET_NAME, auth)
2024-08-19 13:50:29,647 - DEBUG - Starting new HTTP connection (1): db:3030 2024-08-19 13:50:29,655 - DEBUG - http://db:3030 "GET /$/datasets HTTP/11" 200 414
Dataset 'test' already exists.
In [6]:
#########################
##delete data in triple store by URI
dataset="test"
db="http://db:3030"
credentials=["admin", "admin"] #update the password as necessary
sc=cl.BIOPAXStoreClient(db,dataset,credentials)
prefix="eos"
domain="http://www.gruppomcr.com/2020/06/eos-ontology-meets#"
uri_id="eos:uritoberemoved2"
sc.delete_from_store_by_uri_id(uri_id,prefix,domain)
#deletion done"
In [7]:
"""
creating or updating entities in a BIOPAX rdf dataset the dataset
an insertion example
"""
voc=RelationshipTypeVocabulary(
pk="http://localhost:3030/g6p/RelationshipTypeVocabulary_ac7de6f2f302971b64781fc96cc97c86" ,
comment="no_comment")
rel=RelationshipXref(pk="http://www.reactome.org/biopax/56/71387#RelationshipXref90")
rel.set_comment("Database 'x' identifier. Use this URL to connect to the web page of this instance in Reactome")
rel.set_relationshipType(voc)
rel.set_db("database1")
print( rel.to_json() )
sc.insert_instance(rel)
print(" insert done")
{ "uri": "http://www.reactome.org/biopax/56/71387#RelationshipXref90", "comment": "Database 'x' identifier. Use this URL to connect to the web page of this instance in Reactome", "db": "database1", "dbVersion": null, "id": null, "idVersion": null, "relationshipType": { "__uri__": "http://localhost:3030/g6p/RelationshipTypeVocabulary_ac7de6f2f302971b64781fc96cc97c86", "comment": "no_comment", "xref": null, "term": null }, "__class__": "RelationshipXref" } insert done
In [8]:
rel.set_db("database2")
sc.update_or_insert_instance(rel)
print("update done")
update done
####################
In [9]:
from datetime import datetime
ndate=datetime.today().strftime('%Y-%m-%d')
voc=RelationshipTypeVocabulary(
pk="http://localhost:3030/g6p/RelationshipTypeVocabulary_ac7de6f2f302971b64781fc96cc97c86" ,
comment="has been updated on %s " %(ndate))
sc.update_or_insert_instance(voc)
In [10]:
query=sc.select_all_query()
print(query)
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX biopax: <http://www.biopax.org/release/biopax-level3.owl#> SELECT ?s ?p ?o WHERE { ?s ?p ?o FILTER(?s != <None>) } LIMIT 1000 OFFSET 0
In [11]:
#store_to_graph( sc.wrapper,query)
dataset="g6p"
db="http://db:3030"
credentials=None
sc=cl.BIOPAXStoreClient(db,dataset,credentials)
#########################################all triples in graph
res=sc.execute(query)
i=0
for tp in res:
print(tp)
i=i+1
if i>10:
break
['http://identifiers.org/uniprot/Q9BRR6', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://www.biopax.org/release/biopax-level3.owl#ProteinReference'] ['http://identifiers.org/uniprot/Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#xref', 'http://www.reactome.org/biopax/56/71387#UnificationXref_uniprot_knowledgebase_Q9BRR6'] ['http://identifiers.org/uniprot/Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#organism', 'http://identifiers.org/taxonomy/9606'] ['http://identifiers.org/uniprot/Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#displayName', 'ADPGK'] ['http://identifiers.org/uniprot/Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#name', 'PSEC0260'] ['http://identifiers.org/uniprot/Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#name', 'UniProt:Q9BRR6 ADPGK'] ['http://identifiers.org/uniprot/Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#comment', 'CATALYTIC ACTIVITY ADP + D-glucose = AMP + D-glucose 6-phosphate.'] ['http://www.reactome.org/biopax/56/71387#UnificationXref_uniprot_knowledgebase_Q9BRR6', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://www.biopax.org/release/biopax-level3.owl#UnificationXref'] ['http://www.reactome.org/biopax/56/71387#UnificationXref_uniprot_knowledgebase_Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#id', 'Q9BRR6'] ['http://www.reactome.org/biopax/56/71387#UnificationXref_uniprot_knowledgebase_Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#db', 'uniprot knowledgebase'] ['http://identifiers.org/taxonomy/9606', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://www.biopax.org/release/biopax-level3.owl#BioSource']
In [12]:
rdfxmlfile="/work/input/export_query_g6p.xml"
sc.file_to_graph(rdfxmlfile)
print("#############RDF XML #########################")
print(textwrap.shorten(sc.rdf_xml_string(), width=1000))
#############RDF XML ######################### <?xml version="1.0" encoding="utf-8"?> <rdf:RDF xmlns:ns1="http://www.biopax.org/release/biopax-level3.owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" > <rdf:Description rdf:about="http://www.reactome.org/biopax/56/71387#Protein67"> <rdf:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#Protein"/> <ns1:xref rdf:resource="http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_R-HSA-3006348"/> <ns1:xref rdf:resource="http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_3006348"/> <ns1:displayName rdf:datatype="http://www.w3.org/2001/XMLSchema#string">SLC37A4</ns1:displayName> <ns1:name rdf:datatype="http://www.w3.org/2001/XMLSchema#string">G6PT1_HUMAN</ns1:name> <ns1:name rdf:datatype="http://www.w3.org/2001/XMLSchema#string">Glucose-6-phosphate translocase</ns1:name> <ns1:comment [...]
In [13]:
print("generate the related graph data structure using networkx")
print("######################################")
nx_graph=sc.nxgraph()
print("Number of nodes:", nx.number_of_nodes(nx_graph))
print("Number of edges:", nx.number_of_edges(nx_graph))
print("Is directed:", nx.is_directed(nx_graph))
print("Is connected:", nx.is_connected(nx_graph))
generate the related graph data structure using networkx ###################################### Number of nodes: 111 Number of edges: 134 Is directed: False Is connected: True
--
In [ ]:
In [ ]: