insert update delete within a BIOPAX rdf dataset¶

using a tripe store as backend¶

file and graph processing¶

In [1]:
import os,pathlib
import textwrap
import networkx as nx
In [2]:
from biopax_explorer.biopax.utils import gen_utils as gu
from biopax_explorer.biopax  import *
from biopax_explorer.query import client as cl
In [3]:
print("export rdf_xml from triples store")


dataset="g6p"
db="http://db:3030"  
unwanted_subject_uri="http://localhost:3030/%s/data" %(dataset) # avoid unwanted uri (local triple-store deployment)

credentials="admin"

expath = pathlib.Path().resolve().parent.absolute() 
exfile1="%s/input/export_all_%s.xml" % (expath,dataset) 


 
 
sc=cl.BIOPAXStoreClient(db,dataset,credentials,unwanted_subject_uri)
#########################################all triples in graph

g=sc.store_to_graph()

print("#############RDF XML #########################")
print(textwrap.shorten(sc.rdf_xml_string(), width=1000))
sc.save_graph_as_rdf_xml(exfile1)
print("##############################################")
print("all triples exported to %s" %(exfile1))
print("##############################################")
export rdf_xml from triples store
http://db:3030/g6p/query biopax http://www.biopax.org/release/biopax-level3.owl# http://localhost:3030/g6p/data 1000
#############RDF XML #########################
<?xml version="1.0" encoding="utf-8"?> <rdf:RDF xmlns:ns1="http://www.biopax.org/release/biopax-level3.owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" > <rdf:Description rdf:about="http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_R-ALL-114632"> <rdf:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#UnificationXref"/> <ns1:comment rdf:datatype="http://www.w3.org/2001/XMLSchema#string">Reactome stable identifier. Use this URL to connect to the web page of this instance in Reactome: http://www.reactome.org/cgi-bin/eventbrowser_st_id?ST_ID=R-ALL-114632.2</ns1:comment> <ns1:id rdf:datatype="http://www.w3.org/2001/XMLSchema#string">R-ALL-114632</ns1:id> <ns1:db rdf:datatype="http://www.w3.org/2001/XMLSchema#string">reactome</ns1:db> <ns1:idVersion rdf:datatype="http://www.w3.org/2001/XMLSchema#string">2</ns1:idVersion> </rdf:Description> <rdf:Description rdf:about="http://www.reactome.org/biopax/56/71387#SmallMolecule98"> <rdf:type [...]
##############################################
all triples exported to /work/input/export_all_g6p.xml
##############################################
In [4]:
exfile2b="%s/input/export_query_%s.xml" % (expath,dataset) 
sc=cl.BIOPAXStoreClient(db,dataset,credentials,unwanted_subject_uri)

##############################################only selected Triples
#use s,p,o variable names only or add parameters labels=["s","p","o"] for other variable names
q1="""

prefix bp: <http://www.biopax.org/release/biopax-level3.owl#>
select ?s ?p ?o
 where {
 ?s ?p ?o.
 { ?o a bp:ProteinReference } .

 { ?s a bp:RnaRegionReference }  UNION  { ?s a bp:SmallMoleculeReference }  UNION  { ?s a bp:ProteinReference }  UNION  { ?s a bp:RnaReference }  UNION  { ?s a bp:DnaRegionReference }  UNION  { ?s a bp:DnaReference } 
FILTER (  ?p =  bp:memberEntityReference  ).

}

"""


q2="""

prefix bp: <http://www.biopax.org/release/biopax-level3.owl#>
select ?s ?p ?o
 where {
 ?s ?p ?o.
 { ?o a bp:Protein }  UNION  { ?o a bp:Complex }  UNION  { ?o a bp:RnaRegion }  UNION  { ?o a bp:Dna }  UNION  { ?o a bp:Rna }  UNION  { ?o a bp:DnaRegion }  UNION  { ?o a bp:SmallMolecule } .

 { ?s a bp:GeneticInteraction }  UNION  { ?s a bp:Conversion }  UNION  { ?s a bp:ComplexAssembly }  UNION  { ?s a bp:Degradation }  UNION  { ?s a bp:Transport }  UNION  { ?s a bp:BiochemicalReaction }  UNION  { ?s a bp:TransportWithBiochemicalReaction }  UNION  { ?s a bp:MolecularInteraction }  UNION  { ?s a bp:TemplateReaction }  UNION  { ?s a bp:Control }  UNION  { ?s a bp:Catalysis }  UNION  { ?s a bp:Modulation }  UNION  { ?s a bp:TemplateReactionRegulation } 
FILTER (  ?p =  bp:controller  ).

}
"""

sc.custom_query_list_append(q1)
sc.custom_query_list_append(q2)
extension=2 #extension values : 0,1,2 # case 0 : only s,p,o from queries, case 1: add all triples with s=s , case 2: add case 1+ s=o 
g=sc.store_custom_query_to_graph(extension)
 
sc.save_graph_as_rdf_xml(exfile2b)
print("##############################################")
print("selected triples from parql queries exported to %s " %(exfile2b))
print("##############################################")
 
##############################################
selected triples from parql queries exported to /work/input/export_query_g6p.xml 
##############################################
In [5]:
import requests
from requests.auth import HTTPBasicAuth
#########create a dataset in fuseki if not exist#########
##need admin role + credential
# Fuseki server configuration
FUSEKI_URL ="http://db:3030"  
DATASET_NAME = 'test'
USERNAME = 'admin'
PASSWORD = 'admin'

def dataset_exists(fuseki_url, dataset_name, auth):
    """Check if a dataset exists on the Fuseki server."""
    response = requests.get(f"{fuseki_url}/$/datasets", auth=auth)
    if response.status_code == 200:
        datasets = response.json().get('datasets', [])
        return any(ds['ds.name'] == f"/{dataset_name}" for ds in datasets)
    else:
        raise Exception(f"Failed to retrieve datasets. Status code: {response.status_code}")

def create_dataset(fuseki_url, dataset_name, auth):
    """Create a dataset on the Fuseki server if it doesn't exist."""
    if not dataset_exists(fuseki_url, dataset_name, auth):
        headers = {'Content-Type': 'application/x-www-form-urlencoded'}
        data = {'dbName': dataset_name, 'dbType': 'mem'}  # 'mem' for in-memory; use 'tdb' for TDB
        response = requests.post(f"{fuseki_url}/$/datasets", headers=headers, data=data, auth=auth)
        if response.status_code == 200:
            print(f"Dataset '{dataset_name}' created successfully.")
        else:
            raise Exception(f"Failed to create dataset. Status code: {response.status_code}")
    else:
        print(f"Dataset '{dataset_name}' already exists.")

# Run the dataset creation with authentication
auth = HTTPBasicAuth(USERNAME, PASSWORD)
create_dataset(FUSEKI_URL, DATASET_NAME, auth)
2024-08-19 13:50:29,647 - DEBUG - Starting new HTTP connection (1): db:3030
2024-08-19 13:50:29,655 - DEBUG - http://db:3030 "GET /$/datasets HTTP/11" 200 414
Dataset 'test' already exists.
In [6]:
#########################
##delete data in triple store by URI

dataset="test"
db="http://db:3030"  
credentials=["admin", "admin"] #update the password as necessary
 

sc=cl.BIOPAXStoreClient(db,dataset,credentials)
 

prefix="eos"
domain="http://www.gruppomcr.com/2020/06/eos-ontology-meets#"
uri_id="eos:uritoberemoved2"

sc.delete_from_store_by_uri_id(uri_id,prefix,domain)
#deletion done"
In [7]:
 


 
"""
creating or updating  entities in a BIOPAX rdf dataset the dataset
an insertion example

"""
   
    
voc=RelationshipTypeVocabulary(
     pk="http://localhost:3030/g6p/RelationshipTypeVocabulary_ac7de6f2f302971b64781fc96cc97c86" ,
     comment="no_comment")


rel=RelationshipXref(pk="http://www.reactome.org/biopax/56/71387#RelationshipXref90")
rel.set_comment("Database 'x' identifier. Use this URL to connect to the web page of this instance in Reactome")
rel.set_relationshipType(voc)
rel.set_db("database1")
              
print( rel.to_json() )
sc.insert_instance(rel)

 
print(" insert done")
{
  "uri": "http://www.reactome.org/biopax/56/71387#RelationshipXref90",
  "comment": "Database 'x' identifier. Use this URL to connect to the web page of this instance in Reactome",
  "db": "database1",
  "dbVersion": null,
  "id": null,
  "idVersion": null,
  "relationshipType": {
    "__uri__": "http://localhost:3030/g6p/RelationshipTypeVocabulary_ac7de6f2f302971b64781fc96cc97c86",
    "comment": "no_comment",
    "xref": null,
    "term": null
  },
  "__class__": "RelationshipXref"
}
 insert done
In [8]:
rel.set_db("database2")
sc.update_or_insert_instance(rel)

print("update   done")
update   done

####################

In [9]:
from datetime import datetime
ndate=datetime.today().strftime('%Y-%m-%d')

voc=RelationshipTypeVocabulary(
     pk="http://localhost:3030/g6p/RelationshipTypeVocabulary_ac7de6f2f302971b64781fc96cc97c86" ,
     comment="has been updated on %s " %(ndate))
 

sc.update_or_insert_instance(voc)
In [10]:
query=sc.select_all_query()
print(query)

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX biopax: <http://www.biopax.org/release/biopax-level3.owl#>

SELECT ?s ?p ?o
WHERE {
  ?s ?p ?o
   FILTER(?s != <None>)
}
LIMIT 1000
OFFSET 0
    
  
In [11]:
#store_to_graph( sc.wrapper,query)


dataset="g6p"
db="http://db:3030"  
credentials=None 
sc=cl.BIOPAXStoreClient(db,dataset,credentials)
#########################################all triples in graph
res=sc.execute(query)
i=0
for tp in res:
    print(tp)
    i=i+1
    if i>10:
        break
        
['http://identifiers.org/uniprot/Q9BRR6', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://www.biopax.org/release/biopax-level3.owl#ProteinReference']
['http://identifiers.org/uniprot/Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#xref', 'http://www.reactome.org/biopax/56/71387#UnificationXref_uniprot_knowledgebase_Q9BRR6']
['http://identifiers.org/uniprot/Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#organism', 'http://identifiers.org/taxonomy/9606']
['http://identifiers.org/uniprot/Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#displayName', 'ADPGK']
['http://identifiers.org/uniprot/Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#name', 'PSEC0260']
['http://identifiers.org/uniprot/Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#name', 'UniProt:Q9BRR6 ADPGK']
['http://identifiers.org/uniprot/Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#comment', 'CATALYTIC ACTIVITY ADP + D-glucose = AMP + D-glucose 6-phosphate.']
['http://www.reactome.org/biopax/56/71387#UnificationXref_uniprot_knowledgebase_Q9BRR6', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://www.biopax.org/release/biopax-level3.owl#UnificationXref']
['http://www.reactome.org/biopax/56/71387#UnificationXref_uniprot_knowledgebase_Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#id', 'Q9BRR6']
['http://www.reactome.org/biopax/56/71387#UnificationXref_uniprot_knowledgebase_Q9BRR6', 'http://www.biopax.org/release/biopax-level3.owl#db', 'uniprot knowledgebase']
['http://identifiers.org/taxonomy/9606', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://www.biopax.org/release/biopax-level3.owl#BioSource']
In [12]:
rdfxmlfile="/work/input/export_query_g6p.xml"
sc.file_to_graph(rdfxmlfile)
print("#############RDF XML #########################")
print(textwrap.shorten(sc.rdf_xml_string(), width=1000))
#############RDF XML #########################
<?xml version="1.0" encoding="utf-8"?> <rdf:RDF xmlns:ns1="http://www.biopax.org/release/biopax-level3.owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" > <rdf:Description rdf:about="http://www.reactome.org/biopax/56/71387#Protein67"> <rdf:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#Protein"/> <ns1:xref rdf:resource="http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_R-HSA-3006348"/> <ns1:xref rdf:resource="http://www.reactome.org/biopax/56/71387#UnificationXref_reactome_database_id_release_56_3006348"/> <ns1:displayName rdf:datatype="http://www.w3.org/2001/XMLSchema#string">SLC37A4</ns1:displayName> <ns1:name rdf:datatype="http://www.w3.org/2001/XMLSchema#string">G6PT1_HUMAN</ns1:name> <ns1:name rdf:datatype="http://www.w3.org/2001/XMLSchema#string">Glucose-6-phosphate translocase</ns1:name> <ns1:comment [...]
In [13]:
print("generate the related  graph data structure using networkx")
print("######################################")

nx_graph=sc.nxgraph()
print("Number of nodes:", nx.number_of_nodes(nx_graph))
print("Number of edges:", nx.number_of_edges(nx_graph))
print("Is directed:", nx.is_directed(nx_graph))
print("Is connected:", nx.is_connected(nx_graph))

 
    
generate the related  graph data structure using networkx
######################################
Number of nodes: 111
Number of edges: 134
Is directed: False
Is connected: True

--

In [ ]:

In [ ]: