BIOPAX dataset Validation¶

In [1]:
from biopax_explorer.biopax import *
from biopax_explorer.validation import *
from biopax_explorer.pattern.pattern import PatternExecutor, Pattern
from biopax_explorer.query import  EntityNode

Using a generic Validator to validate a BIOPAX dataset¶

In [2]:
# Create instances of EntityModel
entity1 = Protein(pk='http://uri1')
entity1.name = 'p1'
entity2 = Protein(pk='http://uri1')
entity2.name = 'p2'
#print(dir(entity1))
#print( entity1.pk) 
# Create a list of entities
ent_collection = [entity1, entity2]

# Configuration
cfg = {
    'Protein': {
        'pk' : ['notNull', 'unique','string'],
        'name': ['notNull', 'unique'],
        'class' : ['PhysicalEntity'],
        
    }
}

# Create a validator instance
va = Validator(cfg)



# Validate the entity collection
[errors,err_d] = va.validate(ent_collection)
print("validation results:")
print(err_d)
validation results:
{'http://uri1': ['Protein.pk must be unique.']}

Using a customized Validator to validate a BIOPAX dataset¶

In [3]:
#we create a custom validatordataset

class CustomValidator(Validator):
    
  def custom_url_validation(self, entity):  
      # Custom validation function example 
      #print("-->",CustomValidator)
      msg=None
      attl=dir(entity) 
      attribute="pk" 
      if attribute in attl: 
          attv=self.getattr(entity, attribute) 
          if attv is  None or self.uri_validator(attv) ==False or "localhost" in attv : 
             msg= f"Custom validation failed for {entity.__class__.__name__} attribute:{attribute}, value:{attv} not allowed." 
             return [False,msg]
      return [True,msg]
In [4]:
entity1 = Protein(pk='http://domain1/p1')
entity1.name = 'p1'
entity2 = Protein(pk='http://localhost/protein123')
entity2.name = 'p2'

ent_collection = [entity1, entity2]

cfg = {
    'Entity': {
        'pk' : ['custom_url_validation']
        
    }
}

cva = CustomValidator(cfg)



# Validate the entity collection
[errors,err_d] = cva.validate(ent_collection)
 
print(err_d)
{'http://localhost/protein123': ['Custom validation failed for Protein attribute:pk, value:http://localhost/protein123 not allowed.']}

Using a Validator to validate a Pattern quey result¶

In [5]:
cfg = {
    'Entity': {
        'pk' : ['notNull', 'unique','string'],
        'xref': ['notNull'],
        'cellularLocation' : ['notNull'],
        'evidence' : ['notNull'],
         
    }
}
 
# Create a validator instance
va = Validator(cfg)

datasetN = "netpath"
 
db = "http://db:3030" # with local triple store deployed with docker-compose
 
 
peN = PatternExecutor(db,datasetN) # create a Pattern executor for a dataset

def simplePattern():
  p=Pattern()    
  prot = EntityNode("P", Protein())
  entityReference=EntityNode("E", EntityReference())
  prot.connectedWith(entityReference, "entityReference")
  prot.whereAttribute("comment", "Protein_9a","CONTAINS")
  p.define(prot,entityReference)
  
  return p
    
p = simplePattern()

res_P2 = peN.fetchEntities(p, level=1, max_count=1)
print("validating an pattern query result subset using meta_label\n")
for entitylist in res_P2:  
  tobevalidated=[]
  for entity in entitylist:
    if entity.meta_label in ["P"]:  
      
      print("uri: ",entity.pk)
      print("  cellular location: ",entity.get_cellularLocation())
      print("  xref:",entity.get_xref())
      print("  evidence:",entity.get_evidence())
      print("") 
      #print( entity.to_json()) 
 
      tobevalidated.append(entity)
      
  [errors,err_d] = va.validate(tobevalidated)
  if len(err_d.keys())>0:
         print("================")
         print("VALIDATION ERROR")
         print(err_d)
         print("================")
validating an pattern query result subset using meta_label

uri:  http://pathwaycommons.org/pc12/#Protein_9a138a025b1ef814f572a11f8e164d2e
  cellular location:  CellularLocationVocabulary(pk=http://pathwaycommons.org/pc12/#CellularLocationVocabulary_dc68fffeee0259e0d3bd7a3f6d0cc067, pop_state=1, exhausted=None, meta_label=None, rdf_type=http://www.biopax.org/release/biopax-level3.owl#CellularLocationVocabulary, _comment=None, _xref=None, _term=None)
  xref: Xref(pk=http://pathwaycommons.org/pc12/#RelationshipXref_4eb6f3f2-a79f-46ad-826b-f3017c43eea5RelationshipXref_PSI-MI_MI_0217, pop_state=1, exhausted=None, meta_label=None, rdf_type=http://www.biopax.org/release/biopax-level3.owl#Xref, _comment=None, _db=None, _dbVersion=None, _id=None, _idVersion=None)
  evidence: None

================
VALIDATION ERROR
{'http://pathwaycommons.org/pc12/#Protein_9a138a025b1ef814f572a11f8e164d2e': ['Protein.evidence cannot be null.']}
================
In [ ]:
 
In [ ]: