# !/usr/bin/python3
"""
Contains necessary objects for creating a Directed Acyclic Graph (DAG) object to represent Open Biomedical Ontologies
(OBO).
"""
import re
[docs]class OboGraph(object):
"""A pythonic graph of a generic Open Biomedical Ontology (OBO) directed
acyclic graph (DAG).
"""
[docs] def __init__(self, namespace_filter=None, allowed_relationships=None):
"""`OboGraph` initializer. Leave `namespace_filter` and `allowed_relationship` as :py:obj:`None` to create the
entire ontology graph. Otherwise, provide filters to limit what information is pulled into the graph.
:param str namespace_filter: Specify the namespace of a sub-ontology namespace, if one is available for the ontology.
:param list allowed_relationships: Specify a list of relationships to utilize in the graph, other relationships will be ignored.
"""
self.namespace_filter = namespace_filter
self.allowed_relationships = allowed_relationships
self.word_split = re.compile(r"[\w\'\-]+")
self.node_list = list()
self.edge_list = list()
self.id_index = dict()
self.vocab_index = dict()
self.relationship_index = dict()
self.used_relationship_set = set()
self.relationship_count = dict()
self.root_nodes = list()
self._orphans = None
self._leaves = None
self._modified = True
if self.allowed_relationships:
if 'is_a' not in self.allowed_relationships:
print("WARNING: 'is_a' is a required relationship type within OBO ontologies.\nAdding 'is_a' to the allowed_relationships list.")
self.allowed_relationships.append('is_a')
@property
def orphans(self):
""":py:obj:`property` defining a set of nodes in the graph which have no parents. When the graph is modified,
calls :func:`_update_graph` to repopulate the sets of orphan and leaf nodes.
:return: Set of 'orphan' :class:`gocats.dag.AbstractNode` objects.
:rtype: :py:class:`set`
"""
if self._modified:
self._update_graph()
return self._orphans
@property
def leaves(self):
""":py:obj:`property` defining a set of nodes in the graph which have no children. When the graph is modified,
calls :func:`_update_graph` to repopulate the sets of orphan and leaf nodes.
:return: Set of 'leaf' :class:`gocats.dag.AbstractNode` objects.
:rtype: :py:class:`set`
"""
if self._modified:
self._update_graph()
return self._leaves
[docs] def valid_node(self, node):
"""Defines condition of a valid node. Node is valid if it is not obsolete and is contained within the given
ontology namespace constraint.
:param node: A :class:`gocats.dag.AbstractNode` object
:return: True if node is valid, False otherwise
:rtype: :py:obj:`True` or :py:obj:`False`
"""
if not node.obsolete and (not self.namespace_filter or node.namespace == self.namespace_filter):
return True
return False
[docs] def valid_edge(self, edge):
"""Defines condition of a valid edge. Edge is valid if it is within the list of allowed edges and connects two
nodes that are both contained in the graph in question.
:param edge: A :class:`gocats.dag.AbstractEdge` object
:return: True if node is valid, False otherwise
:rtype: :py:obj:`True` or :py:obj:`False`
"""
if (edge.parent_node.id in self.id_index and edge.child_node.id in self.id_index) and (not self.allowed_relationships or edge.relationship_id in self.allowed_relationships):
return True
return False
[docs] def _update_graph(self):
"""Repopulates graph orphans and leaves sets.
:return: None
:rtype: :py:obj:`None`
"""
self._orphans = set([node for node in self.node_list if not node.obsolete and not node.parent_node_set and node not in self.root_nodes])
self._leaves = set([node for node in self.node_list if not node.obsolete and not node.child_node_set and node.parent_node_set])
self._modified = False
[docs] def add_node(self, node):
"""Adds a node object to the graph, adds an object pointer to the vocabulary index to reference nodes to every
word in the node name and definition. Sets modification state to :py:obj:`True`.
:param node: A :class:`gocats.dag.AbstractNode` object.
:return: None
:rtype: :py:obj:`None`
"""
self.node_list.append(node)
self.id_index[node.id] = node
for word in re.findall(r"[\w\'\-]+", node.name + " " + node.definition):
try:
self.vocab_index[word].add(node)
except KeyError:
self.vocab_index[word] = set([node]) # Don't replace with set literal
self._modified = True
[docs] def remove_node(self, node):
"""Removes a node from the graph and deletes node references from all entries in the vocabulary index. Sets
modification state to :py:obj:`True`.
:param node: A :class:`gocats.dag.AbstractNode` object.
:return: None
:rtype: :py:obj:`None`
"""
if node not in self.node_list:
pass
else:
for graph_node in self.node_list:
if node in graph_node.parent_node_set:
graph_node.parent_node_set.remove(node)
elif node in graph_node.child_node_set:
graph_node.child_node_set.remove(node)
for edge in graph_node.edges:
if node is edge.parent_node or node is edge.child_node:
graph_node.edges.remove(edge)
for word in re.findall(r"[\w\'\-]+", node.name + " " + node.definition):
try:
self.vocab_index[word].remove(node)
except KeyError:
pass
else:
if not self.vocab_index[word]:
del self.vocab_index[word]
del self.id_index[node.id]
self.node_list.remove(node)
self._modified = True
[docs] def add_edge(self, edge):
"""Adds an edge object to the graph, and counts the edge relationship type. Sets modification state to
:py:obj:`True`.
:param edge: A :class:`gocats.dag.AbstractEdge` object.
:return: None
:rtype: :py:obj:`None`
"""
self.edge_list.append(edge)
try:
self.relationship_count[edge.relationship_id] += 1
except KeyError:
self.relationship_count[edge.relationship_id] = 1
self._modified = True
[docs] def remove_edge(self, edge):
"""Removes an edge object from the graph, and removes references to that edge from the node objects involved.
Sets modification state to :py:obj:`True`.
:param edge: A :class:`gocats.dag.AbstractEdge` object.
:return: None
:rtype: :py:obj:`None`
"""
self.id_index[edge.parent_id].remove_edge(edge)
self.id_index[edge.child_id].remove_edge(edge)
self.edge_list.remove(edge)
self._modified = True
[docs] def add_relationship(self, relationship):
"""Adds a :class:`gocats.dag.AbstractRelationship` object to the graph's relationship index, referenced by
that relationships ID. Sets modification state to :py:obj:`True`.
:param relationship: A :class:`gocats.dag.AbstractRelationship` object.
:return: None
:rtype: :py:obj:`None`
"""
self.relationship_index[relationship.id] = relationship
self._modified = True
[docs] def instantiate_valid_edges(self):
"""Add all edge references to their respective nodes and vice versa if both nodes of the edge are in the graph.
This is carried out by :func:`AbstractEdge.connect_nodes`. Also adds :class:`gocats.dag.AbstractRelationship`
object reference to each edge. If both nodes are not in the graph, the edge is deleted from the graph. Sets
modification state to :py:obj:`True`.
:return: None
:rtype: :py:obj:`None`
"""
del_edges = set()
for edge in self.edge_list:
if edge.node_pair_id[0] in self.id_index.keys() and edge.node_pair_id[1] in self.id_index.keys():
edge.relationship = self.relationship_index[edge.relationship_id]
edge.connect_nodes((self.id_index[edge.node_pair_id[0]], self.id_index[edge.node_pair_id[1]]), self.allowed_relationships)
else:
del_edges.add(edge)
for edge in del_edges:
self.edge_list.remove(edge)
self._modified = True
[docs] def node_depth(self, sample_node):
"""Returns an integer representing how many nodes are between the given node and the root node of the graph
(depth level).
:param sample_node: A :class:`gocats.dag.AbstractNode` object.
:return: Depth level.
:rtype: :py:obj:`int`
"""
if sample_node in self.root_nodes:
return 0
depth = 1
root_node_set = set(self.root_nodes)
parent_set = sample_node.parent_node_set
while parent_set:
if parent_set & root_node_set: # There is an intersection between the parent set and the root_node_set
break
depth += 1
parent_set = set().union(*[parent.parent_node_set for parent in parent_set])
return depth
[docs] def filter_nodes(self, search_string_list):
"""Returns a list of node objects that contain vocabulary matching the keywords provided in the search string
list. Nodes are selected by searching through the vocablary index.
:param search_string_list: A :py:obj:`list` of search strings provided in the keyword_file provided to :func:`gocats.gocats.create_subgraphs`.
:return: A list of :class:`gocats.dag.AbstractNode` objects.
:rtype: :py:obj:`list`
"""
search_string_list_words = [re.findall(self.word_split, word) for word in search_string_list]
search_string_word_set = set([word for sublist in search_string_list_words for word in sublist])
filtered_nodes = set.union(*[node_set for node_set in [self.vocab_index[word] for word in search_string_word_set if word in self.vocab_index]])
if self.namespace_filter:
filtered_nodes = [node for node in filtered_nodes if node.namespace == self.namespace_filter]
return filtered_nodes
[docs] def filter_edges(self, filtered_nodes):
"""Returns a list of edges in the graph that connect the nodes provided in the filtered nodes list.
:param filtered_nodes: List of filtered nodes provided by :func:`filter_nodes`.
:return: A list of :class:`gocats.dag.AbstractEdge` objects.
:rtype: :py:obj:`list`
"""
filtered_edges = [edge for edge in self.edge_list if edge.parent_node in filtered_nodes and edge.child_node in filtered_nodes]
if self.allowed_relationships:
filtered_edges = [edge for edge in filtered_edges if edge.relationship_id in self.allowed_relationships]
return filtered_edges
[docs] def nodes_between(self, start_node, end_node):
"""Returns a set of nodes that occur along all paths between the start node and the end node. If no paths exist,
an empty set is returned.
:param start_node: :class:`gocats.dag.AbstractNode` object to start the paths.
:param end_node: :class:`gocats.dag.AbstractNode` object to end the paths.
:return: A set of :class:`gocats.dag.AbstractNode` objects if there is at least one path between the parameters, an empty set otherwise.
:rtype: :py:obj:`set`
"""
if start_node.ancestors and end_node.descendants:
return start_node.ancestors.intersection(end_node.descendants)
else:
return set()
[docs]class AbstractNode(object):
"""A node containing all basic properties of an OBO node. The parsing object, :class:`gocats.ontologyparser.OboParser`
currently has direct access to data members (id, name, definition, namespace, edges, and obsolete) so that
information from the database file can be added to the object.
"""
[docs] def __init__(self):
"""`AbstractNode` initializer
"""
self.id = str()
self.name = str()
self.definition = str()
self.namespace = str()
self.edges = set()
self.parent_node_set = set()
self.child_node_set = set()
self.obsolete = False
self._modified = True
self._descendants = None
self._ancestors = None
# Will add new sets for equivalence, actor/actee, ordinal, etc
@property
def descendants(self):
""":py:obj:`property` defining a set of nodes in the graph that are recursively reverse of a node with a
scoping-type relationship. When the node is modified, calls :func:`gocats.dag.AbstractNode._update_node` to
repopulate the sets of descendants and ancestors. This represents a "lazy" evaluation of node descendants.
:return: Set of :class:`gocats.dag.AbstractNode` objects
:rtype: :py:class:`set`
"""
if self._modified:
self._update_node()
return self._descendants
@property
def ancestors(self):
""":py:obj:`property` defining a set of nodes in the graph that are recursively forward of a node with a
scoping-type relationship. When the node is modified, calls :func:`gocats.dag.AbstractNode._update_node` to
repopulate the sets of descendants and ancestors. This represents a "lazy" evaluation of node ancestors.
:return: Set of :class:`gocats.dag.AbstractNode` objects
:rtype: :py:class:`set`
"""
if self._modified:
self._update_node()
return self._ancestors
[docs] def _update_node(self):
"""Repopulates ancestor and descendant sets for a node. Sets modification state to :py:obj:`True`.
:return: None
:rtype: :py:obj:`None`
"""
self._update_descendants()
self._update_ancestors()
self._modified = False
[docs] def add_edge(self, edge, allowed_relationships):
"""Adds a given :class:`gocats.dag.AbstractEdge` to a each :class:`gocats.dag.AbstractNode` objects that the
edge connects. If there is a filter for the types of relationships allowed, edges with non-allowed relationship
types are not processed. Sets modification state to :py:obj:`True`.
:return: None
:rtype: :py:obj:`None`
"""
# TODO: Need to capture non-parent/child relationship types, such as actor/actee and equivalence
# FIXME: Should we add edges that represent non-allowed relationships?
self.edges.add(edge)
if not allowed_relationships:
if edge.child_id == self.id:
self.parent_node_set.add(edge.parent_node)
elif edge.parent_id == self.id:
self.child_node_set.add(edge.child_node)
else:
if edge.child_id == self.id and edge.relationship_id in allowed_relationships:
self.parent_node_set.add(edge.parent_node)
elif edge.parent_id == self.id and edge.relationship_id in allowed_relationships:
self.child_node_set.add(edge.child_node)
self._modified = True
[docs] def remove_edge(self, edge):
"""Removes a given :class:`gocats.dag.AbstractEdge` the :class:`gocats.dag.AbstractNode` object. Also removes
parent or child node references that the edge referenced. Sets modification state to :py:obj:`True`.
:return: None
:rtype: :py:obj:`None`
"""
if edge.child_id == self.id:
self.parent_node_set.remove(edge.parent_node)
elif edge.parent_id == self.id:
self.child_node_set.remove(edge.child_node)
self.edges.remove(edge)
self._modified = True
[docs] def _update_descendants(self):
"""Used for the lazy evaluation of graph descendants of the current :class:`gocats.dag.AbstractNode` object.
Creates internal :py:obj:`set` variable, descendant_set. Iterates through node children until the bottom of the
graph is reached. The descendant_set is a set of all nodes across all paths encountered from the current node.
:return: None
:rtype: :py:obj:`None`
"""
descendant_set = set()
children = list(self.child_node_set)
while len(children) > 0:
child = children[0]
descendant_set.add(child)
if not child._modified:
descendant_set.update(child._descendants)
else:
children.extend([new_child for new_child in child.child_node_set if new_child not in descendant_set and new_child not in children])
children.remove(child)
self._descendants = descendant_set
[docs] def _update_ancestors(self):
"""Used for the lazy evaluation of graph ancestors of the current :class:`gocats.dag.AbstractNode` object.
Creates internal :py:obj:`set` variable, ancestors_set. Iterates through node parents until the top of the graph
is reached. The ancestors_set is a set of all nodes across all paths encountered from the current node.
:return: None
:rtype: :py:obj:`None`
"""
ancestors_set = set()
parents = list(self.parent_node_set)
while len(parents) > 0:
parent = parents[0]
ancestors_set.add(parent)
if not parent._modified:
ancestors_set.update(parent._ancestors)
else:
parents.extend([new_parent for new_parent in parent.parent_node_set if new_parent not in ancestors_set and new_parent not in parents])
parents.remove(parent)
self._ancestors = ancestors_set
[docs]class AbstractEdge(object):
"""An OBO edge which links two ontology term nodes and contains a relationship type describing now the two nodes are
related.
"""
[docs] def __init__(self, node1_id, node2_id, relationship_id, node_pair=None):
"""`AbstractEdge` initializer. Node pair refers to a :py:obj:`tuple` of :class:`gocats.dag.AbstractNode` objects that are
connected by the edge. Defaults to :py:obj:`None` and is later populated.
:param str node1_id: The ID of the first term referenced from the ontology file's relationship line.
:param str node2_id: The ID of the second term referenced from the ontology file's relationship line.
:param str relationship_id: The ID of the relationship in the ontology file's relationship line.
:param tuple node_pair: Default-:py:obj:`None`, provide a :py:obj:`tuple` containing two :class:`gocats.dag.AbstractNode` objects if they are already created and able to be referenced.
"""
self.node_pair_id = (node1_id, node2_id)
self.node_pair = node_pair
self.relationship_id = relationship_id
self.relationship = None
@property
def json_edge(self):
""":py:obj:`property` which returns a tuple where position 0 is a unique string representation of the edge made by combining the ID of the reverse node and the id of the forward nodes and where position 1 is a list of two node IDs: the reverse and forward node.
:return: :py:obj:`tuple` of a unique :class:`AbstractEdge` ID and a list of that edge object's reverse and forward node IDs, respectively. Returns an empty :py:obj:str at a position for which there are no forward or reverse nodes in the graph.
:rtype: :py:obj:`tuple`
"""
reverse_node_id = self.reverse_node.id
forward_node_id = self.forward_node.id
return (str(reverse_node_id+forward_node_id), [reverse_node_id, forward_node_id])
@property
def parent_id(self):
""":py:obj:`property` defining the ID of the node forward of the current :class:`gocats.dag.AbstractEdge`
object.
:return: :py:obj:`str` ID of the forward node in the node_pair associated with the edge if the edge's relationship is assigned, :py:obj:`None` otherwise.
:rtype: :py:obj:`str` or :py:obj:`None`
"""
if self.relationship:
return self.relationship.forward(self.node_pair_id)
return None
@property
def child_id(self):
""":py:obj:`property` defining the ID of the node reverse of the current :class:`gocats.dag.AbstractEdge` object.
:return: :py:obj:`str` ID of the reverse node in the node_pair associated with the edge if the edge's relationship is assigned, :py:obj:`None` otherwise.
:rtype: :py:obj:`str` or :py:obj:`None`
"""
if self.relationship:
return self.relationship.reverse(self.node_pair_id)
return None
@property
def forward_node(self):
""":py:obj:`property` defining the :class:`gocats.dag.AbstractNode` object forward of the current
:class:`gocats.dag.AbstractEdge` object.
:return: :class:`gocats.dag.AbstractNode` object of the forward node in the node_pair associated with the edge if the edge's relationship is assigned, the node_pair is assigned, and the type of relationship is instantiated by :class:`gocats.dag.DirectionalRelationship` :py:obj:`None` otherwise.
:rtype: :class:`gocats.dag.AbstractNode` or :py:obj:`None`
"""
if self.node_pair and self.relationship and type(self.relationship) is DirectionalRelationship:
return self.relationship.forward(self.node_pair)
return None
@property
def reverse_node(self):
""":py:obj:`property` defining the :class:`gocats.dag.AbstractNode` object reverse of the current
:class:`gocats.dag.AbstractEdge` object.
:return: :class:`gocats.dag.AbstractNode` object of the reverse node in the node_pair associated with the edge if the edge's relationship is assigned, the node_pair is assigned, and the type of relationship is instantiated by :class:`gocats.dag.DirectionalRelationship` :py:obj:`None` otherwise.
:rtype: :class:`gocats.dag.AbstractNode` or :py:obj:`None`
"""
if self.node_pair and self.relationship and type(self.relationship) is DirectionalRelationship:
return self.relationship.reverse(self.node_pair)
return None
@property
def parent_node(self):
""":py:obj:`property` defining the :class:`gocats.dag.AbstractNode` object forward of the current
:class:`gocats.dag.AbstractEdge` object. This designation will be unique to scoping-type relationships, although
this is **not yet specified**.
:return: :class:`gocats.dag.AbstractNode` object of the forward node in the node_pair associated with the edge if the edge's relationship is assigned, the node_pair is assigned, and the type of relationship is instantiated by :class:`gocats.dag.DirectionalRelationship` :py:obj:`None` otherwise.
:rtype: :class:`gocats.dag.AbstractNode` or :py:obj:`None`
"""
if self.relationship:
return self.relationship.forward(self.node_pair)
return None
@property
def child_node(self):
""":py:obj:`property` defining the :class:`gocats.dag.AbstractNode` object reverse of the current
:class:`gocats.dag.AbstractEdge` object. This designation will be unique to scoping-type relationships, although
this is **not yet specified**.
:return: :class:`gocats.dag.AbstractNode` object of the reverse node in the node_pair associated with the edge if the edge's relationship is assigned, the node_pair is assigned, and the type of relationship is instantiated by :class:`gocats.dag.DirectionalRelationship` :py:obj:`None` otherwise.
:rtype: :class:`gocats.dag.AbstractNode` or :py:obj:`None`
"""
if self.relationship:
return self.relationship.reverse(self.node_pair)
return None
# Will finish these later
@property
def actor_node(self):
"""**not yet implemented**
:return: None
:rtype: :py:obj:`None`
"""
return
@property
def recipient_node(self):
"""**not yet implemented**
:return: None
:rtype: :py:obj:`None`
"""
return
@property
def ordinal_prior_node(self):
"""**not yet implemented**
:return: None
:rtype: :py:obj:`None`
"""
return
@property
def ordinal_post_node(self):
"""**not yet implemented**
:return: None
:rtype: :py:obj:`None`
"""
return
@property
def other_node(self, node):
"""**not yet implemented**
:return: None
:rtype: :py:obj:`None`
"""
return
[docs] def connect_nodes(self, node_pair, allowed_relationships):
"""Adds the current edge object to the :class:`gocats.dag.AbstractNode` objects that are connected by the edge.
Populates the node_pair with :class:`gocats.dag.AbstractNode` objects.
:return: None
:rtype: :py:obj:`None`
"""
self.node_pair = node_pair
node_pair[0].add_edge(self, allowed_relationships)
node_pair[1].add_edge(self, allowed_relationships)
[docs]class AbstractRelationship(object):
"""A relationship as defined by a [typedef] stanza in an OBO ontology and augmented by GOcats to better interpret
semantic correspondence.
"""
[docs] def __init__(self):
"""`AbstractRelationship` initializer.
"""
self.id = str()
self.name = str()
self.category = str() # TODO: change category to correspondence_classes DO everywhere.
[docs]class DirectionalRelationship(AbstractRelationship):
"""A singly-directional relationship edge connecting two nodes in the graph. The two nodes are designated 'forward'
and 'reverse.' The 'forward' node semantically succeeds the 'reverse' node in a way that depends on the context of
the type of relationship describing the edge to which it is applied.
"""
[docs] def __init__(self):
"""`DirectionalRelationship` initializer.
"""
super().__init__()
self.inverse_relationship_id = None
self.inverse_relationship = None
self.direction = 1 # Defaults as toward node2 (node2 is the 'forward' node)
[docs] def forward(self, pair):
"""Returns the forward node in a node pair that semantically succeeds the other and is independent of the
directionality of the edge. Default position is the second position [1].
:param tuple pair: A pair of :class:`gocats.dag.AbstractNode` objects.
:return: The forward :class:`gocats.dag.AbstractNode` object as determined by the pre-defined semantic directionality of the relationship.
"""
return pair[self.direction]
[docs] def reverse(self, pair):
"""Returns the reverse node in a node pair that semantically precedes the other and is independent of the
directionality of the edge. Default position is the second position [1].
:param tuple pair: A pair of :class:`gocats.dag.AbstractNode` objects.
:return: The reverse :class:`gocats.dag.AbstractNode` object as determined by the pre-defined semantic directionality of the relationship.
"""
return pair[(self.direction+1) % 2]
[docs]class NonDirectionalRelationship(AbstractRelationship):
"""A non-directional relationship whose edge directionality is either non-existent or semantically irrelevant.
"""
[docs] def __init__(self):
"""`NonDirectionalRelationship` initializer.
"""
return