# !/usr/bin/python3
"""
A subgraph object of an OBOGraph object.
"""
from .dag import OboGraph, AbstractNode
import re
[docs]class SubGraph(OboGraph):
"""A subgraph of a provided supergraph with node contents."""
[docs] def __init__(self, super_graph, namespace_filter=None, allowed_relationships=None):
"""`SubGraph initializer. Creates a subgraph object of :class:`gocats.dag.OboGraph`. Leave `namespace_filter`
and `allowed_relationship` as :py:obj:`None` to create the entire ontology graph. Otherwise, provide filters to
limit what information is pulled into the subgraph.
:param obj super_graph: A supergraph object i.e. :class:`gocats.godag.GoGraph`.
:param str namespace_filter: Specify the namespace of a sub-ontology namespace, if one is available for the ontology.
:param list allowed_relationships: Specify a list of relationships to utilize in the graph, other relationships will be ignored.
"""
self.super_graph = super_graph
if self.super_graph.namespace_filter and self.super_graph.namespace_filter != namespace_filter:
raise Exception("Unless a namespace_filter is not specified for a parent_graph, a subgraph's namespace_filter must not differ from its parent graph's namespace_filter.\nsubgraph namespace_filter = {}, supergraph namespace_filter = {}").format(namespace_filter, self.super_graph.namespace_filter)
if self.super_graph.allowed_relationships and allowed_relationships and any(relationship not in self.super_graph.allowed_relationships for relationship in allowed_relationships):
raise Exception("Unless an allowed_relationships list is not specified for a parent graph, a subgraph's allowed_relationships list must be a subset of, or exactly, its parent graph's allowed_relationships list.\nsubgraph allowed_relationships = {}, supergraph allowed_relationships = {}").format(allowed_relationships, self.super_graph.allowed_relationships)
super().__init__(namespace_filter, allowed_relationships)
self.seeded_size = None # The number of nodes filtered in the keyword search, used for informational purposes only.
self.category_node = None
self._root_id_mapping = None
self._root_node_mapping = None
self._content_mapping = None
self._subgraph_finalized = False
@property
def root_id_mapping(self):
"""Property describing a mapping :py:obj:`dict` that relates every ontology term ID of subgraphs in
:class:`gocats.dag.OboGraph` to a :py:obj:`list` of root :class:`gocats.subdag.CategoryNode` IDs.
:return: :py:obj:`dict` of :class:`gocats.subdag.SubGraphNode` IDs mapped to a :py:obj:`list` of root :class:`gocats.subdag.CategoryNode` IDs.
:rtype: :py:obj:`dict`
"""
if (self._modified and self.category_node) or self._root_id_mapping == None:
self._root_id_mapping = {node.id: self.category_node.id for node in self.category_node.descendants}
self._root_id_mapping[self.category_node.id] = self.category_node.id
elif not self.category_node:
raise Exception("Mapping failed: category node not identified.")
return self._root_id_mapping
@property
def root_node_mapping(self):
"""Property describing a mapping :py:obj:`dict` that relates every ontology :class:`gocats.subdag.SubGraphNode`
object of subgraphs in :class:`gocats.subdag.SubGraph` to a :py:obj:`list` of root :class:`gocats.subdag.CategoryNode` objects.
:return: :py:obj:`dict` of :class:`gocats.subdag.SubGraphNode` objects mapped to a :py:obj:`list` of root :class:`gocats.subdag.CategoryNode` objects.
:rtype: :py:obj:`dict`
"""
if (self._modified and self.category_node) or self._root_node_mapping == None:
self._root_node_mapping = {node: self.category_node for node in self.category_node.descendants}
self._root_node_mapping[self.category_node] = self.category_node
elif not self.category_node:
raise Exception("Mapping failed: category node not identified.")
return self._root_node_mapping
@property
def content_mapping(self):
"""Property describing a mapping :py:obj:`dict` that relates every root :class:`gocats.subdag.CategoryNode` IDs
of subgraphs in a :class:`gocats.subdag.SubGraph` to a :py:obj:`list` of their subgraph nodes' IDs.
:return: :py:obj:`dict` of :class:`gocats.dag.AbstractNode` IDs mapped to a :py:obj:`list' of :class:`gocats.dag.AbstractNode` IDs.
:rtype: :py:obj:`dict`
"""
if (self._modified and self.category_node) or self._content_mapping == None:
self._content_mapping = {self.category_node.id: [node.id for node in self.category_node.descendants]}
elif not self.category_node:
raise Exception("Mapping failed: category node not identified.")
return self._content_mapping
[docs] def subnode(self, super_node):
"""Defines a :class:`gocats.subdag.SubGraph` node object. Calls :func:`add_node` to convert a supergraph node
into a :class:`gocats.subdag.SubGraphNode` and add this node to the subgraph.
:param super_node: A node object from the supergraph i.e. :class:`gocats.godag.GoGraphNode`.
:return: A :class:`gocats.subdag.SubGraphNode` object.
:rtype: :py:obj:`class`
"""
if super_node.id not in self.id_index:
self.add_node(super_node)
return self.id_index[super_node.id]
[docs] def add_node(self, super_node):
"""Converts a supergraph node into a :class:`gocats.subdag.SubGraphNode` and adds this node to the subgraph.
Sets modification state to :py:obj:`True`.
:param obj super_node: A node object from the supergraph i.e. :class:`gocats.godag.GoGraphNode`.
:return: None
:rtype: :py:obj:`None`
"""
if super_node.id not in self.id_index:
subgraph_node = SubGraphNode(super_node, self.allowed_relationships)
if self.valid_node(subgraph_node):
super().add_node(subgraph_node)
self._modified = True
# TODO: Rename/reconsider this (needs to be similar to instantiate_valid_edges)
[docs] def connect_subnodes(self):
"""Analogous to :func:`gocats.dag.instantiate_valid_edges` and :func:`gocats.dag.AbstractEdge.connect_nodes`.
Updates child and parent node sets for each :class:`gocats.subdag.SubGraphNode` in the
:class:`gocats.subdag.SubGraph`. Adds edge object references to nodes and node object references to edges.
Counts instances of relationship IDs and sets modification state to :py:obj:`True`.
:return: None
:rtype: :py:obj:`None`
"""
for subnode in self.node_list:
subnode.update_children([self.id_index[child.id] for child in subnode.super_node.child_node_set if child.id in self.id_index])
subnode.update_parents([self.id_index[parent.id] for parent in subnode.super_node.parent_node_set if parent.id in self.id_index])
for edge in subnode.super_node.edges: # This counts the number of times each relationship type is used in a subgraph and also adds edges to the subgraph
if edge.forward_node.id in self.id_index and edge.reverse_node.id in self.id_index:
self.add_edge(edge)
try:
self.relationship_count[edge.relationship.id] += 1
except KeyError:
self.relationship_count[edge.relationship.id] = 1
self._modified = True
[docs] def greedily_extend_subgraph(self):
"""Extends a seeded subgraph to include all supergraph descendants of the nodes. Searches through the supergraph
to add new SubGraphNode objects.
:return: None
:rtype: :py:obj:`None`
"""
possible_graph_extension_nodes = set()
for node in self.category_node.child_node_set:
possible_graph_extension_nodes.update(node.super_node.descendants) # May want to implement this as a generator somehow for efficiency.
for super_node in possible_graph_extension_nodes:
if super_node.id not in self.id_index:
self.add_node(super_node)
self.connect_subnodes()
[docs] def conservatively_extend_subgraph(self):
"""**Not currently in use.*** Needs to be updated to handle CategoryNode.
Extends a seeded subgraph to include only nodes in the supergraph that occur along paths between nodes in the
subgraph. Searches through the supergraph to add new node objects.
:return: None
:rtype: :py:obj:`None`
"""
graph_extension_nodes = set()
for subleaf in self.leaves:
start_node = self.super_graph.id_index[subleaf.id]
end_node = self.super_graph.id_index[self.representative_node.id]
graph_extension_nodes.update(self.nodes_between(start_node, end_node))
for super_node in graph_extension_nodes:
if super_node.id not in self.id_index and self.valid_node(super_node):
self.add_node(super_node)
self.connect_subnodes()
[docs] def remove_orphan_paths(self):
"""**Not currently in use.** Needs to be updated ot handle CategoryNode.
Removes nodes and their descendants from the subgraph which do not root to the
category-representative node.
:return: None
:rtype: :py:obj:`None`
"""
for orphan in self.orphans:
orphaned_descendants = orphan.descendants - self.representative_node.descendants
if orphaned_descendants:
for descendant in orphaned_descendants:
self.remove_node(descendant)
self.remove_node(orphan)
[docs] @staticmethod
def find_representative_nodes(subgraph, search_string_list):
"""Compiles a list candidate :class:`gocats.subdag.SubGraphNode` objects from the :class:`gocats.subdag.SubGraph`
object based on a list of search strings matching strings in the names of the nodes (using regular
expressions). Returns a list containing a single candidate node with the highest number of descendants when possible, returns the sole
node if the subgraph only contains one node, returns a list of all seeded nodes when choosing candidates is impossible, or
aborts if the subgraph is empty.
:param subgraph: A :class:`gocats.subdag.SubGraph` object.
:param search_string_list: A :py:obj:`list` of search term :py:obj:`str` entries.
:return: A list of one or more candidate term :class:`gocats.subgraph.SubGraphNode` chosen as the subgraph's representative ontology term(s).
"""
representative_nodes = list()
if len(subgraph.node_list) == 1: # For the case where there is only one node, seeded. Go ahead and set this to the representative.
representative_nodes.append(subgraph.node_list[0])
elif not subgraph.node_list:
raise Exception("Subgraph did not seed any nodes from the supergraph! Aborting.")
else:
candidates = [node for node in subgraph.node_list if any(re.search('(?<!\-)'+search_string+'(?!\-)', node.name) for search_string in search_string_list) and node not in subgraph.leaves and not node.obsolete]
if candidates:
representative_node_scoring = {node: len(node.descendants) for node in candidates}
representative_nodes.append(max(representative_node_scoring, key=representative_node_scoring.get))
elif not candidates: # and all(subgraph.node_list) in subgraph.leaves:
representative_nodes = [node for node in subgraph.node_list]
return representative_nodes
[docs] @staticmethod
def from_filtered_graph(super_graph, subgraph_name, keyword_list, namespace_filter=None, allowed_relationships=None, extension='greedy'):
"""Staticmethod for extracting a subgraph from the supergraph by selecting nodes that contain vocabulary in the
supplied keyword list. Leave `namespace_filter` and `allowed_relationship` as :py:obj:`None` to create the
entire ontology graph. Otherwise, provide filters to limit what information is pulled into the subgraph.
Graph `extension` variable defaults to 'greedy' which calls :func:`greedily_extend_subgraph` to add nodes to the
subgraph after instantiation. Conversely, 'conservative' may be used to call
:func:`conservatively_extend_subgraph` for this function.
:param obj super_graph: A supergraph object i.e. :class:`gocats.godag.GoGraph`.
:param str subgraph_name: The name of the subgraph being created; will be used as the id of the :class:`gocats.subdag.CategoryNode`.
:param keyword_list: A :py:obj:`list` of :py:obj:`str` entries used to query the supergraph for concepts to be extracted into subgraphs.
:param str namespace_filter: Specify the namespace of a sub-ontology namespace, if one is available for the ontology.
:param list allowed_relationships: Specify a list of relationships to utilize in the graph, other relationships will be ignored.
:param str extension: Specify 'greedy' or 'conservative' to determine how subgraphs will be extended after creation (defaults to greedy).
:return: A :class:`gocats.subdag.SubGraph` object.
"""
subgraph = SubGraph(super_graph, namespace_filter, allowed_relationships)
keyword_list = [word.lower() for word in keyword_list]
filtered_nodes = super_graph.filter_nodes(keyword_list)
subgraph.seeded_size = len(filtered_nodes)
for super_node in filtered_nodes:
subgraph.add_node(super_node)
subgraph.connect_subnodes()
representative_nodes = subgraph.find_representative_nodes(subgraph, keyword_list)
subgraph.category_node = CategoryNode(subgraph_name, representative_nodes, namespace_filter)
subgraph.root_nodes.extend(representative_nodes)
if extension == 'greedy':
subgraph.greedily_extend_subgraph()
elif extension == 'conservative':
subgraph.conservatively_extend_subgraph()
subgraph_orphans_descendants = set()
for orphan in subgraph.orphans:
for node in orphan.descendants:
subgraph_orphans_descendants.add(node)
subgraph_orphans_descendants.update([orphan for orphan in subgraph.orphans])
subgraph._subgraph_finalized = True
return subgraph
[docs]class SubGraphNode(AbstractNode):
"""An instance of a node within a subgraph of an OBO ontology (supergraph)
"""
[docs] def __init__(self, super_node=None, allowed_relationships=None):
"""SubGraphNode initializer. Inherits from :class:`gocats.dag.AbstractNode` and contains a reference to the
supergraph node it represents e.g. :class:`gocats.godag.GoGraphNode`.
:param super_node: A node from the `supergraph`.
:param allowed_relationships: **Not currently used** Used to specify a list of allowable relationships evaluated between nodes.
"""
self.allowed_relationships = allowed_relationships
self.super_node = super_node
self.parent_node_set = set()
self.child_node_set = set()
self._modified = True
self._descendants = None
self._ancestors = None
# TODO: add in update_parent_node_set and update_child_node_set with a _modified switch !!!!
@property
def super_edges(self):
""":py:obj:`property` describing the set of edges referenced in the supergraph node, filtered to only those
edges with nodes in the subgraph node.
:return: A set of :class:`gocats.subgraph.SubGraphNode` edges that were copied from the supergraph node.
:rtype: :py:obj:`set`
"""
edges = set()
edges.update([edge for edge in self.super_node.edges if edge.parent_node.id in [node.id for node in self.parent_node_set] and edge.child_node.id in [node.id for node in self.child_node_set]])
return edges
@property
def id(self):
""":py:obj:`property` describing the ID of the supernode
:return: The ID of a supernode e.g. :class:`gocats.godag.GoGraphNode`
:rtype: :py:obj:`str`
"""
return self.super_node.id
@property
def name(self):
""":py:obj:`property` describing the name of the supernode
:return: The name of a supernode e.g. :class:`gocats.godag.GoGraphNode`
:rtype: :py:obj:`str`
"""
return self.super_node.name
@property
def definition(self):
""":py:obj:`property` describing the definition of the supernode
:return: The definition of a supernode e.g. :class:`gocats.godag.GoGraphNode`
:rtype: :py:obj:`str`
"""
return self.super_node.definition
@property
def namespace(self):
""":py:obj:`property` describing the namespace of the supernode
:return: A namespace of a supernode e.g. :class:`gocats.godag.GoGraphNode`
:rtype: :py:obj:`str`
"""
return self.super_node.namespace
@property
def obsolete(self):
""":py:obj:`property` describing whether or not supernode is marked as obsolete.
:return: :py:obj:`True` or :py:obj:`False`
"""
return self.super_node.obsolete
[docs] def update_parents(self, parent_set):
"""Updates the parent_node_set with a set of new parents provided. Sets modification state to :py:obj:`True`.
:param parent_set: A set of parent nodes to be added to this objects parent_node set.
:return: None
:rtype: :py:obj:`None`
"""
self.parent_node_set.update(parent_set)
self._modified = True
[docs] def update_children(self, child_set):
"""Updates the child_node_set with a set of new children provided. Sets modification state to :py:obj:`True`.
:param child_set: A set of child nodes to be added to this objects child_node set.
:return: None
:rtype: :py:obj:`None`
"""
self.child_node_set.update(child_set)
self._modified = True
[docs]class CategoryNode(AbstractNode):
"""A special node added to the subgraph which contains all representative nodes identified
and serves as the single representative of the subgraph which represents a concept.
"""
[docs] def __init__(self, category_name, representative_node_list, namespace_filter=None):
self.parent_node_set = set()
self.obsolete = False
self._modified = True
self._descendants = None
self._ancestors = None
self.namespace_filter = namespace_filter # Needed to make the node valid.
if self.namespace_filter:
self.namesapce = namespace_filter[0]
self.name = category_name
self.child_node_set = set(representative_node_list)
@property
def id(self):
if len(self.child_node_set) == 1:
return next(iter(self.child_node_set)).id
return self.name