Source code for CoRe.ncip

import os,sys
import numpy as np
import math
import networkx as nx
import pandas as pd
from scipy import sparse
import scipy.stats as st
from CoRe.cypher_commands import command_set
from tqdm import tqdm
import random as rand
#import BA_C.BA as BA

[docs]class ncip:
    def __init__(self,_property=None):
        self.edge_data = {}
        self.edge_data['input'] = []
        self.edge_data['output'] = []
        self.edge_data['reaction'] = []
        self.edge_data['name'] = []
        #self.edge_data['category'] = []
        self.edge_data['schemaClass'] = []
        self.edge_data['module'] = []

        self.property = _property

        self.node_data = {}
        self.node_data['node'] = []
        self.node_data['name'] = []
        self.node_data['genome-encoded'] = []
        self.node_data['schemaClass'] = []
        self.node_data['reference gene'] = []
        self.edge_log = []

[docs]    def load_data(self,_edge_data,_node_data):
        self.edge_data = _edge_data
        self.node_data = _node_data

[docs]    def load_graph(self,filename):
        self.G_d = nx.read_gml(filename)

[docs]    def construct_edges(self,inputs,outputs,neglect_ids=[],neglect_class=[]):
        """
        Converts reactions into a local graph topology.
        If a reaction is: A + B -> C + D + E,
        then A and B are two input nodes to three separate output nodes C, D, and E.
        If the reaction is chemically balanced then the edges are bidirectional,
        else edges are unidirectional.

        Parameters
        ----------
        inputs: neo4j result
            Result object from neo4j containing Reactome physical entities nodes that are inputs to an event.

        outputs: neo4j result
            object from neo4j containing Reactome physical entities nodes that are outputs to an event.

        neglect_ids: list
            stId of Reactome physical entities that should be neglected from the network.

        neglect_class: list
            schemaClass of Reactome physical entities that should be neglected from the network.

        Returns
        -------
        edge_nodes: dict
            Dict containing the fields ['inputs','outputs','input_names','output_names'].
            'inputs' and 'outputs' for the input and output node property values of an edge.
            'input names' and 'output_names' are tuples (displayName,schemaClass) with values from
            the Reactome Data Schema, https://reactome.org/content/schema/PhysicalEntity.
        """

        self.edge_nodes = {}
        self.edge_nodes['inputs'] = []            # Property value as input node id
        self.edge_nodes['outputs'] = []           # Property value as output node id
        self.edge_nodes['input_names'] = []       # Physical entity name of input nodes
        self.edge_nodes['output_names'] = []      # Physical entity name of output nodes

        node_in_names = []
        node_out_names = []

        for res_in in inputs:
            node_in = res_in[self.property]

            if node_in not in neglect_ids and res_in['schemaClass'] not in neglect_class:

                for res_out in outputs:
                    node_out = res_out[self.property]

                    if node_out not in neglect_ids  and res_out['schemaClass'] not in neglect_class:
                        self.edge_nodes['inputs'].append(node_in)
                        self.edge_nodes['outputs'].append(node_out)
                        self.edge_nodes['input_names'].append((res_in['displayName'],res_in['schemaClass']))
                        self.edge_nodes['output_names'].append((res_out['displayName'],res_out['schemaClass']))

[docs]    def construct_exo_edges(self,inputs,neglect_ids=[],neglect_class=[]):
        """
        Converts reactions into a local graph topology.
        If a reaction is: A + B -> C + D + E,
        then A and B are two input nodes to three separate output nodes C, D, and E.
        If the reaction is chemically balanced then the edges are bidirectional,
        else edges are unidirectional.

        Parameters
        ----------
        inputs: neo4j result
            Result object from neo4j containing Reactome physical entities nodes that are inputs to an event.

        neglect_ids: list
            stId of Reactome physical entities that should be neglected from the network.

        neglect_class: list
            schemaClass of Reactome physical entities that should be neglected from the network.

        Returns
        -------
        edge_nodes: dict
            Dict containing the fields ['inputs','outputs','input_names','output_names'].
            'inputs' and 'outputs' for the input and output node property values of an edge.
            'input names' and 'output_names' are tuples (displayName,schemaClass) with values from
            the Reactome Data Schema, https://reactome.org/content/schema/PhysicalEntity.
        """

        self.edge_nodes = {}
        self.edge_nodes['inputs'] = []            # Property value as input node id
        self.edge_nodes['outputs'] = []           # Property value as output node id
        self.edge_nodes['input_names'] = []       # Physical entity name of input nodes
        self.edge_nodes['output_names'] = []      # Physical entity name of output nodes

        node_in_names = []
        node_out_names = []

        for res_in in inputs:
            node_in = res_in[self.property]

            if node_in not in neglect_ids and res_in['schemaClass'] not in neglect_class:
                self.edge_nodes['inputs'].append(node_in)
                self.edge_nodes['outputs'].append(node_in)
                self.edge_nodes['input_names'].append((res_in['displayName'],res_in['schemaClass']))
                self.edge_nodes['output_names'].append((res_in['displayName'],res_in['schemaClass']))

[docs]    def add_edges(self,session,reaction_info):
        """
        Adds information about an edge to the edge_data and node_data.

        Parameters
        ----------
        session: neo4j session
            To query the neo4j reactome database

        command_set: dict
            Set of commands to query the neo4j reactome database.

        reaction_info: list
            Row entry of the ReactionLikeEvents dataframe containing stId, category, schemaClass, and name.
        """

        ref_gene_info = {}
        ref_gene_keys = []
        species_name = {}

        for i in range(0,len(self.edge_nodes['inputs'])):
            if self.edge_nodes['input_names'][i][1]=='EntityWithAccessionedSequence':
                if self.edge_nodes['inputs'][i] not in ref_gene_keys:
                    ref_gene_command = command_set['reference_gene'].replace('#',self.edge_nodes['inputs'][i])
                    result = session.run(ref_gene_command)
                    refgene_inputs = [res[0] for res in result]

                    spec_command = "MATCH(n:EntityWithAccessionedSequence {stId:'"+ self.edge_nodes['inputs'][i] +"'}) RETURN n.speciesName"
                    result = session.run(spec_command)
                    spec_name = [res[0] for res in result][0]

                    species_name[self.edge_nodes['inputs'][i]] = spec_name

                    try:
                        gene_name = refgene_inputs[0]['geneName'][0]
                    except TypeError:
                        gene_name = refgene_inputs[0]['name'][0]

                    edge_log_name_in = gene_name

                    ref_gene_keys.append(self.edge_nodes['inputs'][i])
                    ref_gene_info[self.edge_nodes['inputs'][i]] = (edge_log_name_in,gene_name)

            if self.edge_nodes['output_names'][i][1]=='EntityWithAccessionedSequence':
                if self.edge_nodes['outputs'][i] not in ref_gene_keys:
                    ref_gene_command = command_set['reference_gene'].replace('#',self.edge_nodes['outputs'][i])
                    result = session.run(ref_gene_command)
                    refgene_outputs = [res[0] for res in result]

                    spec_command = "MATCH(n:EntityWithAccessionedSequence {stId:'"+ self.edge_nodes['outputs'][i] +"'}) RETURN n.speciesName"
                    result = session.run(spec_command)
                    spec_name = [res[0] for res in result][0]

                    species_name[self.edge_nodes['outputs'][i]] = spec_name

                    try:
                        gene_name = refgene_outputs[0]['geneName'][0]
                    except TypeError:
                        gene_name = refgene_outputs[0]['name'][0]

                    edge_log_name_out = gene_name

                    ref_gene_keys.append(self.edge_nodes['outputs'][i])
                    ref_gene_info[self.edge_nodes['outputs'][i]] = (edge_log_name_out,gene_name)

        #print('Reference gene search completed')

        for i in range(0,len(self.edge_nodes['inputs'])):
            edge_log_name_in = self.edge_nodes['inputs'][i]
            edge_log_name_out = self.edge_nodes['outputs'][i]

            if self.edge_nodes['input_names'][i][1]=='EntityWithAccessionedSequence':
                try:
                    edge_log_name_in = ref_gene_info[self.edge_nodes['inputs'][i]][0]
                except KeyError:
                    pass

            if self.edge_nodes['output_names'][i][1]=='EntityWithAccessionedSequence':
                try:
                    edge_log_name_out = ref_gene_info[self.edge_nodes['outputs'][i]][0]
                except KeyError:
                    pass

            edge_tag = edge_log_name_in+','+edge_log_name_out

            if edge_tag in self.edge_log:
                pass
            else:
                self.edge_log.append(edge_tag)

                self.edge_data['input'].append(edge_log_name_in)
                self.edge_data['output'].append(edge_log_name_out)
                self.edge_data['reaction'].append(reaction_info['reaction'])
                #self.edge_data['category'].append(reaction_info['category'])
                self.edge_data['schemaClass'].append(reaction_info['schemaClass'])
                self.edge_data['name'].append(reaction_info['name'])
                self.edge_data['module'].append(reaction_info['module'])

                if self.edge_data['input'][-1] not in self.node_data['node']:
                    self.node_data['node'].append(self.edge_data['input'][-1])
                    self.node_data['name'].append(self.edge_nodes['input_names'][i][0])
                    self.node_data['schemaClass'].append(self.edge_nodes['input_names'][i][1])

                    if self.edge_nodes['input_names'][i][1]=='EntityWithAccessionedSequence':
                        if species_name[self.edge_nodes['inputs'][i]]=='Homo sapiens':
                            self.node_data['reference gene'].append(ref_gene_info[self.edge_nodes['inputs'][i]][1])
                            self.node_data['genome-encoded'].append(True)
                        else:
                            self.node_data['genome-encoded'].append(False)
                            self.node_data['reference gene'].append('None')
                    else:
                        self.node_data['genome-encoded'].append(False)
                        self.node_data['reference gene'].append('None')
                        #node_data['reference stId'].append('None')

                if self.edge_data['output'][-1] not in self.node_data['node']:
                    self.node_data['node'].append(self.edge_data['output'][-1])
                    self.node_data['name'].append(self.edge_nodes['output_names'][i][0])
                    self.node_data['schemaClass'].append(self.edge_nodes['output_names'][i][1])

                    if self.edge_nodes['output_names'][i][1]=='EntityWithAccessionedSequence':
                        if species_name[self.edge_nodes['outputs'][i]]=='Homo sapiens':
                            self.node_data['reference gene'].append(ref_gene_info[self.edge_nodes['outputs'][i]][1])
                            self.node_data['genome-encoded'].append(True)
                        else:
                            self.node_data['genome-encoded'].append(False)
                            self.node_data['reference gene'].append('None')
                    else:
                        self.node_data['genome-encoded'].append(False)
                        self.node_data['reference gene'].append('None')

[docs]    def make_graph(self):
        """
        Creates a networkx graph using the connecitivity data and assigns a color value to each edge
        depending the subpathway they belong to.
        """
        self.G_d = nx.DiGraph()
        edge_category = {}
        edge_names = {}
        edge_model = {}
        edge_module = {}
        edge_reaction = {}

        check1, check2 = 0, 0

        #for n1,n2,prop,sc,name in zip(edge_data['input'],edge_data['output'],edge_data['category'],edge_data['schemaClass'],edge_data['name']):
        for i in tqdm(range(0,len(self.edge_data['input']))):
            #if self.edge_data['input'][i]!=self.edge_data['output'][i]:
            if self.G_d.has_edge(self.edge_data['input'][i],self.edge_data['output'][i]):
                pass
            else:
                #if self.edge_data['input'][i]!=self.edge_data['output'][i]:
                #if (self.edge_data['input'][i]==self.edge_data['output'][i]):
                #    pass
                #else:
                self.G_d.add_edge(self.edge_data['input'][i],self.edge_data['output'][i])

                edge_names[(self.edge_data['input'][i],self.edge_data['output'][i])] = self.edge_data['name'][i]

                if 'binding' in self.edge_data['name'][i] or 'Binding' in self.edge_data['name'][i]:
                    category_name = 'binding'
                elif 'Negative' in self.edge_data['schemaClass'][i] or 'nhibit' in self.edge_data['name'][i]:
                    category_name = 'negative regulation'
                elif 'Positive' in self.edge_data['schemaClass'][i] or 'egulation' in self.edge_data['name'][i]:
                    category_name = 'positive regulation'
                elif 'phorylat' in self.edge_data['name'][i]:
                    category_name = 'phosphorylation'
                elif 'Expression'  in self.edge_data['name'][i]:
                    category_name = 'expression'
                elif self.edge_data['name'][i]=='PPI':
                    category_name = 'PPI'
                else:
                    category_name = 'other'

                edge_category[(self.edge_data['input'][i],self.edge_data['output'][i])] = category_name
                edge_module[(self.edge_data['input'][i],self.edge_data['output'][i])] = self.edge_data['module'][i]
                edge_reaction[(self.edge_data['input'][i],self.edge_data['output'][i])] = self.edge_data['reaction'][i]


                if 'nhibit' in self.edge_data['name'][i] or 'Negative' in self.edge_data['schemaClass'][i]:
                #if 'Exocytosis' in edge_data['name'][i] or 'Inhibit' in edge_data['name'][i] or 'Negative' in edge_data['schemaClass'][i]:
                    edge_model[(self.edge_data['input'][i],self.edge_data['output'][i])] = 'down'
                else:
                    edge_model[(self.edge_data['input'][i],self.edge_data['output'][i])] = 'up'

                #if True==False:
                #if edge_data['category'][i]=='binding' or edge_data['category'][i]=='dissociation' or edge_data['category'][i]=='PPI':
                #if prop!='PositiveRegulation' and prop!='NegativeRegulation':
                if self.edge_data['schemaClass'][i]=='Reaction' or self.edge_data['schemaClass'][i]=='PPI':# and 'Exocytosis' not in edge_data['name'][i]:
                    self.G_d.add_edge(self.edge_data['output'][i],self.edge_data['input'][i])

                    edge_names[(self.edge_data['output'][i],self.edge_data['input'][i])] = self.edge_data['name'][i]
                    edge_category[(self.edge_data['output'][i],self.edge_data['input'][i])] = category_name
                    edge_model[(self.edge_data['output'][i],self.edge_data['input'][i])] = edge_model[(self.edge_data['input'][i],self.edge_data['output'][i])]
                    edge_module[(self.edge_data['output'][i],self.edge_data['input'][i])] = self.edge_data['module'][i]
                    edge_reaction[(self.edge_data['output'][i],self.edge_data['input'][i])] = self.edge_data['reaction'][i]

        nx.set_edge_attributes(self.G_d, edge_category, "category")
        nx.set_edge_attributes(self.G_d, edge_names, "name")
        nx.set_edge_attributes(self.G_d, edge_model, "channel")
        nx.set_edge_attributes(self.G_d, edge_module, "module")
        nx.set_edge_attributes(self.G_d, edge_reaction, "reaction")

        node_names = {}
        node_genes = {}
        node_w_seq = {}
        node_types = {}

        for index,row in self.node_data.iterrows():
            id = row['node']

            node_names[id] = row['name']
            node_genes[id] = row['reference gene']
            node_w_seq[id] = row['genome-encoded']
            node_types[id] = row['schemaClass']

        nx.set_node_attributes(self.G_d, node_names, "name")
        nx.set_node_attributes(self.G_d, node_genes, "gene")
        nx.set_node_attributes(self.G_d, node_w_seq, "sequenced")
        nx.set_node_attributes(self.G_d, node_types, "class")

[docs]    def state_entropy(self,state,sources,reference_state=[]):
        """
        Computes the entropy of the network state with respect to the maximum entropy state.
        For binary valued network nodes, the state is: {P(v=1),P(v=0)}, where v=1 means the physical
        entity at high concentration (or copy number). Similary, v=0 means the physical entity is
        at low concentration (or copy number). The total number of nodes in the network is n and the
        number of states for each node is m.

        Nodes that act as sources are neglected from the entropy calculation, to determine only
        the effect of the information propagation through the network.

        Parameters
        ----------
        state: array_like
            A [mn,1] numpy array.

        sources: list
            Node numbers that are sources in the network.

        Returns
        -------
        H: float
            Entropy of the network state in bits.
        """

        H = 0.0

        maxEnt_state = (1.0/float(self.code_length))*np.ones(shape=(self.code_length,))

        if len(reference_state)==0:
            for i in range(0,int(state.shape[0]/self.code_length)):
                if i not in sources:
                    H += st.entropy(state[self.code_length*i:self.code_length*(i+1),0],maxEnt_state,base=2)
                    #print(state[self.code_length*i:self.code_length*(i+1),0])
        else:
            for i in range(0,int(state.shape[0]/self.code_length)):
                if i not in sources:
                    this_entropy = st.entropy(state[self.code_length*i:self.code_length*(i+1),0],reference_state[self.code_length*i:self.code_length*(i+1),0],base=2)
                    if this_entropy!=np.inf:
                        H += this_entropy
                    #print(state[self.code_length*i:self.code_length*(i+1),0])

        return H

[docs]    def entropy_drop_and_gain(self,state_1,state_2,reference_state,sources):
        """
        Computes the difference between two Kullback-Leibeler divergences, D_KL(state_1||reference_state), and
        D_KL(state_2||reference_state), for each unconstrained node in the network.

        Parameters
        ----------
        state: array_like
            A [mn,1] numpy array.

        sources: list
            Node numbers that are sources in the network.

        Returns
        -------
        drop: float
            Sum of all difference in the relative entropy values for all the nodes in the network where, state_2 > state_1.

        gain: float
            Sum of all difference in the relative entropy values for all the nodes in the network where, state_1 <= state_2.
        """

        H_drop = 0.0
        H_gain = 0.0

        maxEnt_state = (1.0/float(self.code_length))*np.ones(shape=(self.code_length,))

        for i in range(0,int(state_1.shape[0]/self.code_length)):
            if i not in sources:
                this_entropy_1 = st.entropy(state_1[self.code_length*i:self.code_length*(i+1),0],reference_state[self.code_length*i:self.code_length*(i+1),0],base=2)

                this_entropy_2 = st.entropy(state_2[self.code_length*i:self.code_length*(i+1),0],reference_state[self.code_length*i:self.code_length*(i+1),0],base=2)

                if this_entropy_1!=np.inf and this_entropy_2!=np.inf:
                    if this_entropy_1>=this_entropy_2:
                        H_gain += this_entropy_1 - this_entropy_2
                    else:
                        H_drop += this_entropy_1 - this_entropy_2
                    #print(state[self.code_length*i:self.code_length*(i+1),0])

        return H_drop, H_gain

[docs]    def get_final_state_old(self,X,sources):
        """
        Computes the final state of the network using the initial state X and the information transfer
        matrix C_dense. The state of the sources are fixed during the information transfer. The number of
        nodes in the network is n and the number of states for each node is m.

        Parameters
        ----------
        X: array_like
            A [mn,1] numpy array.

        sources: list
            Node numbers that are sources in the network, whose probabilistic state, e.g. {P[v=1],P[v=0]}
            has to be fixed during information propagation.

        Returns
        -------
        X: array_like
            Stationary state of the network.

        steps: int
            Number of steps it took to reach the stationary state of the network.
        """
        tol = 1.0
        steps = 0

        while tol>1e-10:
            X_old = np.array(X, copy=True)
            X = self.C_sparse.dot(X)

            for s in sources:
                X[2*s:2*(s+1),:] = X_old[2*s:2*(s+1),:]

            tol = np.linalg.norm(X-X_old,1)

            steps += 1

        return X, steps

[docs]    def get_final_state(self,X,sources):
        """
        Computes the final state of the network using the initial state X and the information transfer
        matrix C_dense. The state of the sources are fixed during the information transfer. The number of
        nodes in the network is n and the number of states for each node is m.

        Parameters
        ----------
        X: array_like
            A [mn,1] numpy array.

        sources: list
            Node numbers that are sources in the network, whose probabilistic state, e.g. {P[v=1],P[v=0]}
            has to be fixed during information propagation.

        Returns
        -------
        X: array_like
            Stationary state of the network.

        steps: int
            Number of steps it took to reach the stationary state of the network.
        """

        self.apply_BC(sources)

        tol = 1.0
        steps = 0

        while tol>1e-4:
            X_old = np.array(X, copy=True)
            X = self.C_sparse_with_BC.dot(X)

            tol = np.linalg.norm(X-X_old,1)

            steps += 1

        return X, steps

[docs]    def apply_BC(self,sources):
        """
        Computes the final state of the network using the initial state X and the information transfer
        matrix C_dense. The state of the sources are fixed during the information transfer. The number of
        nodes in the network is n and the number of states for each node is m.

        Parameters
        ----------

        sources: list
            Node numbers that are sources in the network, whose probabilistic state, e.g. {P[v=1],P[v=0]}
            has to be fixed during information propagation.

        """

        C_dense = self.C_sparse.todense()

        for s in sources:
            C_dense[2*s:2*(s+1),:] = 0
            C_dense[2*s:2*(s+1),2*s:2*(s+1)] = np.identity(2)

        # Sparse matrix after imposing additional boundary conditions
        # Stored as a class variable for nullspace, etc. calculations
        self.C_sparse_with_BC = sparse.csr_matrix(C_dense)

[docs]    def construct_C(self,rho=0,h=1,neglect_modules=[]):
        """
        Creates the information transition matrix for the graph using the adjacency matrix and
        interference due to multiple information sources.

        Parameters
        ----------
        G_d: networkx DiGraph
            Graph of a Reactome TopLevelPathway consisting of ReactionLikeEvents and regulations.

        rho: float
            Communication error in each edge.

        Returns
        -------
        C: numpy matrix
            Information transition matrix of the network.
        """

        self.code_length = int(2**h)
        m_rho = rho/(self.code_length - 1)

        up_regulation, down_regulation = np.eye((self.code_length)), np.rot90(np.eye((self.code_length)))

        up_regulation *= (1 - rho)*up_regulation
        down_regulation *= (1 - rho)*down_regulation

        up_regulation += m_rho*(np.ones((self.code_length,self.code_length)) - np.eye(self.code_length))
        down_regulation += m_rho*(np.ones((self.code_length,self.code_length)) - np.rot90(np.eye(self.code_length)))

        null_communicator = (1.0/float(self.code_length))*np.ones((self.code_length,self.code_length))

        # bao = BA()
        # bao.set_response(up_regulation)
        # print(bao.get_CC())
        #
        # bao.set_response(down_regulation)
        # print(bao.get_CC())

        n = len(self.G_d.nodes())
        C = np.zeros(shape=(self.code_length*n,self.code_length*n))
        A = np.transpose(nx.to_numpy_array(self.G_d))

        node_list = list(self.G_d.nodes)

        node_idx = {}

        for i in range(0,len(node_list)):
            node_idx[node_list[i]] = i

        # for n1 in range(0,A.shape[0]):
        #     in_degree = self.G_d.in_degree(node_list[n1])
        #     for n2 in range(0,A.shape[0]):
        #         if A[n1,n2]>0:
        #             if self.G_d.get_edge_data(node_list[n2],node_list[n1])['channel']=='down':
        #                 C[self.code_length*n1:self.code_length*(n1+1),self.code_length*n2:self.code_length*(n2+1)] = (1.0/in_degree)*down_regulation
        #             else:
        #                 C[self.code_length*n1:self.code_length*(n1+1),self.code_length*n2:self.code_length*(n2+1)] = (1.0/in_degree)*up_regulation

        for e in self.G_d.edges():
            i, j = node_idx[e[0]], node_idx[e[1]]

            if 'PPI' in self.G_d.get_edge_data(e[0],e[1])['category']:
                w = 1.0
            else:
                w = 1.0

            if self.G_d.get_edge_data(e[0],e[1])['channel']=='down':
                C[self.code_length*j:self.code_length*(j+1),self.code_length*i:self.code_length*(i+1)] = w*down_regulation
            else:
                C[self.code_length*j:self.code_length*(j+1),self.code_length*i:self.code_length*(i+1)] = w*up_regulation

            if self.G_d.get_edge_data(e[0],e[1])['module'] in neglect_modules:
                C[self.code_length*j:self.code_length*(j+1),self.code_length*i:self.code_length*(i+1)] = w*null_communicator

        for i in range(0,C.shape[0]):
            total_row = np.sum(C[i,:])

            if total_row>0.0:
                C[i,:] *= 1.0/total_row

        # Apply bounday condition
        for n in range(0,len(node_list)):
            if self.G_d.in_degree(node_list[n])==0:
                C[self.code_length*n:self.code_length*(n+1),self.code_length*n:self.code_length*(n+1)] = np.identity(self.code_length)

        self.C_sparse = sparse.csr_matrix(C)
        self.up_regulation = up_regulation
        self.down_regulation = down_regulation

[docs]    def disconnect_nodes(self,classname,sources):
        """
        Removes nodes of the specified class from the network communication (or transition) matrix.

        Parameters
        ----------
        classname: string
            Class of the nodes to be disconnected from the network communication matrix.

        sources: list
            List of source nodes which remains to be connected to the communication matrix.
        """

        nodes = list(self.G_d.nodes)

        C_dense = self.C_sparse.todense()

        for n in self.G_d.nodes(data=True):
            if classname==n[1]['class'] and n[0] not in sources:
                i = nodes.index(n[0])

                C_dense[:,self.code_length*i:self.code_length*(i+1)] = 0
                C_dense[self.code_length*i:self.code_length*(i+1),self.code_length*i:self.code_length*(i+1)] = np.identity(self.code_length)

        for i in range(0,C_dense.shape[0]):
            s = np.sum(C_dense[i,:])
            if s>0.0:
                C_dense[i,:] *= 1.0/np.sum(C_dense[i,:])
            else:
                C_dense[i,i] = 1.0

        self.C_sparse = sparse.csr_matrix(C_dense)

[docs]    def disconnect_drug_nodes(self,skip='None'):
        """
        Removes nodes of the specified class from the network communication (or transition) matrix.

        Parameters
        ----------
        classname: string
            Class of the nodes to be disconnected from the network communication matrix.

        sources: list
            List of source nodes which remains to be connected to the communication matrix.
        """

        drugs = []
        drug_ids = []

        nodes = list(self.G_d.nodes)

        all_nodes = self.G_d.nodes(data=True)

        for n in self.G_d.nodes(data=True):
            if 'Drug' in n[1]['class'] and skip not in n[1]['name']:
                drugs.append(n[1]['name'].split(' [')[0])
                drug_ids.append(n[0])

        total_edges_list = []
        total_edges_to_delete = 0
        total_deletions = []

        for e in self.G_d.edges:
            if e[0] in drug_ids:
                total_edges_to_delete += 1
                total_edges_list.append(self.G_d[e[0]][e[1]]['reaction'])
                total_deletions.append((nodes.index(e[0]),nodes.index(e[1])))

        C_dense = self.C_sparse.todense()

        for deletion in total_deletions:
            i,j = deletion[0], deletion[1]

            C_dense[self.code_length*j:self.code_length*(j+1),self.code_length*i:self.code_length*(i+1)] = np.zeros(shape=(self.code_length,self.code_length,))

        for i in range(0,C_dense.shape[0]):
            s = np.sum(C_dense[i,:])
            if s>0.0:
                C_dense[i,:] *= 1.0/np.sum(C_dense[i,:])
            else:
                C_dense[i,i] = 1.0

        self.C_sparse = sparse.csr_matrix(C_dense)

[docs]    def save_edges_and_nodes(self,pathway_nametag,network_type):
        """
        Saves the network nodes and edges as csv and pickled files.

        Parameters
        ----------
        pathway_name: string
            Name of the Reactome top level pathway.

        network_type: string
            Additional description about the network.
        """
        df_e = pd.DataFrame(self.edge_data)

        df_e.to_pickle(pathway_nametag+'_'+network_type+'-edges.pkl')
        df_e.to_csv(pathway_nametag+'_'+network_type+'-edges.csv',index=None)

        df_n = pd.DataFrame(self.node_data)
        df_n.to_pickle(pathway_nametag+'_'+network_type+'-nodes.pkl')
        df_n.to_csv(pathway_nametag+'_'+network_type+'-nodes.csv',index=None)

[docs]    def save_network(self,pathway_nametag,network_type):
        """
        Saves the network as a gml file.

        Parameters
        ----------
        pathway_name: string
            Name of the Reactome top level pathway.

        network_type: string
            Additional description about the network.
        """

        print("Communication network "+pathway_nametag+"-"+network_type+".gml created.")

        nx.write_gml(self.G_d,pathway_nametag+"-"+network_type+".gml")

[docs]    def initialize_network_state(self,additional_sources,source_state):
        """
        Saves the network as a gml file.

        Parameters
        ----------
        pathway_name: string
            Name of the Reactome top level pathway.

        network_type: string
            Additional description about the network.
        """

        network_state = (1.0/float(self.code_length))*np.ones(shape=(C.shape[0],1))
        network_sources = []

        node_list = self.G_d.nodes

        for n in additional_sources:
            try:
                i = node_list.index(n)

                network_state[self.code_length*i:self.code_length*(i+1),0] = source_state

                network_sources.append(i)

            except ValueError:
                pass

[docs]    def write_nodes_and_edges(self,filetag=''):
        ef = open(filetag+'-edges.tsv','w')
        print('Input\tOutput\tReactionName\tChannel\tReactionId',file=ef)

        for e in self.G_d.edges(data=True):
            outstring = e[0]+'\t'+e[1]+'\t'+e[2]['name']+'\t'+e[2]['channel']+'\t'+e[2]['reaction']
            print(outstring,file=ef)

        ef.close()

        nf = open(filetag+'-nodes.tsv','w')
        print('Node\tNodeName\tClass\tSequenced',file=nf)

        for n in self.G_d.nodes(data=True):
            if n[1]['sequenced']==0:
                outstring = n[0]+'\t'+n[1]['name']+'\t'+n[1]['class']+'\t'+str(n[1]['sequenced'])
            else:
                outstring = n[0]+'\t'+n[0]+'\t'+n[1]['class']+'\t'+str(n[1]['sequenced'])

            print(outstring,file=nf)

        nf.close()

[docs]    def save_nullspace(self,outfile):
        """
        Computes and saves the nullspace for the communication matrix.

        Parameters
        ----------
        outfile: string
            Name of the file to store the nullspace. The numpy array is saved in csv format. 
        """

        C_dense = self.temp_C_sparse.todense()
        C_diff = C_dense - np.eye(C_dense.shape[0])
        C_diff_sparse = sp.sparse.csr_matrix(C_diff)

        try:
            ns = sp.linalg.null_space(C_diff)
            np.savetxt(outfile+'-nullspace.csv',ns,delimiter=",")
        except sp.linalg.LinAlgError:
            print('SVD did not converge.')
            pass