0

Previously I created a lot of Python objects of class A, and I would like to add a new function plotting_in_PC_space_with_coloring_option() (the purpose of this function is to plot some data in this object) to class A and use those old objects to call plotting_in_PC_space_with_coloring_option().

An example is:

import copy
import numpy as np
from math import *
from pybrain.structure import *
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.datasets.supervised import SupervisedDataSet
import pickle
import neural_network_related


class A(object):
    """the neural network for simulation"""
    '''
    todo:
        - find boundary
        - get_angles_from_coefficients
    '''

    def __init__(self, 
                 index, # the index of the current network
                 list_of_coor_data_files, # accept multiple files of training data
                 energy_expression_file, # input, output files
                 preprocessing_settings = None, 
                 connection_between_layers = None, connection_with_bias_layers = None,
                 PCs = None,  # principal components
                 ):

        self._index = index
        self._list_of_coor_data_files = list_of_coor_data_files
        self._energy_expression_file = energy_expression_file

        self._data_set = []
        for item in list_of_coor_data_files:
            self._data_set += self.get_many_cossin_from_coordiantes_in_file(item)

        self._preprocessing_settings = preprocessing_settings
        self._connection_between_layers = connection_between_layers
        self._connection_with_bias_layers = connection_with_bias_layers
        self._node_num = [8, 15, 2, 15, 8]
        self._PCs = PCs

    def save_into_file(self, filename = None):
        if filename is None:
            filename = "network_%s.pkl" % str(self._index) # by default naming with its index

        with open(filename, 'wb') as my_file:
            pickle.dump(self, my_file, pickle.HIGHEST_PROTOCOL)

        return 


    def get_cossin_from_a_coordinate(self, a_coordinate):
        num_of_coordinates = len(a_coordinate) / 3
        a_coordinate = np.array(a_coordinate).reshape(num_of_coordinates, 3)
        diff_coordinates = a_coordinate[1:num_of_coordinates, :] - a_coordinate[0:num_of_coordinates - 1,:]  # bond vectors
        diff_coordinates_1=diff_coordinates[0:num_of_coordinates-2,:];diff_coordinates_2=diff_coordinates[1:num_of_coordinates-1,:]
        normal_vectors = np.cross(diff_coordinates_1, diff_coordinates_2);
        normal_vectors_normalized = np.array(map(lambda x: x / sqrt(np.dot(x,x)), normal_vectors))
        normal_vectors_normalized_1 = normal_vectors_normalized[0:num_of_coordinates-3, :];normal_vectors_normalized_2 = normal_vectors_normalized[1:num_of_coordinates-2,:];
        diff_coordinates_mid = diff_coordinates[1:num_of_coordinates-2]; # these are bond vectors in the middle (remove the first and last one), they should be perpendicular to adjacent normal vectors

        cos_of_angles = range(len(normal_vectors_normalized_1))
        sin_of_angles_vec = range(len(normal_vectors_normalized_1))
        sin_of_angles = range(len(normal_vectors_normalized_1)) # initialization

        for index in range(len(normal_vectors_normalized_1)):
            cos_of_angles[index] = np.dot(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index])
            sin_of_angles_vec[index] = np.cross(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index])
            sin_of_angles[index] = sqrt(np.dot(sin_of_angles_vec[index], sin_of_angles_vec[index])) * np.sign(sum(sin_of_angles_vec[index]) * sum(diff_coordinates_mid[index]));  
        return cos_of_angles + sin_of_angles

    def get_many_cossin_from_coordinates(self, coordinates):
        return map(self.get_cossin_from_a_coordinate, coordinates)

    def get_many_cossin_from_coordiantes_in_file (self, filename):
        coordinates = np.loadtxt(filename)
        return self.get_many_cossin_from_coordinates(coordinates)

    def mapminmax(self, my_list): # for preprocessing in network
        my_min = min(my_list)
        my_max = max(my_list)
        mul_factor = 2.0 / (my_max - my_min)
        offset = (my_min + my_max) / 2.0
        result_list = np.array(map(lambda x : (x - offset) * mul_factor, my_list))
        return (result_list, (mul_factor, offset)) # also return the parameters for processing

    def get_mapminmax_preprocess_result_and_coeff(self,data=None):
        if data is None:
            data = self._data_set
        data = np.array(data)
        data = np.transpose(data)
        result = []; params = []
        for item in data:
            temp_result, preprocess_params = self.mapminmax(item)
            result.append(temp_result)
            params.append(preprocess_params)
        return (np.transpose(np.array(result)), params)

    def mapminmax_preprocess_using_coeff(self, input_data=None, preprocessing_settings=None):
        # try begin    
        if preprocessing_settings is None:
            preprocessing_settings = self._preprocessing_settings

        temp_setttings = np.transpose(np.array(preprocessing_settings))
        result = []

        for item in input_data:
            item = np.multiply(item - temp_setttings[1], temp_setttings[0])
            result.append(item)

        return result
        # try end    

    def get_expression_of_network(self, connection_between_layers=None, connection_with_bias_layers=None):
        if connection_between_layers is None:
            connection_between_layers = self._connection_between_layers
        if connection_with_bias_layers is None:
            connection_with_bias_layers = self._connection_with_bias_layers

        node_num = self._node_num
        expression = ""
        # first part: network
        for i in range(2):
            expression = '\n' + expression
            mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i])
            bias_coef = connection_with_bias_layers[i].params
            for j in range(np.size(mul_coef, 0)):
                temp_expression = 'layer_%d_unit_%d = tanh( ' % (i + 1, j) 

                for k in range(np.size(mul_coef, 1)):
                    temp_expression += ' %f * layer_%d_unit_%d +' % (mul_coef[j, k], i, k)

                temp_expression += ' %f);\n' % (bias_coef[j])
                expression = temp_expression + expression  # order of expressions matter in OpenMM

        # second part: definition of inputs
        index_of_backbone_atoms = [2, 5, 7, 9, 15, 17, 19];
        for i in range(len(index_of_backbone_atoms) - 3):
            index_of_coss = i
            index_of_sins = i + 4
            expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \
            (index_of_coss, index_of_coss, self._preprocessing_settings[index_of_coss][1], self._preprocessing_settings[index_of_coss][0])
            expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \
            (index_of_sins, index_of_sins, self._preprocessing_settings[index_of_sins][1], self._preprocessing_settings[index_of_sins][0])
            expression += 'raw_layer_0_unit_%d = cos(dihedral_angle_%d);\n' % (index_of_coss, i)
            expression += 'raw_layer_0_unit_%d = sin(dihedral_angle_%d);\n' % (index_of_sins, i)
            expression += 'dihedral_angle_%d = dihedral(p%d, p%d, p%d, p%d);\n' % \
            (i, index_of_backbone_atoms[i], index_of_backbone_atoms[i+1],index_of_backbone_atoms[i+2],index_of_backbone_atoms[i+3])

        return expression

    def write_expression_into_file(self, out_file = None):
        if out_file is None: out_file = self._energy_expression_file

        expression = self.get_expression_of_network()
        with open(out_file, 'w') as f_out:
            f_out.write(expression)
        return

    def get_mid_result(self, input_data=None, connection_between_layers=None, connection_with_bias_layers=None):
        if input_data is None: input_data = self._data_set
        if connection_between_layers is None: connection_between_layers = self._connection_between_layers
        if connection_with_bias_layers is None: connection_with_bias_layers = self._connection_with_bias_layers


        node_num = self._node_num
        temp_mid_result = range(4)
        mid_result = []

        # first need to do preprocessing
        for item in self.mapminmax_preprocess_using_coeff(input_data, self._preprocessing_settings):  
            for i in range(4):
                mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i]) # fix node_num
                bias_coef = connection_with_bias_layers[i].params
                previous_result = item if i == 0 else temp_mid_result[i - 1]
                temp_mid_result[i] = np.dot(mul_coef, previous_result) + bias_coef
                if i != 3: # the last output layer is a linear layer, while others are tanh layers
                    temp_mid_result[i] = map(tanh, temp_mid_result[i])

            mid_result.append(copy.deepcopy(temp_mid_result)) # note that should use deepcopy
        return mid_result

    def get_PC_and_save_it_to_network(self): 
        '''get PCs and save the result into _PCs
        '''
        mid_result = self.get_mid_result()
        self._PCs = [item[1] for item in mid_result]
        return

    def train(self):

        ####################### set up autoencoder begin #######################
        node_num = self._node_num

        in_layer = LinearLayer(node_num[0], "IL")
        hidden_layers = [TanhLayer(node_num[1], "HL1"), TanhLayer(node_num[2], "HL2"), TanhLayer(node_num[3], "HL3")]
        bias_layers = [BiasUnit("B1"),BiasUnit("B2"),BiasUnit("B3"),BiasUnit("B4")]
        out_layer = LinearLayer(node_num[4], "OL")

        layer_list = [in_layer] + hidden_layers + [out_layer]

        molecule_net = FeedForwardNetwork()

        molecule_net.addInputModule(in_layer)
        for item in (hidden_layers + bias_layers):
            molecule_net.addModule(item)

        molecule_net.addOutputModule(out_layer)

        connection_between_layers = range(4); connection_with_bias_layers = range(4)

        for i in range(4):
            connection_between_layers[i] = FullConnection(layer_list[i], layer_list[i+1])
            connection_with_bias_layers[i] = FullConnection(bias_layers[i], layer_list[i+1])
            molecule_net.addConnection(connection_between_layers[i])  # connect two neighbor layers
            molecule_net.addConnection(connection_with_bias_layers[i])  

        molecule_net.sortModules()  # this is some internal initialization process to make this module usable

        ####################### set up autoencoder end #######################


        trainer = BackpropTrainer(molecule_net, learningrate=0.002,momentum=0.4,verbose=False, weightdecay=0.1, lrdecay=1)
        data_set = SupervisedDataSet(node_num[0], node_num[4])

        sincos = self._data_set
        (sincos_after_process, self._preprocessing_settings) = self.get_mapminmax_preprocess_result_and_coeff(data = sincos)
        for item in sincos_after_process:  # is it needed?
            data_set.addSample(item, item)

        trainer.trainUntilConvergence(data_set, maxEpochs=50)

        self._connection_between_layers = connection_between_layers
        self._connection_with_bias_layers = connection_with_bias_layers 

        print("Done!\n")
        return 

    def create_sge_files_for_simulation(self,potential_centers = None):
        if potential_centers is None: 
            potential_centers = self.get_boundary_points()

        neural_network_related.create_sge_files(potential_centers)
        return 

    def get_boundary_points(self, list_of_points = None, num_of_bins = 5):
        if list_of_points is None: list_of_points = self._PCs

        x = [item[0] for item in list_of_points]
        y = [item[1] for item in list_of_points]

        temp = np.histogram2d(x,y, bins=[num_of_bins, num_of_bins])
        hist_matrix = temp[0]
        # add a set of zeros around this region
        hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins), 0)
        hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins), 0)
        hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins + 2), 1)
        hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins +2), 1)

        hist_matrix = (hist_matrix != 0).astype(int)

        sum_of_neighbors = np.zeros(np.shape(hist_matrix)) # number of neighbors occupied with some points
        for i in range(np.shape(hist_matrix)[0]):
            for j in range(np.shape(hist_matrix)[1]):
                if i != 0: sum_of_neighbors[i,j] += hist_matrix[i - 1][j]
                if j != 0: sum_of_neighbors[i,j] += hist_matrix[i][j - 1]
                if i != np.shape(hist_matrix)[0] - 1: sum_of_neighbors[i,j] += hist_matrix[i + 1][j]
                if j != np.shape(hist_matrix)[1] - 1: sum_of_neighbors[i,j] += hist_matrix[i][j + 1]

        bin_width_0 = temp[1][1]-temp[1][0]
        bin_width_1 = temp[2][1]-temp[2][0]
        min_coor_in_PC_space_0 = temp[1][0] - 0.5 * bin_width_0  # multiply by 0.5 since we want the center of the grid
        min_coor_in_PC_space_1 = temp[2][0] - 0.5 * bin_width_1

        potential_centers = []

        for i in range(np.shape(hist_matrix)[0]):
            for j in range(np.shape(hist_matrix)[1]):
                if hist_matrix[i,j] == 0 and sum_of_neighbors[i,j] != 0:  # no points in this block but there are points in neighboring blocks
                    temp_potential_center = [round(min_coor_in_PC_space_0 + i * bin_width_0, 2), round(min_coor_in_PC_space_1 + j * bin_width_1, 2)]
                    potential_centers.append(temp_potential_center)

        return potential_centers

    # this function is added after those old objects of A were created
    def plotting_in_PC_space_with_coloring_option(self, 
                                                  list_of_coordinate_files_for_plotting=None, # accept multiple files
                                                  color_option='pure'):
        '''
        by default, we are using training data, and we also allow external data input
        '''

        if list_of_coordinate_files_for_plotting is None: 
            PCs_to_plot = self._PCs
        else:
            temp_sincos = []
            for item in list_of_coordinate_files_for_plotting:
                temp_sincos += self.get_many_cossin_from_coordiantes_in_file(item)

            temp_mid_result = self.get_mid_result(input_data = temp_sincos)
            PCs_to_plot = [item[1] for item in temp_mid_result]

        (x, y) = ([item[0] for item in PCs_to_plot], [item[1] for item in PCs_to_plot])

        # coloring
        if color_option == 'pure':
            coloring = 'red'
        elif color_option == 'step':
            coloring = range(len(x))

        fig, ax = plt.subplots()
        ax.scatter(x,y, c=coloring)
        ax.set_xlabel("PC1")
        ax.set_ylabel("PC2")

        plt.show()
        return

But it seems that plotting_in_PC_space_with_coloring_option() was not binded to those old objects, is here any way to fix it (I do not want to recreate these objects since creation involves CPU-intensive calculation and would take very long time to do it)?

Thanks!

12
  • 1
    Can you give us an example code of what you have tried please. Commented Oct 30, 2015 at 15:08
  • 1
    Could you show us the code you used when you first tried to bind f() Commented Oct 30, 2015 at 15:09
  • @DeliriousMistakes Dang, 29 sec ninja much? Commented Oct 30, 2015 at 15:09
  • Yeah, I like it when people help me as soon as possible, so I try to return a favor when I can :) Commented Oct 30, 2015 at 15:11
  • @DeliriousMistakes I just updated the question:) Commented Oct 30, 2015 at 15:18

1 Answer 1

3

Something like this:

class A:
    def q(self): print 1

a = A()

def f(self): print 2

setattr(A, 'f', f)

a.f()

This is called a monkey patch.

Sign up to request clarification or add additional context in comments.

4 Comments

How is setattr(A, 'f', f) different from A.f = f?
Following to docs.python.org/2/library/functions.html#setattr setattr(x, 'foobar', 123) is equivalent to x.foobar = 123.
I meant that rhetorically, as in why not use the much more readable A.f = f?
@Wei Chen the monkey patch method seems like the solution. This thread might help: Stack Overflow monkey patch thread

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.