I have a data set generated from
import numpy as np
dataset = np.random.normal(50,10,100)
Record the min and max for this dataset. Let [min, max] be the root node. Then it has 100 points. Let [min, min + d) and [max - d, max] (where d=(max-min)/2) be the left and right child respectively. Keep doing this. Stop when the node has less than or equal to 5 points(which are in the dataset). How to find the number of points of each node?
I have constructed the original tree with node (1,2,3....From top to bottom and left to right), now I want to insert the data for each node. I also have written the function for splitting each interval. It just needs a recursive algorithm to complete. But how to make them together?
import numpy as np
dataset = np.random.normal(50,10,100)
For left child(function)
def split_L(l):
d = (max(l)-min(l)) / 2
print('max=', max(l))
print('min=', min(l))
print('d=', d)
j = 0
m = []
for i in dataset:
if i in l:
j=j+1
print('Number of points=',j)
for k in l:
if k < min(l)+d:
m.append(k)
while j > 5:
return m
For right child(function)
def split_R(l):
d = (max(l)-min(l)) / 2
print('max=', max(l))
print('min=', min(l))
print('d=', d)
j = 0
m = []
for i in dataset:
if i in l:
j=j+1
print('Number of points=',j)
for k in l:
if k >= max(l)-d:
m.append(k)
while j > 5:
return m
For tree
class Node:
def __init__(self,data):
self.data = data
self.left = None
self.right = None
class Tree:
def __init__(self):
self.list = [self.root_node()]
def root_node(self):
root_node = Node(1)
return root_node
def add_node(self, data):
new_node = Node(data)
self.list.append(new_node)
if len(self.list) % 2==0:
self.list[len(self.list) // 2].left = new_node
else:
self.list[len(self.list) // 2].right = new_node
def output_tree(self):
for i in range(len(self.list)):
print(self.list[i].data)
if __name__ == '__main__':
tree = Tree()
for i in range(2,100):
tree.add_node(i)
tree.output_tree()