Lekcja  – Wyszukiwanie danych

from time import time, sleep

class Node:

    def __init__(self, id, data):
        self.id = id
        self.data = data
        self.left = None
        self.right = None
        self.depth = None

    def __repr__(self):
        return f"<{self.id}/{self.depth}>"


def insert(parent, child, depth=0):
    if parent is None:
        child.depth = depth
        return child
    elif parent.id == child.id:
        return parent
    elif child.id < parent.id:
        parent.left = insert(parent.left, child, parent.depth+1)
    else:
        parent.right = insert(parent.right, child, parent.depth+1)
    return parent


def load_stock_file(file_name):
    bst = None
    file = open(file_name, 'r')
    line = file.readline()
    line_number = 0

    while True: 
        line = file.readline()
        if not line:
            break
        data = line.split(',')
        line_number +=1
        #print(line_number, data[0], '/', data[1])
        node = Node(data[0], data[1])
        bst = insert(bst, node)        

    file.close()
    return bst

# https://www.nasdaq.com/market-activity/stocks/screener?exchange=NASDAQ&render=download
stock_file = './data/stock.csv'

bst = load_stock_file(stock_file)
print('Done')


def max_depth(node):
    if node is None:
        return 0
    else:
        left_depth = max_depth(node.left) + 1
        right_depth = max_depth(node.right) + 1
        return max(left_depth, right_depth)


print(f"Max depth: {max_depth(bst)}")        
print(f"Balance info: nodes on left: {max_depth(bst.left)} nodes on right: {max_depth(bst.right)}")


def count(node, delay=0):
    sleep(delay)
    if node is None:
        return 0
    else:
        left_count = count(node.left, delay)
        right_count = count(node.right, delay)
        return left_count + right_count + 1        

print(f"Count: {count(bst)}")
print(f"Balance info: nodes on left: {count(bst.left)} nodes on right: {count(bst.right)}")

# start_time = time()
# count(bst.left, 0.001)
# print(f"Operation on left {time() - start_time} s")

# start_time = time()
# count(bst.right, 0.001)
# print(f"Operation on right {time() - start_time} s")

def get_ordered_list(parent, nodes_list):
    if not parent:
        return
    get_ordered_list(parent.left,nodes_list)
    nodes_list.append(parent)
    get_ordered_list(parent.right,nodes_list)

# nodes_list = []
# get_ordered_list(bst, nodes_list)

# for node in nodes_list:
#     print(node)

def build_bst_partially(nodes_list, first_idx, last_idx, depth=0):
    if first_idx > last_idx:
        return None

    middle_idx = (first_idx + last_idx) // 2
    middle_node = nodes_list[middle_idx]
    middle_node.left = build_bst_partially(nodes_list,first_idx,middle_idx-1, depth+1)
    middle_node.right = build_bst_partially(nodes_list,middle_idx+1,last_idx, depth+1)

    middle_node.depth = depth
    return middle_node


def rebuild_bst(bst):
    nodes_list=[]
    get_ordered_list(bst,nodes_list)

    first_idx = 0
    last_idx = len(nodes_list) - 1

    return build_bst_partially(nodes_list, first_idx, last_idx)

bst = rebuild_bst(bst)
print(f"Max depth: {max_depth(bst)}")        
print(f"Balance info: nodes on left: {max_depth(bst.left)} nodes on right: {max_depth(bst.right)}")
print(f"Count: {count(bst)}")
print(f"Balance info: nodes on left: {count(bst.left)} nodes on right: {count(bst.right)}")

def find(parent, id, verbose=False):
    if verbose:
        print(f"Analyzing {parent}")

    if not parent:
        return None
    elif parent.id == id:
        return parent
    elif id < parent.id:
        return find(parent.left, id, verbose)
    else:
        return find(parent.right, id, verbose)

microsoft = find(bst, 'MSFT', True)
print(microsoft.id, microsoft.data)

transmedics = find(bst, 'TMDX', True)
print(transmedics.id, transmedics.data)

mars = find(bst, 'MARS', True)
print(mars)

Lab

class City:

    def __init__(self, id, country, population):
        self.id = id
        self.country = country
        self.population = population
        self.left = None
        self.right = None
        self.depth = None

    def __repr__(self):
        return f"<{self.id}/{self.depth}>"


def insert(parent, child, depth=0):
    if parent is None:
        child.depth = depth
        return child
    elif parent.id == child.id:
        return parent
    elif child.id < parent.id:
        parent.left = insert(parent.left, child, parent.depth+1)
    else:
        parent.right = insert(parent.right, child, parent.depth+1)
    return parent


def load_file(file_name):
    bst = None
    file = open(file_name, 'r', encoding="utf-8")
    line = file.readline()
    line_number = 0

    while True: 
        line = file.readline()
        if not line:
            break
        data = line.split(',')
        line_number +=1

        city_ascii = data[1].strip().replace('"','').replace("'",'').replace("`",'').upper()
        country = data[4].replace('"','').replace("'",'').replace("`",'').upper()
        try:
            population = int(data[9].replace('"',''))
        except ValueError:
            population = -1

        node = City(city_ascii, country, population)
        bst = insert(bst, node)        

    file.close()
    return bst


# https://www.kaggle.com/juanmah/world-cities
stock_file = './data/worldcities.csv'
bst = load_file(stock_file)
print('Done')


def max_depth(node):
    if node is None:
        return 0
    else:
        left_depth = max_depth(node.left) + 1
        right_depth = max_depth(node.right) + 1
        return max(left_depth, right_depth)

def get_ordered_list(parent, nodes_list):
    if not parent:
        return
    get_ordered_list(parent.left,nodes_list)
    nodes_list.append(parent)
    get_ordered_list(parent.right,nodes_list)

def build_bst_partially(nodes_list, first_idx, last_idx, depth=0):
    if first_idx > last_idx:
        return None

    middle_idx = (first_idx + last_idx) // 2
    middle_node = nodes_list[middle_idx]
    middle_node.left = build_bst_partially(nodes_list,first_idx,middle_idx-1, depth+1)
    middle_node.right = build_bst_partially(nodes_list,middle_idx+1,last_idx, depth+1)

    middle_node.depth = depth
    return middle_node

def rebuild_bst(bst):
    nodes_list=[]
    get_ordered_list(bst,nodes_list)

    first_idx = 0
    last_idx = len(nodes_list) - 1

    return build_bst_partially(nodes_list, first_idx, last_idx)

bst = rebuild_bst(bst)

def find(parent, id, verbose=False):
    if verbose:
        print(f"Analyzing {parent}")

    if not parent:
        return None
    elif parent.id == id:
        return parent
    elif id < parent.id:
        return find(parent.left, id, verbose)
    else:
        return find(parent.right, id, verbose)

c = find(bst, 'BERLIN', False)
if c:
    print(c.id, c.population)
else:
    print('404 - not found')

c = find(bst, 'WROCLAW', False)
if c:
    print(c.id, c.population)
else:
    print('404 - not found')

c = find(bst, 'VINA TUSCA', False)
if c:
    print(c.id, c.population)
else:
    print('404 - not found')