diff --git a/Python_files/.DS_Store b/Python_files/.DS_Store deleted file mode 100644 index 135f4d6bc8ebd17c73548a793eaaaa62d35e63c3..0000000000000000000000000000000000000000 Binary files a/Python_files/.DS_Store and /dev/null differ diff --git a/Python_files/ex1/.DS_Store b/Python_files/ex1/.DS_Store deleted file mode 100644 index 1e16d7855794467073f47100738f30843226ea27..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/.DS_Store and /dev/null differ diff --git a/Python_files/ex1/Exercise-1.pdf b/Python_files/ex1/Exercise-1.pdf deleted file mode 100644 index f24f9910d9142104337553ec5bef913377845c7e..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/Exercise-1.pdf and /dev/null differ diff --git a/Python_files/ex1/Slides-Graph-Kernels.pdf b/Python_files/ex1/Slides-Graph-Kernels.pdf deleted file mode 100644 index feb2b923c98de978ad49e127aad2a629ce76e42a..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/Slides-Graph-Kernels.pdf and /dev/null differ diff --git a/Python_files/ex1/Untitled.ipynb b/Python_files/ex1/Untitled.ipynb deleted file mode 100644 index 739c8f6e2be96d492d519da8892011f738a85488..0000000000000000000000000000000000000000 --- a/Python_files/ex1/Untitled.ipynb +++ /dev/null @@ -1,189 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "flying-twist", - "metadata": {}, - "outputs": [], - "source": [ - "from gurobipy import *\n", - "import networkx as nx" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "noble-kazakhstan", - "metadata": {}, - "outputs": [], - "source": [ - "a = [7, 4, 6, 4, 5, 4, 3, 4, 6, 7]\n", - "\n", - "# profits\n", - "p = [5, 4, 4, 6, 4, 7, 4, 5, 7, 3]\n", - "\n", - "# knapsack capacity\n", - "b = 20\n", - "\n", - "G=nx.DiGraph()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "unlikely-charity", - "metadata": {}, - "outputs": [], - "source": [ - "def solve(a, p, b):\n", - " nitems = len(p)\n", - " items = range(nitems)\n", - "\n", - " # Do not change the following line!\n", - " vertices = [(c, i) for i in range(nitems+1) for c in range(b+2)]\n", - " \n", - " G.add_nodes_from(vertices)\n", - " \n", - " arcs = []\n", - " for i in range(1, len(p) + 1):\n", - " for c in range(b - a[i - 1] + 1):\n", - " arcs.append(((c,i-1),(c+a[i-1],i),p[i-1]))\n", - "\n", - " for i in range(1, len(p) + 1):\n", - " for c in range(b + 1):\n", - " arcs.append(((c,i-1),(c,i),0))\n", - "\n", - " for i in range(len(p) + 1):\n", - " for c in range(b):\n", - " arcs.append(((c,i),(c+1,i),1))\n", - " G.add_weighted_edges_from(arcs)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "conceptual-equation", - "metadata": {}, - "outputs": [], - "source": [ - "solve(a,p,b)" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "governmental-chain", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(9, 1), (2, 1), (3, 0)]" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(nx.neighbors(G,(2,0)))" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "optical-modification", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(G.predecessors((0,0)))" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "successful-reserve", - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "'tuple' object is not callable", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-38-9eb58f29ea08>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mtup\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mtup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m: 'tuple' object is not callable" - ] - } - ], - "source": [ - "tup=(1,2,3,4)\n", - "tup(1)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "romance-barrel", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(14, 2), (10, 2), (11, 1)]" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(nx.neighbors(G,(10,1)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "judicial-health", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex1/datasets/.DS_Store b/Python_files/ex1/datasets/.DS_Store deleted file mode 100644 index ff04d33e0071c56ba96d764facc20d7927f842db..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/.DS_Store and /dev/null differ diff --git a/Python_files/ex1/datasets/.ipynb_checkpoints/EX4_GL1_0426-checkpoint.ipynb b/Python_files/ex1/datasets/.ipynb_checkpoints/EX4_GL1_0426-checkpoint.ipynb deleted file mode 100644 index b70092ee581b0ca473f4c0e3c54382b9daf8920e..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/.ipynb_checkpoints/EX4_GL1_0426-checkpoint.ipynb +++ /dev/null @@ -1,3231 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "235289b2", - "metadata": {}, - "source": [ - "# EX 4" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "3a7d45a0", - "metadata": {}, - "outputs": [], - "source": [ - "#needed for EX 3\n", - "import pickle\n", - "import networkx as nx\n", - "import matplotlib.pyplot as plt\n", - "from collections import Counter, defaultdict\n", - "import scipy.sparse as sp\n", - "import numpy as np\n", - "from multiset import FrozenMultiset\n", - "import random" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "8c73bfa8", - "metadata": {}, - "outputs": [], - "source": [ - "# additionally needed for EX 4\n", - "\n", - "from sklearn.svm import SVC\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.preprocessing import label_binarize\n", - "from sklearn.multiclass import OneVsRestClassifier\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.model_selection import cross_val_score" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "11c6f5f6", - "metadata": {}, - "outputs": [], - "source": [ - "DD = pickle.load(open(\"datasets/DD/data.pkl\", \"rb\"))\n", - "ENZ = pickle.load(open(\"datasets/ENZYMES/data.pkl\", \"rb\"))\n", - "NCI= pickle.load(open(\"datasets/NCI1/data.pkl\", \"rb\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "11bf36a7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "284.3166383701188 1178\n", - "32.63333333333333 600\n", - "29.8654501216545 4110\n" - ] - } - ], - "source": [ - "for data in [DD, ENZ, NCI]:\n", - " print(np.mean([len(graph.nodes()) for graph in data]), len(data))" - ] - }, - { - "cell_type": "markdown", - "id": "7921f1bc", - "metadata": {}, - "source": [ - "# Kernels" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "4e66ec3a", - "metadata": {}, - "outputs": [], - "source": [ - "def wl_kernel(orig_graphs, labelname=None, rounds=4):\n", - " graphs = [graph.copy() for graph in orig_graphs]\n", - " \n", - " ##### COLOR REFINEMENT ############\n", - " idx_counter = 0\n", - " coldict = dict() #save all colors in a dictionary (keys: hash values, values: index in the final histograms)\n", - " \n", - " #initial colors: if there is a initial color scheme, use it in round 1\n", - " if labelname:\n", - " for graph in graphs:\n", - " init_labels = nx.get_node_attributes(graph, labelname) #dict {node: label}\n", - " hash_labels = {key: hash(value) for key,value in init_labels.items()} #hash label values so that they are the same for all coming graphs and rounds\n", - " colors = list(set(hash_labels.values())) #list of the different colors in this graph\n", - " for hashcol in colors:\n", - " #check if colors already have been saved in coldict and save them if not\n", - " if hashcol not in coldict.keys():\n", - " coldict[hashcol] = idx_counter\n", - " idx_counter += 1\n", - " #change from hashed colors to final colors which will be used afterwards\n", - " new_labels = {key: coldict[hashvalue] for key,hashvalue in hash_labels.items()}\n", - " nx.set_node_attributes(graph, new_labels, str(0))\n", - " # no initial color scheme -> every node gets same color\n", - " else:\n", - " for graph in graphs:\n", - " nx.set_node_attributes(graph, 0, str(0))\n", - " #save color in coldict and increment idx_counter (which counts total number of colors)\n", - " coldict[0] = idx_counter #here: 0\n", - " idx_counter += 1\n", - " \n", - " #next rounds of color refinement\n", - " for k in range(1, rounds+1):\n", - " for graph in graphs:\n", - " #attribute dictionaries\n", - " attrs_last_round = nx.get_node_attributes(graph, str(k-1)) #dictionary with nodes as keys and corresponding attributes of last round as values\n", - " attrs_this_round = dict() #where you save attributes of this round\n", - " \n", - " #compute current color of each node\n", - " for node in graph.nodes():\n", - " #get colors of neighbors and hash them together with the node's color\n", - " colset = FrozenMultiset(attrs_last_round.get(neighbor) for neighbor in list(graph[node]))\n", - " hashcol = hash((attrs_last_round.get(node), colset))\n", - " #if hash produces a new color:\n", - " if hashcol not in coldict.keys():\n", - " coldict[hashcol] = idx_counter\n", - " idx_counter += 1\n", - " attrs_this_round[node] = coldict[hashcol]\n", - " #save current colors of the graph as node attributes\n", - " nx.set_node_attributes(graph, attrs_this_round, name=str(k))\n", - "\n", - " \n", - " ####### CONSTRUCT FEATURE VECTORS ###############\n", - " f_vecs = list() #where feature vectors will be stored\n", - " for graph in graphs:\n", - " c = Counter()\n", - " for k in range(rounds):\n", - " #count number of colors that appeared in each round, i.e. c = {0:302, 1:4} if color 0 appeared 302 times and color 1 4 times\n", - " c.update(nx.get_node_attributes(graph, str(k)).values()) \n", - " #create feature vectore as sparse matrix in format 1 x idx_counter\n", - " data = np.array(list(c.values()))\n", - " col = np.array(list(c.keys()))\n", - " row = np.zeros(len(col)) #only one row so far\n", - " f_vec = sp.coo_matrix((data, (row,col)), shape=(1, idx_counter)) #feature vector with histogram entries \n", - " f_vecs.append(f_vec)\n", - "\n", - " return graphs, f_vecs" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "61618352", - "metadata": {}, - "outputs": [], - "source": [ - "def closed_kernel(graph_list):\n", - "\n", - " l = int(np.mean([len(g.nodes) for g in graph_list])) #list comprehension\n", - " print(\"mean of number of nodes:\", l)\n", - " feature_vectors = []\n", - " \n", - " for graph in graph_list:\n", - " number = []\n", - " A = nx.adjacency_matrix(graph) # sparse matrix\n", - " A =A.todense() # dense matrix\n", - " lambdas = np.linalg.eigvalsh(A)\n", - " for j in range(1, l+1):\n", - " power_lambdas= [x**(j) for x in lambdas ]\n", - " sum_lambdas=int(np.round(sum(power_lambdas)))\n", - " number.append(sum_lambdas) \n", - " feature_vectors.append(number)\n", - " #print(number)\n", - " \n", - " return feature_vectors" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "id": "68101110", - "metadata": {}, - "outputs": [], - "source": [ - "ENZ_graphs, ENZ_feat = wl_kernel(ENZ, labelname=\"node_label\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "57da0c1a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(ENZ_feat) == len(ENZ_graphs)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "4692d7cf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[<1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 58 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 158 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 4 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 33 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 94 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 86 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 125 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 97 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 4 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 100 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 101 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 81 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 17 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 58 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 94 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 12 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 56 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 103 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 85 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 74 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 78 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 19 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 44 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 39 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 11 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 43 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 26 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 34 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 26 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 16 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 101 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 80 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 101 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 50 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 47 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 42 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 41 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 50 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 86 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 80 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 81 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 82 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 78 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 25 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 58 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 7 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 101 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 124 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 37 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 30 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 65 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 116 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 119 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 108 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 110 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 158 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 28 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 146 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 44 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 38 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 28 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 26 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 29 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 42 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 81 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 31 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 4 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 44 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 109 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 30 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 32 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 34 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 43 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 91 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 27 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 19 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 26 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 28 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 23 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 30 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 42 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 31 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 50 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 34 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 48 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 115 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 91 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 113 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 119 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 95 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 85 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 23 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 95 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 65 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 99 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 78 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 74 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 107 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 115 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 123 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 111 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 82 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 74 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 124 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 128 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 50 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 128 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 48 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 56 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 123 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 69 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 105 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 79 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 85 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 115 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 45 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 82 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 73 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 79 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 65 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 85 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 17 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 97 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 99 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 41 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 81 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 45 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 45 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 36 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 34 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 48 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 73 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 78 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 73 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 113 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 85 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 125 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 110 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 91 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 86 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 86 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 81 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 121 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 113 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 101 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 94 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 111 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 164 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 107 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 95 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 100 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 104 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 117 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 111 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 100 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 119 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 120 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 104 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 122 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 37 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 45 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 94 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 103 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 25 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 25 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 58 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 20 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 42 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 106 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 106 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 153 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 50 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 15 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 48 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 151 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 69 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 50 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 81 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 30 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 23 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 19 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 27 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 16 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 44 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 31 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 74 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 115 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 23 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 15 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 42 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 98 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 29 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 28 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 116 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 56 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 95 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 74 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 78 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 101 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 25 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 16 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 45 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 98 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 58 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 44 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 28 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 123 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 116 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 111 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 106 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 106 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 107 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 65 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 45 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 41 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 91 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 36 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 36 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 125 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 41 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 32 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 33 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 30 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 43 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 31 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 29 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 23 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 34 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 43 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 27 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 109 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 42 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 29 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 79 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 80 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 91 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 95 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 94 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 97 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 105 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 151 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 69 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 98 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 95 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 82 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 109 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 73 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 131 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 137 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 56 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 163 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 107 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 85 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 94 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 103 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 98 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 119 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 105 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 99 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 106 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 103 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 112 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 105 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 65 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 39 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 36 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 38 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 97 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 82 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 99 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 78 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 48 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 117 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 104 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 131 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 100 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 108 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 133 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 130 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 123 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 103 stored elements in COOrdinate format>]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ENZ_feat" - ] - }, - { - "cell_type": "markdown", - "id": "644af059", - "metadata": {}, - "source": [ - "## Gram Matrix" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "0b493d38", - "metadata": {}, - "outputs": [], - "source": [ - "m_ENZ = sp.vstack(ENZ_feat, format=\"csr\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "f4a09179", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<600x600 sparse matrix of type '<class 'numpy.intc'>'\n", - "\twith 358808 stored elements in Compressed Sparse Row format>" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_ENZ = m_ENZ@np.transpose(m_ENZ)\n", - "gram_ENZ" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "id": "93916ae1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mean of number of nodes: 32\n" - ] - } - ], - "source": [ - "ENZ_features = closed_kernel(ENZ)\n", - "#ENZ_features" - ] - }, - { - "cell_type": "markdown", - "id": "8d9b7f36", - "metadata": {}, - "source": [ - "# SVM" - ] - }, - { - "cell_type": "code", - "execution_count": 149, - "id": "5ed85c44", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "e21667af", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "number of labels for each graphlist\n", - "NZ: 6\n", - "DD: 2\n", - "NCI: 2\n" - ] - } - ], - "source": [ - "print(\"number of labels for each graphlist\")\n", - "ENZ_target = [g.graph['label'] for g in ENZ]\n", - "print(\"NZ:\", len(set(ENZ_target)))\n", - "DD_target = [g.graph['label'] for g in DD]\n", - "print(\"DD:\", len(set(DD_target)))\n", - "NCI_target = [g.graph['label'] for g in NCI]\n", - "print(\"NCI:\", len(set(NCI_target)))" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "a6e97d61", - "metadata": {}, - "outputs": [], - "source": [ - "def graph_svm(feature_vecs, target_vec):\n", - " num_labels = len(set(target_vec))\n", - " feature_mat = np.vstack(feature_vecs)\n", - " gram_mat = feature_mat @ np.transpose(feature_mat)\n", - " print(gram_mat)\n", - " clf = SVC(kernel='precomputed')\n", - " clf.fit(gram_mat, target_vec)\n", - " return clf\n", - " " - ] - }, - { - "cell_type": "raw", - "id": "ddae8881", - "metadata": {}, - "source": [ - "clf = graph_svm(NCI_features, NCI_target)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b741d0d6", - "metadata": {}, - "outputs": [], - "source": [ - "predicted = clf.predict(feature_vecs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ddb9ede7", - "metadata": {}, - "outputs": [], - "source": [ - "print(classification_report(NCI_target, predicted)) " - ] - }, - { - "cell_type": "raw", - "id": "949955ff", - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "#this time, we train an SVM classifier\n", - "classifier = SVC(C=1, kernel='linear', gamma = 'auto')\n", - "classifier.fit(NCI_features, NCI_target)\n", - "\n", - "targetFeature_predict = classifier.predict(NCI_features)\n" - ] - }, - { - "cell_type": "raw", - "id": "1edca273", - "metadata": {}, - "source": [ - "\n", - "classifier.decision_function(NCI_features)\n", - "print('Accuracy: \\n', classifier.score(NCI_features,ENZ_target))\n", - "print('Classification report: \\n')\n", - "print(classification_report(NCI_target, targetFeature_predict)) " - ] - }, - { - "cell_type": "markdown", - "id": "eba78df1", - "metadata": {}, - "source": [ - "# new" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "662f913c", - "metadata": {}, - "outputs": [], - "source": [ - "DD_graphs, DD_feat = wl_kernel(DD, labelname=\"node_label\")" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "69764da9", - "metadata": {}, - "outputs": [], - "source": [ - "m_DD = sp.vstack(DD_feat)" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "67109f61", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[<1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 996 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1072 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 581 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 341 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 541 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 10716 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 3755 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1525 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1151 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 457 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 955 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 727 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 962 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 847 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 689 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1369 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 788 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 682 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 473 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1073 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 15374 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 2107 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1037 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 223 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 782 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1313 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1109 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 794 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1195 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 744 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1690 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 538 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1325 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 678 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1052 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 895 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1612 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 870 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 808 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 674 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1358 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1874 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 665 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1202 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 322 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1490 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 667 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1340 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 643 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 740 stored elements in COOrdinate format>]" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "DD_feat[:50]" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "id": "87481bd2", - "metadata": {}, - "outputs": [], - "source": [ - "DD_target = [g.graph['label'] for g in DD]" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "20f87621", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<1254664x1178 sparse matrix of type '<class 'numpy.intc'>'\n", - "\twith 1017230 stored elements in COOrdinate format>" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m_DD.transpose()" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "22b17bc1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "matrix([[8298, 6960, 3678, ..., 1007, 943, 2839],\n", - " [6960, 8926, 3879, ..., 1070, 987, 2922],\n", - " [3678, 3879, 2868, ..., 587, 596, 1578],\n", - " ...,\n", - " [1007, 1070, 587, ..., 428, 185, 663],\n", - " [ 943, 987, 596, ..., 185, 376, 575],\n", - " [2839, 2922, 1578, ..., 663, 575, 3894]], dtype=int32)" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_DD = m_DD.dot(m_DD.transpose()).todense()\n", - "gram_DD" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "id": "6d169fcf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 6])" - ] - }, - "execution_count": 85, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a = np.array([[1,2,3],[4,5,6],[7,8,9]])\n", - "a[[0,1],[0,2]]" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "id": "e088af54", - "metadata": {}, - "outputs": [], - "source": [ - "samplelist = np.arange(len(DD_target)).tolist()\n", - "sample_idx = random.sample(samplelist, 200)\n", - "#sample_idx" - ] - }, - { - "cell_type": "code", - "execution_count": 121, - "id": "44369d23", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(200, 200)" - ] - }, - "execution_count": 121, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.asarray(gram_DD)[sample_idx][:, sample_idx].shape" - ] - }, - { - "cell_type": "raw", - "id": "65f1df55", - "metadata": {}, - "source": [ - "smaller_matrix = np.asarray(gram_DD)[sample_idx][:, sample_idx]\n", - "clf = SVC(kernel='precomputed')\n", - "clf.fit(smaller_matrix, np.array(DD_target)[sample_idx])" - ] - }, - { - "cell_type": "code", - "execution_count": 128, - "id": "d8a3f178", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVC(kernel='precomputed')" - ] - }, - "execution_count": 128, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf = SVC(kernel='precomputed')\n", - "clf.fit(np.array(gram_DD), np.array(DD_target))" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "id": "09032ff9", - "metadata": {}, - "outputs": [], - "source": [ - "targetFeature_predict = clf.predict(smaller_matrix)" - ] - }, - { - "cell_type": "code", - "execution_count": 124, - "id": "8324d609", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 1 1.00 1.00 1.00 115\n", - " 2 1.00 1.00 1.00 85\n", - "\n", - " accuracy 1.00 200\n", - " macro avg 1.00 1.00 1.00 200\n", - "weighted avg 1.00 1.00 1.00 200\n", - "\n" - ] - } - ], - "source": [ - "print(classification_report( np.array(DD_target)[sample_idx], targetFeature_predict)) " - ] - }, - { - "cell_type": "code", - "execution_count": 131, - "id": "ac404f21", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 1 1.00 1.00 1.00 691\n", - " 2 1.00 1.00 1.00 487\n", - "\n", - " accuracy 1.00 1178\n", - " macro avg 1.00 1.00 1.00 1178\n", - "weighted avg 1.00 1.00 1.00 1178\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lea\\anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:593: FutureWarning: np.matrix usage is deprecated in 1.0 and will raise a TypeError in 1.2. Please convert to a numpy array with np.asarray. For more information see: https://numpy.org/doc/stable/reference/generated/numpy.matrix.html\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "new_sample_idx = random.sample(samplelist, 200)\n", - "new_matrix = np.asarray(gram_DD)[new_sample_idx][:, new_sample_idx]\n", - "print(classification_report( np.array(DD_target), clf.predict(gram_DD))) " - ] - }, - { - "cell_type": "raw", - "id": "f28c2d7e", - "metadata": {}, - "source": [ - "from sklearn import datasets, linear_model\n", - "from sklearn.model_selection import cross_val_score\n", - "diabetes = datasets.load_diabetes()\n", - "X = diabetes.data[:150]\n", - "y = diabetes.target[:150]\n", - "lasso = linear_model.Lasso()\n", - "print(cross_val_score(lasso, X, y, cv=3))" - ] - }, - { - "cell_type": "code", - "execution_count": 134, - "id": "4f1f60f0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.7877661886136462\n" - ] - } - ], - "source": [ - "\n", - "accuracy = cross_val_score(clf, np.array(gram_DD), np.array(DD_target), cv=10)\n", - "print(np.mean(accuracy))" - ] - }, - { - "cell_type": "code", - "execution_count": 158, - "id": "45829b72", - "metadata": {}, - "outputs": [], - "source": [ - "def svm_precomputed(feat_vecs, target_vec):\n", - " classes = list(set((target_vec)))\n", - " n_classes = len(classes)\n", - " m_feat = sp.vstack(feat_vecs)\n", - " gram_matrix = m_feat.dot(m_feat.transpose()).todense()\n", - " \n", - " if n_classes <= 2:\n", - " clf = SVC(kernel='precomputed')\n", - " #clf.fit(np.array(gram_matrix), np.array(target_vec))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(target_vec), cv=10)\n", - " \n", - " else:\n", - " print(\"multilabel SVM\")\n", - " # Use label_binarize to be multi-label like settings\n", - " Y = label_binarize(target_vec, classes=classes)\n", - " print(Y)\n", - " clf = OneVsRestClassifier(SVC(kernel='precomputed', class_weight=\"balanced\"))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), Y, cv=10)\n", - " print(accuracy)\n", - " print(np.mean(accuracy))\n", - " \n", - " return np.mean(accuracy)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "f0e4847f", - "metadata": {}, - "outputs": [], - "source": [ - "DD_graphs, DD_feat = wl_kernel(DD, labelname=\"node_label\")\n", - "DD_target = [g.graph['label'] for g in DD]" - ] - }, - { - "cell_type": "code", - "execution_count": 157, - "id": "8ecabd06", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.7877661886136462" - ] - }, - "execution_count": 157, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed(DD_feat, DD_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "1b7a4661", - "metadata": {}, - "outputs": [], - "source": [ - "NCI_graphs, NCI_feat = wl_kernel(NCI, labelname=\"node_label\")\n", - "NCI_target = [g.graph['label'] for g in NCI]" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "id": "37c6c528", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.783698296836983\n" - ] - } - ], - "source": [ - "svm_precomputed(NCI_feat, NCI_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "f9614220", - "metadata": {}, - "outputs": [], - "source": [ - "ENZ_graphs, ENZ_feat = wl_kernel(ENZ, labelname=\"node_label\")\n", - "ENZ_target = [g.graph['label'] for g in ENZ]" - ] - }, - { - "cell_type": "code", - "execution_count": 160, - "id": "92cce688", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - "[[0 0 0 0 0 1]\n", - " [0 0 0 0 0 1]\n", - " [0 0 0 0 0 1]\n", - " ...\n", - " [0 0 0 1 0 0]\n", - " [0 0 0 1 0 0]\n", - " [0 0 0 1 0 0]]\n", - "[0.03333333 0.1 0.03333333 0.01666667 0. 0.01666667\n", - " 0.03333333 0. 0.01666667 0.03333333]\n", - "0.028333333333333332\n" - ] - }, - { - "data": { - "text/plain": [ - "0.028333333333333332" - ] - }, - "execution_count": 160, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed(ENZ_feat, ENZ_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 174, - "id": "58dc2da2", - "metadata": {}, - "outputs": [], - "source": [ - "def svm_precomputed_gridSearchCV(feat_vecs, target_vec):\n", - " classes = list(set((target_vec)))\n", - " n_classes = len(classes)\n", - " m_feat = sp.vstack(feat_vecs)\n", - " gram_matrix = m_feat.dot(m_feat.transpose()).todense()\n", - " \n", - " if n_classes <= 2:\n", - " param_grid = {'C': [0.001,0.01,0.1,1],\n", - " 'class_weight':['balanced',None]} \n", - " grid = GridSearchCV(SVC(kernel=\"precomputed\"), param_grid, cv = 5, verbose = 3) \n", - " grid.fit(np.array(gram_matrix), np.array(target_vec))\n", - " print(grid.best_params_) \n", - " clf = SVC(kernel='precomputed', **grid.best_params_)\n", - " clf.fit(np.array(gram_matrix), np.array(target_vec))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(target_vec), cv=10)\n", - " \n", - " return np.mean(accuracy), accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 175, - "id": "9d7c627b", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 5 folds for each of 8 candidates, totalling 40 fits\n", - "[CV 1/5] END ....C=0.001, class_weight=balanced;, score=0.717 total time= 0.2s\n", - "[CV 2/5] END ....C=0.001, class_weight=balanced;, score=0.754 total time= 0.2s\n", - "[CV 3/5] END ....C=0.001, class_weight=balanced;, score=0.783 total time= 0.2s\n", - "[CV 4/5] END ....C=0.001, class_weight=balanced;, score=0.765 total time= 0.2s\n", - "[CV 5/5] END ....C=0.001, class_weight=balanced;, score=0.719 total time= 0.2s\n", - "[CV 1/5] END ........C=0.001, class_weight=None;, score=0.717 total time= 0.1s\n", - "[CV 2/5] END ........C=0.001, class_weight=None;, score=0.754 total time= 0.1s\n", - "[CV 3/5] END ........C=0.001, class_weight=None;, score=0.785 total time= 0.1s\n", - "[CV 4/5] END ........C=0.001, class_weight=None;, score=0.766 total time= 0.1s\n", - "[CV 5/5] END ........C=0.001, class_weight=None;, score=0.719 total time= 0.1s\n", - "[CV 1/5] END .....C=0.01, class_weight=balanced;, score=0.780 total time= 0.2s\n", - "[CV 2/5] END .....C=0.01, class_weight=balanced;, score=0.783 total time= 0.2s\n", - "[CV 3/5] END .....C=0.01, class_weight=balanced;, score=0.832 total time= 0.2s\n", - "[CV 4/5] END .....C=0.01, class_weight=balanced;, score=0.809 total time= 0.3s\n", - "[CV 5/5] END .....C=0.01, class_weight=balanced;, score=0.786 total time= 0.2s\n", - "[CV 1/5] END .........C=0.01, class_weight=None;, score=0.780 total time= 0.2s\n", - "[CV 2/5] END .........C=0.01, class_weight=None;, score=0.783 total time= 0.2s\n", - "[CV 3/5] END .........C=0.01, class_weight=None;, score=0.832 total time= 0.2s\n", - "[CV 4/5] END .........C=0.01, class_weight=None;, score=0.809 total time= 0.2s\n", - "[CV 5/5] END .........C=0.01, class_weight=None;, score=0.787 total time= 0.3s\n", - "[CV 1/5] END ......C=0.1, class_weight=balanced;, score=0.794 total time= 0.7s\n", - "[CV 2/5] END ......C=0.1, class_weight=balanced;, score=0.787 total time= 0.7s\n", - "[CV 3/5] END ......C=0.1, class_weight=balanced;, score=0.828 total time= 0.7s\n", - "[CV 4/5] END ......C=0.1, class_weight=balanced;, score=0.799 total time= 0.6s\n", - "[CV 5/5] END ......C=0.1, class_weight=balanced;, score=0.802 total time= 0.6s\n", - "[CV 1/5] END ..........C=0.1, class_weight=None;, score=0.794 total time= 0.6s\n", - "[CV 2/5] END ..........C=0.1, class_weight=None;, score=0.787 total time= 0.7s\n", - "[CV 3/5] END ..........C=0.1, class_weight=None;, score=0.828 total time= 0.6s\n", - "[CV 4/5] END ..........C=0.1, class_weight=None;, score=0.799 total time= 0.7s\n", - "[CV 5/5] END ..........C=0.1, class_weight=None;, score=0.802 total time= 0.7s\n", - "[CV 1/5] END ........C=1, class_weight=balanced;, score=0.774 total time= 0.9s\n", - "[CV 2/5] END ........C=1, class_weight=balanced;, score=0.779 total time= 1.0s\n", - "[CV 3/5] END ........C=1, class_weight=balanced;, score=0.787 total time= 1.0s\n", - "[CV 4/5] END ........C=1, class_weight=balanced;, score=0.776 total time= 0.9s\n", - "[CV 5/5] END ........C=1, class_weight=balanced;, score=0.785 total time= 0.8s\n", - "[CV 1/5] END ............C=1, class_weight=None;, score=0.774 total time= 1.0s\n", - "[CV 2/5] END ............C=1, class_weight=None;, score=0.779 total time= 1.0s\n", - "[CV 3/5] END ............C=1, class_weight=None;, score=0.787 total time= 0.9s\n", - "[CV 4/5] END ............C=1, class_weight=None;, score=0.776 total time= 0.9s\n", - "[CV 5/5] END ............C=1, class_weight=None;, score=0.785 total time= 0.8s\n", - "{'C': 0.1, 'class_weight': 'balanced'}\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.802919708029197,\n", - " array([0.81995134, 0.74939173, 0.84184915, 0.76155718, 0.81751825,\n", - " 0.8296837 , 0.79562044, 0.81751825, 0.83211679, 0.76399027]))" - ] - }, - "execution_count": 175, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridSearchCV(NCI_feat, NCI_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "e12789e7", - "metadata": {}, - "outputs": [], - "source": [ - "def svm_precomputed_tt(feat_vecs, target_vec):\n", - " classes = list(set((target_vec)))\n", - " n_classes = len(classes)\n", - " X_train, X_test, y_train, y_test = train_test_split(feat_vecs, target_vec, test_size=0.2, random_state=4)\n", - " train_feat = sp.vstack(X_train)\n", - " test_feat = sp.vstack(X_test)\n", - " gram_matrix = train_feat.dot(train_feat.transpose()).todense()\n", - " gram_test = train_feat.dot(test_feat.transpose()).todense().T\n", - "\n", - " if n_classes <= 2:\n", - " clf = SVC(kernel='precomputed')\n", - " clf.fit(np.array(gram_matrix), np.array(y_train))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(y_train), cv=10)\n", - " print(classification_report(y_test, clf.predict(np.array(gram_test)))) \n", - " \n", - " else:\n", - " print(\"multilabel SVM\")\n", - " # Use label_binarize to be multi-label like settings\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " clf = OneVsRestClassifier(SVC(kernel='precomputed', class_weight=\"balanced\"))\n", - " clf.fit(np.array(gram_matrix), np.array(y_train_bin))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), y_train_bin, cv=10)\n", - " print(classification_report(y_test_bin, clf.predict(np.array(gram_test)))) \n", - " \n", - " return np.mean(accuracy), np.std(accuracy), accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "71efe919", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 1 0.85 0.85 0.85 138\n", - " 2 0.79 0.79 0.79 98\n", - "\n", - " accuracy 0.82 236\n", - " macro avg 0.82 0.82 0.82 236\n", - "weighted avg 0.82 0.82 0.82 236\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.7886898096304591,\n", - " 0.04572144929229968,\n", - " array([0.81052632, 0.82105263, 0.76595745, 0.79787234, 0.80851064,\n", - " 0.80851064, 0.82978723, 0.68085106, 0.82978723, 0.73404255]))" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_tt(DD_feat, DD_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 250, - "id": "cdbffc48", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 0 0.79 0.83 0.81 385\n", - " 1 0.84 0.80 0.82 437\n", - "\n", - " accuracy 0.81 822\n", - " macro avg 0.81 0.81 0.81 822\n", - "weighted avg 0.81 0.81 0.81 822\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.8150928534361332,\n", - " array([0.79331307, 0.81458967, 0.80547112, 0.81458967, 0.81458967,\n", - " 0.82370821, 0.79027356, 0.83890578, 0.8445122 , 0.81097561]))" - ] - }, - "execution_count": 250, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_tt(NCI_feat, NCI_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "68d9fa1d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - " precision recall f1-score support\n", - "\n", - " 0 0.83 0.24 0.37 21\n", - " 1 0.73 0.31 0.43 26\n", - " 2 0.62 0.43 0.51 23\n", - " 3 0.90 0.50 0.64 18\n", - " 4 0.40 0.13 0.20 15\n", - " 5 1.00 0.29 0.45 17\n", - "\n", - " micro avg 0.74 0.33 0.45 120\n", - " macro avg 0.75 0.32 0.44 120\n", - "weighted avg 0.75 0.33 0.44 120\n", - " samples avg 0.32 0.33 0.32 120\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lea\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.29375000000000007,\n", - " 0.044145041875868936,\n", - " array([0.27083333, 0.27083333, 0.29166667, 0.29166667, 0.29166667,\n", - " 0.375 , 0.35416667, 0.27083333, 0.20833333, 0.3125 ]))" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_tt(ENZ_feat, ENZ_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "a5cc2d4e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mean of number of nodes: 284\n" - ] - } - ], - "source": [ - "DD_ck = closed_kernel(DD)" - ] - }, - { - "cell_type": "raw", - "id": "28a4e982", - "metadata": {}, - "source": [ - "train_feat = np.vstack(DD_ck)\n", - "\n", - "gram_matrix = train_feat.dot(train_feat.transpose()).todense()" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "ebd1e4c3", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 1 0.85 0.85 0.85 138\n", - " 2 0.79 0.79 0.79 98\n", - "\n", - " accuracy 0.82 236\n", - " macro avg 0.82 0.82 0.82 236\n", - "weighted avg 0.82 0.82 0.82 236\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.7886898096304591,\n", - " 0.04572144929229968,\n", - " array([0.81052632, 0.82105263, 0.76595745, 0.79787234, 0.80851064,\n", - " 0.80851064, 0.82978723, 0.68085106, 0.82978723, 0.73404255]))" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_tt(DD_feat, DD_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "id": "99fc680f", - "metadata": {}, - "outputs": [], - "source": [ - "def svm_precomputed_gridsearch_tt(feat_vecs, target_vec):\n", - " classes = list(set((target_vec)))\n", - " n_classes = len(classes)\n", - " X_train, X_test, y_train, y_test = train_test_split(feat_vecs, target_vec, test_size=0.2, random_state=4)\n", - " #print(\"after traintestsplit\", X_train.shape, X_test.shape)\n", - " train_feat = sp.vstack(X_train)\n", - " test_feat = sp.vstack(X_test)\n", - " #print(\"after vstack\", train_feat.shape, test_feat.shape)\n", - " gram_matrix = train_feat.dot(train_feat.transpose()).todense()\n", - " gram_test = train_feat.dot(test_feat.transpose()).todense().T\n", - " #print(\"after multiplication\", gram_matrix.shape, gram_test.shape)\n", - "\n", - " if n_classes <= 2:\n", - " param_grid = {'C': [0.001,0.01,0.1,1],\n", - " 'class_weight':['balanced',None]} \n", - " grid = GridSearchCV(SVC(kernel=\"precomputed\"), param_grid, cv = 5, verbose = 3) \n", - " grid.fit(np.array(gram_matrix), np.array(y_train))\n", - " print(grid.best_params_) \n", - " clf = SVC(kernel='precomputed', **grid.best_params_)\n", - " clf.fit(np.array(gram_matrix), np.array(y_train))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(y_train), cv=10)\n", - " print(classification_report(y_test, clf.predict(np.array(gram_test))))\n", - " \n", - " \n", - " else:\n", - " print(\"multilabel SVM\")\n", - " # Use label_binarize to be multi-label like settings\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " \n", - " param_grid = {'estimator__C': [0.001,0.01, 0.05, 0.1,1], \n", - " 'estimator__class_weight':['balanced',None]} \n", - " test_clf = OneVsRestClassifier(SVC(kernel=\"precomputed\"))\n", - " #print(test_clf.get_params().keys())\n", - " grid = GridSearchCV(test_clf, param_grid, cv = 5, verbose = 3) \n", - " grid.fit(np.array(gram_matrix), np.array(y_train_bin))\n", - " print(grid.best_params_) \n", - " best_params = {\"C\": grid.best_params_[\"estimator__C\"], \"class_weight\": grid.best_params_[\"estimator__class_weight\"]}\n", - " clf = OneVsRestClassifier(SVC(kernel='precomputed', **best_params))\n", - " clf.fit(np.array(gram_matrix), np.array(y_train_bin))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(y_train_bin), cv=10)\n", - " print(classification_report(y_test_bin, clf.predict(np.array(gram_test))))\n", - " \n", - " return np.mean(accuracy), np.std(accuracy), accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "id": "a34a0181", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - "Fitting 5 folds for each of 10 candidates, totalling 50 fits\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.021 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.052 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.021 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.031 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.042 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.010 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.010 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.010 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.260 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.375 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.375 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.312 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.333 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.104 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.156 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.146 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.156 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.083 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.271 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.323 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.229 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.208 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.240 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=None;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=None;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=None;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=None;, score=0.250 total time= 0.0s\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=None;, score=0.250 total time= 0.0s\n", - "{'estimator__C': 0.01, 'estimator__class_weight': 'balanced'}\n", - " precision recall f1-score support\n", - "\n", - " 0 0.83 0.24 0.37 21\n", - " 1 0.60 0.35 0.44 26\n", - " 2 0.55 0.48 0.51 23\n", - " 3 0.69 0.50 0.58 18\n", - " 4 0.28 0.33 0.30 15\n", - " 5 0.53 0.47 0.50 17\n", - "\n", - " micro avg 0.54 0.39 0.45 120\n", - " macro avg 0.58 0.39 0.45 120\n", - "weighted avg 0.60 0.39 0.45 120\n", - " samples avg 0.37 0.39 0.38 120\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lea\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.33541666666666664,\n", - " 0.07820135157968108,\n", - " array([0.25 , 0.29166667, 0.3125 , 0.35416667, 0.35416667,\n", - " 0.41666667, 0.45833333, 0.3125 , 0.1875 , 0.41666667]))" - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(ENZ_feat, ENZ_target)" - ] - }, - { - "cell_type": "markdown", - "id": "4394d988", - "metadata": {}, - "source": [ - "# Graphlet Kernel" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "2591e8de", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "id": "0918ee67", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "id": "47414d42", - "metadata": {}, - "outputs": [], - "source": [ - "DD_graphlet = graphlet_kernel(DD)" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "id": "1075cb04", - "metadata": {}, - "outputs": [], - "source": [ - "ENZ_graphlet = graphlet_kernel(ENZ)" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "id": "6f797e29", - "metadata": {}, - "outputs": [], - "source": [ - "NCI_graphlet = graphlet_kernel(NCI)" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "f29f121f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<1178x34 sparse matrix of type '<class 'numpy.intc'>'\n", - "\twith 6861 stored elements in Compressed Sparse Row format>" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sp.vstack(sp.csr_matrix(np.array(DD_graphlet)))" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "id": "a54a6b4a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 5 folds for each of 8 candidates, totalling 40 fits\n", - "[CV 1/5] END ....C=0.001, class_weight=balanced;, score=0.730 total time= 0.0s\n", - "[CV 2/5] END ....C=0.001, class_weight=balanced;, score=0.757 total time= 0.0s\n", - "[CV 3/5] END ....C=0.001, class_weight=balanced;, score=0.793 total time= 0.0s\n", - "[CV 4/5] END ....C=0.001, class_weight=balanced;, score=0.739 total time= 0.0s\n", - "[CV 5/5] END ....C=0.001, class_weight=balanced;, score=0.734 total time= 0.0s\n", - "[CV 1/5] END ........C=0.001, class_weight=None;, score=0.741 total time= 0.0s\n", - "[CV 2/5] END ........C=0.001, class_weight=None;, score=0.778 total time= 0.0s\n", - "[CV 3/5] END ........C=0.001, class_weight=None;, score=0.793 total time= 0.0s\n", - "[CV 4/5] END ........C=0.001, class_weight=None;, score=0.707 total time= 0.0s\n", - "[CV 5/5] END ........C=0.001, class_weight=None;, score=0.745 total time= 0.0s\n", - "[CV 1/5] END .....C=0.01, class_weight=balanced;, score=0.735 total time= 0.0s\n", - "[CV 2/5] END .....C=0.01, class_weight=balanced;, score=0.746 total time= 0.0s\n", - "[CV 3/5] END .....C=0.01, class_weight=balanced;, score=0.793 total time= 0.0s\n", - "[CV 4/5] END .....C=0.01, class_weight=balanced;, score=0.739 total time= 0.0s\n", - "[CV 5/5] END .....C=0.01, class_weight=balanced;, score=0.745 total time= 0.0s\n", - "[CV 1/5] END .........C=0.01, class_weight=None;, score=0.735 total time= 0.0s\n", - "[CV 2/5] END .........C=0.01, class_weight=None;, score=0.783 total time= 0.0s\n", - "[CV 3/5] END .........C=0.01, class_weight=None;, score=0.777 total time= 0.0s\n", - "[CV 4/5] END .........C=0.01, class_weight=None;, score=0.702 total time= 0.0s\n", - "[CV 5/5] END .........C=0.01, class_weight=None;, score=0.734 total time= 0.0s\n", - "[CV 1/5] END ......C=0.1, class_weight=balanced;, score=0.730 total time= 0.3s\n", - "[CV 2/5] END ......C=0.1, class_weight=balanced;, score=0.757 total time= 0.3s\n", - "[CV 3/5] END ......C=0.1, class_weight=balanced;, score=0.782 total time= 0.2s\n", - "[CV 4/5] END ......C=0.1, class_weight=balanced;, score=0.739 total time= 0.2s\n", - "[CV 5/5] END ......C=0.1, class_weight=balanced;, score=0.734 total time= 0.3s\n", - "[CV 1/5] END ..........C=0.1, class_weight=None;, score=0.725 total time= 0.4s\n", - "[CV 2/5] END ..........C=0.1, class_weight=None;, score=0.783 total time= 0.3s\n", - "[CV 3/5] END ..........C=0.1, class_weight=None;, score=0.771 total time= 0.3s\n", - "[CV 4/5] END ..........C=0.1, class_weight=None;, score=0.702 total time= 0.4s\n", - "[CV 5/5] END ..........C=0.1, class_weight=None;, score=0.734 total time= 0.4s\n", - "[CV 1/5] END ........C=1, class_weight=balanced;, score=0.725 total time= 4.2s\n", - "[CV 2/5] END ........C=1, class_weight=balanced;, score=0.741 total time= 3.1s\n", - "[CV 3/5] END ........C=1, class_weight=balanced;, score=0.787 total time= 5.2s\n", - "[CV 4/5] END ........C=1, class_weight=balanced;, score=0.734 total time= 2.6s\n", - "[CV 5/5] END ........C=1, class_weight=balanced;, score=0.734 total time= 3.6s\n", - "[CV 1/5] END ............C=1, class_weight=None;, score=0.735 total time= 5.9s\n", - "[CV 2/5] END ............C=1, class_weight=None;, score=0.783 total time= 4.5s\n", - "[CV 3/5] END ............C=1, class_weight=None;, score=0.750 total time= 3.8s\n", - "[CV 4/5] END ............C=1, class_weight=None;, score=0.707 total time= 4.2s\n", - "[CV 5/5] END ............C=1, class_weight=None;, score=0.723 total time= 4.5s\n", - "{'C': 0.001, 'class_weight': None}\n", - " precision recall f1-score support\n", - "\n", - " 1 0.73 0.90 0.81 138\n", - " 2 0.79 0.54 0.64 98\n", - "\n", - " accuracy 0.75 236\n", - " macro avg 0.76 0.72 0.73 236\n", - "weighted avg 0.76 0.75 0.74 236\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.7463045912653976,\n", - " 0.04357951487182796,\n", - " array([0.72631579, 0.74736842, 0.74468085, 0.78723404, 0.78723404,\n", - " 0.78723404, 0.76595745, 0.63829787, 0.76595745, 0.71276596]))" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(sp.csr_matrix(np.array(DD_graphlet)), DD_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "id": "ee7d1e5a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - "Fitting 5 folds for each of 10 candidates, totalling 50 fits\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.073 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.031 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.021 total time= 0.8s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.010 total time= 0.7s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.021 total time= 0.6s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.5s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.031 total time= 0.3s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.042 total time= 0.2s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.031 total time= 0.3s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.021 total time= 0.3s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.042 total time= 0.3s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 3.3s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.031 total time= 5.3s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.021 total time= 5.1s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.031 total time= 4.8s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.010 total time= 4.3s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.021 total time= 2.2s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.031 total time= 2.1s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.021 total time= 1.8s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.031 total time= 1.8s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.052 total time= 1.9s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 12.1s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.021 total time= 29.5s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.010 total time= 21.0s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.031 total time= 29.2s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.010 total time= 21.5s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.021 total time= 4.7s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.042 total time= 5.3s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.031 total time= 3.7s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.052 total time= 4.3s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.062 total time= 5.3s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.010 total time= 28.1s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 58.2s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.010 total time= 41.3s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.021 total time= 52.9s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.010 total time= 33.9s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.031 total time= 1.0min\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.052 total time= 47.8s\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.031 total time= 53.0s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.021 total time= 47.4s\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.021 total time= 46.1s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=None;, score=0.010 total time= 3.2min\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=None;, score=0.010 total time= 8.7min\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=None;, score=0.010 total time= 6.5min\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=None;, score=0.021 total time= 8.8min\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=None;, score=0.021 total time= 4.3min\n", - "{'estimator__C': 0.1, 'estimator__class_weight': 'balanced'}\n", - " precision recall f1-score support\n", - "\n", - " 0 0.16 0.57 0.25 21\n", - " 1 0.30 0.35 0.32 26\n", - " 2 0.32 0.52 0.40 23\n", - " 3 0.18 0.72 0.29 18\n", - " 4 0.17 0.33 0.23 15\n", - " 5 0.26 0.47 0.33 17\n", - "\n", - " micro avg 0.21 0.49 0.30 120\n", - " macro avg 0.23 0.49 0.30 120\n", - "weighted avg 0.24 0.49 0.31 120\n", - " samples avg 0.24 0.49 0.31 120\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lea\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.03958333333333333,\n", - " 0.025430324199445214,\n", - " array([0.02083333, 0.02083333, 0.04166667, 0.02083333, 0.02083333,\n", - " 0.0625 , 0.02083333, 0.04166667, 0.10416667, 0.04166667]))" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(sp.csr_matrix(np.array(ENZ_graphlet)), ENZ_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "id": "5c8e8ce2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 5 folds for each of 8 candidates, totalling 40 fits\n", - "[CV 1/5] END ....C=0.001, class_weight=balanced;, score=0.632 total time= 0.3s\n", - "[CV 2/5] END ....C=0.001, class_weight=balanced;, score=0.612 total time= 0.4s\n", - "[CV 3/5] END ....C=0.001, class_weight=balanced;, score=0.637 total time= 0.3s\n", - "[CV 4/5] END ....C=0.001, class_weight=balanced;, score=0.604 total time= 0.3s\n", - "[CV 5/5] END ....C=0.001, class_weight=balanced;, score=0.642 total time= 0.3s\n", - "[CV 1/5] END ........C=0.001, class_weight=None;, score=0.635 total time= 0.3s\n", - "[CV 2/5] END ........C=0.001, class_weight=None;, score=0.617 total time= 0.2s\n", - "[CV 3/5] END ........C=0.001, class_weight=None;, score=0.622 total time= 0.2s\n", - "[CV 4/5] END ........C=0.001, class_weight=None;, score=0.604 total time= 0.2s\n", - "[CV 5/5] END ........C=0.001, class_weight=None;, score=0.635 total time= 0.2s\n", - "[CV 1/5] END .....C=0.01, class_weight=balanced;, score=0.626 total time= 0.7s\n", - "[CV 2/5] END .....C=0.01, class_weight=balanced;, score=0.619 total time= 0.7s\n", - "[CV 3/5] END .....C=0.01, class_weight=balanced;, score=0.635 total time= 0.7s\n", - "[CV 4/5] END .....C=0.01, class_weight=balanced;, score=0.607 total time= 0.8s\n", - "[CV 5/5] END .....C=0.01, class_weight=balanced;, score=0.642 total time= 0.7s\n", - "[CV 1/5] END .........C=0.01, class_weight=None;, score=0.625 total time= 0.8s\n", - "[CV 2/5] END .........C=0.01, class_weight=None;, score=0.617 total time= 0.7s\n", - "[CV 3/5] END .........C=0.01, class_weight=None;, score=0.626 total time= 0.8s\n", - "[CV 4/5] END .........C=0.01, class_weight=None;, score=0.606 total time= 0.8s\n", - "[CV 5/5] END .........C=0.01, class_weight=None;, score=0.639 total time= 0.7s\n", - "[CV 1/5] END ......C=0.1, class_weight=balanced;, score=0.629 total time= 3.4s\n", - "[CV 2/5] END ......C=0.1, class_weight=balanced;, score=0.622 total time= 3.7s\n", - "[CV 3/5] END ......C=0.1, class_weight=balanced;, score=0.640 total time= 4.0s\n", - "[CV 4/5] END ......C=0.1, class_weight=balanced;, score=0.607 total time= 3.8s\n", - "[CV 5/5] END ......C=0.1, class_weight=balanced;, score=0.641 total time= 3.8s\n", - "[CV 1/5] END ..........C=0.1, class_weight=None;, score=0.628 total time= 4.9s\n", - "[CV 2/5] END ..........C=0.1, class_weight=None;, score=0.620 total time= 4.1s\n", - "[CV 3/5] END ..........C=0.1, class_weight=None;, score=0.631 total time= 4.0s\n", - "[CV 4/5] END ..........C=0.1, class_weight=None;, score=0.607 total time= 4.3s\n", - "[CV 5/5] END ..........C=0.1, class_weight=None;, score=0.636 total time= 3.7s\n", - "[CV 1/5] END ........C=1, class_weight=balanced;, score=0.629 total time= 31.3s\n", - "[CV 2/5] END ........C=1, class_weight=balanced;, score=0.622 total time= 35.6s\n", - "[CV 3/5] END ........C=1, class_weight=balanced;, score=0.637 total time= 28.5s\n", - "[CV 4/5] END ........C=1, class_weight=balanced;, score=0.604 total time= 36.2s\n", - "[CV 5/5] END ........C=1, class_weight=balanced;, score=0.639 total time= 30.0s\n", - "[CV 1/5] END ............C=1, class_weight=None;, score=0.631 total time= 33.8s\n", - "[CV 2/5] END ............C=1, class_weight=None;, score=0.619 total time= 33.0s\n", - "[CV 3/5] END ............C=1, class_weight=None;, score=0.620 total time= 30.1s\n", - "[CV 4/5] END ............C=1, class_weight=None;, score=0.609 total time= 34.0s\n", - "[CV 5/5] END ............C=1, class_weight=None;, score=0.635 total time= 28.7s\n", - "{'C': 0.1, 'class_weight': 'balanced'}\n", - " precision recall f1-score support\n", - "\n", - " 0 0.60 0.64 0.62 385\n", - " 1 0.66 0.62 0.64 437\n", - "\n", - " accuracy 0.63 822\n", - " macro avg 0.63 0.63 0.63 822\n", - "weighted avg 0.63 0.63 0.63 822\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.6286511231373713,\n", - " 0.01745021029215421,\n", - " array([0.64741641, 0.61702128, 0.61094225, 0.6443769 , 0.65653495,\n", - " 0.63221884, 0.59878419, 0.61702128, 0.6402439 , 0.62195122]))" - ] - }, - "execution_count": 83, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(sp.csr_matrix(np.array(NCI_graphlet)), NCI_target)" - ] - }, - { - "cell_type": "markdown", - "id": "de37ea00", - "metadata": {}, - "source": [ - "# closed walk" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "id": "bda0b15d", - "metadata": {}, - "outputs": [], - "source": [ - "max_enz = max([max(point) for point in ENZ_features])/1000\n", - "ENZ_normalized = np.array(np.array(ENZ_features)/max_enz, dtype=float)" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "id": "25ed0e82", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - "Fitting 5 folds for each of 10 candidates, totalling 50 fits\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.2s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.6s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.4s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.6s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.6s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.000 total time= 0.3s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.000 total time= 0.2s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 13.4s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 1.7s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 5.3s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 6.9s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 8.7s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.000 total time= 0.2s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.000 total time= 0.1s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.000 total time= 0.3s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.010 total time= 0.1s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 20.1s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 17.0s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 15.1s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 29.6s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 27.3s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 1.6s\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 0.6s\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 0.6s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 1.3s\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 1.2s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 1.6min\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 1.1min\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 47.5s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 1.6min\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 52.6s\n", - "{'estimator__C': 0.001, 'estimator__class_weight': 'balanced'}\n", - " precision recall f1-score support\n", - "\n", - " 0 0.18 1.00 0.30 21\n", - " 1 0.26 1.00 0.41 26\n", - " 2 0.00 0.00 0.00 23\n", - " 3 0.50 0.06 0.10 18\n", - " 4 0.12 0.87 0.20 15\n", - " 5 0.00 0.00 0.00 17\n", - "\n", - " micro avg 0.18 0.51 0.27 120\n", - " macro avg 0.18 0.49 0.17 120\n", - "weighted avg 0.18 0.51 0.18 120\n", - " samples avg 0.20 0.51 0.28 120\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.0, 0.0, array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))" - ] - }, - "execution_count": 111, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(sp.csr_matrix(ENZ_normalized), ENZ_target)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "be57620b", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex1/datasets/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/Python_files/ex1/datasets/.ipynb_checkpoints/Untitled-checkpoint.ipynb deleted file mode 100644 index bb9a810f0ae63a97f0c529d9d1b7ab05b5ddf479..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/.ipynb_checkpoints/Untitled-checkpoint.ipynb +++ /dev/null @@ -1,35 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "palestinian-quarter", - "metadata": {}, - "outputs": [], - "source": [ - "hello" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex1/datasets/DD/.DS_Store b/Python_files/ex1/datasets/DD/.DS_Store deleted file mode 100644 index 765cdd1cf89e2916c73e1e8f3cbf370041187a44..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/DD/.DS_Store and /dev/null differ diff --git a/Python_files/ex1/datasets/DD/.ipynb_checkpoints/EX2_GL1-checkpoint.ipynb b/Python_files/ex1/datasets/DD/.ipynb_checkpoints/EX2_GL1-checkpoint.ipynb deleted file mode 100644 index 4833343bd4390afdeb6f6d8d2e965b475495b0c4..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/DD/.ipynb_checkpoints/EX2_GL1-checkpoint.ipynb +++ /dev/null @@ -1,635 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 15, - "id": "surgical-christmas", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn.svm import SVC\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.preprocessing import label_binarize\n", - "from sklearn.multiclass import OneVsRestClassifier" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "waiting-paste", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "acute-accessory", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "civil-crash", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "precious-lounge", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - " \n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "alternate-minister", - "metadata": {}, - "outputs": [], - "source": [ - "def ten_fold(data):\n", - " target=[g.graph['label'] for g in data]\n", - " feature=graphlet_kernel(data)\n", - " \n", - "#SVM function can only handle list\n", - " to_list_feature=[list(i) for i in feature]\n", - " \n", - "#ten_fold Cross-validation\n", - " kf=KFold(n_splits=10)\n", - " \n", - "#interate all the folds\n", - " for i,j in kf.split(to_list_feature,target):\n", - " clf = svm.SVC(kernel='linear', C=1).fit(np.array(to_list_feature)[i],np.array(target)[i])\n", - " score=clf.score(np.array(to_list_feature)[j],np.array(target)[j])\n", - " print (score)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "tight-defensive", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "radio-armenia", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "pleased-pressing", - "metadata": {}, - "outputs": [], - "source": [ - "output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "arabic-ivory", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "promotional-conclusion", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "exterior-packing", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(480, 34) (480,)\n", - "(120, 34) (120,)\n" - ] - } - ], - "source": [ - "#create training and testing vars\n", - "output=graphlet_kernel(data1)\n", - "target=[g.graph['label'] for g in data1]\n", - "\n", - "\n", - "X_train, X_test, y_train, y_test = train_test_split(np.array(output),np.array(target), test_size=0.2)\n", - "print(X_train.shape, y_train.shape)\n", - "print(X_test.shape, y_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "pregnant-capacity", - "metadata": {}, - "outputs": [], - "source": [ - "clf = svm.SVC(kernel='linear', C=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "abroad-attribute", - "metadata": {}, - "outputs": [], - "source": [ - "gram_train= np.dot(X_train,X_train.T)" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "heard-screening", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVC(C=1, kernel='precomputed')" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.fit(gram_train,y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "modular-hunger", - "metadata": {}, - "outputs": [], - "source": [ - "gram_test=np.dot(X_train,X_test.T).T" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "right-research", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "list" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(target)" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "synthetic-spring", - "metadata": {}, - "outputs": [], - "source": [ - "accuracy= cross_val_score(clf,gram_train, y_train, cv=10 )" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "nearby-recommendation", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.03460563266237011" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy.mean()\n", - "accuracy.std()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "satellite-fiber", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(942, 942)" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_train.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "satellite-meaning", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1,\n", - " 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1,\n", - " 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1,\n", - " 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1,\n", - " 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1,\n", - " 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2])" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.predict(gram_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "charged-french", - "metadata": {}, - "outputs": [], - "source": [ - "def closed_kernel(graph_list):\n", - "\n", - " l = int(np.mean([len(g.nodes) for g in graph_list])) #list comprehension\n", - " feature_vectors = []\n", - " \n", - " for graph in graph_list:\n", - " number = []\n", - " A = nx.adjacency_matrix(graph) # sparse matrix\n", - " A =A.todense() # dense matrix\n", - " \n", - " # eigenvalues and eigenvectors\n", - " lambdas = np.linalg.eigvalsh(A)\n", - " for j in range(1, l):\n", - " power_lambdas= [x**(j) for x in lambdas ]\n", - " \n", - " #power_lambdas=np.rint(power_lambdas)\n", - " sum_lambdas=int(np.round(sum(power_lambdas)))\n", - " number.append(sum_lambdas) \n", - " feature_vectors.append(number)\n", - " \n", - " return feature_vectors" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "affiliated-lambda", - "metadata": {}, - "outputs": [], - "source": [ - "output_dd=closed_kernel(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "immune-samba", - "metadata": {}, - "outputs": [], - "source": [ - "output_nci=closed_kernel(data2)\n", - "target_nci=[g.graph['label'] for g in data2]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "small-sword", - "metadata": {}, - "outputs": [], - "source": [ - "def svc_evaluation(features,targets):\n", - " classes = list(set(targets))\n", - " X_train, X_test, y_train, y_test = train_test_split(np.array(features),np.array(targets), test_size=0.2)\n", - " if len(classes)<=2:\n", - " clf = SVC(kernel='linear', C=1)\n", - " clf.fit(X_train,y_train)\n", - " accuracy= cross_val_score(clf,X_train, y_train, cv=10 )\n", - " print (accuracy)\n", - " print (classification_report(y_test,clf.predict(X_test)))\n", - " else:\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " clf = OneVsRestClassifier(SVC(kernel='linear', class_weight=\"balanced\"))\n", - " clf.fit(X_train,y_train)\n", - " accuracy = cross_val_score(clf,X_train, y_train_bin, cv=10)\n", - " print(accuracy.mean(),accuracy.std())\n", - " print(classification_report(y_test_bin, clf.predict(X_test)))" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "necessary-rates", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.018750000000000003 0.014583333333333332\n" - ] - }, - { - "ename": "ValueError", - "evalue": "Classification metrics can't handle a mix of multilabel-indicator and multiclass targets", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-17-57e877d3e78a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msvc_evaluation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m<ipython-input-16-ec47bb244c24>\u001b[0m in \u001b[0;36msvc_evaluation\u001b[0;34m(features, targets)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0maccuracy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train_bin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclassification_report\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_test_bin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36mclassification_report\u001b[0;34m(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division)\u001b[0m\n\u001b[1;32m 1964\u001b[0m \"\"\"\n\u001b[1;32m 1965\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1966\u001b[0;31m \u001b[0my_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_check_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1967\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1968\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36m_check_targets\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m raise ValueError(\"Classification metrics can't handle a mix of {0} \"\n\u001b[0m\u001b[1;32m 93\u001b[0m \"and {1} targets\".format(type_true, type_pred))\n\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Classification metrics can't handle a mix of multilabel-indicator and multiclass targets" - ] - } - ], - "source": [ - "svc_evaluation(output,target)" - ] - }, - { - "cell_type": "code", - "execution_count": 403, - "id": "quick-auction", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8220338983050848\n", - "0.8389830508474576\n", - "0.9067796610169492\n", - "0.7627118644067796\n", - "0.8135593220338984\n", - "0.8559322033898306\n", - "0.5508474576271186\n", - "0.3983050847457627\n", - "0.47863247863247865\n", - "0.49572649572649574\n" - ] - } - ], - "source": [ - "ten_fold(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 404, - "id": "mighty-mortgage", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.13333333333333333\n", - "0.05\n", - "0.05\n", - "0.06666666666666667\n", - "0.03333333333333333\n", - "0.016666666666666666\n", - "0.08333333333333333\n", - "0.05\n", - "0.06666666666666667\n", - "0.0\n" - ] - } - ], - "source": [ - "ten_fold(data1)" - ] - }, - { - "cell_type": "code", - "execution_count": 405, - "id": "recognized-ensemble", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.35523114355231145\n", - "0.38686131386861317\n", - "0.35766423357664234\n", - "0.5352798053527981\n", - "0.4306569343065693\n", - "0.44038929440389296\n", - "0.3260340632603406\n", - "0.3722627737226277\n", - "0.29927007299270075\n", - "0.5304136253041363\n" - ] - } - ], - "source": [ - "ten_fold(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "classical-chapel", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex1/datasets/DD/.ipynb_checkpoints/EX2_GL1_new-checkpoint.ipynb b/Python_files/ex1/datasets/DD/.ipynb_checkpoints/EX2_GL1_new-checkpoint.ipynb deleted file mode 100644 index 4833343bd4390afdeb6f6d8d2e965b475495b0c4..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/DD/.ipynb_checkpoints/EX2_GL1_new-checkpoint.ipynb +++ /dev/null @@ -1,635 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 15, - "id": "surgical-christmas", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn.svm import SVC\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.preprocessing import label_binarize\n", - "from sklearn.multiclass import OneVsRestClassifier" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "waiting-paste", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "acute-accessory", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "civil-crash", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "precious-lounge", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - " \n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "alternate-minister", - "metadata": {}, - "outputs": [], - "source": [ - "def ten_fold(data):\n", - " target=[g.graph['label'] for g in data]\n", - " feature=graphlet_kernel(data)\n", - " \n", - "#SVM function can only handle list\n", - " to_list_feature=[list(i) for i in feature]\n", - " \n", - "#ten_fold Cross-validation\n", - " kf=KFold(n_splits=10)\n", - " \n", - "#interate all the folds\n", - " for i,j in kf.split(to_list_feature,target):\n", - " clf = svm.SVC(kernel='linear', C=1).fit(np.array(to_list_feature)[i],np.array(target)[i])\n", - " score=clf.score(np.array(to_list_feature)[j],np.array(target)[j])\n", - " print (score)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "tight-defensive", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "radio-armenia", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "pleased-pressing", - "metadata": {}, - "outputs": [], - "source": [ - "output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "arabic-ivory", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "promotional-conclusion", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "exterior-packing", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(480, 34) (480,)\n", - "(120, 34) (120,)\n" - ] - } - ], - "source": [ - "#create training and testing vars\n", - "output=graphlet_kernel(data1)\n", - "target=[g.graph['label'] for g in data1]\n", - "\n", - "\n", - "X_train, X_test, y_train, y_test = train_test_split(np.array(output),np.array(target), test_size=0.2)\n", - "print(X_train.shape, y_train.shape)\n", - "print(X_test.shape, y_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "pregnant-capacity", - "metadata": {}, - "outputs": [], - "source": [ - "clf = svm.SVC(kernel='linear', C=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "abroad-attribute", - "metadata": {}, - "outputs": [], - "source": [ - "gram_train= np.dot(X_train,X_train.T)" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "heard-screening", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVC(C=1, kernel='precomputed')" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.fit(gram_train,y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "modular-hunger", - "metadata": {}, - "outputs": [], - "source": [ - "gram_test=np.dot(X_train,X_test.T).T" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "right-research", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "list" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(target)" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "synthetic-spring", - "metadata": {}, - "outputs": [], - "source": [ - "accuracy= cross_val_score(clf,gram_train, y_train, cv=10 )" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "nearby-recommendation", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.03460563266237011" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy.mean()\n", - "accuracy.std()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "satellite-fiber", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(942, 942)" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_train.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "satellite-meaning", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1,\n", - " 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1,\n", - " 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1,\n", - " 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1,\n", - " 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1,\n", - " 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2])" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.predict(gram_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "charged-french", - "metadata": {}, - "outputs": [], - "source": [ - "def closed_kernel(graph_list):\n", - "\n", - " l = int(np.mean([len(g.nodes) for g in graph_list])) #list comprehension\n", - " feature_vectors = []\n", - " \n", - " for graph in graph_list:\n", - " number = []\n", - " A = nx.adjacency_matrix(graph) # sparse matrix\n", - " A =A.todense() # dense matrix\n", - " \n", - " # eigenvalues and eigenvectors\n", - " lambdas = np.linalg.eigvalsh(A)\n", - " for j in range(1, l):\n", - " power_lambdas= [x**(j) for x in lambdas ]\n", - " \n", - " #power_lambdas=np.rint(power_lambdas)\n", - " sum_lambdas=int(np.round(sum(power_lambdas)))\n", - " number.append(sum_lambdas) \n", - " feature_vectors.append(number)\n", - " \n", - " return feature_vectors" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "affiliated-lambda", - "metadata": {}, - "outputs": [], - "source": [ - "output_dd=closed_kernel(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "immune-samba", - "metadata": {}, - "outputs": [], - "source": [ - "output_nci=closed_kernel(data2)\n", - "target_nci=[g.graph['label'] for g in data2]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "small-sword", - "metadata": {}, - "outputs": [], - "source": [ - "def svc_evaluation(features,targets):\n", - " classes = list(set(targets))\n", - " X_train, X_test, y_train, y_test = train_test_split(np.array(features),np.array(targets), test_size=0.2)\n", - " if len(classes)<=2:\n", - " clf = SVC(kernel='linear', C=1)\n", - " clf.fit(X_train,y_train)\n", - " accuracy= cross_val_score(clf,X_train, y_train, cv=10 )\n", - " print (accuracy)\n", - " print (classification_report(y_test,clf.predict(X_test)))\n", - " else:\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " clf = OneVsRestClassifier(SVC(kernel='linear', class_weight=\"balanced\"))\n", - " clf.fit(X_train,y_train)\n", - " accuracy = cross_val_score(clf,X_train, y_train_bin, cv=10)\n", - " print(accuracy.mean(),accuracy.std())\n", - " print(classification_report(y_test_bin, clf.predict(X_test)))" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "necessary-rates", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.018750000000000003 0.014583333333333332\n" - ] - }, - { - "ename": "ValueError", - "evalue": "Classification metrics can't handle a mix of multilabel-indicator and multiclass targets", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-17-57e877d3e78a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msvc_evaluation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m<ipython-input-16-ec47bb244c24>\u001b[0m in \u001b[0;36msvc_evaluation\u001b[0;34m(features, targets)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0maccuracy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train_bin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclassification_report\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_test_bin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36mclassification_report\u001b[0;34m(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division)\u001b[0m\n\u001b[1;32m 1964\u001b[0m \"\"\"\n\u001b[1;32m 1965\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1966\u001b[0;31m \u001b[0my_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_check_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1967\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1968\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36m_check_targets\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m raise ValueError(\"Classification metrics can't handle a mix of {0} \"\n\u001b[0m\u001b[1;32m 93\u001b[0m \"and {1} targets\".format(type_true, type_pred))\n\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Classification metrics can't handle a mix of multilabel-indicator and multiclass targets" - ] - } - ], - "source": [ - "svc_evaluation(output,target)" - ] - }, - { - "cell_type": "code", - "execution_count": 403, - "id": "quick-auction", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8220338983050848\n", - "0.8389830508474576\n", - "0.9067796610169492\n", - "0.7627118644067796\n", - "0.8135593220338984\n", - "0.8559322033898306\n", - "0.5508474576271186\n", - "0.3983050847457627\n", - "0.47863247863247865\n", - "0.49572649572649574\n" - ] - } - ], - "source": [ - "ten_fold(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 404, - "id": "mighty-mortgage", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.13333333333333333\n", - "0.05\n", - "0.05\n", - "0.06666666666666667\n", - "0.03333333333333333\n", - "0.016666666666666666\n", - "0.08333333333333333\n", - "0.05\n", - "0.06666666666666667\n", - "0.0\n" - ] - } - ], - "source": [ - "ten_fold(data1)" - ] - }, - { - "cell_type": "code", - "execution_count": 405, - "id": "recognized-ensemble", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.35523114355231145\n", - "0.38686131386861317\n", - "0.35766423357664234\n", - "0.5352798053527981\n", - "0.4306569343065693\n", - "0.44038929440389296\n", - "0.3260340632603406\n", - "0.3722627737226277\n", - "0.29927007299270075\n", - "0.5304136253041363\n" - ] - } - ], - "source": [ - "ten_fold(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "classical-chapel", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex1/datasets/DD/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/Python_files/ex1/datasets/DD/.ipynb_checkpoints/Untitled-checkpoint.ipynb deleted file mode 100644 index 96319576d15788953cae1cf3b3cea4a36c3f2885..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/DD/.ipynb_checkpoints/Untitled-checkpoint.ipynb +++ /dev/null @@ -1,304 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 398, - "id": "noted-complaint", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn import svm" - ] - }, - { - "cell_type": "code", - "execution_count": 399, - "id": "sticky-fraction", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "amended-india", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 400, - "id": "chinese-chrome", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 401, - "id": "distributed-chuck", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - " \n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(temp.values())\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(temp.values())\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 402, - "id": "consecutive-portal", - "metadata": {}, - "outputs": [], - "source": [ - "def ten_fold(data):\n", - " target=[g.graph['label'] for g in data]\n", - " feature=graphlet_kernel(data)\n", - " \n", - "#SVM function can only handle list\n", - " to_list_feature=[list(i) for i in feature]\n", - " \n", - "#ten_fold Cross-validation\n", - " kf=KFold(n_splits=10)\n", - " \n", - "#interate all the folds\n", - " for i,j in kf.split(to_list_feature,target):\n", - " clf = svm.SVC(kernel='linear', C=1).fit(np.array(to_list_feature)[i],np.array(target)[i])\n", - " score=clf.score(np.array(to_list_feature)[j],np.array(target)[j])\n", - " print (score)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "floating-approval", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 403, - "id": "handy-attitude", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8220338983050848\n", - "0.8389830508474576\n", - "0.9067796610169492\n", - "0.7627118644067796\n", - "0.8135593220338984\n", - "0.8559322033898306\n", - "0.5508474576271186\n", - "0.3983050847457627\n", - "0.47863247863247865\n", - "0.49572649572649574\n" - ] - } - ], - "source": [ - "ten_fold(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 404, - "id": "crude-myanmar", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.13333333333333333\n", - "0.05\n", - "0.05\n", - "0.06666666666666667\n", - "0.03333333333333333\n", - "0.016666666666666666\n", - "0.08333333333333333\n", - "0.05\n", - "0.06666666666666667\n", - "0.0\n" - ] - } - ], - "source": [ - "ten_fold(data1)" - ] - }, - { - "cell_type": "code", - "execution_count": 405, - "id": "statewide-crossing", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.35523114355231145\n", - "0.38686131386861317\n", - "0.35766423357664234\n", - "0.5352798053527981\n", - "0.4306569343065693\n", - "0.44038929440389296\n", - "0.3260340632603406\n", - "0.3722627737226277\n", - "0.29927007299270075\n", - "0.5304136253041363\n" - ] - } - ], - "source": [ - "ten_fold(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "varied-cologne", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex1/datasets/DD/.ipynb_checkpoints/Untitled1-checkpoint.ipynb b/Python_files/ex1/datasets/DD/.ipynb_checkpoints/Untitled1-checkpoint.ipynb deleted file mode 100644 index 363fcab7ed6e9634e198cf5555ceb88932c9a245..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/DD/.ipynb_checkpoints/Untitled1-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex1/datasets/DD/EX2_GL1.ipynb b/Python_files/ex1/datasets/DD/EX2_GL1.ipynb deleted file mode 100644 index c2dfdc7475419ab98e65947b6547f7b263c77872..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/DD/EX2_GL1.ipynb +++ /dev/null @@ -1,639 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 15, - "id": "fitting-toyota", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn.svm import SVC\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.preprocessing import label_binarize\n", - "from sklearn.multiclass import OneVsRestClassifier" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "limited-dance", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "revised-calendar", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "swiss-population", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "convenient-initial", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - " \n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "structured-delivery", - "metadata": {}, - "outputs": [], - "source": [ - "def ten_fold(data):\n", - " target=[g.graph['label'] for g in data]\n", - " feature=graphlet_kernel(data)\n", - " \n", - "#SVM function can only handle list\n", - " to_list_feature=[list(i) for i in feature]\n", - " \n", - "#ten_fold Cross-validation\n", - " kf=KFold(n_splits=10)\n", - " \n", - "#interate all the folds\n", - " for i,j in kf.split(to_list_feature,target):\n", - " clf = svm.SVC(kernel='linear', C=1).fit(np.array(to_list_feature)[i],np.array(target)[i])\n", - " score=clf.score(np.array(to_list_feature)[j],np.array(target)[j])\n", - " print (score)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "piano-johns", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "treated-flashing", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "obvious-marriage", - "metadata": {}, - "outputs": [], - "source": [ - "output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "statewide-extra", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "hired-feature", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "architectural-inflation", - "metadata": {}, - "outputs": [], - "source": [ - "#create training and testing vars\n", - "output=graphlet_kernel(data1)\n", - "target=[g.graph['label'] for g in data1]\n", - "\n", - "\n", - "X_train, X_test, y_train, y_test = train_test_split(np.array(output),np.array(target), test_size=0.2)\n", - "print(X_train.shape, y_train.shape)\n", - "print(X_test.shape, y_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "expired-barcelona", - "metadata": {}, - "outputs": [], - "source": [ - "clf = svm.SVC(kernel='linear', C=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "shaped-worse", - "metadata": {}, - "outputs": [], - "source": [ - "gram_train= np.dot(X_train,X_train.T)" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "prospective-upper", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVC(C=1, kernel='precomputed')" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.fit(gram_train,y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "controlling-stock", - "metadata": {}, - "outputs": [], - "source": [ - "gram_test=np.dot(X_train,X_test.T).T" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "turned-calvin", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "list" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(target)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "featured-training", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'clf' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-20-fc545aec8df4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0maccuracy\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mgram_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mNameError\u001b[0m: name 'clf' is not defined" - ] - } - ], - "source": [ - "accuracy= cross_val_score(clf,gram_train, y_train, cv=10 )" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "apart-lancaster", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'accuracy' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-18-3bcce742fcca>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'accuracy' is not defined" - ] - } - ], - "source": [ - "print(accuracy.mean())\n", - "accuracy.std()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "about-nicholas", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(942, 942)" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_train.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "modular-johnston", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1,\n", - " 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1,\n", - " 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1,\n", - " 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1,\n", - " 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1,\n", - " 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2])" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.predict(gram_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "everyday-digest", - "metadata": {}, - "outputs": [], - "source": [ - "def closed_kernel(graph_list):\n", - "\n", - " l = int(np.mean([len(g.nodes) for g in graph_list])) #list comprehension\n", - " feature_vectors = []\n", - " \n", - " for graph in graph_list:\n", - " number = []\n", - " A = nx.adjacency_matrix(graph) # sparse matrix\n", - " A =A.todense() # dense matrix\n", - " \n", - " # eigenvalues and eigenvectors\n", - " lambdas = np.linalg.eigvalsh(A)\n", - " for j in range(1, l):\n", - " power_lambdas= [x**(j) for x in lambdas ]\n", - " \n", - " #power_lambdas=np.rint(power_lambdas)\n", - " sum_lambdas=int(np.round(sum(power_lambdas)))\n", - " number.append(sum_lambdas) \n", - " feature_vectors.append(number)\n", - " \n", - " return feature_vectors" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "short-native", - "metadata": {}, - "outputs": [], - "source": [ - "output_dd=closed_kernel(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "satisfactory-facility", - "metadata": {}, - "outputs": [], - "source": [ - "output_nci=closed_kernel(data2)\n", - "target_nci=[g.graph['label'] for g in data2]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "forbidden-triumph", - "metadata": {}, - "outputs": [], - "source": [ - "def svc_evaluation(features,targets):\n", - " classes = list(set(targets))\n", - " X_train, X_test, y_train, y_test = train_test_split(np.array(features),np.array(targets), test_size=0.2)\n", - " if len(classes)<=2:\n", - " clf = SVC(kernel='linear', C=1)\n", - " clf.fit(X_train,y_train)\n", - " accuracy= cross_val_score(clf,X_train, y_train, cv=10 )\n", - " print (accuracy)\n", - " print (classification_report(y_test,clf.predict(X_test)))\n", - " else:\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " clf = OneVsRestClassifier(SVC(kernel='linear', class_weight=\"balanced\"))\n", - " clf.fit(X_train,y_train)\n", - " accuracy = cross_val_score(clf,X_train, y_train_bin, cv=10)\n", - " print(accuracy.mean(),accuracy.std())\n", - " print(classification_report(y_test_bin, clf.predict(X_test)))" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "spoken-oasis", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.018750000000000003 0.014583333333333332\n" - ] - }, - { - "ename": "ValueError", - "evalue": "Classification metrics can't handle a mix of multilabel-indicator and multiclass targets", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-17-57e877d3e78a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msvc_evaluation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m<ipython-input-16-ec47bb244c24>\u001b[0m in \u001b[0;36msvc_evaluation\u001b[0;34m(features, targets)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0maccuracy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train_bin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclassification_report\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_test_bin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36mclassification_report\u001b[0;34m(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division)\u001b[0m\n\u001b[1;32m 1964\u001b[0m \"\"\"\n\u001b[1;32m 1965\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1966\u001b[0;31m \u001b[0my_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_check_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1967\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1968\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36m_check_targets\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m raise ValueError(\"Classification metrics can't handle a mix of {0} \"\n\u001b[0m\u001b[1;32m 93\u001b[0m \"and {1} targets\".format(type_true, type_pred))\n\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Classification metrics can't handle a mix of multilabel-indicator and multiclass targets" - ] - } - ], - "source": [ - "svc_evaluation(output,target)" - ] - }, - { - "cell_type": "code", - "execution_count": 403, - "id": "prompt-draft", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8220338983050848\n", - "0.8389830508474576\n", - "0.9067796610169492\n", - "0.7627118644067796\n", - "0.8135593220338984\n", - "0.8559322033898306\n", - "0.5508474576271186\n", - "0.3983050847457627\n", - "0.47863247863247865\n", - "0.49572649572649574\n" - ] - } - ], - "source": [ - "ten_fold(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 404, - "id": "coordinated-counter", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.13333333333333333\n", - "0.05\n", - "0.05\n", - "0.06666666666666667\n", - "0.03333333333333333\n", - "0.016666666666666666\n", - "0.08333333333333333\n", - "0.05\n", - "0.06666666666666667\n", - "0.0\n" - ] - } - ], - "source": [ - "ten_fold(data1)" - ] - }, - { - "cell_type": "code", - "execution_count": 405, - "id": "listed-turning", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.35523114355231145\n", - "0.38686131386861317\n", - "0.35766423357664234\n", - "0.5352798053527981\n", - "0.4306569343065693\n", - "0.44038929440389296\n", - "0.3260340632603406\n", - "0.3722627737226277\n", - "0.29927007299270075\n", - "0.5304136253041363\n" - ] - } - ], - "source": [ - "ten_fold(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "boolean-mailman", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex1/datasets/DD/EX2_GL1_new.ipynb b/Python_files/ex1/datasets/DD/EX2_GL1_new.ipynb deleted file mode 100644 index 133cc9f4c39d36247cf1f5c9a42411112169af21..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/DD/EX2_GL1_new.ipynb +++ /dev/null @@ -1,599 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 34, - "id": "numeric-glucose", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "import argparse\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn.svm import SVC\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.preprocessing import label_binarize\n", - "from sklearn.multiclass import OneVsRestClassifier" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "pursuant-atlantic", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "designing-religion", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "nasty-calculator", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "departmental-missouri", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - " \n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "comprehensive-shareware", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "existing-grain", - "metadata": {}, - "outputs": [], - "source": [ - "#create training and testing vars\n", - "output=graphlet_kernel(data)\n", - "target=[g.graph['label'] for g in data]" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "apparent-snowboard", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(1060, 34) (1060,)\n", - "(118, 34) (118,)\n" - ] - } - ], - "source": [ - "X_train, X_test, y_train, y_test = train_test_split(np.array(output),np.array(target), test_size=0.1)\n", - "print(X_train.shape, y_train.shape)\n", - "print(X_test.shape, y_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "civic-contents", - "metadata": {}, - "outputs": [], - "source": [ - "clf = SVC(kernel='precomputed', C=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "micro-money", - "metadata": {}, - "outputs": [], - "source": [ - "gram_train= np.dot(X_train,X_train.T)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "brutal-deployment", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVC(C=1, kernel='precomputed')" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.fit(gram_train,y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "dietary-practice", - "metadata": {}, - "outputs": [], - "source": [ - "gram_test=np.dot(X_train,X_test.T).T" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "parliamentary-vietnamese", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "list" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(target)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "fixed-residence", - "metadata": {}, - "outputs": [], - "source": [ - "accuracy= cross_val_score(clf,gram_train, y_train, cv=10 )" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "critical-stanford", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.7481132075471698" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy.mean()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "behavioral-saying", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.04525352813287346" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy.std()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "leading-invalid", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(942, 942)" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_train.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "filled-trash", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1,\n", - " 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1,\n", - " 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1,\n", - " 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1,\n", - " 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1,\n", - " 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2])" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.predict(gram_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "generic-argument", - "metadata": {}, - "outputs": [], - "source": [ - "def closed_kernel(graph_list):\n", - "\n", - " l = int(np.mean([len(g.nodes) for g in graph_list])) #list comprehension\n", - " feature_vectors = []\n", - " \n", - " for graph in graph_list:\n", - " number = []\n", - " A = nx.adjacency_matrix(graph) # sparse matrix\n", - " A =A.todense() # dense matrix\n", - " \n", - " # eigenvalues and eigenvectors\n", - " lambdas = np.linalg.eigvalsh(A)\n", - " for j in range(1, l):\n", - " power_lambdas= [x**(j) for x in lambdas ]\n", - " \n", - " #power_lambdas=np.rint(power_lambdas)\n", - " sum_lambdas=int(np.round(sum(power_lambdas)))\n", - " number.append(sum_lambdas) \n", - " feature_vectors.append(number)\n", - " \n", - " return feature_vectors" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "exact-freeze", - "metadata": {}, - "outputs": [], - "source": [ - "output_dd=closed_kernel(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "funny-omaha", - "metadata": {}, - "outputs": [], - "source": [ - "output_nci=closed_kernel(data2)\n", - "target_nci=[g.graph['label'] for g in data2]" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "appropriate-guest", - "metadata": {}, - "outputs": [], - "source": [ - "def svc_evaluation(features,targets):\n", - " classes = list(set(targets))\n", - " X_train, X_test, y_train, y_test = train_test_split(np.array(features),np.array(targets), test_size=0.2)\n", - " if len(classes)<=2:\n", - " clf = SVC(kernel='linear', C=1)\n", - " clf.fit(X_train,y_train)\n", - " accuracy= cross_val_score(clf,X_train, y_train, cv=10 )\n", - " print (accuracy)\n", - " print (classification_report(y_test,clf.predict(X_test)))\n", - " else:\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " clf = OneVsRestClassifier(SVC(kernel='linear', class_weight=\"balanced\"))\n", - " clf.fit(X_train,y_train)\n", - " accuracy = cross_val_score(clf,X_train, y_train_bin, cv=10)\n", - " print(accuracy.mean(),accuracy.std())\n", - " print(classification_report(y_test_bin, clf.predict(X_test)))" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "portable-calibration", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.74736842 0.75789474 0.72340426 0.74468085 0.79787234 0.70212766\n", - " 0.72340426 0.78723404 0.76595745 0.73404255]\n", - " precision recall f1-score support\n", - "\n", - " 1 0.72 0.89 0.79 137\n", - " 2 0.77 0.52 0.62 99\n", - "\n", - " accuracy 0.73 236\n", - " macro avg 0.75 0.70 0.71 236\n", - "weighted avg 0.74 0.73 0.72 236\n", - "\n" - ] - } - ], - "source": [ - "svc_evaluation(output,target)" - ] - }, - { - "cell_type": "code", - "execution_count": 403, - "id": "native-grade", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8220338983050848\n", - "0.8389830508474576\n", - "0.9067796610169492\n", - "0.7627118644067796\n", - "0.8135593220338984\n", - "0.8559322033898306\n", - "0.5508474576271186\n", - "0.3983050847457627\n", - "0.47863247863247865\n", - "0.49572649572649574\n" - ] - } - ], - "source": [ - "ten_fold(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 404, - "id": "mechanical-twist", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.13333333333333333\n", - "0.05\n", - "0.05\n", - "0.06666666666666667\n", - "0.03333333333333333\n", - "0.016666666666666666\n", - "0.08333333333333333\n", - "0.05\n", - "0.06666666666666667\n", - "0.0\n" - ] - } - ], - "source": [ - "ten_fold(data1)" - ] - }, - { - "cell_type": "code", - "execution_count": 405, - "id": "clean-blackjack", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.35523114355231145\n", - "0.38686131386861317\n", - "0.35766423357664234\n", - "0.5352798053527981\n", - "0.4306569343065693\n", - "0.44038929440389296\n", - "0.3260340632603406\n", - "0.3722627737226277\n", - "0.29927007299270075\n", - "0.5304136253041363\n" - ] - } - ], - "source": [ - "ten_fold(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "signal-clause", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex1/datasets/DD/Untitled.ipynb b/Python_files/ex1/datasets/DD/Untitled.ipynb deleted file mode 100644 index 65beca8ba1a2d59014a9bb8c93a5b889ae6242b6..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/DD/Untitled.ipynb +++ /dev/null @@ -1,313 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 406, - "id": "moral-brazil", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn import svm\n", - "from collections import Counter" - ] - }, - { - "cell_type": "code", - "execution_count": 407, - "id": "played-studio", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "destroyed-sociology", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 400, - "id": "intelligent-sword", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "played-yugoslavia", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 401, - "id": "skilled-whale", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - " \n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(temp.values())\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(temp.values())\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 402, - "id": "substantial-charge", - "metadata": {}, - "outputs": [], - "source": [ - "def ten_fold(data):\n", - " target=[g.graph['label'] for g in data]\n", - " feature=graphlet_kernel(data)\n", - " \n", - "#SVM function can only handle list\n", - " to_list_feature=[list(i) for i in feature]\n", - " \n", - "#ten_fold Cross-validation\n", - " kf=KFold(n_splits=10)\n", - " \n", - "#interate all the folds\n", - " for i,j in kf.split(to_list_feature,target):\n", - " clf = svm.SVC(kernel='linear', C=1).fit(np.array(to_list_feature)[i],np.array(target)[i])\n", - " score=clf.score(np.array(to_list_feature)[j],np.array(target)[j])\n", - " print (score)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "agricultural-bookmark", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 403, - "id": "present-springfield", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8220338983050848\n", - "0.8389830508474576\n", - "0.9067796610169492\n", - "0.7627118644067796\n", - "0.8135593220338984\n", - "0.8559322033898306\n", - "0.5508474576271186\n", - "0.3983050847457627\n", - "0.47863247863247865\n", - "0.49572649572649574\n" - ] - } - ], - "source": [ - "ten_fold(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 404, - "id": "athletic-colombia", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.13333333333333333\n", - "0.05\n", - "0.05\n", - "0.06666666666666667\n", - "0.03333333333333333\n", - "0.016666666666666666\n", - "0.08333333333333333\n", - "0.05\n", - "0.06666666666666667\n", - "0.0\n" - ] - } - ], - "source": [ - "ten_fold(data1)" - ] - }, - { - "cell_type": "code", - "execution_count": 405, - "id": "sorted-tuesday", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.35523114355231145\n", - "0.38686131386861317\n", - "0.35766423357664234\n", - "0.5352798053527981\n", - "0.4306569343065693\n", - "0.44038929440389296\n", - "0.3260340632603406\n", - "0.3722627737226277\n", - "0.29927007299270075\n", - "0.5304136253041363\n" - ] - } - ], - "source": [ - "ten_fold(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "optional-horse", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex1/datasets/DD/Untitled1.ipynb b/Python_files/ex1/datasets/DD/Untitled1.ipynb deleted file mode 100644 index 11abc48d1e747cba7fba15e207c40b4183d93b14..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/DD/Untitled1.ipynb +++ /dev/null @@ -1,236 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "id": "welsh-chorus", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn import svm\n", - "from collections import Counter" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "favorite-andrews", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "boxed-fortune", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "treated-mitchell", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if sorted(dict(k.degree()).values())==sorted(dict(g.degree()).values()):\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - "\n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "automated-period", - "metadata": {}, - "outputs": [], - "source": [ - "out=graphlet_kernel(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "trained-arrow", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "4110" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(out)" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "registered-patent", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0, 0, 0, 0, 0]" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "g=nx.empty_graph(5)\n", - "sorted(dict(g.degree()).values())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "pointed-preview", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex1/datasets/DD/data.pkl b/Python_files/ex1/datasets/DD/data.pkl deleted file mode 100644 index 546de71a1d97226a0a1a3e0f89a34671520934f9..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/DD/data.pkl and /dev/null differ diff --git a/Python_files/ex1/datasets/DD/data1.pkl b/Python_files/ex1/datasets/DD/data1.pkl deleted file mode 100644 index e4e7190c25ffe6ac0636afceb46b30d55557319f..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/DD/data1.pkl and /dev/null differ diff --git a/Python_files/ex1/datasets/DD/data2.pkl b/Python_files/ex1/datasets/DD/data2.pkl deleted file mode 100644 index 6c72a48da0903e887c68acff31d5715f0072cd6c..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/DD/data2.pkl and /dev/null differ diff --git a/Python_files/ex1/datasets/ENZYMES/.DS_Store b/Python_files/ex1/datasets/ENZYMES/.DS_Store deleted file mode 100644 index 14f68d85f90870714ff4c194f0d8aac3137a8fb0..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/ENZYMES/.DS_Store and /dev/null differ diff --git a/Python_files/ex1/datasets/ENZYMES/data.pkl b/Python_files/ex1/datasets/ENZYMES/data.pkl deleted file mode 100644 index e4e7190c25ffe6ac0636afceb46b30d55557319f..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/ENZYMES/data.pkl and /dev/null differ diff --git a/Python_files/ex1/datasets/NCI1/.DS_Store b/Python_files/ex1/datasets/NCI1/.DS_Store deleted file mode 100644 index 2ebdc99225f67b4c77a99cda1e323630bf220956..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/NCI1/.DS_Store and /dev/null differ diff --git a/Python_files/ex1/datasets/NCI1/data2.pkl b/Python_files/ex1/datasets/NCI1/data2.pkl deleted file mode 100644 index 6c72a48da0903e887c68acff31d5715f0072cd6c..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/NCI1/data2.pkl and /dev/null differ diff --git a/Python_files/ex1/datasets/ex1/.DS_Store b/Python_files/ex1/datasets/ex1/.DS_Store deleted file mode 100644 index 8e627a99d8e9cfc8ea2709922ec67eda81057390..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/ex1/.DS_Store and /dev/null differ diff --git a/Python_files/ex1/datasets/ex1/README.md b/Python_files/ex1/datasets/ex1/README.md deleted file mode 100644 index 50cca315745698f7e4ab01c7dbb70190ba0f132e..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/ex1/README.md +++ /dev/null @@ -1,75 +0,0 @@ -# Graph Learning -This readme is used to describe how to run our code and the results we obtained in each exercise. - -## Structure of the repository - -The repository contains five different files: \ -\ - 1. closed_walk_kernel.py: This file contains the Closed Walk Kernel of exercise 1. \ - 2. graphlet_kernel.py: This file contains the Graphlet Kernel of exercise 2. \ - 3. wl_kernel.py: This file contains the Weisfeiler-Leman-Kernel of exercise 3.\ - 4. svm_function.py: This file contains the Support Vector Machine of exercise 4.\ - 5. arg_code_ex1.py: This is the main code where the defined kernels and functions (1.-5.) are imported and called. - -## How to run the script - -This script uses argparse. \ -\ -To run the script it is necessary to call the file 'arg_code_ex1.py'. It is required to choose the kernel and the dataset of interest. The arguments '-k' and '-P' are implemented to adress the kernels and paths respectively. The kernel of interest can be chosen with the filename (without filename extension), while the dataset can be chosen with the pathname. \ -Further, there is an additional optional argument '-eval' which runs the Support Vector Machine. - - -#### Example for running the script - -The following command should be run in the terminal to call the Closed Walk Kernel with the dataset 'Enzymes' (if the folder "datasets" is in the same location as the python file): \ -\ - -- python arg_code_ex1.py -k closed_walk_kernel -P datasets/ENZYMES/data.pkl - -The following command should be run in the terminal to call the Closed Walk Kernel with the dataset 'Enzymes' and then perform graph classification with a Support Vector Machine: \ -\ - -- python arg_code_ex1.py -k closed_walk_kernel -P datasets/ENZYMES/data.pkl -eval svm - -## Notes on the Exercises - -### Ex.1: Choice of maximal length l - -Our goal was to find a variable which takes into account the size of the respective graphs. After having considered several graph characteristics, like the diameter, minimum or maximum number of nodes, we chose l to be the mean number of nodes of the respective graph collections, because the other attributes are either too high (maximum number of nodes), too low (minimum number of nodes) or gave an infinite path length because the datasets contain graphs which are not connected (diameter). Our choice ensures a suitable balance between information and complexity. This yields the results DD: 284, ENZYMES: 32 and NCI: 29 (rounded down). - -### Ex.4: Train/Test split and Gram Matrix - -In order to ensure reliable and independent results for the SVM, we performed a train-test-split of the graph data (train: 80%, test: 20%). We then trained a classifier on the training data and computed the 10-fold cross-validation on the training data. The testing data was evaluated separately. - -As the feature vectors of the WL-kernel are very large and sparse, we used the option "kernel=precomputed" in the SVM and used the gram matrix of the feature vectors as input. For the other two kernels, we used the "raw" feature vectors as input to the SVM. - -## Results - -### DD - -| | Closed Walk Kernel | Graphlet Kernel | WL-Kernel | -|-----------------------------|--------------------|------------------|-----------| -|**train data mean accuracy** | 0.593 | 0.744 | 0.789 | -|**train data standard dev.** | 0.004 | 0.021 | 0.046 | -|**test data accuracy** | 0.559 | 0.707 | **0.822** | - - -### ENZYMES - -| | Closed Walk Kernel | Graphlet Kernel | WL-Kernel | -|-----------------------------|--------------------|------------------|-----------| -|**train data mean accuracy** | 0.187 | 0.2625 | 0.517 | -|**train data standard dev.** | 0.034 | 0.053 | 0.084 | -|**test data accuracy** | 0.142 | 0.175 | **0.492** | - - -### NCI1 - -| | Closed Walk Kernel | Graphlet Kernel | WL-Kernel | -|-----------------------------|--------------------|------------------|-----------| -|**train data mean accuracy** | 0.510 | 0.610 | 0.815 | -|**train data standard dev.** | 0.061 | 0.021 | 0.016 | -|**test data accuracy** | 0.533 | 0.658 | **0.813** | - - -Our results show that the Weisfeiler-Leman-Kernel performs best throughout all datasets. This is not surprising, as the WL-Kernel is the most sophisticated kernel we have used so far. Compared to the paper *Weisfeiler-Lehman Graph Kernels*, our WL-kernel achieved equal accuracy as the paper on all three datasets respectively (paper NCI1: 82.19 (± 0.18), DD: 79.78 (±0.36), ENZYMES 46.42 (±1.35)). - -It is also interesting to note that a (3-)Graphlet Kernel appeared in the paper as a reference Kernel, but both in our calculations and in the paper, the WL-Kernel always outperformed the Graphlet Kernel (albeit sometimes only slightly). The closed walk, on the other hand, does not have enough explanatory power to achieve competitive results, so it also does not appear in the paper. diff --git a/Python_files/ex1/datasets/ex1/__pycache__/closed_walk_kernel.cpython-38.pyc b/Python_files/ex1/datasets/ex1/__pycache__/closed_walk_kernel.cpython-38.pyc deleted file mode 100644 index 2b68a2376201b5479854f530bd6f7436e4f103c0..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/ex1/__pycache__/closed_walk_kernel.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex1/datasets/ex1/__pycache__/graphlet_kernel.cpython-38.pyc b/Python_files/ex1/datasets/ex1/__pycache__/graphlet_kernel.cpython-38.pyc deleted file mode 100644 index 2ce7793147a23b064fdfdba182b1f0ac5b9afbd3..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/ex1/__pycache__/graphlet_kernel.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex1/datasets/ex1/__pycache__/svm_function.cpython-38.pyc b/Python_files/ex1/datasets/ex1/__pycache__/svm_function.cpython-38.pyc deleted file mode 100644 index 9403fc51e4794e9b812b8431a49a2e59d2621147..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/ex1/__pycache__/svm_function.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex1/datasets/ex1/__pycache__/wl_kernel.cpython-38.pyc b/Python_files/ex1/datasets/ex1/__pycache__/wl_kernel.cpython-38.pyc deleted file mode 100644 index aaa32618b530795d117e7f8b0fff5ed5a03832bf..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/ex1/__pycache__/wl_kernel.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex1/datasets/ex1/arg_code_ex1.py b/Python_files/ex1/datasets/ex1/arg_code_ex1.py deleted file mode 100644 index bf1fbd3b2bafe2fd243e1f9939844d432ec46a61..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/ex1/arg_code_ex1.py +++ /dev/null @@ -1,59 +0,0 @@ -import pickle -import argparse - -""" -The following code is the main code where the defined kernels and functions are imported and called. -""" - -# import defined kernels and functions -from closed_walk_kernel import closed_walk_kernel -from graphlet_kernel import graphlet_kernel -from wl_kernel import wl_kernel -from svm_function import svm_precomputed_tt, svm_linear_tt,svm_tt - -# Create arguments which are needed for the command line -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--kernel', required = True, help='Choose the kernel of interest') -parser.add_argument('-P', '--path', required = True, help='Choose the path of the dataset of interest') -parser.add_argument('-eval', '--svm', help='Call if you want to make use of SVM') -args = parser.parse_args() - -# load the data -with open(args.path, 'rb') as file: - data = pickle.load(file) - -# 'react' if this file is called -# run the chosen kernel -# If SVM is called then run it -if __name__ == '__main__': - print("Computing Kernel") - if args.kernel == 'closed_walk_kernel': - feature_vectors = closed_walk_kernel(data) - - elif args.kernel == 'graphlet_kernel': - feature_vectors = graphlet_kernel(data) - - elif args.kernel == 'wl_kernel': - feature_vectors = wl_kernel(data) - - else: - raise Exception("Chosen kernel does not exist :S") - - if args.svm == 'svm': - print("Computing SVM") - target_label = [g.graph['label'] for g in data] - if args.kernel == 'wl_kernel': - svm_precomputed_tt(feature_vectors, target_label) - elif args.kernel == 'closed_walk_kernel': - svm_linear_tt(feature_vectors, target_label) - else: - svm_tt(feature_vectors, target_label) - - - - - - - - - diff --git a/Python_files/ex1/datasets/ex1/closed_walk_kernel.py b/Python_files/ex1/datasets/ex1/closed_walk_kernel.py deleted file mode 100644 index 3fab98b69b02702db22e006e16e905376bc20cdd..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/ex1/closed_walk_kernel.py +++ /dev/null @@ -1,34 +0,0 @@ -import networkx as nx -import numpy as np - -# Exercise 1 -def closed_walk_kernel(graph_list): - """Implemention of the Closed Walk Kernel. - - Keyword argument: - graph_list -- Dataset representing list of graphs - - Key idea: - Computation through eigenvalues with help of the eigenvalue decomposition. - - Returns: List of Histograms (one histogram for every graph of the dataset) """ - - # Compute the mean number of nodes over all graphs - l = int(np.mean([len(g.nodes) for g in graph_list])) - print("mean of number of nodes:", l) - - - # Compute the histogram of closed walks of different length up to the mean number of nodes - feature_vectors = [] - for graph in graph_list: - number = [] - A = nx.adjacency_matrix(graph) - A =A.todense() - lambdas = np.linalg.eigvalsh(A) - for j in range(1, l+1): - power_lambdas= [x**(j) for x in lambdas ] - sum_lambdas=int(np.round(sum(power_lambdas))) - number.append(sum_lambdas) - feature_vectors.append(number) - - return feature_vectors diff --git a/Python_files/ex1/datasets/ex1/data.pkl b/Python_files/ex1/datasets/ex1/data.pkl deleted file mode 100644 index 546de71a1d97226a0a1a3e0f89a34671520934f9..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/ex1/data.pkl and /dev/null differ diff --git a/Python_files/ex1/datasets/ex1/data1.pkl b/Python_files/ex1/datasets/ex1/data1.pkl deleted file mode 100644 index e4e7190c25ffe6ac0636afceb46b30d55557319f..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/ex1/data1.pkl and /dev/null differ diff --git a/Python_files/ex1/datasets/ex1/data2.pkl b/Python_files/ex1/datasets/ex1/data2.pkl deleted file mode 100644 index 6c72a48da0903e887c68acff31d5715f0072cd6c..0000000000000000000000000000000000000000 Binary files a/Python_files/ex1/datasets/ex1/data2.pkl and /dev/null differ diff --git a/Python_files/ex1/datasets/ex1/graphlet_kernel.py b/Python_files/ex1/datasets/ex1/graphlet_kernel.py deleted file mode 100644 index bbf7e26ad0e2f7bd01b171232be99236ab2d3e70..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/ex1/graphlet_kernel.py +++ /dev/null @@ -1,116 +0,0 @@ -import networkx as nx -import numpy as np -import random - - - -#Exercise 2 - - -def graphlet_kernel(data): - ''' - Count the number of randomly sampled graphlets of a graph for all graphs in the dataset. - - Key idea: Sample graphlets with five nodes a thousand times from a given graph and store the number of isomorphic types in a histogram - - input:dataset - output:list of counter for all the graphs in the dataset - ''' - -#create all the non-isoorphic graphs with 5 nodes and store it in a list called 'dic' - g0=nx.empty_graph(5) - dic=[0]*34 - dic[0]=nx.create_empty_copy(g0) - dic[0].add_edges_from([(0,1)]) - dic[1]=nx.create_empty_copy(g0) - dic[1].add_edges_from([(0,1),(0,2)]) - dic[2]=nx.create_empty_copy(g0) - dic[2].add_edges_from([(0,1),(2,3)]) - dic[3]=nx.create_empty_copy(g0) - dic[3].add_edges_from([(0,1),(0,2),(0,3)]) - dic[4]=nx.create_empty_copy(g0) - dic[4].add_edges_from([(0,1),(0,2),(3,4)]) - dic[5]=nx.create_empty_copy(g0) - dic[5].add_edges_from([(0,1),(1,2),(2,3)]) - dic[6]=nx.create_empty_copy(g0) - dic[6].add_edges_from([(0,1),(0,2),(1,2)]) - dic[7]=nx.create_empty_copy(g0) - dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)]) - dic[8]=nx.create_empty_copy(g0) - dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)]) - dic[9]=nx.create_empty_copy(g0) - dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)]) - dic[10]=nx.create_empty_copy(g0) - dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)]) - dic[11]=nx.create_empty_copy(g0) - dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)]) - dic[12]=nx.create_empty_copy(g0) - dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)]) - dic[13]=nx.create_empty_copy(g0) - dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)]) - dic[14]=nx.create_empty_copy(g0) - dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)]) - dic[15]=nx.create_empty_copy(g0) - dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)]) - dic[16]=nx.create_empty_copy(g0) - dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)]) - dic[17]=nx.create_empty_copy(g0) - dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)]) - dic[18]=nx.create_empty_copy(g0) - dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)]) - dic[19]=nx.create_empty_copy(g0) - dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)]) - dic[20]=nx.create_empty_copy(g0) - dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)]) - dic[21]=nx.create_empty_copy(g0) - dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)]) - dic[22]=nx.create_empty_copy(g0) - dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)]) - dic[23]=nx.create_empty_copy(g0) - dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)]) - dic[24]=nx.create_empty_copy(g0) - dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)]) - dic[25]=nx.create_empty_copy(g0) - dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)]) - dic[26]=nx.create_empty_copy(g0) - dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)]) - dic[27]=nx.create_empty_copy(g0) - dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)]) - dic[28]=nx.create_empty_copy(g0) - dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)]) - dic[29]=nx.create_empty_copy(g0) - dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)]) - dic[30]=nx.create_empty_copy(g0) - dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)]) - dic[31]=nx.create_empty_copy(g0) - dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)]) - dic[32]=nx.create_empty_copy(g0) - dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)]) - dic.insert(0,nx.create_empty_copy(g0)) - -#make a initial dict called 'vektor' to count the non-isoorphic graphs - vektor=dict(zip(dic,[0]*34)) - output=[] - -#to check which graphlet the induced subgraph is isomorphic to,and plus 1. -#input:the induced subgraph - - def count_graphlet(g): - for k,v in temp.items(): - if nx.is_isomorphic(k,g): - temp[k]+=1 - break - -#iterate over all graphs in the dataset - for graph in data: - temp=vektor.copy() -#if the number of nodes of the gragh is less than 5,then output a vektor with zeros,because can't be isomorphic graph. - if len(graph.nodes())<5: - output.append(list(temp.values())) - else: -#if the number of nodes is more than 5,randomly sample 1000 times. - for j in range(1000): - temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5)) - count_graphlet(temp_subgraph) - output.append(list(temp.values())) - return output diff --git a/Python_files/ex1/datasets/ex1/svm_function.py b/Python_files/ex1/datasets/ex1/svm_function.py deleted file mode 100644 index 3147a1d8984b53dcc0a890550782cd4784cf97d1..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/ex1/svm_function.py +++ /dev/null @@ -1,59 +0,0 @@ -import scipy.sparse as sp -import numpy as np - - -from sklearn.svm import SVC, LinearSVC -from sklearn.metrics import accuracy_score -from sklearn.model_selection import train_test_split, cross_val_score - -# Exercise 4 - -''' -This file defines the Support Vector Machines which will train on the datasets. - -Key Idea: -Make use of 10-fold cross validation to measure the accuracy of each kernel on each dataset. Further, choose 80% of the dataset as trainin -''' - -#SVC with 'linear' kernel -#input:features and targets calculated from previous kernel -#output:mean and deviation accuracy of validation data and accuracy of test data -def svm_tt(features,targets): - X_train, X_test, y_train, y_test = train_test_split(np.array(features),np.array(targets), test_size=0.2) - clf = SVC(kernel='linear', C=1) - clf.fit(X_train,y_train) - accuracy= cross_val_score(clf,X_train, y_train, cv=10 ) - print ("Mean accuracy and standard deviation of training data (10-fold cross validation):", - accuracy.mean(), accuracy.std()) - print ("Accuracy of test data:", accuracy_score(y_test,clf.predict(X_test))) - -#LinearSVC -#input:features and targets calculated from previous kernel -#output:mean and deviation accuracy of validation data and accuracy of test data -def svm_linear_tt(features,targets): - X_train, X_test, y_train, y_test = train_test_split(np.array(features),np.array(targets), test_size=0.2) - clf = LinearSVC(C=1) - clf.fit(X_train,y_train) - accuracy= cross_val_score(clf,X_train, y_train, cv=10 ) - print ("Mean accuracy and standard deviation of training data (10-fold cross validation):", - accuracy.mean(), accuracy.std()) - print ("Accuracy of test data:", accuracy_score(y_test,clf.predict(X_test))) - -#SVC with 'precomputed' kernel -#input:features and targets calculated from previous kernel -#output:mean and deviation accuracy of validation data and accuracy of test data -def svm_precomputed_tt(feat_vecs, target_vec): - X_train, X_test, y_train, y_test = train_test_split(feat_vecs, target_vec, test_size=0.2, random_state=4) -#in order to use the 'precomputed' kernel,first calculate the gram_matrix - train_feat = sp.vstack(X_train) - test_feat = sp.vstack(X_test) - gram_matrix = train_feat.dot(train_feat.transpose()).todense() - gram_test = train_feat.dot(test_feat.transpose()).todense().T - - clf = SVC(kernel='precomputed') - clf.fit(np.array(gram_matrix), np.array(y_train)) - accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(y_train), cv=10) - print ("Mean accuracy and standard deviation of training data (10-fold cross validation):", - accuracy.mean(), accuracy.std()) - print("Accuracy of test data:", accuracy_score(y_test, clf.predict(np.array(gram_test)))) - diff --git a/Python_files/ex1/datasets/ex1/wl_kernel.py b/Python_files/ex1/datasets/ex1/wl_kernel.py deleted file mode 100644 index ea12b41e29f0861ebea111e820da90916a3a561c..0000000000000000000000000000000000000000 --- a/Python_files/ex1/datasets/ex1/wl_kernel.py +++ /dev/null @@ -1,96 +0,0 @@ -import networkx as nx -from collections import Counter -import scipy.sparse as sp -import numpy as np -from multiset import FrozenMultiset - - -# Exercise 3: Weisfeiler-Leman-Kernel - -def wl_kernel(orig_graphs, labelname="node_label", rounds=4): - ''' - Implementation of the Weisfeiler-Leman-Kernel - - Keyword Arguments - orig_graphs: original list of graphs - label_name: initial node labels/colors (can be None, default value: "node_label") - rounds: number of rounds of color refinement - - return: f_vecs -> list of histograms, one for each graph - (each histogram: sparse coo-matrix of shape (1, total_number_of_colors)) - - Key ideas: - - store the colors as node attributes of the respective graphs - - use a hash function to compute new colors, but assign each new hashcolor to an integer - color (starting from 0) and store the pairs in a dictionary (keys: hashcolors, values: - respective integer colors) - - use integer colors as indices in the final histograms (e.g. the number of occurences of - color 4 is stored at fvecs[4]) - ''' - - #copy graphs because they are modified later - graphs = [graph.copy() for graph in orig_graphs] - - ##### COLOR REFINEMENT ############ - idx_counter = 0 - coldict = dict() #save all colors in a dictionary (keys: hash values, values: index in the final histograms) - - #initial colors: if there is a initial color scheme, use it in round 0 - if labelname: - for graph in graphs: - init_labels = nx.get_node_attributes(graph, labelname) #dict {node: label} - hash_labels = {key: hash(value) for key,value in init_labels.items()} #hash label values (-hashcolors) so that they are the same for all coming graphs and rounds - colors = list(set(hash_labels.values())) #list of the different colors in this graph - for hashcol in colors: - #check if colors already have been saved in coldict and save them if not - if hashcol not in coldict.keys(): - coldict[hashcol] = idx_counter - idx_counter += 1 #counts total number of colors - #change from hashed colors to final integer colors which will be used afterwards - new_labels = {key: coldict[hashvalue] for key,hashvalue in hash_labels.items()} - nx.set_node_attributes(graph, new_labels, str(0)) - # no initial color scheme -> every node gets same color - else: - for graph in graphs: - nx.set_node_attributes(graph, 0, str(0)) - #save color in coldict and increment idx_counter (which counts total number of colors) - coldict[0] = idx_counter #here: 0 - idx_counter += 1 - - #next rounds of color refinement - for k in range(1, rounds+1): - for graph in graphs: - #attribute dictionaries - attrs_last_round = nx.get_node_attributes(graph, str(k-1)) #dictionary with nodes as keys and corresponding attributes of last round as values - attrs_this_round = dict() #where you save attributes of this round - - #compute current color of each node - for node in graph.nodes(): - #get colors of neighbors and hash them together with the node's color - colset = FrozenMultiset(attrs_last_round.get(neighbor) for neighbor in list(graph[node])) - hashcol = hash((attrs_last_round.get(node), colset)) - #if hash produces a new color: - if hashcol not in coldict.keys(): - coldict[hashcol] = idx_counter - idx_counter += 1 - attrs_this_round[node] = coldict[hashcol] - #save current colors of the graph as node attributes - nx.set_node_attributes(graph, attrs_this_round, name=str(k)) - - - ####### CONSTRUCT FEATURE VECTORS ############### - f_vecs = list() #where feature vectors (histograms) will be stored - for graph in graphs: - c = Counter() - for k in range(rounds): - #count number of colors that appeared in each round, - #e.g. c = {0:302, 1:4} if color 0 appeared 302 times and color 1 appeared 4 times - c.update(nx.get_node_attributes(graph, str(k)).values()) - #create feature vector as sparse matrix in format 1 x idx_counter - data = np.array(list(c.values())) - col = np.array(list(c.keys())) - row = np.zeros(len(col)) #only one row for each histogram - f_vec = sp.coo_matrix((data, (row,col)), shape=(1, idx_counter)) #feature vector with histogram entries - f_vecs.append(f_vec) - - return f_vecs diff --git a/Python_files/ex1/graphlet_kernel.py b/Python_files/ex1/graphlet_kernel.py deleted file mode 100644 index 33f3fafe544e5257149317550f2dcb04ac4966ca..0000000000000000000000000000000000000000 --- a/Python_files/ex1/graphlet_kernel.py +++ /dev/null @@ -1,112 +0,0 @@ -import networkx as nx -import numpy as np -import random - - - -#Exercise 2 - - -#count the number of graphlets of a graph for all graphs in the dataset -#input:dataset -#output:list of counter for all the graphs in the dataset - -def graphlet_kernel(data): - -#create all the non-isoorphic graphs with 5 nodes and store it in a list called 'dic' - g0=nx.empty_graph(5) - dic=[0]*34 - dic[0]=nx.create_empty_copy(g0) - dic[0].add_edges_from([(0,1)]) - dic[1]=nx.create_empty_copy(g0) - dic[1].add_edges_from([(0,1),(0,2)]) - dic[2]=nx.create_empty_copy(g0) - dic[2].add_edges_from([(0,1),(2,3)]) - dic[3]=nx.create_empty_copy(g0) - dic[3].add_edges_from([(0,1),(0,2),(0,3)]) - dic[4]=nx.create_empty_copy(g0) - dic[4].add_edges_from([(0,1),(0,2),(3,4)]) - dic[5]=nx.create_empty_copy(g0) - dic[5].add_edges_from([(0,1),(1,2),(2,3)]) - dic[6]=nx.create_empty_copy(g0) - dic[6].add_edges_from([(0,1),(0,2),(1,2)]) - dic[7]=nx.create_empty_copy(g0) - dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)]) - dic[8]=nx.create_empty_copy(g0) - dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)]) - dic[9]=nx.create_empty_copy(g0) - dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)]) - dic[10]=nx.create_empty_copy(g0) - dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)]) - dic[11]=nx.create_empty_copy(g0) - dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)]) - dic[12]=nx.create_empty_copy(g0) - dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)]) - dic[13]=nx.create_empty_copy(g0) - dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)]) - dic[14]=nx.create_empty_copy(g0) - dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)]) - dic[15]=nx.create_empty_copy(g0) - dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)]) - dic[16]=nx.create_empty_copy(g0) - dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)]) - dic[17]=nx.create_empty_copy(g0) - dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)]) - dic[18]=nx.create_empty_copy(g0) - dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)]) - dic[19]=nx.create_empty_copy(g0) - dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)]) - dic[20]=nx.create_empty_copy(g0) - dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)]) - dic[21]=nx.create_empty_copy(g0) - dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)]) - dic[22]=nx.create_empty_copy(g0) - dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)]) - dic[23]=nx.create_empty_copy(g0) - dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)]) - dic[24]=nx.create_empty_copy(g0) - dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)]) - dic[25]=nx.create_empty_copy(g0) - dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)]) - dic[26]=nx.create_empty_copy(g0) - dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)]) - dic[27]=nx.create_empty_copy(g0) - dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)]) - dic[28]=nx.create_empty_copy(g0) - dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)]) - dic[29]=nx.create_empty_copy(g0) - dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)]) - dic[30]=nx.create_empty_copy(g0) - dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)]) - dic[31]=nx.create_empty_copy(g0) - dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)]) - dic[32]=nx.create_empty_copy(g0) - dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)]) - dic.insert(0,nx.create_empty_copy(g0)) - -#make a initial dict called 'vektor' to count the non-isoorphic graphs - vektor=dict(zip(dic,[0]*34)) - output=[] - -#to check which graphlet the induced subgraph is isomorphic to,and plus 1. -#input:the induced subgraph - - def count_graphlet(g): - for k,v in temp.items(): - if nx.is_isomorphic(k,g): - temp[k]+=1 - break - -#iterate over all graphs in the dataset - for graph in data: - temp=vektor.copy() -#if the number of nodes of the gragh is less than 5,then output a vektor with zeros,because can't be isomorphic graph. - if len(graph.nodes())<5: - output.append(list(temp.values())) - else: -#if the number of nodes is more than 5,randomly sample 1000 times. - for j in range(1000): - temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5)) - count_graphlet(temp_subgraph) - output.append(list(temp.values())) - return output \ No newline at end of file diff --git a/Python_files/ex2/.DS_Store b/Python_files/ex2/.DS_Store deleted file mode 100644 index c27f73e9226ffed81561eebf133394f02cb0b422..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/.DS_Store and /dev/null differ diff --git a/Python_files/ex2/.gitkeep b/Python_files/ex2/.gitkeep deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/Python_files/ex2/.idea/.gitignore b/Python_files/ex2/.idea/.gitignore deleted file mode 100644 index 26d33521af10bcc7fd8cea344038eaaeb78d0ef5..0000000000000000000000000000000000000000 --- a/Python_files/ex2/.idea/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml diff --git a/Python_files/ex2/.idea/ex2.iml b/Python_files/ex2/.idea/ex2.iml deleted file mode 100644 index 1bd56c7206633c917eadd9dca9ce3c3dd5e84c37..0000000000000000000000000000000000000000 --- a/Python_files/ex2/.idea/ex2.iml +++ /dev/null @@ -1,12 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<module type="PYTHON_MODULE" version="4"> - <component name="NewModuleRootManager"> - <content url="file://$MODULE_DIR$" /> - <orderEntry type="jdk" jdkName="Python 3.9 (torch)" jdkType="Python SDK" /> - <orderEntry type="sourceFolder" forTests="false" /> - </component> - <component name="PyDocumentationSettings"> - <option name="format" value="PLAIN" /> - <option name="myDocStringFormat" value="Plain" /> - </component> -</module> \ No newline at end of file diff --git a/Python_files/ex2/.idea/inspectionProfiles/profiles_settings.xml b/Python_files/ex2/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2da2d6447d11dfe32bfb846c3d5b199fc99..0000000000000000000000000000000000000000 --- a/Python_files/ex2/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ -<component name="InspectionProjectProfileManager"> - <settings> - <option name="USE_PROJECT_PROFILE" value="false" /> - <version value="1.0" /> - </settings> -</component> \ No newline at end of file diff --git a/Python_files/ex2/.idea/misc.xml b/Python_files/ex2/.idea/misc.xml deleted file mode 100644 index 47afc9b110075450a7c1bff984683adfac6804c6..0000000000000000000000000000000000000000 --- a/Python_files/ex2/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (torch)" project-jdk-type="Python SDK" /> -</project> \ No newline at end of file diff --git a/Python_files/ex2/.idea/modules.xml b/Python_files/ex2/.idea/modules.xml deleted file mode 100644 index baa420177d1be449b45f4a0dd5a14ca8fb383143..0000000000000000000000000000000000000000 --- a/Python_files/ex2/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectModuleManager"> - <modules> - <module fileurl="file://$PROJECT_DIR$/.idea/ex2.iml" filepath="$PROJECT_DIR$/.idea/ex2.iml" /> - </modules> - </component> -</project> \ No newline at end of file diff --git a/Python_files/ex2/.idea/vcs.xml b/Python_files/ex2/.idea/vcs.xml deleted file mode 100644 index 6c0b8635858dc7ad44b93df54b762707ce49eefc..0000000000000000000000000000000000000000 --- a/Python_files/ex2/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="VcsDirectoryMappings"> - <mapping directory="$PROJECT_DIR$/.." vcs="Git" /> - </component> -</project> \ No newline at end of file diff --git a/Python_files/ex2/Ex1/.DS_Store b/Python_files/ex2/Ex1/.DS_Store deleted file mode 100644 index 1cfe287f10aa6ba4e5aa30bc251fc856498c5d94..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/.DS_Store and /dev/null differ diff --git a/Python_files/ex2/Ex1/Exercise-1.pdf b/Python_files/ex2/Ex1/Exercise-1.pdf deleted file mode 100644 index f24f9910d9142104337553ec5bef913377845c7e..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/Exercise-1.pdf and /dev/null differ diff --git a/Python_files/ex2/Ex1/Slides-Graph-Kernels.pdf b/Python_files/ex2/Ex1/Slides-Graph-Kernels.pdf deleted file mode 100644 index feb2b923c98de978ad49e127aad2a629ce76e42a..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/Slides-Graph-Kernels.pdf and /dev/null differ diff --git a/Python_files/ex2/Ex1/Untitled.ipynb b/Python_files/ex2/Ex1/Untitled.ipynb deleted file mode 100644 index 739c8f6e2be96d492d519da8892011f738a85488..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/Untitled.ipynb +++ /dev/null @@ -1,189 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "flying-twist", - "metadata": {}, - "outputs": [], - "source": [ - "from gurobipy import *\n", - "import networkx as nx" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "noble-kazakhstan", - "metadata": {}, - "outputs": [], - "source": [ - "a = [7, 4, 6, 4, 5, 4, 3, 4, 6, 7]\n", - "\n", - "# profits\n", - "p = [5, 4, 4, 6, 4, 7, 4, 5, 7, 3]\n", - "\n", - "# knapsack capacity\n", - "b = 20\n", - "\n", - "G=nx.DiGraph()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "unlikely-charity", - "metadata": {}, - "outputs": [], - "source": [ - "def solve(a, p, b):\n", - " nitems = len(p)\n", - " items = range(nitems)\n", - "\n", - " # Do not change the following line!\n", - " vertices = [(c, i) for i in range(nitems+1) for c in range(b+2)]\n", - " \n", - " G.add_nodes_from(vertices)\n", - " \n", - " arcs = []\n", - " for i in range(1, len(p) + 1):\n", - " for c in range(b - a[i - 1] + 1):\n", - " arcs.append(((c,i-1),(c+a[i-1],i),p[i-1]))\n", - "\n", - " for i in range(1, len(p) + 1):\n", - " for c in range(b + 1):\n", - " arcs.append(((c,i-1),(c,i),0))\n", - "\n", - " for i in range(len(p) + 1):\n", - " for c in range(b):\n", - " arcs.append(((c,i),(c+1,i),1))\n", - " G.add_weighted_edges_from(arcs)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "conceptual-equation", - "metadata": {}, - "outputs": [], - "source": [ - "solve(a,p,b)" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "governmental-chain", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(9, 1), (2, 1), (3, 0)]" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(nx.neighbors(G,(2,0)))" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "optical-modification", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(G.predecessors((0,0)))" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "successful-reserve", - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "'tuple' object is not callable", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-38-9eb58f29ea08>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mtup\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mtup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m: 'tuple' object is not callable" - ] - } - ], - "source": [ - "tup=(1,2,3,4)\n", - "tup(1)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "romance-barrel", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(14, 2), (10, 2), (11, 1)]" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(nx.neighbors(G,(10,1)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "judicial-health", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/.DS_Store b/Python_files/ex2/Ex1/datasets/.DS_Store deleted file mode 100644 index 1c3325ebc49330c837884acbffbba05bb907a9b5..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/.DS_Store and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/.ipynb_checkpoints/EX4_GL1_0426-checkpoint.ipynb b/Python_files/ex2/Ex1/datasets/.ipynb_checkpoints/EX4_GL1_0426-checkpoint.ipynb deleted file mode 100644 index b70092ee581b0ca473f4c0e3c54382b9daf8920e..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/.ipynb_checkpoints/EX4_GL1_0426-checkpoint.ipynb +++ /dev/null @@ -1,3231 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "235289b2", - "metadata": {}, - "source": [ - "# EX 4" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "3a7d45a0", - "metadata": {}, - "outputs": [], - "source": [ - "#needed for EX 3\n", - "import pickle\n", - "import networkx as nx\n", - "import matplotlib.pyplot as plt\n", - "from collections import Counter, defaultdict\n", - "import scipy.sparse as sp\n", - "import numpy as np\n", - "from multiset import FrozenMultiset\n", - "import random" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "8c73bfa8", - "metadata": {}, - "outputs": [], - "source": [ - "# additionally needed for EX 4\n", - "\n", - "from sklearn.svm import SVC\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.preprocessing import label_binarize\n", - "from sklearn.multiclass import OneVsRestClassifier\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.model_selection import cross_val_score" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "11c6f5f6", - "metadata": {}, - "outputs": [], - "source": [ - "DD = pickle.load(open(\"datasets/DD/data.pkl\", \"rb\"))\n", - "ENZ = pickle.load(open(\"datasets/ENZYMES/data.pkl\", \"rb\"))\n", - "NCI= pickle.load(open(\"datasets/NCI1/data.pkl\", \"rb\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "11bf36a7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "284.3166383701188 1178\n", - "32.63333333333333 600\n", - "29.8654501216545 4110\n" - ] - } - ], - "source": [ - "for data in [DD, ENZ, NCI]:\n", - " print(np.mean([len(graph.nodes()) for graph in data]), len(data))" - ] - }, - { - "cell_type": "markdown", - "id": "7921f1bc", - "metadata": {}, - "source": [ - "# Kernels" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "4e66ec3a", - "metadata": {}, - "outputs": [], - "source": [ - "def wl_kernel(orig_graphs, labelname=None, rounds=4):\n", - " graphs = [graph.copy() for graph in orig_graphs]\n", - " \n", - " ##### COLOR REFINEMENT ############\n", - " idx_counter = 0\n", - " coldict = dict() #save all colors in a dictionary (keys: hash values, values: index in the final histograms)\n", - " \n", - " #initial colors: if there is a initial color scheme, use it in round 1\n", - " if labelname:\n", - " for graph in graphs:\n", - " init_labels = nx.get_node_attributes(graph, labelname) #dict {node: label}\n", - " hash_labels = {key: hash(value) for key,value in init_labels.items()} #hash label values so that they are the same for all coming graphs and rounds\n", - " colors = list(set(hash_labels.values())) #list of the different colors in this graph\n", - " for hashcol in colors:\n", - " #check if colors already have been saved in coldict and save them if not\n", - " if hashcol not in coldict.keys():\n", - " coldict[hashcol] = idx_counter\n", - " idx_counter += 1\n", - " #change from hashed colors to final colors which will be used afterwards\n", - " new_labels = {key: coldict[hashvalue] for key,hashvalue in hash_labels.items()}\n", - " nx.set_node_attributes(graph, new_labels, str(0))\n", - " # no initial color scheme -> every node gets same color\n", - " else:\n", - " for graph in graphs:\n", - " nx.set_node_attributes(graph, 0, str(0))\n", - " #save color in coldict and increment idx_counter (which counts total number of colors)\n", - " coldict[0] = idx_counter #here: 0\n", - " idx_counter += 1\n", - " \n", - " #next rounds of color refinement\n", - " for k in range(1, rounds+1):\n", - " for graph in graphs:\n", - " #attribute dictionaries\n", - " attrs_last_round = nx.get_node_attributes(graph, str(k-1)) #dictionary with nodes as keys and corresponding attributes of last round as values\n", - " attrs_this_round = dict() #where you save attributes of this round\n", - " \n", - " #compute current color of each node\n", - " for node in graph.nodes():\n", - " #get colors of neighbors and hash them together with the node's color\n", - " colset = FrozenMultiset(attrs_last_round.get(neighbor) for neighbor in list(graph[node]))\n", - " hashcol = hash((attrs_last_round.get(node), colset))\n", - " #if hash produces a new color:\n", - " if hashcol not in coldict.keys():\n", - " coldict[hashcol] = idx_counter\n", - " idx_counter += 1\n", - " attrs_this_round[node] = coldict[hashcol]\n", - " #save current colors of the graph as node attributes\n", - " nx.set_node_attributes(graph, attrs_this_round, name=str(k))\n", - "\n", - " \n", - " ####### CONSTRUCT FEATURE VECTORS ###############\n", - " f_vecs = list() #where feature vectors will be stored\n", - " for graph in graphs:\n", - " c = Counter()\n", - " for k in range(rounds):\n", - " #count number of colors that appeared in each round, i.e. c = {0:302, 1:4} if color 0 appeared 302 times and color 1 4 times\n", - " c.update(nx.get_node_attributes(graph, str(k)).values()) \n", - " #create feature vectore as sparse matrix in format 1 x idx_counter\n", - " data = np.array(list(c.values()))\n", - " col = np.array(list(c.keys()))\n", - " row = np.zeros(len(col)) #only one row so far\n", - " f_vec = sp.coo_matrix((data, (row,col)), shape=(1, idx_counter)) #feature vector with histogram entries \n", - " f_vecs.append(f_vec)\n", - "\n", - " return graphs, f_vecs" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "61618352", - "metadata": {}, - "outputs": [], - "source": [ - "def closed_kernel(graph_list):\n", - "\n", - " l = int(np.mean([len(g.nodes) for g in graph_list])) #list comprehension\n", - " print(\"mean of number of nodes:\", l)\n", - " feature_vectors = []\n", - " \n", - " for graph in graph_list:\n", - " number = []\n", - " A = nx.adjacency_matrix(graph) # sparse matrix\n", - " A =A.todense() # dense matrix\n", - " lambdas = np.linalg.eigvalsh(A)\n", - " for j in range(1, l+1):\n", - " power_lambdas= [x**(j) for x in lambdas ]\n", - " sum_lambdas=int(np.round(sum(power_lambdas)))\n", - " number.append(sum_lambdas) \n", - " feature_vectors.append(number)\n", - " #print(number)\n", - " \n", - " return feature_vectors" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "id": "68101110", - "metadata": {}, - "outputs": [], - "source": [ - "ENZ_graphs, ENZ_feat = wl_kernel(ENZ, labelname=\"node_label\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "57da0c1a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(ENZ_feat) == len(ENZ_graphs)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "4692d7cf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[<1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 58 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 158 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 4 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 33 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 94 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 86 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 125 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 97 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 4 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 100 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 101 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 81 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 17 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 58 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 94 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 12 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 56 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 103 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 85 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 74 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 78 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 19 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 44 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 39 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 11 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 43 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 26 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 34 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 26 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 16 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 101 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 80 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 101 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 50 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 47 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 42 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 41 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 50 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 86 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 80 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 81 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 82 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 78 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 25 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 58 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 7 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 101 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 124 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 37 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 30 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 65 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 116 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 119 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 108 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 110 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 158 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 28 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 146 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 44 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 38 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 28 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 26 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 29 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 42 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 81 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 31 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 4 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 44 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 109 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 30 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 32 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 34 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 43 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 91 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 27 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 19 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 26 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 28 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 23 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 30 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 42 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 31 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 50 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 34 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 48 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 115 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 91 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 113 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 119 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 95 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 85 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 23 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 95 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 65 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 99 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 78 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 74 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 107 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 115 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 123 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 111 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 82 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 74 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 124 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 128 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 50 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 128 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 48 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 56 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 123 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 69 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 105 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 79 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 85 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 115 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 45 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 82 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 73 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 79 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 65 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 85 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 17 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 97 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 99 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 41 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 81 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 45 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 45 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 36 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 34 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 48 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 73 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 78 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 73 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 113 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 85 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 125 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 110 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 91 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 86 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 86 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 81 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 121 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 113 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 101 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 94 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 111 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 164 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 93 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 107 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 95 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 100 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 104 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 117 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 111 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 100 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 119 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 120 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 104 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 122 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 37 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 45 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 94 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 103 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 25 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 25 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 58 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 20 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 42 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 106 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 106 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 153 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 50 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 15 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 48 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 151 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 66 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 69 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 50 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 81 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 30 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 23 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 19 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 27 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 16 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 44 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 31 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 74 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 115 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 23 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 15 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 92 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 42 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 98 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 29 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 28 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 116 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 56 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 95 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 74 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 78 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 101 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 25 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 16 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 45 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 98 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 58 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 44 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 28 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 123 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 116 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 111 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 106 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 106 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 107 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 65 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 45 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 41 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 35 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 91 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 36 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 36 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 125 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 41 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 32 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 33 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 30 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 43 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 31 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 59 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 29 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 23 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 34 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 40 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 43 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 27 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 109 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 42 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 29 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 60 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 79 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 80 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 91 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 95 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 94 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 97 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 105 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 151 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 52 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 71 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 69 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 98 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 95 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 82 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 109 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 73 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 131 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 137 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 56 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 163 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 107 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 85 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 94 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 103 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 98 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 96 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 119 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 72 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 105 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 102 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 99 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 106 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 53 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 54 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 89 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 103 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 77 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 112 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 105 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 70 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 65 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 57 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 64 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 76 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 46 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 39 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 36 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 38 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 83 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 97 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 82 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 88 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 87 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 99 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 61 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 63 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 78 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 90 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 62 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 49 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 75 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 55 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 51 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 48 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 67 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 68 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 117 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 84 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 104 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 131 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 100 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 108 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 133 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 130 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 123 stored elements in COOrdinate format>,\n", - " <1x25858 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 103 stored elements in COOrdinate format>]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ENZ_feat" - ] - }, - { - "cell_type": "markdown", - "id": "644af059", - "metadata": {}, - "source": [ - "## Gram Matrix" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "0b493d38", - "metadata": {}, - "outputs": [], - "source": [ - "m_ENZ = sp.vstack(ENZ_feat, format=\"csr\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "f4a09179", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<600x600 sparse matrix of type '<class 'numpy.intc'>'\n", - "\twith 358808 stored elements in Compressed Sparse Row format>" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_ENZ = m_ENZ@np.transpose(m_ENZ)\n", - "gram_ENZ" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "id": "93916ae1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mean of number of nodes: 32\n" - ] - } - ], - "source": [ - "ENZ_features = closed_kernel(ENZ)\n", - "#ENZ_features" - ] - }, - { - "cell_type": "markdown", - "id": "8d9b7f36", - "metadata": {}, - "source": [ - "# SVM" - ] - }, - { - "cell_type": "code", - "execution_count": 149, - "id": "5ed85c44", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "e21667af", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "number of labels for each graphlist\n", - "NZ: 6\n", - "DD: 2\n", - "NCI: 2\n" - ] - } - ], - "source": [ - "print(\"number of labels for each graphlist\")\n", - "ENZ_target = [g.graph['label'] for g in ENZ]\n", - "print(\"NZ:\", len(set(ENZ_target)))\n", - "DD_target = [g.graph['label'] for g in DD]\n", - "print(\"DD:\", len(set(DD_target)))\n", - "NCI_target = [g.graph['label'] for g in NCI]\n", - "print(\"NCI:\", len(set(NCI_target)))" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "a6e97d61", - "metadata": {}, - "outputs": [], - "source": [ - "def graph_svm(feature_vecs, target_vec):\n", - " num_labels = len(set(target_vec))\n", - " feature_mat = np.vstack(feature_vecs)\n", - " gram_mat = feature_mat @ np.transpose(feature_mat)\n", - " print(gram_mat)\n", - " clf = SVC(kernel='precomputed')\n", - " clf.fit(gram_mat, target_vec)\n", - " return clf\n", - " " - ] - }, - { - "cell_type": "raw", - "id": "ddae8881", - "metadata": {}, - "source": [ - "clf = graph_svm(NCI_features, NCI_target)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b741d0d6", - "metadata": {}, - "outputs": [], - "source": [ - "predicted = clf.predict(feature_vecs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ddb9ede7", - "metadata": {}, - "outputs": [], - "source": [ - "print(classification_report(NCI_target, predicted)) " - ] - }, - { - "cell_type": "raw", - "id": "949955ff", - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "#this time, we train an SVM classifier\n", - "classifier = SVC(C=1, kernel='linear', gamma = 'auto')\n", - "classifier.fit(NCI_features, NCI_target)\n", - "\n", - "targetFeature_predict = classifier.predict(NCI_features)\n" - ] - }, - { - "cell_type": "raw", - "id": "1edca273", - "metadata": {}, - "source": [ - "\n", - "classifier.decision_function(NCI_features)\n", - "print('Accuracy: \\n', classifier.score(NCI_features,ENZ_target))\n", - "print('Classification report: \\n')\n", - "print(classification_report(NCI_target, targetFeature_predict)) " - ] - }, - { - "cell_type": "markdown", - "id": "eba78df1", - "metadata": {}, - "source": [ - "# new" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "662f913c", - "metadata": {}, - "outputs": [], - "source": [ - "DD_graphs, DD_feat = wl_kernel(DD, labelname=\"node_label\")" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "69764da9", - "metadata": {}, - "outputs": [], - "source": [ - "m_DD = sp.vstack(DD_feat)" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "67109f61", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[<1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 996 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1072 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 581 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 341 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 541 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 10716 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 3755 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1525 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1151 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 457 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 955 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 727 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 962 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 847 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 689 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1369 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 788 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 682 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 473 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1073 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 15374 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 2107 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1037 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 223 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 782 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1313 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1109 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 794 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1195 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 744 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1690 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 538 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1325 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 678 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1052 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 895 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1612 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 870 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 808 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 674 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1358 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1874 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 665 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1202 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 322 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1490 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 667 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 1340 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 643 stored elements in COOrdinate format>,\n", - " <1x1254664 sparse matrix of type '<class 'numpy.int32'>'\n", - " \twith 740 stored elements in COOrdinate format>]" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "DD_feat[:50]" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "id": "87481bd2", - "metadata": {}, - "outputs": [], - "source": [ - "DD_target = [g.graph['label'] for g in DD]" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "20f87621", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<1254664x1178 sparse matrix of type '<class 'numpy.intc'>'\n", - "\twith 1017230 stored elements in COOrdinate format>" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m_DD.transpose()" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "22b17bc1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "matrix([[8298, 6960, 3678, ..., 1007, 943, 2839],\n", - " [6960, 8926, 3879, ..., 1070, 987, 2922],\n", - " [3678, 3879, 2868, ..., 587, 596, 1578],\n", - " ...,\n", - " [1007, 1070, 587, ..., 428, 185, 663],\n", - " [ 943, 987, 596, ..., 185, 376, 575],\n", - " [2839, 2922, 1578, ..., 663, 575, 3894]], dtype=int32)" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_DD = m_DD.dot(m_DD.transpose()).todense()\n", - "gram_DD" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "id": "6d169fcf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 6])" - ] - }, - "execution_count": 85, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a = np.array([[1,2,3],[4,5,6],[7,8,9]])\n", - "a[[0,1],[0,2]]" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "id": "e088af54", - "metadata": {}, - "outputs": [], - "source": [ - "samplelist = np.arange(len(DD_target)).tolist()\n", - "sample_idx = random.sample(samplelist, 200)\n", - "#sample_idx" - ] - }, - { - "cell_type": "code", - "execution_count": 121, - "id": "44369d23", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(200, 200)" - ] - }, - "execution_count": 121, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.asarray(gram_DD)[sample_idx][:, sample_idx].shape" - ] - }, - { - "cell_type": "raw", - "id": "65f1df55", - "metadata": {}, - "source": [ - "smaller_matrix = np.asarray(gram_DD)[sample_idx][:, sample_idx]\n", - "clf = SVC(kernel='precomputed')\n", - "clf.fit(smaller_matrix, np.array(DD_target)[sample_idx])" - ] - }, - { - "cell_type": "code", - "execution_count": 128, - "id": "d8a3f178", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVC(kernel='precomputed')" - ] - }, - "execution_count": 128, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf = SVC(kernel='precomputed')\n", - "clf.fit(np.array(gram_DD), np.array(DD_target))" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "id": "09032ff9", - "metadata": {}, - "outputs": [], - "source": [ - "targetFeature_predict = clf.predict(smaller_matrix)" - ] - }, - { - "cell_type": "code", - "execution_count": 124, - "id": "8324d609", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 1 1.00 1.00 1.00 115\n", - " 2 1.00 1.00 1.00 85\n", - "\n", - " accuracy 1.00 200\n", - " macro avg 1.00 1.00 1.00 200\n", - "weighted avg 1.00 1.00 1.00 200\n", - "\n" - ] - } - ], - "source": [ - "print(classification_report( np.array(DD_target)[sample_idx], targetFeature_predict)) " - ] - }, - { - "cell_type": "code", - "execution_count": 131, - "id": "ac404f21", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 1 1.00 1.00 1.00 691\n", - " 2 1.00 1.00 1.00 487\n", - "\n", - " accuracy 1.00 1178\n", - " macro avg 1.00 1.00 1.00 1178\n", - "weighted avg 1.00 1.00 1.00 1178\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lea\\anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:593: FutureWarning: np.matrix usage is deprecated in 1.0 and will raise a TypeError in 1.2. Please convert to a numpy array with np.asarray. For more information see: https://numpy.org/doc/stable/reference/generated/numpy.matrix.html\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "new_sample_idx = random.sample(samplelist, 200)\n", - "new_matrix = np.asarray(gram_DD)[new_sample_idx][:, new_sample_idx]\n", - "print(classification_report( np.array(DD_target), clf.predict(gram_DD))) " - ] - }, - { - "cell_type": "raw", - "id": "f28c2d7e", - "metadata": {}, - "source": [ - "from sklearn import datasets, linear_model\n", - "from sklearn.model_selection import cross_val_score\n", - "diabetes = datasets.load_diabetes()\n", - "X = diabetes.data[:150]\n", - "y = diabetes.target[:150]\n", - "lasso = linear_model.Lasso()\n", - "print(cross_val_score(lasso, X, y, cv=3))" - ] - }, - { - "cell_type": "code", - "execution_count": 134, - "id": "4f1f60f0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.7877661886136462\n" - ] - } - ], - "source": [ - "\n", - "accuracy = cross_val_score(clf, np.array(gram_DD), np.array(DD_target), cv=10)\n", - "print(np.mean(accuracy))" - ] - }, - { - "cell_type": "code", - "execution_count": 158, - "id": "45829b72", - "metadata": {}, - "outputs": [], - "source": [ - "def svm_precomputed(feat_vecs, target_vec):\n", - " classes = list(set((target_vec)))\n", - " n_classes = len(classes)\n", - " m_feat = sp.vstack(feat_vecs)\n", - " gram_matrix = m_feat.dot(m_feat.transpose()).todense()\n", - " \n", - " if n_classes <= 2:\n", - " clf = SVC(kernel='precomputed')\n", - " #clf.fit(np.array(gram_matrix), np.array(target_vec))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(target_vec), cv=10)\n", - " \n", - " else:\n", - " print(\"multilabel SVM\")\n", - " # Use label_binarize to be multi-label like settings\n", - " Y = label_binarize(target_vec, classes=classes)\n", - " print(Y)\n", - " clf = OneVsRestClassifier(SVC(kernel='precomputed', class_weight=\"balanced\"))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), Y, cv=10)\n", - " print(accuracy)\n", - " print(np.mean(accuracy))\n", - " \n", - " return np.mean(accuracy)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "f0e4847f", - "metadata": {}, - "outputs": [], - "source": [ - "DD_graphs, DD_feat = wl_kernel(DD, labelname=\"node_label\")\n", - "DD_target = [g.graph['label'] for g in DD]" - ] - }, - { - "cell_type": "code", - "execution_count": 157, - "id": "8ecabd06", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.7877661886136462" - ] - }, - "execution_count": 157, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed(DD_feat, DD_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "1b7a4661", - "metadata": {}, - "outputs": [], - "source": [ - "NCI_graphs, NCI_feat = wl_kernel(NCI, labelname=\"node_label\")\n", - "NCI_target = [g.graph['label'] for g in NCI]" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "id": "37c6c528", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.783698296836983\n" - ] - } - ], - "source": [ - "svm_precomputed(NCI_feat, NCI_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "f9614220", - "metadata": {}, - "outputs": [], - "source": [ - "ENZ_graphs, ENZ_feat = wl_kernel(ENZ, labelname=\"node_label\")\n", - "ENZ_target = [g.graph['label'] for g in ENZ]" - ] - }, - { - "cell_type": "code", - "execution_count": 160, - "id": "92cce688", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - "[[0 0 0 0 0 1]\n", - " [0 0 0 0 0 1]\n", - " [0 0 0 0 0 1]\n", - " ...\n", - " [0 0 0 1 0 0]\n", - " [0 0 0 1 0 0]\n", - " [0 0 0 1 0 0]]\n", - "[0.03333333 0.1 0.03333333 0.01666667 0. 0.01666667\n", - " 0.03333333 0. 0.01666667 0.03333333]\n", - "0.028333333333333332\n" - ] - }, - { - "data": { - "text/plain": [ - "0.028333333333333332" - ] - }, - "execution_count": 160, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed(ENZ_feat, ENZ_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 174, - "id": "58dc2da2", - "metadata": {}, - "outputs": [], - "source": [ - "def svm_precomputed_gridSearchCV(feat_vecs, target_vec):\n", - " classes = list(set((target_vec)))\n", - " n_classes = len(classes)\n", - " m_feat = sp.vstack(feat_vecs)\n", - " gram_matrix = m_feat.dot(m_feat.transpose()).todense()\n", - " \n", - " if n_classes <= 2:\n", - " param_grid = {'C': [0.001,0.01,0.1,1],\n", - " 'class_weight':['balanced',None]} \n", - " grid = GridSearchCV(SVC(kernel=\"precomputed\"), param_grid, cv = 5, verbose = 3) \n", - " grid.fit(np.array(gram_matrix), np.array(target_vec))\n", - " print(grid.best_params_) \n", - " clf = SVC(kernel='precomputed', **grid.best_params_)\n", - " clf.fit(np.array(gram_matrix), np.array(target_vec))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(target_vec), cv=10)\n", - " \n", - " return np.mean(accuracy), accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 175, - "id": "9d7c627b", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 5 folds for each of 8 candidates, totalling 40 fits\n", - "[CV 1/5] END ....C=0.001, class_weight=balanced;, score=0.717 total time= 0.2s\n", - "[CV 2/5] END ....C=0.001, class_weight=balanced;, score=0.754 total time= 0.2s\n", - "[CV 3/5] END ....C=0.001, class_weight=balanced;, score=0.783 total time= 0.2s\n", - "[CV 4/5] END ....C=0.001, class_weight=balanced;, score=0.765 total time= 0.2s\n", - "[CV 5/5] END ....C=0.001, class_weight=balanced;, score=0.719 total time= 0.2s\n", - "[CV 1/5] END ........C=0.001, class_weight=None;, score=0.717 total time= 0.1s\n", - "[CV 2/5] END ........C=0.001, class_weight=None;, score=0.754 total time= 0.1s\n", - "[CV 3/5] END ........C=0.001, class_weight=None;, score=0.785 total time= 0.1s\n", - "[CV 4/5] END ........C=0.001, class_weight=None;, score=0.766 total time= 0.1s\n", - "[CV 5/5] END ........C=0.001, class_weight=None;, score=0.719 total time= 0.1s\n", - "[CV 1/5] END .....C=0.01, class_weight=balanced;, score=0.780 total time= 0.2s\n", - "[CV 2/5] END .....C=0.01, class_weight=balanced;, score=0.783 total time= 0.2s\n", - "[CV 3/5] END .....C=0.01, class_weight=balanced;, score=0.832 total time= 0.2s\n", - "[CV 4/5] END .....C=0.01, class_weight=balanced;, score=0.809 total time= 0.3s\n", - "[CV 5/5] END .....C=0.01, class_weight=balanced;, score=0.786 total time= 0.2s\n", - "[CV 1/5] END .........C=0.01, class_weight=None;, score=0.780 total time= 0.2s\n", - "[CV 2/5] END .........C=0.01, class_weight=None;, score=0.783 total time= 0.2s\n", - "[CV 3/5] END .........C=0.01, class_weight=None;, score=0.832 total time= 0.2s\n", - "[CV 4/5] END .........C=0.01, class_weight=None;, score=0.809 total time= 0.2s\n", - "[CV 5/5] END .........C=0.01, class_weight=None;, score=0.787 total time= 0.3s\n", - "[CV 1/5] END ......C=0.1, class_weight=balanced;, score=0.794 total time= 0.7s\n", - "[CV 2/5] END ......C=0.1, class_weight=balanced;, score=0.787 total time= 0.7s\n", - "[CV 3/5] END ......C=0.1, class_weight=balanced;, score=0.828 total time= 0.7s\n", - "[CV 4/5] END ......C=0.1, class_weight=balanced;, score=0.799 total time= 0.6s\n", - "[CV 5/5] END ......C=0.1, class_weight=balanced;, score=0.802 total time= 0.6s\n", - "[CV 1/5] END ..........C=0.1, class_weight=None;, score=0.794 total time= 0.6s\n", - "[CV 2/5] END ..........C=0.1, class_weight=None;, score=0.787 total time= 0.7s\n", - "[CV 3/5] END ..........C=0.1, class_weight=None;, score=0.828 total time= 0.6s\n", - "[CV 4/5] END ..........C=0.1, class_weight=None;, score=0.799 total time= 0.7s\n", - "[CV 5/5] END ..........C=0.1, class_weight=None;, score=0.802 total time= 0.7s\n", - "[CV 1/5] END ........C=1, class_weight=balanced;, score=0.774 total time= 0.9s\n", - "[CV 2/5] END ........C=1, class_weight=balanced;, score=0.779 total time= 1.0s\n", - "[CV 3/5] END ........C=1, class_weight=balanced;, score=0.787 total time= 1.0s\n", - "[CV 4/5] END ........C=1, class_weight=balanced;, score=0.776 total time= 0.9s\n", - "[CV 5/5] END ........C=1, class_weight=balanced;, score=0.785 total time= 0.8s\n", - "[CV 1/5] END ............C=1, class_weight=None;, score=0.774 total time= 1.0s\n", - "[CV 2/5] END ............C=1, class_weight=None;, score=0.779 total time= 1.0s\n", - "[CV 3/5] END ............C=1, class_weight=None;, score=0.787 total time= 0.9s\n", - "[CV 4/5] END ............C=1, class_weight=None;, score=0.776 total time= 0.9s\n", - "[CV 5/5] END ............C=1, class_weight=None;, score=0.785 total time= 0.8s\n", - "{'C': 0.1, 'class_weight': 'balanced'}\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.802919708029197,\n", - " array([0.81995134, 0.74939173, 0.84184915, 0.76155718, 0.81751825,\n", - " 0.8296837 , 0.79562044, 0.81751825, 0.83211679, 0.76399027]))" - ] - }, - "execution_count": 175, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridSearchCV(NCI_feat, NCI_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "e12789e7", - "metadata": {}, - "outputs": [], - "source": [ - "def svm_precomputed_tt(feat_vecs, target_vec):\n", - " classes = list(set((target_vec)))\n", - " n_classes = len(classes)\n", - " X_train, X_test, y_train, y_test = train_test_split(feat_vecs, target_vec, test_size=0.2, random_state=4)\n", - " train_feat = sp.vstack(X_train)\n", - " test_feat = sp.vstack(X_test)\n", - " gram_matrix = train_feat.dot(train_feat.transpose()).todense()\n", - " gram_test = train_feat.dot(test_feat.transpose()).todense().T\n", - "\n", - " if n_classes <= 2:\n", - " clf = SVC(kernel='precomputed')\n", - " clf.fit(np.array(gram_matrix), np.array(y_train))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(y_train), cv=10)\n", - " print(classification_report(y_test, clf.predict(np.array(gram_test)))) \n", - " \n", - " else:\n", - " print(\"multilabel SVM\")\n", - " # Use label_binarize to be multi-label like settings\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " clf = OneVsRestClassifier(SVC(kernel='precomputed', class_weight=\"balanced\"))\n", - " clf.fit(np.array(gram_matrix), np.array(y_train_bin))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), y_train_bin, cv=10)\n", - " print(classification_report(y_test_bin, clf.predict(np.array(gram_test)))) \n", - " \n", - " return np.mean(accuracy), np.std(accuracy), accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "71efe919", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 1 0.85 0.85 0.85 138\n", - " 2 0.79 0.79 0.79 98\n", - "\n", - " accuracy 0.82 236\n", - " macro avg 0.82 0.82 0.82 236\n", - "weighted avg 0.82 0.82 0.82 236\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.7886898096304591,\n", - " 0.04572144929229968,\n", - " array([0.81052632, 0.82105263, 0.76595745, 0.79787234, 0.80851064,\n", - " 0.80851064, 0.82978723, 0.68085106, 0.82978723, 0.73404255]))" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_tt(DD_feat, DD_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 250, - "id": "cdbffc48", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 0 0.79 0.83 0.81 385\n", - " 1 0.84 0.80 0.82 437\n", - "\n", - " accuracy 0.81 822\n", - " macro avg 0.81 0.81 0.81 822\n", - "weighted avg 0.81 0.81 0.81 822\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.8150928534361332,\n", - " array([0.79331307, 0.81458967, 0.80547112, 0.81458967, 0.81458967,\n", - " 0.82370821, 0.79027356, 0.83890578, 0.8445122 , 0.81097561]))" - ] - }, - "execution_count": 250, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_tt(NCI_feat, NCI_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "68d9fa1d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - " precision recall f1-score support\n", - "\n", - " 0 0.83 0.24 0.37 21\n", - " 1 0.73 0.31 0.43 26\n", - " 2 0.62 0.43 0.51 23\n", - " 3 0.90 0.50 0.64 18\n", - " 4 0.40 0.13 0.20 15\n", - " 5 1.00 0.29 0.45 17\n", - "\n", - " micro avg 0.74 0.33 0.45 120\n", - " macro avg 0.75 0.32 0.44 120\n", - "weighted avg 0.75 0.33 0.44 120\n", - " samples avg 0.32 0.33 0.32 120\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lea\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.29375000000000007,\n", - " 0.044145041875868936,\n", - " array([0.27083333, 0.27083333, 0.29166667, 0.29166667, 0.29166667,\n", - " 0.375 , 0.35416667, 0.27083333, 0.20833333, 0.3125 ]))" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_tt(ENZ_feat, ENZ_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "a5cc2d4e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mean of number of nodes: 284\n" - ] - } - ], - "source": [ - "DD_ck = closed_kernel(DD)" - ] - }, - { - "cell_type": "raw", - "id": "28a4e982", - "metadata": {}, - "source": [ - "train_feat = np.vstack(DD_ck)\n", - "\n", - "gram_matrix = train_feat.dot(train_feat.transpose()).todense()" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "ebd1e4c3", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 1 0.85 0.85 0.85 138\n", - " 2 0.79 0.79 0.79 98\n", - "\n", - " accuracy 0.82 236\n", - " macro avg 0.82 0.82 0.82 236\n", - "weighted avg 0.82 0.82 0.82 236\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.7886898096304591,\n", - " 0.04572144929229968,\n", - " array([0.81052632, 0.82105263, 0.76595745, 0.79787234, 0.80851064,\n", - " 0.80851064, 0.82978723, 0.68085106, 0.82978723, 0.73404255]))" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_tt(DD_feat, DD_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "id": "99fc680f", - "metadata": {}, - "outputs": [], - "source": [ - "def svm_precomputed_gridsearch_tt(feat_vecs, target_vec):\n", - " classes = list(set((target_vec)))\n", - " n_classes = len(classes)\n", - " X_train, X_test, y_train, y_test = train_test_split(feat_vecs, target_vec, test_size=0.2, random_state=4)\n", - " #print(\"after traintestsplit\", X_train.shape, X_test.shape)\n", - " train_feat = sp.vstack(X_train)\n", - " test_feat = sp.vstack(X_test)\n", - " #print(\"after vstack\", train_feat.shape, test_feat.shape)\n", - " gram_matrix = train_feat.dot(train_feat.transpose()).todense()\n", - " gram_test = train_feat.dot(test_feat.transpose()).todense().T\n", - " #print(\"after multiplication\", gram_matrix.shape, gram_test.shape)\n", - "\n", - " if n_classes <= 2:\n", - " param_grid = {'C': [0.001,0.01,0.1,1],\n", - " 'class_weight':['balanced',None]} \n", - " grid = GridSearchCV(SVC(kernel=\"precomputed\"), param_grid, cv = 5, verbose = 3) \n", - " grid.fit(np.array(gram_matrix), np.array(y_train))\n", - " print(grid.best_params_) \n", - " clf = SVC(kernel='precomputed', **grid.best_params_)\n", - " clf.fit(np.array(gram_matrix), np.array(y_train))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(y_train), cv=10)\n", - " print(classification_report(y_test, clf.predict(np.array(gram_test))))\n", - " \n", - " \n", - " else:\n", - " print(\"multilabel SVM\")\n", - " # Use label_binarize to be multi-label like settings\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " \n", - " param_grid = {'estimator__C': [0.001,0.01, 0.05, 0.1,1], \n", - " 'estimator__class_weight':['balanced',None]} \n", - " test_clf = OneVsRestClassifier(SVC(kernel=\"precomputed\"))\n", - " #print(test_clf.get_params().keys())\n", - " grid = GridSearchCV(test_clf, param_grid, cv = 5, verbose = 3) \n", - " grid.fit(np.array(gram_matrix), np.array(y_train_bin))\n", - " print(grid.best_params_) \n", - " best_params = {\"C\": grid.best_params_[\"estimator__C\"], \"class_weight\": grid.best_params_[\"estimator__class_weight\"]}\n", - " clf = OneVsRestClassifier(SVC(kernel='precomputed', **best_params))\n", - " clf.fit(np.array(gram_matrix), np.array(y_train_bin))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(y_train_bin), cv=10)\n", - " print(classification_report(y_test_bin, clf.predict(np.array(gram_test))))\n", - " \n", - " return np.mean(accuracy), np.std(accuracy), accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "id": "a34a0181", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - "Fitting 5 folds for each of 10 candidates, totalling 50 fits\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.021 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.052 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.021 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.031 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.042 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.010 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.010 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.010 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.260 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.375 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.375 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.312 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.333 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.104 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.156 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.146 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.156 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.083 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.271 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.323 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.229 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.208 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.240 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=None;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=None;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=None;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=None;, score=0.250 total time= 0.0s\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=None;, score=0.250 total time= 0.0s\n", - "{'estimator__C': 0.01, 'estimator__class_weight': 'balanced'}\n", - " precision recall f1-score support\n", - "\n", - " 0 0.83 0.24 0.37 21\n", - " 1 0.60 0.35 0.44 26\n", - " 2 0.55 0.48 0.51 23\n", - " 3 0.69 0.50 0.58 18\n", - " 4 0.28 0.33 0.30 15\n", - " 5 0.53 0.47 0.50 17\n", - "\n", - " micro avg 0.54 0.39 0.45 120\n", - " macro avg 0.58 0.39 0.45 120\n", - "weighted avg 0.60 0.39 0.45 120\n", - " samples avg 0.37 0.39 0.38 120\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lea\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.33541666666666664,\n", - " 0.07820135157968108,\n", - " array([0.25 , 0.29166667, 0.3125 , 0.35416667, 0.35416667,\n", - " 0.41666667, 0.45833333, 0.3125 , 0.1875 , 0.41666667]))" - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(ENZ_feat, ENZ_target)" - ] - }, - { - "cell_type": "markdown", - "id": "4394d988", - "metadata": {}, - "source": [ - "# Graphlet Kernel" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "2591e8de", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "id": "0918ee67", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "id": "47414d42", - "metadata": {}, - "outputs": [], - "source": [ - "DD_graphlet = graphlet_kernel(DD)" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "id": "1075cb04", - "metadata": {}, - "outputs": [], - "source": [ - "ENZ_graphlet = graphlet_kernel(ENZ)" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "id": "6f797e29", - "metadata": {}, - "outputs": [], - "source": [ - "NCI_graphlet = graphlet_kernel(NCI)" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "f29f121f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<1178x34 sparse matrix of type '<class 'numpy.intc'>'\n", - "\twith 6861 stored elements in Compressed Sparse Row format>" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sp.vstack(sp.csr_matrix(np.array(DD_graphlet)))" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "id": "a54a6b4a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 5 folds for each of 8 candidates, totalling 40 fits\n", - "[CV 1/5] END ....C=0.001, class_weight=balanced;, score=0.730 total time= 0.0s\n", - "[CV 2/5] END ....C=0.001, class_weight=balanced;, score=0.757 total time= 0.0s\n", - "[CV 3/5] END ....C=0.001, class_weight=balanced;, score=0.793 total time= 0.0s\n", - "[CV 4/5] END ....C=0.001, class_weight=balanced;, score=0.739 total time= 0.0s\n", - "[CV 5/5] END ....C=0.001, class_weight=balanced;, score=0.734 total time= 0.0s\n", - "[CV 1/5] END ........C=0.001, class_weight=None;, score=0.741 total time= 0.0s\n", - "[CV 2/5] END ........C=0.001, class_weight=None;, score=0.778 total time= 0.0s\n", - "[CV 3/5] END ........C=0.001, class_weight=None;, score=0.793 total time= 0.0s\n", - "[CV 4/5] END ........C=0.001, class_weight=None;, score=0.707 total time= 0.0s\n", - "[CV 5/5] END ........C=0.001, class_weight=None;, score=0.745 total time= 0.0s\n", - "[CV 1/5] END .....C=0.01, class_weight=balanced;, score=0.735 total time= 0.0s\n", - "[CV 2/5] END .....C=0.01, class_weight=balanced;, score=0.746 total time= 0.0s\n", - "[CV 3/5] END .....C=0.01, class_weight=balanced;, score=0.793 total time= 0.0s\n", - "[CV 4/5] END .....C=0.01, class_weight=balanced;, score=0.739 total time= 0.0s\n", - "[CV 5/5] END .....C=0.01, class_weight=balanced;, score=0.745 total time= 0.0s\n", - "[CV 1/5] END .........C=0.01, class_weight=None;, score=0.735 total time= 0.0s\n", - "[CV 2/5] END .........C=0.01, class_weight=None;, score=0.783 total time= 0.0s\n", - "[CV 3/5] END .........C=0.01, class_weight=None;, score=0.777 total time= 0.0s\n", - "[CV 4/5] END .........C=0.01, class_weight=None;, score=0.702 total time= 0.0s\n", - "[CV 5/5] END .........C=0.01, class_weight=None;, score=0.734 total time= 0.0s\n", - "[CV 1/5] END ......C=0.1, class_weight=balanced;, score=0.730 total time= 0.3s\n", - "[CV 2/5] END ......C=0.1, class_weight=balanced;, score=0.757 total time= 0.3s\n", - "[CV 3/5] END ......C=0.1, class_weight=balanced;, score=0.782 total time= 0.2s\n", - "[CV 4/5] END ......C=0.1, class_weight=balanced;, score=0.739 total time= 0.2s\n", - "[CV 5/5] END ......C=0.1, class_weight=balanced;, score=0.734 total time= 0.3s\n", - "[CV 1/5] END ..........C=0.1, class_weight=None;, score=0.725 total time= 0.4s\n", - "[CV 2/5] END ..........C=0.1, class_weight=None;, score=0.783 total time= 0.3s\n", - "[CV 3/5] END ..........C=0.1, class_weight=None;, score=0.771 total time= 0.3s\n", - "[CV 4/5] END ..........C=0.1, class_weight=None;, score=0.702 total time= 0.4s\n", - "[CV 5/5] END ..........C=0.1, class_weight=None;, score=0.734 total time= 0.4s\n", - "[CV 1/5] END ........C=1, class_weight=balanced;, score=0.725 total time= 4.2s\n", - "[CV 2/5] END ........C=1, class_weight=balanced;, score=0.741 total time= 3.1s\n", - "[CV 3/5] END ........C=1, class_weight=balanced;, score=0.787 total time= 5.2s\n", - "[CV 4/5] END ........C=1, class_weight=balanced;, score=0.734 total time= 2.6s\n", - "[CV 5/5] END ........C=1, class_weight=balanced;, score=0.734 total time= 3.6s\n", - "[CV 1/5] END ............C=1, class_weight=None;, score=0.735 total time= 5.9s\n", - "[CV 2/5] END ............C=1, class_weight=None;, score=0.783 total time= 4.5s\n", - "[CV 3/5] END ............C=1, class_weight=None;, score=0.750 total time= 3.8s\n", - "[CV 4/5] END ............C=1, class_weight=None;, score=0.707 total time= 4.2s\n", - "[CV 5/5] END ............C=1, class_weight=None;, score=0.723 total time= 4.5s\n", - "{'C': 0.001, 'class_weight': None}\n", - " precision recall f1-score support\n", - "\n", - " 1 0.73 0.90 0.81 138\n", - " 2 0.79 0.54 0.64 98\n", - "\n", - " accuracy 0.75 236\n", - " macro avg 0.76 0.72 0.73 236\n", - "weighted avg 0.76 0.75 0.74 236\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.7463045912653976,\n", - " 0.04357951487182796,\n", - " array([0.72631579, 0.74736842, 0.74468085, 0.78723404, 0.78723404,\n", - " 0.78723404, 0.76595745, 0.63829787, 0.76595745, 0.71276596]))" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(sp.csr_matrix(np.array(DD_graphlet)), DD_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "id": "ee7d1e5a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - "Fitting 5 folds for each of 10 candidates, totalling 50 fits\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.073 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.031 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.021 total time= 0.8s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.010 total time= 0.7s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.021 total time= 0.6s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.5s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.031 total time= 0.3s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.042 total time= 0.2s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.031 total time= 0.3s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.021 total time= 0.3s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.042 total time= 0.3s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 3.3s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.031 total time= 5.3s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.021 total time= 5.1s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.031 total time= 4.8s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.010 total time= 4.3s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.021 total time= 2.2s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.031 total time= 2.1s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.021 total time= 1.8s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.031 total time= 1.8s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.052 total time= 1.9s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 12.1s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.021 total time= 29.5s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.010 total time= 21.0s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.031 total time= 29.2s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.010 total time= 21.5s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.021 total time= 4.7s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.042 total time= 5.3s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.031 total time= 3.7s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.052 total time= 4.3s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.062 total time= 5.3s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.010 total time= 28.1s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 58.2s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.010 total time= 41.3s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.021 total time= 52.9s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.010 total time= 33.9s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.031 total time= 1.0min\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.052 total time= 47.8s\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.031 total time= 53.0s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.021 total time= 47.4s\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.021 total time= 46.1s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=None;, score=0.010 total time= 3.2min\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=None;, score=0.010 total time= 8.7min\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=None;, score=0.010 total time= 6.5min\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=None;, score=0.021 total time= 8.8min\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=None;, score=0.021 total time= 4.3min\n", - "{'estimator__C': 0.1, 'estimator__class_weight': 'balanced'}\n", - " precision recall f1-score support\n", - "\n", - " 0 0.16 0.57 0.25 21\n", - " 1 0.30 0.35 0.32 26\n", - " 2 0.32 0.52 0.40 23\n", - " 3 0.18 0.72 0.29 18\n", - " 4 0.17 0.33 0.23 15\n", - " 5 0.26 0.47 0.33 17\n", - "\n", - " micro avg 0.21 0.49 0.30 120\n", - " macro avg 0.23 0.49 0.30 120\n", - "weighted avg 0.24 0.49 0.31 120\n", - " samples avg 0.24 0.49 0.31 120\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lea\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.03958333333333333,\n", - " 0.025430324199445214,\n", - " array([0.02083333, 0.02083333, 0.04166667, 0.02083333, 0.02083333,\n", - " 0.0625 , 0.02083333, 0.04166667, 0.10416667, 0.04166667]))" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(sp.csr_matrix(np.array(ENZ_graphlet)), ENZ_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "id": "5c8e8ce2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 5 folds for each of 8 candidates, totalling 40 fits\n", - "[CV 1/5] END ....C=0.001, class_weight=balanced;, score=0.632 total time= 0.3s\n", - "[CV 2/5] END ....C=0.001, class_weight=balanced;, score=0.612 total time= 0.4s\n", - "[CV 3/5] END ....C=0.001, class_weight=balanced;, score=0.637 total time= 0.3s\n", - "[CV 4/5] END ....C=0.001, class_weight=balanced;, score=0.604 total time= 0.3s\n", - "[CV 5/5] END ....C=0.001, class_weight=balanced;, score=0.642 total time= 0.3s\n", - "[CV 1/5] END ........C=0.001, class_weight=None;, score=0.635 total time= 0.3s\n", - "[CV 2/5] END ........C=0.001, class_weight=None;, score=0.617 total time= 0.2s\n", - "[CV 3/5] END ........C=0.001, class_weight=None;, score=0.622 total time= 0.2s\n", - "[CV 4/5] END ........C=0.001, class_weight=None;, score=0.604 total time= 0.2s\n", - "[CV 5/5] END ........C=0.001, class_weight=None;, score=0.635 total time= 0.2s\n", - "[CV 1/5] END .....C=0.01, class_weight=balanced;, score=0.626 total time= 0.7s\n", - "[CV 2/5] END .....C=0.01, class_weight=balanced;, score=0.619 total time= 0.7s\n", - "[CV 3/5] END .....C=0.01, class_weight=balanced;, score=0.635 total time= 0.7s\n", - "[CV 4/5] END .....C=0.01, class_weight=balanced;, score=0.607 total time= 0.8s\n", - "[CV 5/5] END .....C=0.01, class_weight=balanced;, score=0.642 total time= 0.7s\n", - "[CV 1/5] END .........C=0.01, class_weight=None;, score=0.625 total time= 0.8s\n", - "[CV 2/5] END .........C=0.01, class_weight=None;, score=0.617 total time= 0.7s\n", - "[CV 3/5] END .........C=0.01, class_weight=None;, score=0.626 total time= 0.8s\n", - "[CV 4/5] END .........C=0.01, class_weight=None;, score=0.606 total time= 0.8s\n", - "[CV 5/5] END .........C=0.01, class_weight=None;, score=0.639 total time= 0.7s\n", - "[CV 1/5] END ......C=0.1, class_weight=balanced;, score=0.629 total time= 3.4s\n", - "[CV 2/5] END ......C=0.1, class_weight=balanced;, score=0.622 total time= 3.7s\n", - "[CV 3/5] END ......C=0.1, class_weight=balanced;, score=0.640 total time= 4.0s\n", - "[CV 4/5] END ......C=0.1, class_weight=balanced;, score=0.607 total time= 3.8s\n", - "[CV 5/5] END ......C=0.1, class_weight=balanced;, score=0.641 total time= 3.8s\n", - "[CV 1/5] END ..........C=0.1, class_weight=None;, score=0.628 total time= 4.9s\n", - "[CV 2/5] END ..........C=0.1, class_weight=None;, score=0.620 total time= 4.1s\n", - "[CV 3/5] END ..........C=0.1, class_weight=None;, score=0.631 total time= 4.0s\n", - "[CV 4/5] END ..........C=0.1, class_weight=None;, score=0.607 total time= 4.3s\n", - "[CV 5/5] END ..........C=0.1, class_weight=None;, score=0.636 total time= 3.7s\n", - "[CV 1/5] END ........C=1, class_weight=balanced;, score=0.629 total time= 31.3s\n", - "[CV 2/5] END ........C=1, class_weight=balanced;, score=0.622 total time= 35.6s\n", - "[CV 3/5] END ........C=1, class_weight=balanced;, score=0.637 total time= 28.5s\n", - "[CV 4/5] END ........C=1, class_weight=balanced;, score=0.604 total time= 36.2s\n", - "[CV 5/5] END ........C=1, class_weight=balanced;, score=0.639 total time= 30.0s\n", - "[CV 1/5] END ............C=1, class_weight=None;, score=0.631 total time= 33.8s\n", - "[CV 2/5] END ............C=1, class_weight=None;, score=0.619 total time= 33.0s\n", - "[CV 3/5] END ............C=1, class_weight=None;, score=0.620 total time= 30.1s\n", - "[CV 4/5] END ............C=1, class_weight=None;, score=0.609 total time= 34.0s\n", - "[CV 5/5] END ............C=1, class_weight=None;, score=0.635 total time= 28.7s\n", - "{'C': 0.1, 'class_weight': 'balanced'}\n", - " precision recall f1-score support\n", - "\n", - " 0 0.60 0.64 0.62 385\n", - " 1 0.66 0.62 0.64 437\n", - "\n", - " accuracy 0.63 822\n", - " macro avg 0.63 0.63 0.63 822\n", - "weighted avg 0.63 0.63 0.63 822\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.6286511231373713,\n", - " 0.01745021029215421,\n", - " array([0.64741641, 0.61702128, 0.61094225, 0.6443769 , 0.65653495,\n", - " 0.63221884, 0.59878419, 0.61702128, 0.6402439 , 0.62195122]))" - ] - }, - "execution_count": 83, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(sp.csr_matrix(np.array(NCI_graphlet)), NCI_target)" - ] - }, - { - "cell_type": "markdown", - "id": "de37ea00", - "metadata": {}, - "source": [ - "# closed walk" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "id": "bda0b15d", - "metadata": {}, - "outputs": [], - "source": [ - "max_enz = max([max(point) for point in ENZ_features])/1000\n", - "ENZ_normalized = np.array(np.array(ENZ_features)/max_enz, dtype=float)" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "id": "25ed0e82", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - "Fitting 5 folds for each of 10 candidates, totalling 50 fits\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.2s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.6s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.4s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.6s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.6s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.000 total time= 0.3s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.000 total time= 0.2s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 13.4s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 1.7s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 5.3s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 6.9s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 8.7s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.000 total time= 0.2s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.000 total time= 0.1s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.000 total time= 0.3s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.010 total time= 0.1s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 20.1s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 17.0s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 15.1s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 29.6s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 27.3s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 1.6s\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 0.6s\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 0.6s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 1.3s\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 1.2s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 1.6min\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 1.1min\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 47.5s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 1.6min\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 52.6s\n", - "{'estimator__C': 0.001, 'estimator__class_weight': 'balanced'}\n", - " precision recall f1-score support\n", - "\n", - " 0 0.18 1.00 0.30 21\n", - " 1 0.26 1.00 0.41 26\n", - " 2 0.00 0.00 0.00 23\n", - " 3 0.50 0.06 0.10 18\n", - " 4 0.12 0.87 0.20 15\n", - " 5 0.00 0.00 0.00 17\n", - "\n", - " micro avg 0.18 0.51 0.27 120\n", - " macro avg 0.18 0.49 0.17 120\n", - "weighted avg 0.18 0.51 0.18 120\n", - " samples avg 0.20 0.51 0.28 120\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.0, 0.0, array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))" - ] - }, - "execution_count": 111, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(sp.csr_matrix(ENZ_normalized), ENZ_target)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "be57620b", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/Python_files/ex2/Ex1/datasets/.ipynb_checkpoints/Untitled-checkpoint.ipynb deleted file mode 100644 index bb9a810f0ae63a97f0c529d9d1b7ab05b5ddf479..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/.ipynb_checkpoints/Untitled-checkpoint.ipynb +++ /dev/null @@ -1,35 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "palestinian-quarter", - "metadata": {}, - "outputs": [], - "source": [ - "hello" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/DD/.DS_Store b/Python_files/ex2/Ex1/datasets/DD/.DS_Store deleted file mode 100644 index 765cdd1cf89e2916c73e1e8f3cbf370041187a44..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/DD/.DS_Store and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/DD/.ipynb_checkpoints/EX2_GL1-checkpoint.ipynb b/Python_files/ex2/Ex1/datasets/DD/.ipynb_checkpoints/EX2_GL1-checkpoint.ipynb deleted file mode 100644 index 4833343bd4390afdeb6f6d8d2e965b475495b0c4..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/DD/.ipynb_checkpoints/EX2_GL1-checkpoint.ipynb +++ /dev/null @@ -1,635 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 15, - "id": "surgical-christmas", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn.svm import SVC\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.preprocessing import label_binarize\n", - "from sklearn.multiclass import OneVsRestClassifier" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "waiting-paste", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "acute-accessory", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "civil-crash", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "precious-lounge", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - " \n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "alternate-minister", - "metadata": {}, - "outputs": [], - "source": [ - "def ten_fold(data):\n", - " target=[g.graph['label'] for g in data]\n", - " feature=graphlet_kernel(data)\n", - " \n", - "#SVM function can only handle list\n", - " to_list_feature=[list(i) for i in feature]\n", - " \n", - "#ten_fold Cross-validation\n", - " kf=KFold(n_splits=10)\n", - " \n", - "#interate all the folds\n", - " for i,j in kf.split(to_list_feature,target):\n", - " clf = svm.SVC(kernel='linear', C=1).fit(np.array(to_list_feature)[i],np.array(target)[i])\n", - " score=clf.score(np.array(to_list_feature)[j],np.array(target)[j])\n", - " print (score)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "tight-defensive", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "radio-armenia", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "pleased-pressing", - "metadata": {}, - "outputs": [], - "source": [ - "output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "arabic-ivory", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "promotional-conclusion", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "exterior-packing", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(480, 34) (480,)\n", - "(120, 34) (120,)\n" - ] - } - ], - "source": [ - "#create training and testing vars\n", - "output=graphlet_kernel(data1)\n", - "target=[g.graph['label'] for g in data1]\n", - "\n", - "\n", - "X_train, X_test, y_train, y_test = train_test_split(np.array(output),np.array(target), test_size=0.2)\n", - "print(X_train.shape, y_train.shape)\n", - "print(X_test.shape, y_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "pregnant-capacity", - "metadata": {}, - "outputs": [], - "source": [ - "clf = svm.SVC(kernel='linear', C=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "abroad-attribute", - "metadata": {}, - "outputs": [], - "source": [ - "gram_train= np.dot(X_train,X_train.T)" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "heard-screening", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVC(C=1, kernel='precomputed')" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.fit(gram_train,y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "modular-hunger", - "metadata": {}, - "outputs": [], - "source": [ - "gram_test=np.dot(X_train,X_test.T).T" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "right-research", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "list" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(target)" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "synthetic-spring", - "metadata": {}, - "outputs": [], - "source": [ - "accuracy= cross_val_score(clf,gram_train, y_train, cv=10 )" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "nearby-recommendation", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.03460563266237011" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy.mean()\n", - "accuracy.std()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "satellite-fiber", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(942, 942)" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_train.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "satellite-meaning", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1,\n", - " 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1,\n", - " 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1,\n", - " 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1,\n", - " 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1,\n", - " 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2])" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.predict(gram_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "charged-french", - "metadata": {}, - "outputs": [], - "source": [ - "def closed_kernel(graph_list):\n", - "\n", - " l = int(np.mean([len(g.nodes) for g in graph_list])) #list comprehension\n", - " feature_vectors = []\n", - " \n", - " for graph in graph_list:\n", - " number = []\n", - " A = nx.adjacency_matrix(graph) # sparse matrix\n", - " A =A.todense() # dense matrix\n", - " \n", - " # eigenvalues and eigenvectors\n", - " lambdas = np.linalg.eigvalsh(A)\n", - " for j in range(1, l):\n", - " power_lambdas= [x**(j) for x in lambdas ]\n", - " \n", - " #power_lambdas=np.rint(power_lambdas)\n", - " sum_lambdas=int(np.round(sum(power_lambdas)))\n", - " number.append(sum_lambdas) \n", - " feature_vectors.append(number)\n", - " \n", - " return feature_vectors" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "affiliated-lambda", - "metadata": {}, - "outputs": [], - "source": [ - "output_dd=closed_kernel(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "immune-samba", - "metadata": {}, - "outputs": [], - "source": [ - "output_nci=closed_kernel(data2)\n", - "target_nci=[g.graph['label'] for g in data2]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "small-sword", - "metadata": {}, - "outputs": [], - "source": [ - "def svc_evaluation(features,targets):\n", - " classes = list(set(targets))\n", - " X_train, X_test, y_train, y_test = train_test_split(np.array(features),np.array(targets), test_size=0.2)\n", - " if len(classes)<=2:\n", - " clf = SVC(kernel='linear', C=1)\n", - " clf.fit(X_train,y_train)\n", - " accuracy= cross_val_score(clf,X_train, y_train, cv=10 )\n", - " print (accuracy)\n", - " print (classification_report(y_test,clf.predict(X_test)))\n", - " else:\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " clf = OneVsRestClassifier(SVC(kernel='linear', class_weight=\"balanced\"))\n", - " clf.fit(X_train,y_train)\n", - " accuracy = cross_val_score(clf,X_train, y_train_bin, cv=10)\n", - " print(accuracy.mean(),accuracy.std())\n", - " print(classification_report(y_test_bin, clf.predict(X_test)))" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "necessary-rates", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.018750000000000003 0.014583333333333332\n" - ] - }, - { - "ename": "ValueError", - "evalue": "Classification metrics can't handle a mix of multilabel-indicator and multiclass targets", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-17-57e877d3e78a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msvc_evaluation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m<ipython-input-16-ec47bb244c24>\u001b[0m in \u001b[0;36msvc_evaluation\u001b[0;34m(features, targets)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0maccuracy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train_bin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclassification_report\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_test_bin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36mclassification_report\u001b[0;34m(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division)\u001b[0m\n\u001b[1;32m 1964\u001b[0m \"\"\"\n\u001b[1;32m 1965\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1966\u001b[0;31m \u001b[0my_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_check_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1967\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1968\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36m_check_targets\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m raise ValueError(\"Classification metrics can't handle a mix of {0} \"\n\u001b[0m\u001b[1;32m 93\u001b[0m \"and {1} targets\".format(type_true, type_pred))\n\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Classification metrics can't handle a mix of multilabel-indicator and multiclass targets" - ] - } - ], - "source": [ - "svc_evaluation(output,target)" - ] - }, - { - "cell_type": "code", - "execution_count": 403, - "id": "quick-auction", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8220338983050848\n", - "0.8389830508474576\n", - "0.9067796610169492\n", - "0.7627118644067796\n", - "0.8135593220338984\n", - "0.8559322033898306\n", - "0.5508474576271186\n", - "0.3983050847457627\n", - "0.47863247863247865\n", - "0.49572649572649574\n" - ] - } - ], - "source": [ - "ten_fold(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 404, - "id": "mighty-mortgage", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.13333333333333333\n", - "0.05\n", - "0.05\n", - "0.06666666666666667\n", - "0.03333333333333333\n", - "0.016666666666666666\n", - "0.08333333333333333\n", - "0.05\n", - "0.06666666666666667\n", - "0.0\n" - ] - } - ], - "source": [ - "ten_fold(data1)" - ] - }, - { - "cell_type": "code", - "execution_count": 405, - "id": "recognized-ensemble", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.35523114355231145\n", - "0.38686131386861317\n", - "0.35766423357664234\n", - "0.5352798053527981\n", - "0.4306569343065693\n", - "0.44038929440389296\n", - "0.3260340632603406\n", - "0.3722627737226277\n", - "0.29927007299270075\n", - "0.5304136253041363\n" - ] - } - ], - "source": [ - "ten_fold(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "classical-chapel", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/DD/.ipynb_checkpoints/EX2_GL1_new-checkpoint.ipynb b/Python_files/ex2/Ex1/datasets/DD/.ipynb_checkpoints/EX2_GL1_new-checkpoint.ipynb deleted file mode 100644 index 4833343bd4390afdeb6f6d8d2e965b475495b0c4..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/DD/.ipynb_checkpoints/EX2_GL1_new-checkpoint.ipynb +++ /dev/null @@ -1,635 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 15, - "id": "surgical-christmas", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn.svm import SVC\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.preprocessing import label_binarize\n", - "from sklearn.multiclass import OneVsRestClassifier" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "waiting-paste", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "acute-accessory", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "civil-crash", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "precious-lounge", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - " \n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "alternate-minister", - "metadata": {}, - "outputs": [], - "source": [ - "def ten_fold(data):\n", - " target=[g.graph['label'] for g in data]\n", - " feature=graphlet_kernel(data)\n", - " \n", - "#SVM function can only handle list\n", - " to_list_feature=[list(i) for i in feature]\n", - " \n", - "#ten_fold Cross-validation\n", - " kf=KFold(n_splits=10)\n", - " \n", - "#interate all the folds\n", - " for i,j in kf.split(to_list_feature,target):\n", - " clf = svm.SVC(kernel='linear', C=1).fit(np.array(to_list_feature)[i],np.array(target)[i])\n", - " score=clf.score(np.array(to_list_feature)[j],np.array(target)[j])\n", - " print (score)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "tight-defensive", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "radio-armenia", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "pleased-pressing", - "metadata": {}, - "outputs": [], - "source": [ - "output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "arabic-ivory", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "promotional-conclusion", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "exterior-packing", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(480, 34) (480,)\n", - "(120, 34) (120,)\n" - ] - } - ], - "source": [ - "#create training and testing vars\n", - "output=graphlet_kernel(data1)\n", - "target=[g.graph['label'] for g in data1]\n", - "\n", - "\n", - "X_train, X_test, y_train, y_test = train_test_split(np.array(output),np.array(target), test_size=0.2)\n", - "print(X_train.shape, y_train.shape)\n", - "print(X_test.shape, y_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "pregnant-capacity", - "metadata": {}, - "outputs": [], - "source": [ - "clf = svm.SVC(kernel='linear', C=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "abroad-attribute", - "metadata": {}, - "outputs": [], - "source": [ - "gram_train= np.dot(X_train,X_train.T)" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "heard-screening", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVC(C=1, kernel='precomputed')" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.fit(gram_train,y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "modular-hunger", - "metadata": {}, - "outputs": [], - "source": [ - "gram_test=np.dot(X_train,X_test.T).T" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "right-research", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "list" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(target)" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "synthetic-spring", - "metadata": {}, - "outputs": [], - "source": [ - "accuracy= cross_val_score(clf,gram_train, y_train, cv=10 )" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "nearby-recommendation", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.03460563266237011" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy.mean()\n", - "accuracy.std()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "satellite-fiber", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(942, 942)" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_train.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "satellite-meaning", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1,\n", - " 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1,\n", - " 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1,\n", - " 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1,\n", - " 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1,\n", - " 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2])" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.predict(gram_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "charged-french", - "metadata": {}, - "outputs": [], - "source": [ - "def closed_kernel(graph_list):\n", - "\n", - " l = int(np.mean([len(g.nodes) for g in graph_list])) #list comprehension\n", - " feature_vectors = []\n", - " \n", - " for graph in graph_list:\n", - " number = []\n", - " A = nx.adjacency_matrix(graph) # sparse matrix\n", - " A =A.todense() # dense matrix\n", - " \n", - " # eigenvalues and eigenvectors\n", - " lambdas = np.linalg.eigvalsh(A)\n", - " for j in range(1, l):\n", - " power_lambdas= [x**(j) for x in lambdas ]\n", - " \n", - " #power_lambdas=np.rint(power_lambdas)\n", - " sum_lambdas=int(np.round(sum(power_lambdas)))\n", - " number.append(sum_lambdas) \n", - " feature_vectors.append(number)\n", - " \n", - " return feature_vectors" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "affiliated-lambda", - "metadata": {}, - "outputs": [], - "source": [ - "output_dd=closed_kernel(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "immune-samba", - "metadata": {}, - "outputs": [], - "source": [ - "output_nci=closed_kernel(data2)\n", - "target_nci=[g.graph['label'] for g in data2]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "small-sword", - "metadata": {}, - "outputs": [], - "source": [ - "def svc_evaluation(features,targets):\n", - " classes = list(set(targets))\n", - " X_train, X_test, y_train, y_test = train_test_split(np.array(features),np.array(targets), test_size=0.2)\n", - " if len(classes)<=2:\n", - " clf = SVC(kernel='linear', C=1)\n", - " clf.fit(X_train,y_train)\n", - " accuracy= cross_val_score(clf,X_train, y_train, cv=10 )\n", - " print (accuracy)\n", - " print (classification_report(y_test,clf.predict(X_test)))\n", - " else:\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " clf = OneVsRestClassifier(SVC(kernel='linear', class_weight=\"balanced\"))\n", - " clf.fit(X_train,y_train)\n", - " accuracy = cross_val_score(clf,X_train, y_train_bin, cv=10)\n", - " print(accuracy.mean(),accuracy.std())\n", - " print(classification_report(y_test_bin, clf.predict(X_test)))" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "necessary-rates", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.018750000000000003 0.014583333333333332\n" - ] - }, - { - "ename": "ValueError", - "evalue": "Classification metrics can't handle a mix of multilabel-indicator and multiclass targets", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-17-57e877d3e78a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msvc_evaluation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m<ipython-input-16-ec47bb244c24>\u001b[0m in \u001b[0;36msvc_evaluation\u001b[0;34m(features, targets)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0maccuracy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train_bin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclassification_report\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_test_bin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36mclassification_report\u001b[0;34m(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division)\u001b[0m\n\u001b[1;32m 1964\u001b[0m \"\"\"\n\u001b[1;32m 1965\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1966\u001b[0;31m \u001b[0my_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_check_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1967\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1968\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36m_check_targets\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m raise ValueError(\"Classification metrics can't handle a mix of {0} \"\n\u001b[0m\u001b[1;32m 93\u001b[0m \"and {1} targets\".format(type_true, type_pred))\n\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Classification metrics can't handle a mix of multilabel-indicator and multiclass targets" - ] - } - ], - "source": [ - "svc_evaluation(output,target)" - ] - }, - { - "cell_type": "code", - "execution_count": 403, - "id": "quick-auction", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8220338983050848\n", - "0.8389830508474576\n", - "0.9067796610169492\n", - "0.7627118644067796\n", - "0.8135593220338984\n", - "0.8559322033898306\n", - "0.5508474576271186\n", - "0.3983050847457627\n", - "0.47863247863247865\n", - "0.49572649572649574\n" - ] - } - ], - "source": [ - "ten_fold(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 404, - "id": "mighty-mortgage", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.13333333333333333\n", - "0.05\n", - "0.05\n", - "0.06666666666666667\n", - "0.03333333333333333\n", - "0.016666666666666666\n", - "0.08333333333333333\n", - "0.05\n", - "0.06666666666666667\n", - "0.0\n" - ] - } - ], - "source": [ - "ten_fold(data1)" - ] - }, - { - "cell_type": "code", - "execution_count": 405, - "id": "recognized-ensemble", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.35523114355231145\n", - "0.38686131386861317\n", - "0.35766423357664234\n", - "0.5352798053527981\n", - "0.4306569343065693\n", - "0.44038929440389296\n", - "0.3260340632603406\n", - "0.3722627737226277\n", - "0.29927007299270075\n", - "0.5304136253041363\n" - ] - } - ], - "source": [ - "ten_fold(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "classical-chapel", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/DD/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/Python_files/ex2/Ex1/datasets/DD/.ipynb_checkpoints/Untitled-checkpoint.ipynb deleted file mode 100644 index 96319576d15788953cae1cf3b3cea4a36c3f2885..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/DD/.ipynb_checkpoints/Untitled-checkpoint.ipynb +++ /dev/null @@ -1,304 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 398, - "id": "noted-complaint", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn import svm" - ] - }, - { - "cell_type": "code", - "execution_count": 399, - "id": "sticky-fraction", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "amended-india", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 400, - "id": "chinese-chrome", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 401, - "id": "distributed-chuck", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - " \n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(temp.values())\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(temp.values())\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 402, - "id": "consecutive-portal", - "metadata": {}, - "outputs": [], - "source": [ - "def ten_fold(data):\n", - " target=[g.graph['label'] for g in data]\n", - " feature=graphlet_kernel(data)\n", - " \n", - "#SVM function can only handle list\n", - " to_list_feature=[list(i) for i in feature]\n", - " \n", - "#ten_fold Cross-validation\n", - " kf=KFold(n_splits=10)\n", - " \n", - "#interate all the folds\n", - " for i,j in kf.split(to_list_feature,target):\n", - " clf = svm.SVC(kernel='linear', C=1).fit(np.array(to_list_feature)[i],np.array(target)[i])\n", - " score=clf.score(np.array(to_list_feature)[j],np.array(target)[j])\n", - " print (score)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "floating-approval", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 403, - "id": "handy-attitude", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8220338983050848\n", - "0.8389830508474576\n", - "0.9067796610169492\n", - "0.7627118644067796\n", - "0.8135593220338984\n", - "0.8559322033898306\n", - "0.5508474576271186\n", - "0.3983050847457627\n", - "0.47863247863247865\n", - "0.49572649572649574\n" - ] - } - ], - "source": [ - "ten_fold(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 404, - "id": "crude-myanmar", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.13333333333333333\n", - "0.05\n", - "0.05\n", - "0.06666666666666667\n", - "0.03333333333333333\n", - "0.016666666666666666\n", - "0.08333333333333333\n", - "0.05\n", - "0.06666666666666667\n", - "0.0\n" - ] - } - ], - "source": [ - "ten_fold(data1)" - ] - }, - { - "cell_type": "code", - "execution_count": 405, - "id": "statewide-crossing", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.35523114355231145\n", - "0.38686131386861317\n", - "0.35766423357664234\n", - "0.5352798053527981\n", - "0.4306569343065693\n", - "0.44038929440389296\n", - "0.3260340632603406\n", - "0.3722627737226277\n", - "0.29927007299270075\n", - "0.5304136253041363\n" - ] - } - ], - "source": [ - "ten_fold(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "varied-cologne", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/DD/.ipynb_checkpoints/Untitled1-checkpoint.ipynb b/Python_files/ex2/Ex1/datasets/DD/.ipynb_checkpoints/Untitled1-checkpoint.ipynb deleted file mode 100644 index 363fcab7ed6e9634e198cf5555ceb88932c9a245..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/DD/.ipynb_checkpoints/Untitled1-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/DD/EX2_GL1.ipynb b/Python_files/ex2/Ex1/datasets/DD/EX2_GL1.ipynb deleted file mode 100644 index c2dfdc7475419ab98e65947b6547f7b263c77872..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/DD/EX2_GL1.ipynb +++ /dev/null @@ -1,639 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 15, - "id": "fitting-toyota", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn.svm import SVC\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.preprocessing import label_binarize\n", - "from sklearn.multiclass import OneVsRestClassifier" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "limited-dance", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "revised-calendar", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "swiss-population", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "convenient-initial", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - " \n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "structured-delivery", - "metadata": {}, - "outputs": [], - "source": [ - "def ten_fold(data):\n", - " target=[g.graph['label'] for g in data]\n", - " feature=graphlet_kernel(data)\n", - " \n", - "#SVM function can only handle list\n", - " to_list_feature=[list(i) for i in feature]\n", - " \n", - "#ten_fold Cross-validation\n", - " kf=KFold(n_splits=10)\n", - " \n", - "#interate all the folds\n", - " for i,j in kf.split(to_list_feature,target):\n", - " clf = svm.SVC(kernel='linear', C=1).fit(np.array(to_list_feature)[i],np.array(target)[i])\n", - " score=clf.score(np.array(to_list_feature)[j],np.array(target)[j])\n", - " print (score)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "piano-johns", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "treated-flashing", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "obvious-marriage", - "metadata": {}, - "outputs": [], - "source": [ - "output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "statewide-extra", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "hired-feature", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "architectural-inflation", - "metadata": {}, - "outputs": [], - "source": [ - "#create training and testing vars\n", - "output=graphlet_kernel(data1)\n", - "target=[g.graph['label'] for g in data1]\n", - "\n", - "\n", - "X_train, X_test, y_train, y_test = train_test_split(np.array(output),np.array(target), test_size=0.2)\n", - "print(X_train.shape, y_train.shape)\n", - "print(X_test.shape, y_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "expired-barcelona", - "metadata": {}, - "outputs": [], - "source": [ - "clf = svm.SVC(kernel='linear', C=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "shaped-worse", - "metadata": {}, - "outputs": [], - "source": [ - "gram_train= np.dot(X_train,X_train.T)" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "prospective-upper", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVC(C=1, kernel='precomputed')" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.fit(gram_train,y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "controlling-stock", - "metadata": {}, - "outputs": [], - "source": [ - "gram_test=np.dot(X_train,X_test.T).T" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "turned-calvin", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "list" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(target)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "featured-training", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'clf' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-20-fc545aec8df4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0maccuracy\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mgram_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mNameError\u001b[0m: name 'clf' is not defined" - ] - } - ], - "source": [ - "accuracy= cross_val_score(clf,gram_train, y_train, cv=10 )" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "apart-lancaster", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'accuracy' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-18-3bcce742fcca>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'accuracy' is not defined" - ] - } - ], - "source": [ - "print(accuracy.mean())\n", - "accuracy.std()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "about-nicholas", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(942, 942)" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_train.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "modular-johnston", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1,\n", - " 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1,\n", - " 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1,\n", - " 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1,\n", - " 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1,\n", - " 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2])" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.predict(gram_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "everyday-digest", - "metadata": {}, - "outputs": [], - "source": [ - "def closed_kernel(graph_list):\n", - "\n", - " l = int(np.mean([len(g.nodes) for g in graph_list])) #list comprehension\n", - " feature_vectors = []\n", - " \n", - " for graph in graph_list:\n", - " number = []\n", - " A = nx.adjacency_matrix(graph) # sparse matrix\n", - " A =A.todense() # dense matrix\n", - " \n", - " # eigenvalues and eigenvectors\n", - " lambdas = np.linalg.eigvalsh(A)\n", - " for j in range(1, l):\n", - " power_lambdas= [x**(j) for x in lambdas ]\n", - " \n", - " #power_lambdas=np.rint(power_lambdas)\n", - " sum_lambdas=int(np.round(sum(power_lambdas)))\n", - " number.append(sum_lambdas) \n", - " feature_vectors.append(number)\n", - " \n", - " return feature_vectors" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "short-native", - "metadata": {}, - "outputs": [], - "source": [ - "output_dd=closed_kernel(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "satisfactory-facility", - "metadata": {}, - "outputs": [], - "source": [ - "output_nci=closed_kernel(data2)\n", - "target_nci=[g.graph['label'] for g in data2]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "forbidden-triumph", - "metadata": {}, - "outputs": [], - "source": [ - "def svc_evaluation(features,targets):\n", - " classes = list(set(targets))\n", - " X_train, X_test, y_train, y_test = train_test_split(np.array(features),np.array(targets), test_size=0.2)\n", - " if len(classes)<=2:\n", - " clf = SVC(kernel='linear', C=1)\n", - " clf.fit(X_train,y_train)\n", - " accuracy= cross_val_score(clf,X_train, y_train, cv=10 )\n", - " print (accuracy)\n", - " print (classification_report(y_test,clf.predict(X_test)))\n", - " else:\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " clf = OneVsRestClassifier(SVC(kernel='linear', class_weight=\"balanced\"))\n", - " clf.fit(X_train,y_train)\n", - " accuracy = cross_val_score(clf,X_train, y_train_bin, cv=10)\n", - " print(accuracy.mean(),accuracy.std())\n", - " print(classification_report(y_test_bin, clf.predict(X_test)))" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "spoken-oasis", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.018750000000000003 0.014583333333333332\n" - ] - }, - { - "ename": "ValueError", - "evalue": "Classification metrics can't handle a mix of multilabel-indicator and multiclass targets", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-17-57e877d3e78a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msvc_evaluation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m<ipython-input-16-ec47bb244c24>\u001b[0m in \u001b[0;36msvc_evaluation\u001b[0;34m(features, targets)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0maccuracy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train_bin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0maccuracy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclassification_report\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_test_bin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36mclassification_report\u001b[0;34m(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division)\u001b[0m\n\u001b[1;32m 1964\u001b[0m \"\"\"\n\u001b[1;32m 1965\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1966\u001b[0;31m \u001b[0my_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_check_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1967\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1968\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36m_check_targets\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m raise ValueError(\"Classification metrics can't handle a mix of {0} \"\n\u001b[0m\u001b[1;32m 93\u001b[0m \"and {1} targets\".format(type_true, type_pred))\n\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Classification metrics can't handle a mix of multilabel-indicator and multiclass targets" - ] - } - ], - "source": [ - "svc_evaluation(output,target)" - ] - }, - { - "cell_type": "code", - "execution_count": 403, - "id": "prompt-draft", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8220338983050848\n", - "0.8389830508474576\n", - "0.9067796610169492\n", - "0.7627118644067796\n", - "0.8135593220338984\n", - "0.8559322033898306\n", - "0.5508474576271186\n", - "0.3983050847457627\n", - "0.47863247863247865\n", - "0.49572649572649574\n" - ] - } - ], - "source": [ - "ten_fold(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 404, - "id": "coordinated-counter", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.13333333333333333\n", - "0.05\n", - "0.05\n", - "0.06666666666666667\n", - "0.03333333333333333\n", - "0.016666666666666666\n", - "0.08333333333333333\n", - "0.05\n", - "0.06666666666666667\n", - "0.0\n" - ] - } - ], - "source": [ - "ten_fold(data1)" - ] - }, - { - "cell_type": "code", - "execution_count": 405, - "id": "listed-turning", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.35523114355231145\n", - "0.38686131386861317\n", - "0.35766423357664234\n", - "0.5352798053527981\n", - "0.4306569343065693\n", - "0.44038929440389296\n", - "0.3260340632603406\n", - "0.3722627737226277\n", - "0.29927007299270075\n", - "0.5304136253041363\n" - ] - } - ], - "source": [ - "ten_fold(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "boolean-mailman", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/DD/EX2_GL1_new.ipynb b/Python_files/ex2/Ex1/datasets/DD/EX2_GL1_new.ipynb deleted file mode 100644 index 133cc9f4c39d36247cf1f5c9a42411112169af21..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/DD/EX2_GL1_new.ipynb +++ /dev/null @@ -1,599 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 34, - "id": "numeric-glucose", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "import argparse\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn.svm import SVC\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.preprocessing import label_binarize\n", - "from sklearn.multiclass import OneVsRestClassifier" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "pursuant-atlantic", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "designing-religion", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "nasty-calculator", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "departmental-missouri", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - " \n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "comprehensive-shareware", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "existing-grain", - "metadata": {}, - "outputs": [], - "source": [ - "#create training and testing vars\n", - "output=graphlet_kernel(data)\n", - "target=[g.graph['label'] for g in data]" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "apparent-snowboard", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(1060, 34) (1060,)\n", - "(118, 34) (118,)\n" - ] - } - ], - "source": [ - "X_train, X_test, y_train, y_test = train_test_split(np.array(output),np.array(target), test_size=0.1)\n", - "print(X_train.shape, y_train.shape)\n", - "print(X_test.shape, y_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "civic-contents", - "metadata": {}, - "outputs": [], - "source": [ - "clf = SVC(kernel='precomputed', C=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "micro-money", - "metadata": {}, - "outputs": [], - "source": [ - "gram_train= np.dot(X_train,X_train.T)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "brutal-deployment", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVC(C=1, kernel='precomputed')" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.fit(gram_train,y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "dietary-practice", - "metadata": {}, - "outputs": [], - "source": [ - "gram_test=np.dot(X_train,X_test.T).T" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "parliamentary-vietnamese", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "list" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(target)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "fixed-residence", - "metadata": {}, - "outputs": [], - "source": [ - "accuracy= cross_val_score(clf,gram_train, y_train, cv=10 )" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "critical-stanford", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.7481132075471698" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy.mean()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "behavioral-saying", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.04525352813287346" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy.std()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "leading-invalid", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(942, 942)" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_train.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "filled-trash", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1,\n", - " 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1,\n", - " 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1,\n", - " 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 2,\n", - " 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1,\n", - " 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1,\n", - " 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2])" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.predict(gram_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "generic-argument", - "metadata": {}, - "outputs": [], - "source": [ - "def closed_kernel(graph_list):\n", - "\n", - " l = int(np.mean([len(g.nodes) for g in graph_list])) #list comprehension\n", - " feature_vectors = []\n", - " \n", - " for graph in graph_list:\n", - " number = []\n", - " A = nx.adjacency_matrix(graph) # sparse matrix\n", - " A =A.todense() # dense matrix\n", - " \n", - " # eigenvalues and eigenvectors\n", - " lambdas = np.linalg.eigvalsh(A)\n", - " for j in range(1, l):\n", - " power_lambdas= [x**(j) for x in lambdas ]\n", - " \n", - " #power_lambdas=np.rint(power_lambdas)\n", - " sum_lambdas=int(np.round(sum(power_lambdas)))\n", - " number.append(sum_lambdas) \n", - " feature_vectors.append(number)\n", - " \n", - " return feature_vectors" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "exact-freeze", - "metadata": {}, - "outputs": [], - "source": [ - "output_dd=closed_kernel(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "funny-omaha", - "metadata": {}, - "outputs": [], - "source": [ - "output_nci=closed_kernel(data2)\n", - "target_nci=[g.graph['label'] for g in data2]" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "appropriate-guest", - "metadata": {}, - "outputs": [], - "source": [ - "def svc_evaluation(features,targets):\n", - " classes = list(set(targets))\n", - " X_train, X_test, y_train, y_test = train_test_split(np.array(features),np.array(targets), test_size=0.2)\n", - " if len(classes)<=2:\n", - " clf = SVC(kernel='linear', C=1)\n", - " clf.fit(X_train,y_train)\n", - " accuracy= cross_val_score(clf,X_train, y_train, cv=10 )\n", - " print (accuracy)\n", - " print (classification_report(y_test,clf.predict(X_test)))\n", - " else:\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " clf = OneVsRestClassifier(SVC(kernel='linear', class_weight=\"balanced\"))\n", - " clf.fit(X_train,y_train)\n", - " accuracy = cross_val_score(clf,X_train, y_train_bin, cv=10)\n", - " print(accuracy.mean(),accuracy.std())\n", - " print(classification_report(y_test_bin, clf.predict(X_test)))" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "portable-calibration", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.74736842 0.75789474 0.72340426 0.74468085 0.79787234 0.70212766\n", - " 0.72340426 0.78723404 0.76595745 0.73404255]\n", - " precision recall f1-score support\n", - "\n", - " 1 0.72 0.89 0.79 137\n", - " 2 0.77 0.52 0.62 99\n", - "\n", - " accuracy 0.73 236\n", - " macro avg 0.75 0.70 0.71 236\n", - "weighted avg 0.74 0.73 0.72 236\n", - "\n" - ] - } - ], - "source": [ - "svc_evaluation(output,target)" - ] - }, - { - "cell_type": "code", - "execution_count": 403, - "id": "native-grade", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8220338983050848\n", - "0.8389830508474576\n", - "0.9067796610169492\n", - "0.7627118644067796\n", - "0.8135593220338984\n", - "0.8559322033898306\n", - "0.5508474576271186\n", - "0.3983050847457627\n", - "0.47863247863247865\n", - "0.49572649572649574\n" - ] - } - ], - "source": [ - "ten_fold(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 404, - "id": "mechanical-twist", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.13333333333333333\n", - "0.05\n", - "0.05\n", - "0.06666666666666667\n", - "0.03333333333333333\n", - "0.016666666666666666\n", - "0.08333333333333333\n", - "0.05\n", - "0.06666666666666667\n", - "0.0\n" - ] - } - ], - "source": [ - "ten_fold(data1)" - ] - }, - { - "cell_type": "code", - "execution_count": 405, - "id": "clean-blackjack", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.35523114355231145\n", - "0.38686131386861317\n", - "0.35766423357664234\n", - "0.5352798053527981\n", - "0.4306569343065693\n", - "0.44038929440389296\n", - "0.3260340632603406\n", - "0.3722627737226277\n", - "0.29927007299270075\n", - "0.5304136253041363\n" - ] - } - ], - "source": [ - "ten_fold(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "signal-clause", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/DD/Untitled.ipynb b/Python_files/ex2/Ex1/datasets/DD/Untitled.ipynb deleted file mode 100644 index 65beca8ba1a2d59014a9bb8c93a5b889ae6242b6..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/DD/Untitled.ipynb +++ /dev/null @@ -1,313 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 406, - "id": "moral-brazil", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn import svm\n", - "from collections import Counter" - ] - }, - { - "cell_type": "code", - "execution_count": 407, - "id": "played-studio", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "destroyed-sociology", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 400, - "id": "intelligent-sword", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "played-yugoslavia", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 401, - "id": "skilled-whale", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - " \n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(temp.values())\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(temp.values())\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 402, - "id": "substantial-charge", - "metadata": {}, - "outputs": [], - "source": [ - "def ten_fold(data):\n", - " target=[g.graph['label'] for g in data]\n", - " feature=graphlet_kernel(data)\n", - " \n", - "#SVM function can only handle list\n", - " to_list_feature=[list(i) for i in feature]\n", - " \n", - "#ten_fold Cross-validation\n", - " kf=KFold(n_splits=10)\n", - " \n", - "#interate all the folds\n", - " for i,j in kf.split(to_list_feature,target):\n", - " clf = svm.SVC(kernel='linear', C=1).fit(np.array(to_list_feature)[i],np.array(target)[i])\n", - " score=clf.score(np.array(to_list_feature)[j],np.array(target)[j])\n", - " print (score)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "agricultural-bookmark", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 403, - "id": "present-springfield", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8220338983050848\n", - "0.8389830508474576\n", - "0.9067796610169492\n", - "0.7627118644067796\n", - "0.8135593220338984\n", - "0.8559322033898306\n", - "0.5508474576271186\n", - "0.3983050847457627\n", - "0.47863247863247865\n", - "0.49572649572649574\n" - ] - } - ], - "source": [ - "ten_fold(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 404, - "id": "athletic-colombia", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.13333333333333333\n", - "0.05\n", - "0.05\n", - "0.06666666666666667\n", - "0.03333333333333333\n", - "0.016666666666666666\n", - "0.08333333333333333\n", - "0.05\n", - "0.06666666666666667\n", - "0.0\n" - ] - } - ], - "source": [ - "ten_fold(data1)" - ] - }, - { - "cell_type": "code", - "execution_count": 405, - "id": "sorted-tuesday", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.35523114355231145\n", - "0.38686131386861317\n", - "0.35766423357664234\n", - "0.5352798053527981\n", - "0.4306569343065693\n", - "0.44038929440389296\n", - "0.3260340632603406\n", - "0.3722627737226277\n", - "0.29927007299270075\n", - "0.5304136253041363\n" - ] - } - ], - "source": [ - "ten_fold(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "optional-horse", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/DD/Untitled1.ipynb b/Python_files/ex2/Ex1/datasets/DD/Untitled1.ipynb deleted file mode 100644 index 11abc48d1e747cba7fba15e207c40b4183d93b14..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/DD/Untitled1.ipynb +++ /dev/null @@ -1,236 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "id": "welsh-chorus", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg\n", - "import random\n", - "import networkx.algorithms.isomorphism as iso\n", - "from sklearn.model_selection import train_test_split,KFold\n", - "from sklearn import datasets\n", - "from sklearn import svm\n", - "from collections import Counter" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "favorite-andrews", - "metadata": {}, - "outputs": [], - "source": [ - "with open('data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "with open('data1.pkl','rb') as f1:\n", - " data1 = pickle.load(f1)\n", - "with open('data2.pkl','rb') as f2:\n", - " data2 = pickle.load(f2)" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "boxed-fortune", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "treated-mitchell", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - "\n", - " \n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if sorted(dict(k.degree()).values())==sorted(dict(g.degree()).values()):\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - "\n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "automated-period", - "metadata": {}, - "outputs": [], - "source": [ - "out=graphlet_kernel(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "trained-arrow", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "4110" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(out)" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "registered-patent", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0, 0, 0, 0, 0]" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "g=nx.empty_graph(5)\n", - "sorted(dict(g.degree()).values())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "pointed-preview", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/DD/data.pkl b/Python_files/ex2/Ex1/datasets/DD/data.pkl deleted file mode 100644 index 546de71a1d97226a0a1a3e0f89a34671520934f9..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/DD/data.pkl and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/DD/data1.pkl b/Python_files/ex2/Ex1/datasets/DD/data1.pkl deleted file mode 100644 index e4e7190c25ffe6ac0636afceb46b30d55557319f..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/DD/data1.pkl and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/DD/data2.pkl b/Python_files/ex2/Ex1/datasets/DD/data2.pkl deleted file mode 100644 index 6c72a48da0903e887c68acff31d5715f0072cd6c..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/DD/data2.pkl and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/ENZYMES/.DS_Store b/Python_files/ex2/Ex1/datasets/ENZYMES/.DS_Store deleted file mode 100644 index 14f68d85f90870714ff4c194f0d8aac3137a8fb0..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/ENZYMES/.DS_Store and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/ENZYMES/data.pkl b/Python_files/ex2/Ex1/datasets/ENZYMES/data.pkl deleted file mode 100644 index e4e7190c25ffe6ac0636afceb46b30d55557319f..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/ENZYMES/data.pkl and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/EX4_GL1_0426.ipynb b/Python_files/ex2/Ex1/datasets/EX4_GL1_0426.ipynb deleted file mode 100644 index c98558c23a5d61c2620fae11fbcf6b6c5ef1ac0f..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/EX4_GL1_0426.ipynb +++ /dev/null @@ -1,1911 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "german-appeal", - "metadata": {}, - "source": [ - "# EX 4" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "oriented-nicaragua", - "metadata": {}, - "outputs": [], - "source": [ - "#needed for EX 3\n", - "import pickle\n", - "import networkx as nx\n", - "import matplotlib.pyplot as plt\n", - "from collections import Counter, defaultdict\n", - "import scipy.sparse as sp\n", - "import numpy as np\n", - "from multiset import FrozenMultiset\n", - "import random" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "prompt-delaware", - "metadata": {}, - "outputs": [], - "source": [ - "# additionally needed for EX 4\n", - "\n", - "from sklearn.svm import SVC\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.preprocessing import label_binarize\n", - "from sklearn.multiclass import OneVsRestClassifier\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.model_selection import cross_val_score" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "nuclear-extraction", - "metadata": {}, - "outputs": [], - "source": [ - "DD = pickle.load(open(\"datasets/DD/data.pkl\", \"rb\"))\n", - "ENZ = pickle.load(open(\"datasets/ENZYMES/data.pkl\", \"rb\"))\n", - "NCI= pickle.load(open(\"datasets/NCI1/data.pkl\", \"rb\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "contained-detection", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "284.3166383701188 1178\n", - "32.63333333333333 600\n", - "29.8654501216545 4110\n" - ] - } - ], - "source": [ - "for data in [DD, ENZ, NCI]:\n", - " print(np.mean([len(graph.nodes()) for graph in data]), len(data))" - ] - }, - { - "cell_type": "markdown", - "id": "comparable-guyana", - "metadata": {}, - "source": [ - "# Kernels" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "curious-geology", - "metadata": {}, - "outputs": [], - "source": [ - "def wl_kernel(orig_graphs, labelname=None, rounds=4):\n", - " graphs = [graph.copy() for graph in orig_graphs]\n", - " \n", - " ##### COLOR REFINEMENT ############\n", - " idx_counter = 0\n", - " coldict = dict() #save all colors in a dictionary (keys: hash values, values: index in the final histograms)\n", - " \n", - " #initial colors: if there is a initial color scheme, use it in round 1\n", - " if labelname:\n", - " for graph in graphs:\n", - " init_labels = nx.get_node_attributes(graph, labelname) #dict {node: label}\n", - " hash_labels = {key: hash(value) for key,value in init_labels.items()} #hash label values so that they are the same for all coming graphs and rounds\n", - " colors = list(set(hash_labels.values())) #list of the different colors in this graph\n", - " for hashcol in colors:\n", - " #check if colors already have been saved in coldict and save them if not\n", - " if hashcol not in coldict.keys():\n", - " coldict[hashcol] = idx_counter\n", - " idx_counter += 1\n", - " #change from hashed colors to final colors which will be used afterwards\n", - " new_labels = {key: coldict[hashvalue] for key,hashvalue in hash_labels.items()}\n", - " nx.set_node_attributes(graph, new_labels, str(0))\n", - " # no initial color scheme -> every node gets same color\n", - " else:\n", - " for graph in graphs:\n", - " nx.set_node_attributes(graph, 0, str(0))\n", - " #save color in coldict and increment idx_counter (which counts total number of colors)\n", - " coldict[0] = idx_counter #here: 0\n", - " idx_counter += 1\n", - " \n", - " #next rounds of color refinement\n", - " for k in range(1, rounds+1):\n", - " for graph in graphs:\n", - " #attribute dictionaries\n", - " attrs_last_round = nx.get_node_attributes(graph, str(k-1)) #dictionary with nodes as keys and corresponding attributes of last round as values\n", - " attrs_this_round = dict() #where you save attributes of this round\n", - " \n", - " #compute current color of each node\n", - " for node in graph.nodes():\n", - " #get colors of neighbors and hash them together with the node's color\n", - " colset = FrozenMultiset(attrs_last_round.get(neighbor) for neighbor in list(graph[node]))\n", - " hashcol = hash((attrs_last_round.get(node), colset))\n", - " #if hash produces a new color:\n", - " if hashcol not in coldict.keys():\n", - " coldict[hashcol] = idx_counter\n", - " idx_counter += 1\n", - " attrs_this_round[node] = coldict[hashcol]\n", - " #save current colors of the graph as node attributes\n", - " nx.set_node_attributes(graph, attrs_this_round, name=str(k))\n", - "\n", - " \n", - " ####### CONSTRUCT FEATURE VECTORS ###############\n", - " f_vecs = list() #where feature vectors will be stored\n", - " for graph in graphs:\n", - " c = Counter()\n", - " for k in range(rounds):\n", - " #count number of colors that appeared in each round, i.e. c = {0:302, 1:4} if color 0 appeared 302 times and color 1 4 times\n", - " c.update(nx.get_node_attributes(graph, str(k)).values()) \n", - " #create feature vectore as sparse matrix in format 1 x idx_counter\n", - " data = np.array(list(c.values()))\n", - " col = np.array(list(c.keys()))\n", - " row = np.zeros(len(col)) #only one row so far\n", - " f_vec = sp.coo_matrix((data, (row,col)), shape=(1, idx_counter)) #feature vector with histogram entries \n", - " f_vecs.append(f_vec)\n", - "\n", - " return graphs, f_vecs" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "owned-vinyl", - "metadata": {}, - "outputs": [], - "source": [ - "def closed_kernel(graph_list):\n", - "\n", - " l = int(np.mean([len(g.nodes) for g in graph_list])) #list comprehension\n", - " print(\"mean of number of nodes:\", l)\n", - " feature_vectors = []\n", - " \n", - " for graph in graph_list:\n", - " number = []\n", - " A = nx.adjacency_matrix(graph) # sparse matrix\n", - " A =A.todense() # dense matrix\n", - " lambdas = np.linalg.eigvalsh(A)\n", - " for j in range(1, l+1):\n", - " power_lambdas= [x**(j) for x in lambdas ]\n", - " sum_lambdas=int(np.round(sum(power_lambdas)))\n", - " number.append(sum_lambdas) \n", - " feature_vectors.append(number)\n", - " #print(number)\n", - " \n", - " return feature_vectors" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "id": "correct-endorsement", - "metadata": {}, - "outputs": [], - "source": [ - "ENZ_graphs, ENZ_feat = wl_kernel(ENZ, labelname=\"node_label\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "hearing-reputation", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(ENZ_feat) == len(ENZ_graphs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "verbal-brisbane", - "metadata": {}, - "outputs": [], - "source": [ - "ENZ_feat" - ] - }, - { - "cell_type": "markdown", - "id": "understood-blond", - "metadata": {}, - "source": [ - "## Gram Matrix" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "faced-kentucky", - "metadata": {}, - "outputs": [], - "source": [ - "m_ENZ = sp.vstack(ENZ_feat, format=\"csr\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "successful-execution", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<600x600 sparse matrix of type '<class 'numpy.intc'>'\n", - "\twith 358808 stored elements in Compressed Sparse Row format>" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_ENZ = m_ENZ@np.transpose(m_ENZ)\n", - "gram_ENZ" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "id": "shared-adaptation", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mean of number of nodes: 32\n" - ] - } - ], - "source": [ - "ENZ_features = closed_kernel(ENZ)\n", - "#ENZ_features" - ] - }, - { - "cell_type": "markdown", - "id": "federal-segment", - "metadata": {}, - "source": [ - "# SVM" - ] - }, - { - "cell_type": "code", - "execution_count": 149, - "id": "offshore-palmer", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "informal-luther", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "number of labels for each graphlist\n", - "NZ: 6\n", - "DD: 2\n", - "NCI: 2\n" - ] - } - ], - "source": [ - "print(\"number of labels for each graphlist\")\n", - "ENZ_target = [g.graph['label'] for g in ENZ]\n", - "print(\"NZ:\", len(set(ENZ_target)))\n", - "DD_target = [g.graph['label'] for g in DD]\n", - "print(\"DD:\", len(set(DD_target)))\n", - "NCI_target = [g.graph['label'] for g in NCI]\n", - "print(\"NCI:\", len(set(NCI_target)))" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "adjustable-contrast", - "metadata": {}, - "outputs": [], - "source": [ - "def graph_svm(feature_vecs, target_vec):\n", - " num_labels = len(set(target_vec))\n", - " feature_mat = np.vstack(feature_vecs)\n", - " gram_mat = feature_mat @ np.transpose(feature_mat)\n", - " print(gram_mat)\n", - " clf = SVC(kernel='precomputed')\n", - " clf.fit(gram_mat, target_vec)\n", - " return clf\n", - " " - ] - }, - { - "cell_type": "raw", - "id": "motivated-birmingham", - "metadata": {}, - "source": [ - "clf = graph_svm(NCI_features, NCI_target)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "important-concrete", - "metadata": {}, - "outputs": [], - "source": [ - "predicted = clf.predict(feature_vecs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "detected-multiple", - "metadata": {}, - "outputs": [], - "source": [ - "print(classification_report(NCI_target, predicted)) " - ] - }, - { - "cell_type": "raw", - "id": "institutional-chaos", - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "#this time, we train an SVM classifier\n", - "classifier = SVC(C=1, kernel='linear', gamma = 'auto')\n", - "classifier.fit(NCI_features, NCI_target)\n", - "\n", - "targetFeature_predict = classifier.predict(NCI_features)\n" - ] - }, - { - "cell_type": "raw", - "id": "inappropriate-score", - "metadata": {}, - "source": [ - "\n", - "classifier.decision_function(NCI_features)\n", - "print('Accuracy: \\n', classifier.score(NCI_features,ENZ_target))\n", - "print('Classification report: \\n')\n", - "print(classification_report(NCI_target, targetFeature_predict)) " - ] - }, - { - "cell_type": "markdown", - "id": "identical-catch", - "metadata": {}, - "source": [ - "# new" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "passive-change", - "metadata": {}, - "outputs": [], - "source": [ - "DD_graphs, DD_feat = wl_kernel(DD, labelname=\"node_label\")" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "boxed-medicaid", - "metadata": {}, - "outputs": [], - "source": [ - "m_DD = sp.vstack(DD_feat)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dated-ground", - "metadata": {}, - "outputs": [], - "source": [ - "DD_feat[:50]" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "id": "guilty-england", - "metadata": {}, - "outputs": [], - "source": [ - "DD_target = [g.graph['label'] for g in DD]" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "romantic-terminal", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<1254664x1178 sparse matrix of type '<class 'numpy.intc'>'\n", - "\twith 1017230 stored elements in COOrdinate format>" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m_DD.transpose()" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "wrapped-neighbor", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "matrix([[8298, 6960, 3678, ..., 1007, 943, 2839],\n", - " [6960, 8926, 3879, ..., 1070, 987, 2922],\n", - " [3678, 3879, 2868, ..., 587, 596, 1578],\n", - " ...,\n", - " [1007, 1070, 587, ..., 428, 185, 663],\n", - " [ 943, 987, 596, ..., 185, 376, 575],\n", - " [2839, 2922, 1578, ..., 663, 575, 3894]], dtype=int32)" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gram_DD = m_DD.dot(m_DD.transpose()).todense()\n", - "gram_DD" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "id": "western-delight", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 6])" - ] - }, - "execution_count": 85, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a = np.array([[1,2,3],[4,5,6],[7,8,9]])\n", - "a[[0,1],[0,2]]" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "id": "processed-onion", - "metadata": {}, - "outputs": [], - "source": [ - "samplelist = np.arange(len(DD_target)).tolist()\n", - "sample_idx = random.sample(samplelist, 200)\n", - "#sample_idx" - ] - }, - { - "cell_type": "code", - "execution_count": 121, - "id": "approved-capital", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(200, 200)" - ] - }, - "execution_count": 121, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.asarray(gram_DD)[sample_idx][:, sample_idx].shape" - ] - }, - { - "cell_type": "raw", - "id": "organic-egyptian", - "metadata": {}, - "source": [ - "smaller_matrix = np.asarray(gram_DD)[sample_idx][:, sample_idx]\n", - "clf = SVC(kernel='precomputed')\n", - "clf.fit(smaller_matrix, np.array(DD_target)[sample_idx])" - ] - }, - { - "cell_type": "code", - "execution_count": 128, - "id": "recognized-nigeria", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVC(kernel='precomputed')" - ] - }, - "execution_count": 128, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf = SVC(kernel='precomputed')\n", - "clf.fit(np.array(gram_DD), np.array(DD_target))" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "id": "outside-nation", - "metadata": {}, - "outputs": [], - "source": [ - "targetFeature_predict = clf.predict(smaller_matrix)" - ] - }, - { - "cell_type": "code", - "execution_count": 124, - "id": "unable-holder", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 1 1.00 1.00 1.00 115\n", - " 2 1.00 1.00 1.00 85\n", - "\n", - " accuracy 1.00 200\n", - " macro avg 1.00 1.00 1.00 200\n", - "weighted avg 1.00 1.00 1.00 200\n", - "\n" - ] - } - ], - "source": [ - "print(classification_report( np.array(DD_target)[sample_idx], targetFeature_predict)) " - ] - }, - { - "cell_type": "code", - "execution_count": 131, - "id": "combined-banner", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 1 1.00 1.00 1.00 691\n", - " 2 1.00 1.00 1.00 487\n", - "\n", - " accuracy 1.00 1178\n", - " macro avg 1.00 1.00 1.00 1178\n", - "weighted avg 1.00 1.00 1.00 1178\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lea\\anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:593: FutureWarning: np.matrix usage is deprecated in 1.0 and will raise a TypeError in 1.2. Please convert to a numpy array with np.asarray. For more information see: https://numpy.org/doc/stable/reference/generated/numpy.matrix.html\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "new_sample_idx = random.sample(samplelist, 200)\n", - "new_matrix = np.asarray(gram_DD)[new_sample_idx][:, new_sample_idx]\n", - "print(classification_report( np.array(DD_target), clf.predict(gram_DD))) " - ] - }, - { - "cell_type": "raw", - "id": "strange-longer", - "metadata": {}, - "source": [ - "from sklearn import datasets, linear_model\n", - "from sklearn.model_selection import cross_val_score\n", - "diabetes = datasets.load_diabetes()\n", - "X = diabetes.data[:150]\n", - "y = diabetes.target[:150]\n", - "lasso = linear_model.Lasso()\n", - "print(cross_val_score(lasso, X, y, cv=3))" - ] - }, - { - "cell_type": "code", - "execution_count": 134, - "id": "sealed-conservation", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.7877661886136462\n" - ] - } - ], - "source": [ - "\n", - "accuracy = cross_val_score(clf, np.array(gram_DD), np.array(DD_target), cv=10)\n", - "print(np.mean(accuracy))" - ] - }, - { - "cell_type": "code", - "execution_count": 158, - "id": "helpful-creature", - "metadata": {}, - "outputs": [], - "source": [ - "def svm_precomputed(feat_vecs, target_vec):\n", - " classes = list(set((target_vec)))\n", - " n_classes = len(classes)\n", - " m_feat = sp.vstack(feat_vecs)\n", - " gram_matrix = m_feat.dot(m_feat.transpose()).todense()\n", - " \n", - " if n_classes <= 2:\n", - " clf = SVC(kernel='precomputed')\n", - " #clf.fit(np.array(gram_matrix), np.array(target_vec))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(target_vec), cv=10)\n", - " \n", - " else:\n", - " print(\"multilabel SVM\")\n", - " # Use label_binarize to be multi-label like settings\n", - " Y = label_binarize(target_vec, classes=classes)\n", - " print(Y)\n", - " clf = OneVsRestClassifier(SVC(kernel='precomputed', class_weight=\"balanced\"))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), Y, cv=10)\n", - " print(accuracy)\n", - " print(np.mean(accuracy))\n", - " \n", - " return np.mean(accuracy)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "molecular-passport", - "metadata": {}, - "outputs": [], - "source": [ - "DD_graphs, DD_feat = wl_kernel(DD, labelname=\"node_label\")\n", - "DD_target = [g.graph['label'] for g in DD]" - ] - }, - { - "cell_type": "code", - "execution_count": 157, - "id": "former-infrared", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.7877661886136462" - ] - }, - "execution_count": 157, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed(DD_feat, DD_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "noted-photograph", - "metadata": {}, - "outputs": [], - "source": [ - "NCI_graphs, NCI_feat = wl_kernel(NCI, labelname=\"node_label\")\n", - "NCI_target = [g.graph['label'] for g in NCI]" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "id": "sublime-staff", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.783698296836983\n" - ] - } - ], - "source": [ - "svm_precomputed(NCI_feat, NCI_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "empty-appearance", - "metadata": {}, - "outputs": [], - "source": [ - "ENZ_graphs, ENZ_feat = wl_kernel(ENZ, labelname=\"node_label\")\n", - "ENZ_target = [g.graph['label'] for g in ENZ]" - ] - }, - { - "cell_type": "code", - "execution_count": 160, - "id": "removable-harrison", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - "[[0 0 0 0 0 1]\n", - " [0 0 0 0 0 1]\n", - " [0 0 0 0 0 1]\n", - " ...\n", - " [0 0 0 1 0 0]\n", - " [0 0 0 1 0 0]\n", - " [0 0 0 1 0 0]]\n", - "[0.03333333 0.1 0.03333333 0.01666667 0. 0.01666667\n", - " 0.03333333 0. 0.01666667 0.03333333]\n", - "0.028333333333333332\n" - ] - }, - { - "data": { - "text/plain": [ - "0.028333333333333332" - ] - }, - "execution_count": 160, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed(ENZ_feat, ENZ_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 174, - "id": "developmental-colombia", - "metadata": {}, - "outputs": [], - "source": [ - "def svm_precomputed_gridSearchCV(feat_vecs, target_vec):\n", - " classes = list(set((target_vec)))\n", - " n_classes = len(classes)\n", - " m_feat = sp.vstack(feat_vecs)\n", - " gram_matrix = m_feat.dot(m_feat.transpose()).todense()\n", - " \n", - " if n_classes <= 2:\n", - " param_grid = {'C': [0.001,0.01,0.1,1],\n", - " 'class_weight':['balanced',None]} \n", - " grid = GridSearchCV(SVC(kernel=\"precomputed\"), param_grid, cv = 5, verbose = 3) \n", - " grid.fit(np.array(gram_matrix), np.array(target_vec))\n", - " print(grid.best_params_) \n", - " clf = SVC(kernel='precomputed', **grid.best_params_)\n", - " clf.fit(np.array(gram_matrix), np.array(target_vec))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(target_vec), cv=10)\n", - " \n", - " return np.mean(accuracy), accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 175, - "id": "photographic-scanning", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 5 folds for each of 8 candidates, totalling 40 fits\n", - "[CV 1/5] END ....C=0.001, class_weight=balanced;, score=0.717 total time= 0.2s\n", - "[CV 2/5] END ....C=0.001, class_weight=balanced;, score=0.754 total time= 0.2s\n", - "[CV 3/5] END ....C=0.001, class_weight=balanced;, score=0.783 total time= 0.2s\n", - "[CV 4/5] END ....C=0.001, class_weight=balanced;, score=0.765 total time= 0.2s\n", - "[CV 5/5] END ....C=0.001, class_weight=balanced;, score=0.719 total time= 0.2s\n", - "[CV 1/5] END ........C=0.001, class_weight=None;, score=0.717 total time= 0.1s\n", - "[CV 2/5] END ........C=0.001, class_weight=None;, score=0.754 total time= 0.1s\n", - "[CV 3/5] END ........C=0.001, class_weight=None;, score=0.785 total time= 0.1s\n", - "[CV 4/5] END ........C=0.001, class_weight=None;, score=0.766 total time= 0.1s\n", - "[CV 5/5] END ........C=0.001, class_weight=None;, score=0.719 total time= 0.1s\n", - "[CV 1/5] END .....C=0.01, class_weight=balanced;, score=0.780 total time= 0.2s\n", - "[CV 2/5] END .....C=0.01, class_weight=balanced;, score=0.783 total time= 0.2s\n", - "[CV 3/5] END .....C=0.01, class_weight=balanced;, score=0.832 total time= 0.2s\n", - "[CV 4/5] END .....C=0.01, class_weight=balanced;, score=0.809 total time= 0.3s\n", - "[CV 5/5] END .....C=0.01, class_weight=balanced;, score=0.786 total time= 0.2s\n", - "[CV 1/5] END .........C=0.01, class_weight=None;, score=0.780 total time= 0.2s\n", - "[CV 2/5] END .........C=0.01, class_weight=None;, score=0.783 total time= 0.2s\n", - "[CV 3/5] END .........C=0.01, class_weight=None;, score=0.832 total time= 0.2s\n", - "[CV 4/5] END .........C=0.01, class_weight=None;, score=0.809 total time= 0.2s\n", - "[CV 5/5] END .........C=0.01, class_weight=None;, score=0.787 total time= 0.3s\n", - "[CV 1/5] END ......C=0.1, class_weight=balanced;, score=0.794 total time= 0.7s\n", - "[CV 2/5] END ......C=0.1, class_weight=balanced;, score=0.787 total time= 0.7s\n", - "[CV 3/5] END ......C=0.1, class_weight=balanced;, score=0.828 total time= 0.7s\n", - "[CV 4/5] END ......C=0.1, class_weight=balanced;, score=0.799 total time= 0.6s\n", - "[CV 5/5] END ......C=0.1, class_weight=balanced;, score=0.802 total time= 0.6s\n", - "[CV 1/5] END ..........C=0.1, class_weight=None;, score=0.794 total time= 0.6s\n", - "[CV 2/5] END ..........C=0.1, class_weight=None;, score=0.787 total time= 0.7s\n", - "[CV 3/5] END ..........C=0.1, class_weight=None;, score=0.828 total time= 0.6s\n", - "[CV 4/5] END ..........C=0.1, class_weight=None;, score=0.799 total time= 0.7s\n", - "[CV 5/5] END ..........C=0.1, class_weight=None;, score=0.802 total time= 0.7s\n", - "[CV 1/5] END ........C=1, class_weight=balanced;, score=0.774 total time= 0.9s\n", - "[CV 2/5] END ........C=1, class_weight=balanced;, score=0.779 total time= 1.0s\n", - "[CV 3/5] END ........C=1, class_weight=balanced;, score=0.787 total time= 1.0s\n", - "[CV 4/5] END ........C=1, class_weight=balanced;, score=0.776 total time= 0.9s\n", - "[CV 5/5] END ........C=1, class_weight=balanced;, score=0.785 total time= 0.8s\n", - "[CV 1/5] END ............C=1, class_weight=None;, score=0.774 total time= 1.0s\n", - "[CV 2/5] END ............C=1, class_weight=None;, score=0.779 total time= 1.0s\n", - "[CV 3/5] END ............C=1, class_weight=None;, score=0.787 total time= 0.9s\n", - "[CV 4/5] END ............C=1, class_weight=None;, score=0.776 total time= 0.9s\n", - "[CV 5/5] END ............C=1, class_weight=None;, score=0.785 total time= 0.8s\n", - "{'C': 0.1, 'class_weight': 'balanced'}\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.802919708029197,\n", - " array([0.81995134, 0.74939173, 0.84184915, 0.76155718, 0.81751825,\n", - " 0.8296837 , 0.79562044, 0.81751825, 0.83211679, 0.76399027]))" - ] - }, - "execution_count": 175, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridSearchCV(NCI_feat, NCI_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "saved-tactics", - "metadata": {}, - "outputs": [], - "source": [ - "def svm_precomputed_tt(feat_vecs, target_vec):\n", - " classes = list(set((target_vec)))\n", - " n_classes = len(classes)\n", - " X_train, X_test, y_train, y_test = train_test_split(feat_vecs, target_vec, test_size=0.2, random_state=4)\n", - " train_feat = sp.vstack(X_train)\n", - " test_feat = sp.vstack(X_test)\n", - " gram_matrix = train_feat.dot(train_feat.transpose()).todense()\n", - " gram_test = train_feat.dot(test_feat.transpose()).todense().T\n", - "\n", - " if n_classes <= 2:\n", - " clf = SVC(kernel='precomputed')\n", - " clf.fit(np.array(gram_matrix), np.array(y_train))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(y_train), cv=10)\n", - " print(classification_report(y_test, clf.predict(np.array(gram_test)))) \n", - " \n", - " else:\n", - " print(\"multilabel SVM\")\n", - " # Use label_binarize to be multi-label like settings\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " clf = OneVsRestClassifier(SVC(kernel='precomputed', class_weight=\"balanced\"))\n", - " clf.fit(np.array(gram_matrix), np.array(y_train_bin))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), y_train_bin, cv=10)\n", - " print(classification_report(y_test_bin, clf.predict(np.array(gram_test)))) \n", - " \n", - " return np.mean(accuracy), np.std(accuracy), accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "settled-engine", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 1 0.85 0.85 0.85 138\n", - " 2 0.79 0.79 0.79 98\n", - "\n", - " accuracy 0.82 236\n", - " macro avg 0.82 0.82 0.82 236\n", - "weighted avg 0.82 0.82 0.82 236\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.7886898096304591,\n", - " 0.04572144929229968,\n", - " array([0.81052632, 0.82105263, 0.76595745, 0.79787234, 0.80851064,\n", - " 0.80851064, 0.82978723, 0.68085106, 0.82978723, 0.73404255]))" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_tt(DD_feat, DD_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 250, - "id": "several-liver", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 0 0.79 0.83 0.81 385\n", - " 1 0.84 0.80 0.82 437\n", - "\n", - " accuracy 0.81 822\n", - " macro avg 0.81 0.81 0.81 822\n", - "weighted avg 0.81 0.81 0.81 822\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.8150928534361332,\n", - " array([0.79331307, 0.81458967, 0.80547112, 0.81458967, 0.81458967,\n", - " 0.82370821, 0.79027356, 0.83890578, 0.8445122 , 0.81097561]))" - ] - }, - "execution_count": 250, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_tt(NCI_feat, NCI_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "ideal-integer", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - " precision recall f1-score support\n", - "\n", - " 0 0.83 0.24 0.37 21\n", - " 1 0.73 0.31 0.43 26\n", - " 2 0.62 0.43 0.51 23\n", - " 3 0.90 0.50 0.64 18\n", - " 4 0.40 0.13 0.20 15\n", - " 5 1.00 0.29 0.45 17\n", - "\n", - " micro avg 0.74 0.33 0.45 120\n", - " macro avg 0.75 0.32 0.44 120\n", - "weighted avg 0.75 0.33 0.44 120\n", - " samples avg 0.32 0.33 0.32 120\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lea\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.29375000000000007,\n", - " 0.044145041875868936,\n", - " array([0.27083333, 0.27083333, 0.29166667, 0.29166667, 0.29166667,\n", - " 0.375 , 0.35416667, 0.27083333, 0.20833333, 0.3125 ]))" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_tt(ENZ_feat, ENZ_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "interstate-huntington", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mean of number of nodes: 284\n" - ] - } - ], - "source": [ - "DD_ck = closed_kernel(DD)" - ] - }, - { - "cell_type": "raw", - "id": "oriental-lucas", - "metadata": {}, - "source": [ - "train_feat = np.vstack(DD_ck)\n", - "\n", - "gram_matrix = train_feat.dot(train_feat.transpose()).todense()" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "rough-organization", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 1 0.85 0.85 0.85 138\n", - " 2 0.79 0.79 0.79 98\n", - "\n", - " accuracy 0.82 236\n", - " macro avg 0.82 0.82 0.82 236\n", - "weighted avg 0.82 0.82 0.82 236\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.7886898096304591,\n", - " 0.04572144929229968,\n", - " array([0.81052632, 0.82105263, 0.76595745, 0.79787234, 0.80851064,\n", - " 0.80851064, 0.82978723, 0.68085106, 0.82978723, 0.73404255]))" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_tt(DD_feat, DD_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "id": "rising-teaching", - "metadata": {}, - "outputs": [], - "source": [ - "def svm_precomputed_gridsearch_tt(feat_vecs, target_vec):\n", - " classes = list(set((target_vec)))\n", - " n_classes = len(classes)\n", - " X_train, X_test, y_train, y_test = train_test_split(feat_vecs, target_vec, test_size=0.2, random_state=4)\n", - " #print(\"after traintestsplit\", X_train.shape, X_test.shape)\n", - " train_feat = sp.vstack(X_train)\n", - " test_feat = sp.vstack(X_test)\n", - " #print(\"after vstack\", train_feat.shape, test_feat.shape)\n", - " gram_matrix = train_feat.dot(train_feat.transpose()).todense()\n", - " gram_test = train_feat.dot(test_feat.transpose()).todense().T\n", - " #print(\"after multiplication\", gram_matrix.shape, gram_test.shape)\n", - "\n", - " if n_classes <= 2:\n", - " param_grid = {'C': [0.001,0.01,0.1,1],\n", - " 'class_weight':['balanced',None]} \n", - " grid = GridSearchCV(SVC(kernel=\"precomputed\"), param_grid, cv = 5, verbose = 3) \n", - " grid.fit(np.array(gram_matrix), np.array(y_train))\n", - " print(grid.best_params_) \n", - " clf = SVC(kernel='precomputed', **grid.best_params_)\n", - " clf.fit(np.array(gram_matrix), np.array(y_train))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(y_train), cv=10)\n", - " print(classification_report(y_test, clf.predict(np.array(gram_test))))\n", - " \n", - " \n", - " else:\n", - " print(\"multilabel SVM\")\n", - " # Use label_binarize to be multi-label like settings\n", - " y_train_bin = label_binarize(y_train, classes=classes)\n", - " y_test_bin = label_binarize(y_test, classes=classes)\n", - " \n", - " param_grid = {'estimator__C': [0.001,0.01, 0.05, 0.1,1], \n", - " 'estimator__class_weight':['balanced',None]} \n", - " test_clf = OneVsRestClassifier(SVC(kernel=\"precomputed\"))\n", - " #print(test_clf.get_params().keys())\n", - " grid = GridSearchCV(test_clf, param_grid, cv = 5, verbose = 3) \n", - " grid.fit(np.array(gram_matrix), np.array(y_train_bin))\n", - " print(grid.best_params_) \n", - " best_params = {\"C\": grid.best_params_[\"estimator__C\"], \"class_weight\": grid.best_params_[\"estimator__class_weight\"]}\n", - " clf = OneVsRestClassifier(SVC(kernel='precomputed', **best_params))\n", - " clf.fit(np.array(gram_matrix), np.array(y_train_bin))\n", - " accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(y_train_bin), cv=10)\n", - " print(classification_report(y_test_bin, clf.predict(np.array(gram_test))))\n", - " \n", - " return np.mean(accuracy), np.std(accuracy), accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "id": "simplified-innocent", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - "Fitting 5 folds for each of 10 candidates, totalling 50 fits\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.021 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.052 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.021 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.031 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.042 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.010 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.010 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.010 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.260 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.375 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.375 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.312 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.333 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.104 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.156 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.146 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.156 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.083 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.271 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.323 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.229 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.208 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.240 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.250 total time= 0.0s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=None;, score=0.219 total time= 0.0s\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=None;, score=0.302 total time= 0.0s\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=None;, score=0.312 total time= 0.0s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=None;, score=0.250 total time= 0.0s\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=None;, score=0.250 total time= 0.0s\n", - "{'estimator__C': 0.01, 'estimator__class_weight': 'balanced'}\n", - " precision recall f1-score support\n", - "\n", - " 0 0.83 0.24 0.37 21\n", - " 1 0.60 0.35 0.44 26\n", - " 2 0.55 0.48 0.51 23\n", - " 3 0.69 0.50 0.58 18\n", - " 4 0.28 0.33 0.30 15\n", - " 5 0.53 0.47 0.50 17\n", - "\n", - " micro avg 0.54 0.39 0.45 120\n", - " macro avg 0.58 0.39 0.45 120\n", - "weighted avg 0.60 0.39 0.45 120\n", - " samples avg 0.37 0.39 0.38 120\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lea\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.33541666666666664,\n", - " 0.07820135157968108,\n", - " array([0.25 , 0.29166667, 0.3125 , 0.35416667, 0.35416667,\n", - " 0.41666667, 0.45833333, 0.3125 , 0.1875 , 0.41666667]))" - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(ENZ_feat, ENZ_target)" - ] - }, - { - "cell_type": "markdown", - "id": "mature-corpus", - "metadata": {}, - "source": [ - "# Graphlet Kernel" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "mighty-entrance", - "metadata": {}, - "outputs": [], - "source": [ - "#make all the non-isomorphic graphs with 5 nodes\n", - "g0=nx.empty_graph(5)\n", - "dic=[0]*34\n", - "dic[0]=nx.create_empty_copy(g0)\n", - "dic[0].add_edges_from([(0,1)])\n", - "dic[1]=nx.create_empty_copy(g0)\n", - "dic[1].add_edges_from([(0,1),(0,2)])\n", - "dic[2]=nx.create_empty_copy(g0)\n", - "dic[2].add_edges_from([(0,1),(2,3)])\n", - "dic[3]=nx.create_empty_copy(g0)\n", - "dic[3].add_edges_from([(0,1),(0,2),(0,3)])\n", - "dic[4]=nx.create_empty_copy(g0)\n", - "dic[4].add_edges_from([(0,1),(0,2),(3,4)])\n", - "dic[5]=nx.create_empty_copy(g0)\n", - "dic[5].add_edges_from([(0,1),(1,2),(2,3)])\n", - "dic[6]=nx.create_empty_copy(g0)\n", - "dic[6].add_edges_from([(0,1),(0,2),(1,2)])\n", - "dic[7]=nx.create_empty_copy(g0)\n", - "dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)])\n", - "dic[8]=nx.create_empty_copy(g0)\n", - "dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)])\n", - "dic[9]=nx.create_empty_copy(g0)\n", - "dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)])\n", - "dic[10]=nx.create_empty_copy(g0)\n", - "dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)])\n", - "dic[11]=nx.create_empty_copy(g0)\n", - "dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)])\n", - "dic[12]=nx.create_empty_copy(g0)\n", - "dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)])\n", - "dic[13]=nx.create_empty_copy(g0)\n", - "dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)])\n", - "dic[14]=nx.create_empty_copy(g0)\n", - "dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)])\n", - "dic[15]=nx.create_empty_copy(g0)\n", - "dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)])\n", - "dic[16]=nx.create_empty_copy(g0)\n", - "dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)])\n", - "dic[17]=nx.create_empty_copy(g0)\n", - "dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)])\n", - "dic[18]=nx.create_empty_copy(g0)\n", - "dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)])\n", - "dic[19]=nx.create_empty_copy(g0)\n", - "dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)])\n", - "dic[20]=nx.create_empty_copy(g0)\n", - "dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)])\n", - "dic[21]=nx.create_empty_copy(g0)\n", - "dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)])\n", - "dic[22]=nx.create_empty_copy(g0)\n", - "dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)])\n", - "dic[23]=nx.create_empty_copy(g0)\n", - "dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)])\n", - "dic[24]=nx.create_empty_copy(g0)\n", - "dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)])\n", - "dic[25]=nx.create_empty_copy(g0)\n", - "dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[26]=nx.create_empty_copy(g0)\n", - "dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)])\n", - "dic[27]=nx.create_empty_copy(g0)\n", - "dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)])\n", - "dic[28]=nx.create_empty_copy(g0)\n", - "dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)])\n", - "dic[29]=nx.create_empty_copy(g0)\n", - "dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)])\n", - "dic[30]=nx.create_empty_copy(g0)\n", - "dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)])\n", - "dic[31]=nx.create_empty_copy(g0)\n", - "dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)])\n", - "dic[32]=nx.create_empty_copy(g0)\n", - "dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)])\n", - "dic.insert(0,nx.create_empty_copy(g0))" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "id": "equipped-semiconductor", - "metadata": {}, - "outputs": [], - "source": [ - "def graphlet_kernel(data):\n", - "#make a dict for counting \n", - " vektor=dict(zip(dic,[0]*34))\n", - " output=[]\n", - " \n", - "#count the Graphlets\n", - " def count_graphlet(g):\n", - " for k,v in temp.items():\n", - " if nx.is_isomorphic(k,g):\n", - " temp[k]+=1\n", - " break\n", - " \n", - "#iterate over all graphs \n", - " for graph in data:\n", - " temp=vektor.copy()\n", - "#if the number of nodes is less than 5,then output a vektor with zeros.\n", - " if len(graph.nodes())<5:\n", - " output.append(list(temp.values()))\n", - " else:\n", - "#if the number of nodes is more than 5,randomly sample 1000 times.\n", - " for j in range(1000):\n", - " temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5))\n", - " count_graphlet(temp_subgraph)\n", - " output.append(list(temp.values()))\n", - " return output" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "id": "august-guarantee", - "metadata": {}, - "outputs": [], - "source": [ - "DD_graphlet = graphlet_kernel(DD)" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "id": "excess-niagara", - "metadata": {}, - "outputs": [], - "source": [ - "ENZ_graphlet = graphlet_kernel(ENZ)" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "id": "ecological-berlin", - "metadata": {}, - "outputs": [], - "source": [ - "NCI_graphlet = graphlet_kernel(NCI)" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "acceptable-gender", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<1178x34 sparse matrix of type '<class 'numpy.intc'>'\n", - "\twith 6861 stored elements in Compressed Sparse Row format>" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sp.vstack(sp.csr_matrix(np.array(DD_graphlet)))" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "id": "contained-excuse", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 5 folds for each of 8 candidates, totalling 40 fits\n", - "[CV 1/5] END ....C=0.001, class_weight=balanced;, score=0.730 total time= 0.0s\n", - "[CV 2/5] END ....C=0.001, class_weight=balanced;, score=0.757 total time= 0.0s\n", - "[CV 3/5] END ....C=0.001, class_weight=balanced;, score=0.793 total time= 0.0s\n", - "[CV 4/5] END ....C=0.001, class_weight=balanced;, score=0.739 total time= 0.0s\n", - "[CV 5/5] END ....C=0.001, class_weight=balanced;, score=0.734 total time= 0.0s\n", - "[CV 1/5] END ........C=0.001, class_weight=None;, score=0.741 total time= 0.0s\n", - "[CV 2/5] END ........C=0.001, class_weight=None;, score=0.778 total time= 0.0s\n", - "[CV 3/5] END ........C=0.001, class_weight=None;, score=0.793 total time= 0.0s\n", - "[CV 4/5] END ........C=0.001, class_weight=None;, score=0.707 total time= 0.0s\n", - "[CV 5/5] END ........C=0.001, class_weight=None;, score=0.745 total time= 0.0s\n", - "[CV 1/5] END .....C=0.01, class_weight=balanced;, score=0.735 total time= 0.0s\n", - "[CV 2/5] END .....C=0.01, class_weight=balanced;, score=0.746 total time= 0.0s\n", - "[CV 3/5] END .....C=0.01, class_weight=balanced;, score=0.793 total time= 0.0s\n", - "[CV 4/5] END .....C=0.01, class_weight=balanced;, score=0.739 total time= 0.0s\n", - "[CV 5/5] END .....C=0.01, class_weight=balanced;, score=0.745 total time= 0.0s\n", - "[CV 1/5] END .........C=0.01, class_weight=None;, score=0.735 total time= 0.0s\n", - "[CV 2/5] END .........C=0.01, class_weight=None;, score=0.783 total time= 0.0s\n", - "[CV 3/5] END .........C=0.01, class_weight=None;, score=0.777 total time= 0.0s\n", - "[CV 4/5] END .........C=0.01, class_weight=None;, score=0.702 total time= 0.0s\n", - "[CV 5/5] END .........C=0.01, class_weight=None;, score=0.734 total time= 0.0s\n", - "[CV 1/5] END ......C=0.1, class_weight=balanced;, score=0.730 total time= 0.3s\n", - "[CV 2/5] END ......C=0.1, class_weight=balanced;, score=0.757 total time= 0.3s\n", - "[CV 3/5] END ......C=0.1, class_weight=balanced;, score=0.782 total time= 0.2s\n", - "[CV 4/5] END ......C=0.1, class_weight=balanced;, score=0.739 total time= 0.2s\n", - "[CV 5/5] END ......C=0.1, class_weight=balanced;, score=0.734 total time= 0.3s\n", - "[CV 1/5] END ..........C=0.1, class_weight=None;, score=0.725 total time= 0.4s\n", - "[CV 2/5] END ..........C=0.1, class_weight=None;, score=0.783 total time= 0.3s\n", - "[CV 3/5] END ..........C=0.1, class_weight=None;, score=0.771 total time= 0.3s\n", - "[CV 4/5] END ..........C=0.1, class_weight=None;, score=0.702 total time= 0.4s\n", - "[CV 5/5] END ..........C=0.1, class_weight=None;, score=0.734 total time= 0.4s\n", - "[CV 1/5] END ........C=1, class_weight=balanced;, score=0.725 total time= 4.2s\n", - "[CV 2/5] END ........C=1, class_weight=balanced;, score=0.741 total time= 3.1s\n", - "[CV 3/5] END ........C=1, class_weight=balanced;, score=0.787 total time= 5.2s\n", - "[CV 4/5] END ........C=1, class_weight=balanced;, score=0.734 total time= 2.6s\n", - "[CV 5/5] END ........C=1, class_weight=balanced;, score=0.734 total time= 3.6s\n", - "[CV 1/5] END ............C=1, class_weight=None;, score=0.735 total time= 5.9s\n", - "[CV 2/5] END ............C=1, class_weight=None;, score=0.783 total time= 4.5s\n", - "[CV 3/5] END ............C=1, class_weight=None;, score=0.750 total time= 3.8s\n", - "[CV 4/5] END ............C=1, class_weight=None;, score=0.707 total time= 4.2s\n", - "[CV 5/5] END ............C=1, class_weight=None;, score=0.723 total time= 4.5s\n", - "{'C': 0.001, 'class_weight': None}\n", - " precision recall f1-score support\n", - "\n", - " 1 0.73 0.90 0.81 138\n", - " 2 0.79 0.54 0.64 98\n", - "\n", - " accuracy 0.75 236\n", - " macro avg 0.76 0.72 0.73 236\n", - "weighted avg 0.76 0.75 0.74 236\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.7463045912653976,\n", - " 0.04357951487182796,\n", - " array([0.72631579, 0.74736842, 0.74468085, 0.78723404, 0.78723404,\n", - " 0.78723404, 0.76595745, 0.63829787, 0.76595745, 0.71276596]))" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(sp.csr_matrix(np.array(DD_graphlet)), DD_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "id": "appreciated-cycle", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - "Fitting 5 folds for each of 10 candidates, totalling 50 fits\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.073 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.031 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.021 total time= 0.8s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.010 total time= 0.7s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.021 total time= 0.6s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.5s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.031 total time= 0.3s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.042 total time= 0.2s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.031 total time= 0.3s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.021 total time= 0.3s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.042 total time= 0.3s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 3.3s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.031 total time= 5.3s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.021 total time= 5.1s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.031 total time= 4.8s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.010 total time= 4.3s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.021 total time= 2.2s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.031 total time= 2.1s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.021 total time= 1.8s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.031 total time= 1.8s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.052 total time= 1.9s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 12.1s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.021 total time= 29.5s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.010 total time= 21.0s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.031 total time= 29.2s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.010 total time= 21.5s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.021 total time= 4.7s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.042 total time= 5.3s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.031 total time= 3.7s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.052 total time= 4.3s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.062 total time= 5.3s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.010 total time= 28.1s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 58.2s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.010 total time= 41.3s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.021 total time= 52.9s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.010 total time= 33.9s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.031 total time= 1.0min\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.052 total time= 47.8s\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.031 total time= 53.0s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.021 total time= 47.4s\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.021 total time= 46.1s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=None;, score=0.010 total time= 3.2min\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=None;, score=0.010 total time= 8.7min\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=None;, score=0.010 total time= 6.5min\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=None;, score=0.021 total time= 8.8min\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=None;, score=0.021 total time= 4.3min\n", - "{'estimator__C': 0.1, 'estimator__class_weight': 'balanced'}\n", - " precision recall f1-score support\n", - "\n", - " 0 0.16 0.57 0.25 21\n", - " 1 0.30 0.35 0.32 26\n", - " 2 0.32 0.52 0.40 23\n", - " 3 0.18 0.72 0.29 18\n", - " 4 0.17 0.33 0.23 15\n", - " 5 0.26 0.47 0.33 17\n", - "\n", - " micro avg 0.21 0.49 0.30 120\n", - " macro avg 0.23 0.49 0.30 120\n", - "weighted avg 0.24 0.49 0.31 120\n", - " samples avg 0.24 0.49 0.31 120\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Lea\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.03958333333333333,\n", - " 0.025430324199445214,\n", - " array([0.02083333, 0.02083333, 0.04166667, 0.02083333, 0.02083333,\n", - " 0.0625 , 0.02083333, 0.04166667, 0.10416667, 0.04166667]))" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(sp.csr_matrix(np.array(ENZ_graphlet)), ENZ_target)" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "id": "hearing-lightweight", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 5 folds for each of 8 candidates, totalling 40 fits\n", - "[CV 1/5] END ....C=0.001, class_weight=balanced;, score=0.632 total time= 0.3s\n", - "[CV 2/5] END ....C=0.001, class_weight=balanced;, score=0.612 total time= 0.4s\n", - "[CV 3/5] END ....C=0.001, class_weight=balanced;, score=0.637 total time= 0.3s\n", - "[CV 4/5] END ....C=0.001, class_weight=balanced;, score=0.604 total time= 0.3s\n", - "[CV 5/5] END ....C=0.001, class_weight=balanced;, score=0.642 total time= 0.3s\n", - "[CV 1/5] END ........C=0.001, class_weight=None;, score=0.635 total time= 0.3s\n", - "[CV 2/5] END ........C=0.001, class_weight=None;, score=0.617 total time= 0.2s\n", - "[CV 3/5] END ........C=0.001, class_weight=None;, score=0.622 total time= 0.2s\n", - "[CV 4/5] END ........C=0.001, class_weight=None;, score=0.604 total time= 0.2s\n", - "[CV 5/5] END ........C=0.001, class_weight=None;, score=0.635 total time= 0.2s\n", - "[CV 1/5] END .....C=0.01, class_weight=balanced;, score=0.626 total time= 0.7s\n", - "[CV 2/5] END .....C=0.01, class_weight=balanced;, score=0.619 total time= 0.7s\n", - "[CV 3/5] END .....C=0.01, class_weight=balanced;, score=0.635 total time= 0.7s\n", - "[CV 4/5] END .....C=0.01, class_weight=balanced;, score=0.607 total time= 0.8s\n", - "[CV 5/5] END .....C=0.01, class_weight=balanced;, score=0.642 total time= 0.7s\n", - "[CV 1/5] END .........C=0.01, class_weight=None;, score=0.625 total time= 0.8s\n", - "[CV 2/5] END .........C=0.01, class_weight=None;, score=0.617 total time= 0.7s\n", - "[CV 3/5] END .........C=0.01, class_weight=None;, score=0.626 total time= 0.8s\n", - "[CV 4/5] END .........C=0.01, class_weight=None;, score=0.606 total time= 0.8s\n", - "[CV 5/5] END .........C=0.01, class_weight=None;, score=0.639 total time= 0.7s\n", - "[CV 1/5] END ......C=0.1, class_weight=balanced;, score=0.629 total time= 3.4s\n", - "[CV 2/5] END ......C=0.1, class_weight=balanced;, score=0.622 total time= 3.7s\n", - "[CV 3/5] END ......C=0.1, class_weight=balanced;, score=0.640 total time= 4.0s\n", - "[CV 4/5] END ......C=0.1, class_weight=balanced;, score=0.607 total time= 3.8s\n", - "[CV 5/5] END ......C=0.1, class_weight=balanced;, score=0.641 total time= 3.8s\n", - "[CV 1/5] END ..........C=0.1, class_weight=None;, score=0.628 total time= 4.9s\n", - "[CV 2/5] END ..........C=0.1, class_weight=None;, score=0.620 total time= 4.1s\n", - "[CV 3/5] END ..........C=0.1, class_weight=None;, score=0.631 total time= 4.0s\n", - "[CV 4/5] END ..........C=0.1, class_weight=None;, score=0.607 total time= 4.3s\n", - "[CV 5/5] END ..........C=0.1, class_weight=None;, score=0.636 total time= 3.7s\n", - "[CV 1/5] END ........C=1, class_weight=balanced;, score=0.629 total time= 31.3s\n", - "[CV 2/5] END ........C=1, class_weight=balanced;, score=0.622 total time= 35.6s\n", - "[CV 3/5] END ........C=1, class_weight=balanced;, score=0.637 total time= 28.5s\n", - "[CV 4/5] END ........C=1, class_weight=balanced;, score=0.604 total time= 36.2s\n", - "[CV 5/5] END ........C=1, class_weight=balanced;, score=0.639 total time= 30.0s\n", - "[CV 1/5] END ............C=1, class_weight=None;, score=0.631 total time= 33.8s\n", - "[CV 2/5] END ............C=1, class_weight=None;, score=0.619 total time= 33.0s\n", - "[CV 3/5] END ............C=1, class_weight=None;, score=0.620 total time= 30.1s\n", - "[CV 4/5] END ............C=1, class_weight=None;, score=0.609 total time= 34.0s\n", - "[CV 5/5] END ............C=1, class_weight=None;, score=0.635 total time= 28.7s\n", - "{'C': 0.1, 'class_weight': 'balanced'}\n", - " precision recall f1-score support\n", - "\n", - " 0 0.60 0.64 0.62 385\n", - " 1 0.66 0.62 0.64 437\n", - "\n", - " accuracy 0.63 822\n", - " macro avg 0.63 0.63 0.63 822\n", - "weighted avg 0.63 0.63 0.63 822\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.6286511231373713,\n", - " 0.01745021029215421,\n", - " array([0.64741641, 0.61702128, 0.61094225, 0.6443769 , 0.65653495,\n", - " 0.63221884, 0.59878419, 0.61702128, 0.6402439 , 0.62195122]))" - ] - }, - "execution_count": 83, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(sp.csr_matrix(np.array(NCI_graphlet)), NCI_target)" - ] - }, - { - "cell_type": "markdown", - "id": "increased-delivery", - "metadata": {}, - "source": [ - "# closed walk" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "id": "dense-excess", - "metadata": {}, - "outputs": [], - "source": [ - "max_enz = max([max(point) for point in ENZ_features])/1000\n", - "ENZ_normalized = np.array(np.array(ENZ_features)/max_enz, dtype=float)" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "id": "coupled-married", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "multilabel SVM\n", - "Fitting 5 folds for each of 10 candidates, totalling 50 fits\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 2/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 3/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.2s\n", - "[CV 4/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 5/5] END estimator__C=0.001, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.6s\n", - "[CV 2/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.3s\n", - "[CV 3/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.4s\n", - "[CV 4/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.6s\n", - "[CV 5/5] END estimator__C=0.01, estimator__class_weight=None;, score=0.000 total time= 0.6s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.000 total time= 0.3s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.000 total time= 0.2s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=balanced;, score=0.010 total time= 0.0s\n", - "[CV 1/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 13.4s\n", - "[CV 2/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 1.7s\n", - "[CV 3/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 5.3s\n", - "[CV 4/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 6.9s\n", - "[CV 5/5] END estimator__C=0.05, estimator__class_weight=None;, score=0.000 total time= 8.7s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.000 total time= 0.2s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.000 total time= 0.1s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.000 total time= 0.0s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.000 total time= 0.3s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=balanced;, score=0.010 total time= 0.1s\n", - "[CV 1/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 20.1s\n", - "[CV 2/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 17.0s\n", - "[CV 3/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 15.1s\n", - "[CV 4/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 29.6s\n", - "[CV 5/5] END estimator__C=0.1, estimator__class_weight=None;, score=0.000 total time= 27.3s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 1.6s\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 0.6s\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 0.6s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 1.3s\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=balanced;, score=0.000 total time= 1.2s\n", - "[CV 1/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 1.6min\n", - "[CV 2/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 1.1min\n", - "[CV 3/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 47.5s\n", - "[CV 4/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 1.6min\n", - "[CV 5/5] END estimator__C=1, estimator__class_weight=None;, score=0.000 total time= 52.6s\n", - "{'estimator__C': 0.001, 'estimator__class_weight': 'balanced'}\n", - " precision recall f1-score support\n", - "\n", - " 0 0.18 1.00 0.30 21\n", - " 1 0.26 1.00 0.41 26\n", - " 2 0.00 0.00 0.00 23\n", - " 3 0.50 0.06 0.10 18\n", - " 4 0.12 0.87 0.20 15\n", - " 5 0.00 0.00 0.00 17\n", - "\n", - " micro avg 0.18 0.51 0.27 120\n", - " macro avg 0.18 0.49 0.17 120\n", - "weighted avg 0.18 0.51 0.18 120\n", - " samples avg 0.20 0.51 0.28 120\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(0.0, 0.0, array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))" - ] - }, - "execution_count": 111, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "svm_precomputed_gridsearch_tt(sp.csr_matrix(ENZ_normalized), ENZ_target)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "humanitarian-situation", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/NCI1/.DS_Store b/Python_files/ex2/Ex1/datasets/NCI1/.DS_Store deleted file mode 100644 index 2ebdc99225f67b4c77a99cda1e323630bf220956..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/NCI1/.DS_Store and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/NCI1/data2.pkl b/Python_files/ex2/Ex1/datasets/NCI1/data2.pkl deleted file mode 100644 index 6c72a48da0903e887c68acff31d5715f0072cd6c..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/NCI1/data2.pkl and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/Untitled.ipynb b/Python_files/ex2/Ex1/datasets/Untitled.ipynb deleted file mode 100644 index 7b87ea73a6cbd9cd62fd69e918bad5fe57ad376f..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/Untitled.ipynb +++ /dev/null @@ -1,78 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 74, - "id": "fifteen-merchandise", - "metadata": {}, - "outputs": [], - "source": [ - "import _pickle as pickle\n", - "import networkx as nx\n", - "import numpy as np\n", - "import argparse as arg" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "id": "effective-brave", - "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'DD/data.pkl'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-78-60aedb0a50f5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'DD/data.pkl'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'DD/data.pkl'" - ] - } - ], - "source": [ - "with open('DD/data.pkl','rb') as f:\n", - " data = pickle.load(f)\n", - "len(data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "spectacular-edwards", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "young-riverside", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/Ex1/datasets/ex1/.DS_Store b/Python_files/ex2/Ex1/datasets/ex1/.DS_Store deleted file mode 100644 index 8e627a99d8e9cfc8ea2709922ec67eda81057390..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/ex1/.DS_Store and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/ex1/README.md b/Python_files/ex2/Ex1/datasets/ex1/README.md deleted file mode 100644 index 50cca315745698f7e4ab01c7dbb70190ba0f132e..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/ex1/README.md +++ /dev/null @@ -1,75 +0,0 @@ -# Graph Learning -This readme is used to describe how to run our code and the results we obtained in each exercise. - -## Structure of the repository - -The repository contains five different files: \ -\ - 1. closed_walk_kernel.py: This file contains the Closed Walk Kernel of exercise 1. \ - 2. graphlet_kernel.py: This file contains the Graphlet Kernel of exercise 2. \ - 3. wl_kernel.py: This file contains the Weisfeiler-Leman-Kernel of exercise 3.\ - 4. svm_function.py: This file contains the Support Vector Machine of exercise 4.\ - 5. arg_code_ex1.py: This is the main code where the defined kernels and functions (1.-5.) are imported and called. - -## How to run the script - -This script uses argparse. \ -\ -To run the script it is necessary to call the file 'arg_code_ex1.py'. It is required to choose the kernel and the dataset of interest. The arguments '-k' and '-P' are implemented to adress the kernels and paths respectively. The kernel of interest can be chosen with the filename (without filename extension), while the dataset can be chosen with the pathname. \ -Further, there is an additional optional argument '-eval' which runs the Support Vector Machine. - - -#### Example for running the script - -The following command should be run in the terminal to call the Closed Walk Kernel with the dataset 'Enzymes' (if the folder "datasets" is in the same location as the python file): \ -\ - -- python arg_code_ex1.py -k closed_walk_kernel -P datasets/ENZYMES/data.pkl - -The following command should be run in the terminal to call the Closed Walk Kernel with the dataset 'Enzymes' and then perform graph classification with a Support Vector Machine: \ -\ - -- python arg_code_ex1.py -k closed_walk_kernel -P datasets/ENZYMES/data.pkl -eval svm - -## Notes on the Exercises - -### Ex.1: Choice of maximal length l - -Our goal was to find a variable which takes into account the size of the respective graphs. After having considered several graph characteristics, like the diameter, minimum or maximum number of nodes, we chose l to be the mean number of nodes of the respective graph collections, because the other attributes are either too high (maximum number of nodes), too low (minimum number of nodes) or gave an infinite path length because the datasets contain graphs which are not connected (diameter). Our choice ensures a suitable balance between information and complexity. This yields the results DD: 284, ENZYMES: 32 and NCI: 29 (rounded down). - -### Ex.4: Train/Test split and Gram Matrix - -In order to ensure reliable and independent results for the SVM, we performed a train-test-split of the graph data (train: 80%, test: 20%). We then trained a classifier on the training data and computed the 10-fold cross-validation on the training data. The testing data was evaluated separately. - -As the feature vectors of the WL-kernel are very large and sparse, we used the option "kernel=precomputed" in the SVM and used the gram matrix of the feature vectors as input. For the other two kernels, we used the "raw" feature vectors as input to the SVM. - -## Results - -### DD - -| | Closed Walk Kernel | Graphlet Kernel | WL-Kernel | -|-----------------------------|--------------------|------------------|-----------| -|**train data mean accuracy** | 0.593 | 0.744 | 0.789 | -|**train data standard dev.** | 0.004 | 0.021 | 0.046 | -|**test data accuracy** | 0.559 | 0.707 | **0.822** | - - -### ENZYMES - -| | Closed Walk Kernel | Graphlet Kernel | WL-Kernel | -|-----------------------------|--------------------|------------------|-----------| -|**train data mean accuracy** | 0.187 | 0.2625 | 0.517 | -|**train data standard dev.** | 0.034 | 0.053 | 0.084 | -|**test data accuracy** | 0.142 | 0.175 | **0.492** | - - -### NCI1 - -| | Closed Walk Kernel | Graphlet Kernel | WL-Kernel | -|-----------------------------|--------------------|------------------|-----------| -|**train data mean accuracy** | 0.510 | 0.610 | 0.815 | -|**train data standard dev.** | 0.061 | 0.021 | 0.016 | -|**test data accuracy** | 0.533 | 0.658 | **0.813** | - - -Our results show that the Weisfeiler-Leman-Kernel performs best throughout all datasets. This is not surprising, as the WL-Kernel is the most sophisticated kernel we have used so far. Compared to the paper *Weisfeiler-Lehman Graph Kernels*, our WL-kernel achieved equal accuracy as the paper on all three datasets respectively (paper NCI1: 82.19 (± 0.18), DD: 79.78 (±0.36), ENZYMES 46.42 (±1.35)). - -It is also interesting to note that a (3-)Graphlet Kernel appeared in the paper as a reference Kernel, but both in our calculations and in the paper, the WL-Kernel always outperformed the Graphlet Kernel (albeit sometimes only slightly). The closed walk, on the other hand, does not have enough explanatory power to achieve competitive results, so it also does not appear in the paper. diff --git a/Python_files/ex2/Ex1/datasets/ex1/__pycache__/closed_walk_kernel.cpython-38.pyc b/Python_files/ex2/Ex1/datasets/ex1/__pycache__/closed_walk_kernel.cpython-38.pyc deleted file mode 100644 index 2b68a2376201b5479854f530bd6f7436e4f103c0..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/ex1/__pycache__/closed_walk_kernel.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/ex1/__pycache__/graphlet_kernel.cpython-38.pyc b/Python_files/ex2/Ex1/datasets/ex1/__pycache__/graphlet_kernel.cpython-38.pyc deleted file mode 100644 index 2ce7793147a23b064fdfdba182b1f0ac5b9afbd3..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/ex1/__pycache__/graphlet_kernel.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/ex1/__pycache__/svm_function.cpython-38.pyc b/Python_files/ex2/Ex1/datasets/ex1/__pycache__/svm_function.cpython-38.pyc deleted file mode 100644 index 9403fc51e4794e9b812b8431a49a2e59d2621147..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/ex1/__pycache__/svm_function.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/ex1/__pycache__/wl_kernel.cpython-38.pyc b/Python_files/ex2/Ex1/datasets/ex1/__pycache__/wl_kernel.cpython-38.pyc deleted file mode 100644 index aaa32618b530795d117e7f8b0fff5ed5a03832bf..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/ex1/__pycache__/wl_kernel.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/ex1/arg_code_ex1.py b/Python_files/ex2/Ex1/datasets/ex1/arg_code_ex1.py deleted file mode 100644 index bf1fbd3b2bafe2fd243e1f9939844d432ec46a61..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/ex1/arg_code_ex1.py +++ /dev/null @@ -1,59 +0,0 @@ -import pickle -import argparse - -""" -The following code is the main code where the defined kernels and functions are imported and called. -""" - -# import defined kernels and functions -from closed_walk_kernel import closed_walk_kernel -from graphlet_kernel import graphlet_kernel -from wl_kernel import wl_kernel -from svm_function import svm_precomputed_tt, svm_linear_tt,svm_tt - -# Create arguments which are needed for the command line -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--kernel', required = True, help='Choose the kernel of interest') -parser.add_argument('-P', '--path', required = True, help='Choose the path of the dataset of interest') -parser.add_argument('-eval', '--svm', help='Call if you want to make use of SVM') -args = parser.parse_args() - -# load the data -with open(args.path, 'rb') as file: - data = pickle.load(file) - -# 'react' if this file is called -# run the chosen kernel -# If SVM is called then run it -if __name__ == '__main__': - print("Computing Kernel") - if args.kernel == 'closed_walk_kernel': - feature_vectors = closed_walk_kernel(data) - - elif args.kernel == 'graphlet_kernel': - feature_vectors = graphlet_kernel(data) - - elif args.kernel == 'wl_kernel': - feature_vectors = wl_kernel(data) - - else: - raise Exception("Chosen kernel does not exist :S") - - if args.svm == 'svm': - print("Computing SVM") - target_label = [g.graph['label'] for g in data] - if args.kernel == 'wl_kernel': - svm_precomputed_tt(feature_vectors, target_label) - elif args.kernel == 'closed_walk_kernel': - svm_linear_tt(feature_vectors, target_label) - else: - svm_tt(feature_vectors, target_label) - - - - - - - - - diff --git a/Python_files/ex2/Ex1/datasets/ex1/closed_walk_kernel.py b/Python_files/ex2/Ex1/datasets/ex1/closed_walk_kernel.py deleted file mode 100644 index 3fab98b69b02702db22e006e16e905376bc20cdd..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/ex1/closed_walk_kernel.py +++ /dev/null @@ -1,34 +0,0 @@ -import networkx as nx -import numpy as np - -# Exercise 1 -def closed_walk_kernel(graph_list): - """Implemention of the Closed Walk Kernel. - - Keyword argument: - graph_list -- Dataset representing list of graphs - - Key idea: - Computation through eigenvalues with help of the eigenvalue decomposition. - - Returns: List of Histograms (one histogram for every graph of the dataset) """ - - # Compute the mean number of nodes over all graphs - l = int(np.mean([len(g.nodes) for g in graph_list])) - print("mean of number of nodes:", l) - - - # Compute the histogram of closed walks of different length up to the mean number of nodes - feature_vectors = [] - for graph in graph_list: - number = [] - A = nx.adjacency_matrix(graph) - A =A.todense() - lambdas = np.linalg.eigvalsh(A) - for j in range(1, l+1): - power_lambdas= [x**(j) for x in lambdas ] - sum_lambdas=int(np.round(sum(power_lambdas))) - number.append(sum_lambdas) - feature_vectors.append(number) - - return feature_vectors diff --git a/Python_files/ex2/Ex1/datasets/ex1/data.pkl b/Python_files/ex2/Ex1/datasets/ex1/data.pkl deleted file mode 100644 index 546de71a1d97226a0a1a3e0f89a34671520934f9..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/ex1/data.pkl and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/ex1/data1.pkl b/Python_files/ex2/Ex1/datasets/ex1/data1.pkl deleted file mode 100644 index e4e7190c25ffe6ac0636afceb46b30d55557319f..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/ex1/data1.pkl and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/ex1/data2.pkl b/Python_files/ex2/Ex1/datasets/ex1/data2.pkl deleted file mode 100644 index 6c72a48da0903e887c68acff31d5715f0072cd6c..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/Ex1/datasets/ex1/data2.pkl and /dev/null differ diff --git a/Python_files/ex2/Ex1/datasets/ex1/graphlet_kernel.py b/Python_files/ex2/Ex1/datasets/ex1/graphlet_kernel.py deleted file mode 100644 index bbf7e26ad0e2f7bd01b171232be99236ab2d3e70..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/ex1/graphlet_kernel.py +++ /dev/null @@ -1,116 +0,0 @@ -import networkx as nx -import numpy as np -import random - - - -#Exercise 2 - - -def graphlet_kernel(data): - ''' - Count the number of randomly sampled graphlets of a graph for all graphs in the dataset. - - Key idea: Sample graphlets with five nodes a thousand times from a given graph and store the number of isomorphic types in a histogram - - input:dataset - output:list of counter for all the graphs in the dataset - ''' - -#create all the non-isoorphic graphs with 5 nodes and store it in a list called 'dic' - g0=nx.empty_graph(5) - dic=[0]*34 - dic[0]=nx.create_empty_copy(g0) - dic[0].add_edges_from([(0,1)]) - dic[1]=nx.create_empty_copy(g0) - dic[1].add_edges_from([(0,1),(0,2)]) - dic[2]=nx.create_empty_copy(g0) - dic[2].add_edges_from([(0,1),(2,3)]) - dic[3]=nx.create_empty_copy(g0) - dic[3].add_edges_from([(0,1),(0,2),(0,3)]) - dic[4]=nx.create_empty_copy(g0) - dic[4].add_edges_from([(0,1),(0,2),(3,4)]) - dic[5]=nx.create_empty_copy(g0) - dic[5].add_edges_from([(0,1),(1,2),(2,3)]) - dic[6]=nx.create_empty_copy(g0) - dic[6].add_edges_from([(0,1),(0,2),(1,2)]) - dic[7]=nx.create_empty_copy(g0) - dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)]) - dic[8]=nx.create_empty_copy(g0) - dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)]) - dic[9]=nx.create_empty_copy(g0) - dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)]) - dic[10]=nx.create_empty_copy(g0) - dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)]) - dic[11]=nx.create_empty_copy(g0) - dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)]) - dic[12]=nx.create_empty_copy(g0) - dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)]) - dic[13]=nx.create_empty_copy(g0) - dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)]) - dic[14]=nx.create_empty_copy(g0) - dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)]) - dic[15]=nx.create_empty_copy(g0) - dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)]) - dic[16]=nx.create_empty_copy(g0) - dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)]) - dic[17]=nx.create_empty_copy(g0) - dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)]) - dic[18]=nx.create_empty_copy(g0) - dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)]) - dic[19]=nx.create_empty_copy(g0) - dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)]) - dic[20]=nx.create_empty_copy(g0) - dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)]) - dic[21]=nx.create_empty_copy(g0) - dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)]) - dic[22]=nx.create_empty_copy(g0) - dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)]) - dic[23]=nx.create_empty_copy(g0) - dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)]) - dic[24]=nx.create_empty_copy(g0) - dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)]) - dic[25]=nx.create_empty_copy(g0) - dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)]) - dic[26]=nx.create_empty_copy(g0) - dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)]) - dic[27]=nx.create_empty_copy(g0) - dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)]) - dic[28]=nx.create_empty_copy(g0) - dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)]) - dic[29]=nx.create_empty_copy(g0) - dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)]) - dic[30]=nx.create_empty_copy(g0) - dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)]) - dic[31]=nx.create_empty_copy(g0) - dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)]) - dic[32]=nx.create_empty_copy(g0) - dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)]) - dic.insert(0,nx.create_empty_copy(g0)) - -#make a initial dict called 'vektor' to count the non-isoorphic graphs - vektor=dict(zip(dic,[0]*34)) - output=[] - -#to check which graphlet the induced subgraph is isomorphic to,and plus 1. -#input:the induced subgraph - - def count_graphlet(g): - for k,v in temp.items(): - if nx.is_isomorphic(k,g): - temp[k]+=1 - break - -#iterate over all graphs in the dataset - for graph in data: - temp=vektor.copy() -#if the number of nodes of the gragh is less than 5,then output a vektor with zeros,because can't be isomorphic graph. - if len(graph.nodes())<5: - output.append(list(temp.values())) - else: -#if the number of nodes is more than 5,randomly sample 1000 times. - for j in range(1000): - temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5)) - count_graphlet(temp_subgraph) - output.append(list(temp.values())) - return output diff --git a/Python_files/ex2/Ex1/datasets/ex1/svm_function.py b/Python_files/ex2/Ex1/datasets/ex1/svm_function.py deleted file mode 100644 index 3147a1d8984b53dcc0a890550782cd4784cf97d1..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/ex1/svm_function.py +++ /dev/null @@ -1,59 +0,0 @@ -import scipy.sparse as sp -import numpy as np - - -from sklearn.svm import SVC, LinearSVC -from sklearn.metrics import accuracy_score -from sklearn.model_selection import train_test_split, cross_val_score - -# Exercise 4 - -''' -This file defines the Support Vector Machines which will train on the datasets. - -Key Idea: -Make use of 10-fold cross validation to measure the accuracy of each kernel on each dataset. Further, choose 80% of the dataset as trainin -''' - -#SVC with 'linear' kernel -#input:features and targets calculated from previous kernel -#output:mean and deviation accuracy of validation data and accuracy of test data -def svm_tt(features,targets): - X_train, X_test, y_train, y_test = train_test_split(np.array(features),np.array(targets), test_size=0.2) - clf = SVC(kernel='linear', C=1) - clf.fit(X_train,y_train) - accuracy= cross_val_score(clf,X_train, y_train, cv=10 ) - print ("Mean accuracy and standard deviation of training data (10-fold cross validation):", - accuracy.mean(), accuracy.std()) - print ("Accuracy of test data:", accuracy_score(y_test,clf.predict(X_test))) - -#LinearSVC -#input:features and targets calculated from previous kernel -#output:mean and deviation accuracy of validation data and accuracy of test data -def svm_linear_tt(features,targets): - X_train, X_test, y_train, y_test = train_test_split(np.array(features),np.array(targets), test_size=0.2) - clf = LinearSVC(C=1) - clf.fit(X_train,y_train) - accuracy= cross_val_score(clf,X_train, y_train, cv=10 ) - print ("Mean accuracy and standard deviation of training data (10-fold cross validation):", - accuracy.mean(), accuracy.std()) - print ("Accuracy of test data:", accuracy_score(y_test,clf.predict(X_test))) - -#SVC with 'precomputed' kernel -#input:features and targets calculated from previous kernel -#output:mean and deviation accuracy of validation data and accuracy of test data -def svm_precomputed_tt(feat_vecs, target_vec): - X_train, X_test, y_train, y_test = train_test_split(feat_vecs, target_vec, test_size=0.2, random_state=4) -#in order to use the 'precomputed' kernel,first calculate the gram_matrix - train_feat = sp.vstack(X_train) - test_feat = sp.vstack(X_test) - gram_matrix = train_feat.dot(train_feat.transpose()).todense() - gram_test = train_feat.dot(test_feat.transpose()).todense().T - - clf = SVC(kernel='precomputed') - clf.fit(np.array(gram_matrix), np.array(y_train)) - accuracy = cross_val_score(clf, np.array(gram_matrix), np.array(y_train), cv=10) - print ("Mean accuracy and standard deviation of training data (10-fold cross validation):", - accuracy.mean(), accuracy.std()) - print("Accuracy of test data:", accuracy_score(y_test, clf.predict(np.array(gram_test)))) - diff --git a/Python_files/ex2/Ex1/datasets/ex1/wl_kernel.py b/Python_files/ex2/Ex1/datasets/ex1/wl_kernel.py deleted file mode 100644 index ea12b41e29f0861ebea111e820da90916a3a561c..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/datasets/ex1/wl_kernel.py +++ /dev/null @@ -1,96 +0,0 @@ -import networkx as nx -from collections import Counter -import scipy.sparse as sp -import numpy as np -from multiset import FrozenMultiset - - -# Exercise 3: Weisfeiler-Leman-Kernel - -def wl_kernel(orig_graphs, labelname="node_label", rounds=4): - ''' - Implementation of the Weisfeiler-Leman-Kernel - - Keyword Arguments - orig_graphs: original list of graphs - label_name: initial node labels/colors (can be None, default value: "node_label") - rounds: number of rounds of color refinement - - return: f_vecs -> list of histograms, one for each graph - (each histogram: sparse coo-matrix of shape (1, total_number_of_colors)) - - Key ideas: - - store the colors as node attributes of the respective graphs - - use a hash function to compute new colors, but assign each new hashcolor to an integer - color (starting from 0) and store the pairs in a dictionary (keys: hashcolors, values: - respective integer colors) - - use integer colors as indices in the final histograms (e.g. the number of occurences of - color 4 is stored at fvecs[4]) - ''' - - #copy graphs because they are modified later - graphs = [graph.copy() for graph in orig_graphs] - - ##### COLOR REFINEMENT ############ - idx_counter = 0 - coldict = dict() #save all colors in a dictionary (keys: hash values, values: index in the final histograms) - - #initial colors: if there is a initial color scheme, use it in round 0 - if labelname: - for graph in graphs: - init_labels = nx.get_node_attributes(graph, labelname) #dict {node: label} - hash_labels = {key: hash(value) for key,value in init_labels.items()} #hash label values (-hashcolors) so that they are the same for all coming graphs and rounds - colors = list(set(hash_labels.values())) #list of the different colors in this graph - for hashcol in colors: - #check if colors already have been saved in coldict and save them if not - if hashcol not in coldict.keys(): - coldict[hashcol] = idx_counter - idx_counter += 1 #counts total number of colors - #change from hashed colors to final integer colors which will be used afterwards - new_labels = {key: coldict[hashvalue] for key,hashvalue in hash_labels.items()} - nx.set_node_attributes(graph, new_labels, str(0)) - # no initial color scheme -> every node gets same color - else: - for graph in graphs: - nx.set_node_attributes(graph, 0, str(0)) - #save color in coldict and increment idx_counter (which counts total number of colors) - coldict[0] = idx_counter #here: 0 - idx_counter += 1 - - #next rounds of color refinement - for k in range(1, rounds+1): - for graph in graphs: - #attribute dictionaries - attrs_last_round = nx.get_node_attributes(graph, str(k-1)) #dictionary with nodes as keys and corresponding attributes of last round as values - attrs_this_round = dict() #where you save attributes of this round - - #compute current color of each node - for node in graph.nodes(): - #get colors of neighbors and hash them together with the node's color - colset = FrozenMultiset(attrs_last_round.get(neighbor) for neighbor in list(graph[node])) - hashcol = hash((attrs_last_round.get(node), colset)) - #if hash produces a new color: - if hashcol not in coldict.keys(): - coldict[hashcol] = idx_counter - idx_counter += 1 - attrs_this_round[node] = coldict[hashcol] - #save current colors of the graph as node attributes - nx.set_node_attributes(graph, attrs_this_round, name=str(k)) - - - ####### CONSTRUCT FEATURE VECTORS ############### - f_vecs = list() #where feature vectors (histograms) will be stored - for graph in graphs: - c = Counter() - for k in range(rounds): - #count number of colors that appeared in each round, - #e.g. c = {0:302, 1:4} if color 0 appeared 302 times and color 1 appeared 4 times - c.update(nx.get_node_attributes(graph, str(k)).values()) - #create feature vector as sparse matrix in format 1 x idx_counter - data = np.array(list(c.values())) - col = np.array(list(c.keys())) - row = np.zeros(len(col)) #only one row for each histogram - f_vec = sp.coo_matrix((data, (row,col)), shape=(1, idx_counter)) #feature vector with histogram entries - f_vecs.append(f_vec) - - return f_vecs diff --git a/Python_files/ex2/Ex1/graphlet_kernel.py b/Python_files/ex2/Ex1/graphlet_kernel.py deleted file mode 100644 index 33f3fafe544e5257149317550f2dcb04ac4966ca..0000000000000000000000000000000000000000 --- a/Python_files/ex2/Ex1/graphlet_kernel.py +++ /dev/null @@ -1,112 +0,0 @@ -import networkx as nx -import numpy as np -import random - - - -#Exercise 2 - - -#count the number of graphlets of a graph for all graphs in the dataset -#input:dataset -#output:list of counter for all the graphs in the dataset - -def graphlet_kernel(data): - -#create all the non-isoorphic graphs with 5 nodes and store it in a list called 'dic' - g0=nx.empty_graph(5) - dic=[0]*34 - dic[0]=nx.create_empty_copy(g0) - dic[0].add_edges_from([(0,1)]) - dic[1]=nx.create_empty_copy(g0) - dic[1].add_edges_from([(0,1),(0,2)]) - dic[2]=nx.create_empty_copy(g0) - dic[2].add_edges_from([(0,1),(2,3)]) - dic[3]=nx.create_empty_copy(g0) - dic[3].add_edges_from([(0,1),(0,2),(0,3)]) - dic[4]=nx.create_empty_copy(g0) - dic[4].add_edges_from([(0,1),(0,2),(3,4)]) - dic[5]=nx.create_empty_copy(g0) - dic[5].add_edges_from([(0,1),(1,2),(2,3)]) - dic[6]=nx.create_empty_copy(g0) - dic[6].add_edges_from([(0,1),(0,2),(1,2)]) - dic[7]=nx.create_empty_copy(g0) - dic[7].add_edges_from([(0,1),(0,2),(0,3),(0,4)]) - dic[8]=nx.create_empty_copy(g0) - dic[8].add_edges_from([(0,1),(0,2),(1,3),(2,3)]) - dic[9]=nx.create_empty_copy(g0) - dic[9].add_edges_from([(0,1),(0,2),(0,3),(3,4)]) - dic[10]=nx.create_empty_copy(g0) - dic[10].add_edges_from([(0,1),(0,2),(0,3),(2,3)]) - dic[11]=nx.create_empty_copy(g0) - dic[11].add_edges_from([(0,1),(1,2),(2,3),(3,4)]) - dic[12]=nx.create_empty_copy(g0) - dic[12].add_edges_from([(0,1),(0,2),(1,2),(3,4)]) - dic[13]=nx.create_empty_copy(g0) - dic[13].add_edges_from([(0,1),(0,2),(0,3),(2,4),(3,4)]) - dic[14]=nx.create_empty_copy(g0) - dic[14].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4)]) - dic[15]=nx.create_empty_copy(g0) - dic[15].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2)]) - dic[16]=nx.create_empty_copy(g0) - dic[16].add_edges_from([(0,1),(1,2),(2,3),(3,4),(4,0)]) - dic[17]=nx.create_empty_copy(g0) - dic[17].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3)]) - dic[18]=nx.create_empty_copy(g0) - dic[18].add_edges_from([(0,1),(0,2),(2,3),(2,4),(3,4)]) - dic[19]=nx.create_empty_copy(g0) - dic[19].add_edges_from([(0,1),(0,2),(3,1),(3,2),(4,1),(4,2)]) - dic[20]=nx.create_empty_copy(g0) - dic[20].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,4),(3,4)]) - dic[21]=nx.create_empty_copy(g0) - dic[21].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(2,4)]) - dic[22]=nx.create_empty_copy(g0) - dic[22].add_edges_from([(0,1),(0,2),(0,3),(2,3),(2,4),(3,4)]) - dic[23]=nx.create_empty_copy(g0) - dic[23].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(3,4)]) - dic[24]=nx.create_empty_copy(g0) - dic[24].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)]) - dic[25]=nx.create_empty_copy(g0) - dic[25].add_edges_from([(0,1),(0,2),(1,2),(1,3),(2,3),(1,4),(2,4)]) - dic[26]=nx.create_empty_copy(g0) - dic[26].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4)]) - dic[27]=nx.create_empty_copy(g0) - dic[27].add_edges_from([(0,1),(0,2),(0,3),(1,2),(2,3),(1,4),(3,4)]) - dic[28]=nx.create_empty_copy(g0) - dic[28].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4)]) - dic[29]=nx.create_empty_copy(g0) - dic[29].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(2,3),(3,4),(4,1)]) - dic[30]=nx.create_empty_copy(g0) - dic[30].add_edges_from([(0,1),(0,2),(0,3),(1,2),(1,3),(2,3),(1,4),(2,4)]) - dic[31]=nx.create_empty_copy(g0) - dic[31].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4)]) - dic[32]=nx.create_empty_copy(g0) - dic[32].add_edges_from([(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)]) - dic.insert(0,nx.create_empty_copy(g0)) - -#make a initial dict called 'vektor' to count the non-isoorphic graphs - vektor=dict(zip(dic,[0]*34)) - output=[] - -#to check which graphlet the induced subgraph is isomorphic to,and plus 1. -#input:the induced subgraph - - def count_graphlet(g): - for k,v in temp.items(): - if nx.is_isomorphic(k,g): - temp[k]+=1 - break - -#iterate over all graphs in the dataset - for graph in data: - temp=vektor.copy() -#if the number of nodes of the gragh is less than 5,then output a vektor with zeros,because can't be isomorphic graph. - if len(graph.nodes())<5: - output.append(list(temp.values())) - else: -#if the number of nodes is more than 5,randomly sample 1000 times. - for j in range(1000): - temp_subgraph=graph.subgraph(random.sample(graph.nodes(),5)) - count_graphlet(temp_subgraph) - output.append(list(temp.values())) - return output \ No newline at end of file diff --git a/Python_files/ex2/GCN_modul.py b/Python_files/ex2/GCN_modul.py deleted file mode 100644 index d79de4a787fe930cfff6e2f8f91a6d1980252ee0..0000000000000000000000000000000000000000 --- a/Python_files/ex2/GCN_modul.py +++ /dev/null @@ -1,44 +0,0 @@ -import torch - -#basic torch module -class GCN_Layer(torch.nn.Module): - - def __init__(self, dim_in, dim_out, num_vertices, is_linear=False): - """ - Initializes a GCN Layer. - - :dim_in: input dimension - :dim_out: output dimension - :num_vertices: number of vertices of the graphs (2nd dimension of batched adjecency matrix) - :is_linear: GCN Layer applies Relu if is_linear=False - """ - super(GCN_Layer, self).__init__() - self.is_linear = is_linear - - #use Kaiming Init when using ReLU - self.W = torch.nn.Parameter(torch.zeros(dim_in, dim_out)) - torch.nn.init.kaiming_normal_(self.W) - - #layer which performs batch normalization - self.m = torch.nn.BatchNorm1d(num_vertices) - - - - def forward(self, A, H): - """ - Forward pass for a GCN Layer. - - :A: Adjecency matrix - :H: vertex embedding of last layer - :return: vertex embedding of this layer - """ - # linear transformation on input - x = torch.matmul(A, H) - # batch normalization - x = self.m(x) - - y = torch.matmul(x, self.W) - # apply activation - if not self.is_linear: - y = torch.relu(y) - return y diff --git a/Python_files/ex2/README.md b/Python_files/ex2/README.md deleted file mode 100644 index df7c75bbeb174eab01be57368ff6a102f3f780fd..0000000000000000000000000000000000000000 --- a/Python_files/ex2/README.md +++ /dev/null @@ -1,60 +0,0 @@ -# Graph Learning -This readme is used to describe how to run our code and the results we obtained in each exercise. - -## Structure of the repository of exercise 2 - TODO - -The repository contains several different files: \ -\ - 0. main.py: main file to run other codes. \ - 1. data_utils.py: This file was given and not changed by us. \ - 2. normalized_adj.py: This file contains the adjacency normalization computation of exercise 1. \ - 3. GCN_modul.py: This file implements a GCN layer described in exercise 2.\ - 4. graph_level_gcn.py: This file implements a graph level GCN described in exercise 3.\ - 5. load_data.py: This file's task is to load all the necessary data and put it in the right format to train the GCN. \ - 6. train_graph_GCN: This file contains a function with the training loop and an evaluation function for the graph level GCN. \ - 7. load_data_node_level: This file's task is to load all the necessary data for the node level classification and put it in the right format to train the GCN. \ - 8. node_level_GCN: This file implements a node level GCN described in exercise 4.\ - 9. train_node_level: This file contains a function with the training loop and an evaluation function for the node level GCN. \ - 10. adj_matrix: This file contains the adjacency normalization computation of exercise 1. \ -## How to run the script - TODO - -This script uses argparse. \ -\ -To run the script it is necessary to call the file 'main.py'. -It is required to choose the following arguments: - - -#### Example for running the script - TODO - -The following command should be run in the terminal to call ... (if the folder "datasets" is in the same location as the python file): \ -\ - -- python main.py -p1 datasets/ENZYMES/data.pkl -l graph - -The following command should be run in the terminal to call ...: \ -\ - -- python main.py -p1 datasets/Citeseer_Train/data.pkl -p2 datasets/Citeseer_Eval/data.pkl -l node - -## Results - - - -### Graph-Level GCN - -| | ENZYMES | NCI1 | -|-----------------------------|--------------------|------------------| -|**train data mean accuracy** | 0.6027 | 0.7620 | -|**train data standard dev.** | 0.023 | 0.020 | -|**test data accuracy** | 0.4251 | 0.7326 | -|**test data standard dev.** | 0.023 | 0.020 | - - -### Node-Level GCN - - -| | Cora | Citeseer | -|-----------------------------|--------------------|------------------| -|**train data mean accuracy** | 0.8014 | 0.8313 | -|**train data standard dev.** | 0.072 | 0.047 | -|**test data accuracy** | 0.5168 | 0.4787 | -|**test data standard dev.** | 0.072 | 0.047 | - diff --git a/Python_files/ex2/__pycache__/GCN_modul.cpython-38.pyc b/Python_files/ex2/__pycache__/GCN_modul.cpython-38.pyc deleted file mode 100644 index aeff575693fff01e6f87f1d3ae4d77c233a5361d..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/GCN_modul.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/GCN_modul.cpython-39.pyc b/Python_files/ex2/__pycache__/GCN_modul.cpython-39.pyc deleted file mode 100644 index 214ba8a8a6c64bebb2ce9573c1dda2017949f857..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/GCN_modul.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/adj_matrix.cpython-38.pyc b/Python_files/ex2/__pycache__/adj_matrix.cpython-38.pyc deleted file mode 100644 index aefc71b26018e9406575ed324ae30af1f0d0914b..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/adj_matrix.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/adj_matrix.cpython-39.pyc b/Python_files/ex2/__pycache__/adj_matrix.cpython-39.pyc deleted file mode 100644 index 73a6a18422528d2410616e0f724ca8da9e86e3a6..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/adj_matrix.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/data_utils.cpython-38.pyc b/Python_files/ex2/__pycache__/data_utils.cpython-38.pyc deleted file mode 100644 index a70a57a705a61c4ef380cd9cf7a615a4ef42f7d2..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/data_utils.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/data_utils.cpython-39.pyc b/Python_files/ex2/__pycache__/data_utils.cpython-39.pyc deleted file mode 100644 index cce0b9b8fdbc714c71bab6a81aca4089886797f0..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/data_utils.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/graph_level_GCN.cpython-38.pyc b/Python_files/ex2/__pycache__/graph_level_GCN.cpython-38.pyc deleted file mode 100644 index 63faa80ac330e9426362fc8e565266099a194ef5..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/graph_level_GCN.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/load_data.cpython-38.pyc b/Python_files/ex2/__pycache__/load_data.cpython-38.pyc deleted file mode 100644 index 132908adda509423e7da6900de7649506ea4db4c..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/load_data.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/load_data_node_level.cpython-38.pyc b/Python_files/ex2/__pycache__/load_data_node_level.cpython-38.pyc deleted file mode 100644 index d7aa0d7800f4c17fc4ccdc72e7daceb681d8b2d2..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/load_data_node_level.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/load_data_node_level.cpython-39.pyc b/Python_files/ex2/__pycache__/load_data_node_level.cpython-39.pyc deleted file mode 100644 index ceb557df964f5bd3b80442bbb904f09542d5e07d..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/load_data_node_level.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/node_level_GCN.cpython-38.pyc b/Python_files/ex2/__pycache__/node_level_GCN.cpython-38.pyc deleted file mode 100644 index a7d66b7d017287616bf60a363e21dd62255b3ba1..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/node_level_GCN.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/node_level_GCN.cpython-39.pyc b/Python_files/ex2/__pycache__/node_level_GCN.cpython-39.pyc deleted file mode 100644 index e2d808abe638ec72a5846b67a712fa5e2d6078a9..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/node_level_GCN.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/normalized_adj.cpython-38.pyc b/Python_files/ex2/__pycache__/normalized_adj.cpython-38.pyc deleted file mode 100644 index d340d1fc15c47680d4bcb755445f7d5b8797a490..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/normalized_adj.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/train_graph_GCN.cpython-38.pyc b/Python_files/ex2/__pycache__/train_graph_GCN.cpython-38.pyc deleted file mode 100644 index 2704116dd4f5822dd6da061be4d50fa086b250ad..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/train_graph_GCN.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/__pycache__/train_node_level.cpython-38.pyc b/Python_files/ex2/__pycache__/train_node_level.cpython-38.pyc deleted file mode 100644 index cbddb292d643155a504a3e707adc1134fd052196..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/__pycache__/train_node_level.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/adj_matrix.py b/Python_files/ex2/adj_matrix.py deleted file mode 100644 index 29b6616e76347718ac3e68056b51f2bbdbb86cff..0000000000000000000000000000000000000000 --- a/Python_files/ex2/adj_matrix.py +++ /dev/null @@ -1,37 +0,0 @@ -import math -import numpy as np - -from data_utils import get_padded_node_labels, get_padded_node_attributes, get_padded_adjacency - -def norm_adj_matrix(graph_list): - - """ - Compute the normalized adjacency matrix of every graph in the list. - """ - - # get padded adj - pad_adj_matrix=get_padded_adjacency(graph_list) - - print(pad_adj_matrix[0][0][1]) - - # get the node degrees of each graph. - node_deg=[] - for i in range(len(pad_adj_matrix)): - - a= pad_adj_matrix[i].sum(axis=1)+1 - node_deg.append(a) - - # Get the normalized adjacency matrices and add them in a list - list=[] - for k in range(len(pad_adj_matrix)): - norm_matrix = np.zeros((len(pad_adj_matrix[k]),len(pad_adj_matrix[k]))) - for i in range(len(pad_adj_matrix[k])): - for j in range(len(pad_adj_matrix[k])): - if pad_adj_matrix[k][i][j] !=0: - norm_matrix[i][j] = 1/math.sqrt(node_deg[k][i]*node_deg[k][j]) - elif i == j: - norm_matrix[i][j] = 1/math.sqrt(node_deg[k][i]*node_deg[k][i]) - # else: - # norm_matrix[i][j] = 0 - list.append(norm_matrix) - return list \ No newline at end of file diff --git a/Python_files/ex2/data_utils.py b/Python_files/ex2/data_utils.py deleted file mode 100644 index 26f6c670de65bc25e96bcb126792a403b8250c1d..0000000000000000000000000000000000000000 --- a/Python_files/ex2/data_utils.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python3 -import networkx as nx -import numpy as np - - -def get_adjacency_matrix(G): - """ - :param G: A networkx graph - :return: The adjacency matrix as a dense numpy array - """ - A = nx.to_numpy_array(G) - return A - - -def get_graph_label(G): - """ - :param G: A networkx graph - :return: The label (class) of G - """ - c = G.graph['label'] - return c - - -def get_node_labels(G): - """ - :param G: A networkx graph G=(V,E) - :return: A array of node labels of length |V| - """ - labels = np.int32([node[1]["node_label"] for node in G.nodes(data=True)]) - return labels - - -def get_node_attributes(G): - """ - :param G: A networkx graph G=(V,E) - :return: A numpy array of shape (|V|, a), where a is the length of the node attribute vector - """ - attributes = np.float32([node[1]["node_attributes"] for node in G.nodes(data=True)]) - return attributes - - -def get_padded_adjacency(graphs): - """ - Computes a 3D Tensor A of shape (k,n,n) that stacks all adjacency matrices. - Here, k = |graphs|, n = max(|V|) and A[i,:,:] is the padded adjacency matrix of the i-th graph. - :param graphs: A list of networkx graphs - :return: Numpy array A - """ - max_size = np.max([g.order() for g in graphs]) - A_list = [get_adjacency_matrix(g) for g in graphs] - A_padded = [np.pad(A, [0, max_size-A.shape[0]]) for A in A_list] - - return np.float32(A_padded) - - -def get_padded_node_labels(graphs): - """ - Computes a 3D Tensor X with shape (k, n, l) that stacks the node labels of all graphs. - Here, k = |graphs|, n = max(|V|) and l is the number of distinct node labels. - Node labels are encoded as l-dimensional one-hot vectors. - - :param graphs: A list of networkx graphs - :return: Numpy array X - """ - node_labels = [get_node_labels(g) for g in graphs] - all_labels = np.hstack(node_labels) - max_label = np.max(all_labels) - min_label = np.min(all_labels) - label_count = max_label-min_label+1 - - max_size = np.max([g.order() for g in graphs]) - n_samples = len(graphs) - - X = np.zeros((n_samples, max_size, label_count), dtype=np.float32) - for i, g in enumerate(graphs): - X[i, np.arange(len(g.nodes())), node_labels[i]-min_label] = 1.0 - - return X - - -def get_padded_node_attributes(graphs): - """ - Computes a 3D Tensor X with shape (k, n, a) that stacks the node attributes of all graphs. - Here, k = |graphs|, n = max(|V|) and a is the length of the attribute vectors. - - :param graphs: A list of networkx graphs - :return: Numpy array X - """ - node_attributes = [get_node_attributes(g) for g in graphs] - - max_size = np.max([g.order() for g in graphs]) - padded = [np.vstack([x, np.zeros((max_size-x.shape[0], x.shape[1]), dtype=np.float32)]) for x in node_attributes] - stacked = np.stack(padded, axis=0) - return stacked diff --git a/Python_files/ex2/datasets/.DS_Store b/Python_files/ex2/datasets/.DS_Store deleted file mode 100644 index 0c3b212e8a14846c836038dda01994ad18967bf4..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/datasets/.DS_Store and /dev/null differ diff --git a/Python_files/ex2/datasets/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/Python_files/ex2/datasets/.ipynb_checkpoints/Untitled-checkpoint.ipynb deleted file mode 100644 index 71fa36b781a5a44f79677cb51cafbd6f186773fe..0000000000000000000000000000000000000000 --- a/Python_files/ex2/datasets/.ipynb_checkpoints/Untitled-checkpoint.ipynb +++ /dev/null @@ -1,122 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 72, - "id": "wrong-swiss", - "metadata": {}, - "outputs": [], - "source": [ - "import data_utils\n", - "import pickle\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "athletic-story", - "metadata": {}, - "outputs": [], - "source": [ - "with open('ENZYMES/data1.pkl', 'rb') as file:\n", - " data = pickle.load(file)" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "id": "varied-stevens", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0.0625 , 0.04166667, 0.04166667, ..., 0. , 0. ,\n", - " 0. ],\n", - " [0.04166667, 0.02777778, 0.02777778, ..., 0. , 0. ,\n", - " 0. ],\n", - " [0.04166667, 0.02777778, 0.02777778, ..., 0. , 0. ,\n", - " 0. ],\n", - " ...,\n", - " [0. , 0. , 0. , ..., 0.02040816, 0.02380952,\n", - " 0.02857143],\n", - " [0. , 0. , 0. , ..., 0.02380952, 0.02777778,\n", - " 0. ],\n", - " [0. , 0. , 0. , ..., 0.02857143, 0. ,\n", - " 0.04 ]])" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "adj=data_utils.get_adjacency_matrix(data[0])\n", - "for i in range(len(adj)):\n", - " adj[i][i]=1\n", - "temp=np.zeros(shape=(len(adj),len(adj)))\n", - "for i in range(len(adj)):\n", - " for j in range(len(adj)):\n", - " temp[i][j]=adj[i][j]/(list(adj[i]).count(1)*list(adj[j]).count(1))\n", - "temp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "running-delta", - "metadata": {}, - "outputs": [], - "source": [ - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "heavy-buddy", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "handmade-second", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "legitimate-separate", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/datasets/Citeseer_Eval/data.pkl b/Python_files/ex2/datasets/Citeseer_Eval/data.pkl deleted file mode 100644 index 4e37b2d32f4957ba9f58995d4624a1b3df22edea..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/datasets/Citeseer_Eval/data.pkl and /dev/null differ diff --git a/Python_files/ex2/datasets/Citeseer_Train/data.pkl b/Python_files/ex2/datasets/Citeseer_Train/data.pkl deleted file mode 100644 index 840c1aebc7c3a7b19c3eeea9cd2e7fe765fb7047..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/datasets/Citeseer_Train/data.pkl and /dev/null differ diff --git a/Python_files/ex2/datasets/Cora_Eval/data.pkl b/Python_files/ex2/datasets/Cora_Eval/data.pkl deleted file mode 100644 index 61a35c695a1ec943822120114ce62e18de4792b7..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/datasets/Cora_Eval/data.pkl and /dev/null differ diff --git a/Python_files/ex2/datasets/Cora_Train/data.pkl b/Python_files/ex2/datasets/Cora_Train/data.pkl deleted file mode 100644 index 378dc39f7794d4b8dbd493ee0a2a7c69a7ad6dda..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/datasets/Cora_Train/data.pkl and /dev/null differ diff --git a/Python_files/ex2/datasets/ENZYMES/.DS_Store b/Python_files/ex2/datasets/ENZYMES/.DS_Store deleted file mode 100644 index 14f68d85f90870714ff4c194f0d8aac3137a8fb0..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/datasets/ENZYMES/.DS_Store and /dev/null differ diff --git a/Python_files/ex2/datasets/ENZYMES/data.pkl b/Python_files/ex2/datasets/ENZYMES/data.pkl deleted file mode 100644 index e4e7190c25ffe6ac0636afceb46b30d55557319f..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/datasets/ENZYMES/data.pkl and /dev/null differ diff --git a/Python_files/ex2/datasets/NCI1/.DS_Store b/Python_files/ex2/datasets/NCI1/.DS_Store deleted file mode 100644 index 14f68d85f90870714ff4c194f0d8aac3137a8fb0..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/datasets/NCI1/.DS_Store and /dev/null differ diff --git a/Python_files/ex2/datasets/NCI1/data.pkl b/Python_files/ex2/datasets/NCI1/data.pkl deleted file mode 100644 index 6c72a48da0903e887c68acff31d5715f0072cd6c..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/datasets/NCI1/data.pkl and /dev/null differ diff --git a/Python_files/ex2/datasets/README.md b/Python_files/ex2/datasets/README.md deleted file mode 100644 index f56c81e7143ce8d68baea01a42ac4fb94cde8aee..0000000000000000000000000000000000000000 --- a/Python_files/ex2/datasets/README.md +++ /dev/null @@ -1,8 +0,0 @@ -This file Contains the Cora and Citeseer datasets [1] for node classification. -We have already split each dataset into training and evaluation data. - -For Task 4 you can reuse the ENZYMES and NCI1 datasets provided for Sheet 1. - -References: - -[1] Sen, Prithviraj, et al. "Collective classification in network data." AI magazine 29.3 (2008): 93-93. diff --git a/Python_files/ex2/datasets/Untitled.ipynb b/Python_files/ex2/datasets/Untitled.ipynb deleted file mode 100644 index aa3d35db6dca1cfac03d3000af1fb15547bcab80..0000000000000000000000000000000000000000 --- a/Python_files/ex2/datasets/Untitled.ipynb +++ /dev/null @@ -1,132 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 85, - "id": "roman-auditor", - "metadata": {}, - "outputs": [], - "source": [ - "import data_utils\n", - "import pickle\n", - "import numpy as np\n", - "import torch" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "id": "sharing-implement", - "metadata": {}, - "outputs": [], - "source": [ - "with open('ENZYMES/data1.pkl', 'rb') as file:\n", - " data = pickle.load(file)" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "id": "developing-noise", - "metadata": {}, - "outputs": [], - "source": [ - "def get_nor_adj(G):\n", - " adj=data_utils.get_adjacency_matrix(G)\n", - " for i in range(len(adj)):\n", - " adj[i][i]=1\n", - " temp=np.zeros(shape=(len(adj),len(adj)))\n", - " for i in range(len(adj)):\n", - " for j in range(len(adj)):\n", - " temp[i][j]=adj[i][j]/np.sqrt((list(adj[i]).count(1)*list(adj[j]).count(1)))\n", - " return temp" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "id": "departmental-austria", - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'numpy.ndarray' object has no attribute 'setdiag'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-90-f10cf0e23a6d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_nor_adj\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m<ipython-input-89-7f4246564d29>\u001b[0m in \u001b[0;36mget_nor_adj\u001b[0;34m(G)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_nor_adj\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0madj\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_adjacency_matrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0madj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdiag\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'setdiag'" - ] - } - ], - "source": [ - "get_nor_adj(data[0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "individual-garage", - "metadata": {}, - "outputs": [], - "source": [ - "class GCN_Layer(torch.nn.Module):\n", - " \n", - " def __init__(self,dim_in,dim_out,is_linear=False):\n", - " super(Layer,self).__init__()\n", - " self.is_linear=is_linear\n", - " \n", - " self.W=torch.nn.Parameter(torch.zeros(dim_in,dim_out))\n", - " torch.nn.init.kaiming_normal_(self.W)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "deadly-colleague", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "meaning-swift", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "defensive-burst", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex2/datasets/__pycache__/data_utils.cpython-38.pyc b/Python_files/ex2/datasets/__pycache__/data_utils.cpython-38.pyc deleted file mode 100644 index 799d66e21a337ddba6c271ce9de855b464797a36..0000000000000000000000000000000000000000 Binary files a/Python_files/ex2/datasets/__pycache__/data_utils.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex2/datasets/data_utils.py b/Python_files/ex2/datasets/data_utils.py deleted file mode 100644 index 26f6c670de65bc25e96bcb126792a403b8250c1d..0000000000000000000000000000000000000000 --- a/Python_files/ex2/datasets/data_utils.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python3 -import networkx as nx -import numpy as np - - -def get_adjacency_matrix(G): - """ - :param G: A networkx graph - :return: The adjacency matrix as a dense numpy array - """ - A = nx.to_numpy_array(G) - return A - - -def get_graph_label(G): - """ - :param G: A networkx graph - :return: The label (class) of G - """ - c = G.graph['label'] - return c - - -def get_node_labels(G): - """ - :param G: A networkx graph G=(V,E) - :return: A array of node labels of length |V| - """ - labels = np.int32([node[1]["node_label"] for node in G.nodes(data=True)]) - return labels - - -def get_node_attributes(G): - """ - :param G: A networkx graph G=(V,E) - :return: A numpy array of shape (|V|, a), where a is the length of the node attribute vector - """ - attributes = np.float32([node[1]["node_attributes"] for node in G.nodes(data=True)]) - return attributes - - -def get_padded_adjacency(graphs): - """ - Computes a 3D Tensor A of shape (k,n,n) that stacks all adjacency matrices. - Here, k = |graphs|, n = max(|V|) and A[i,:,:] is the padded adjacency matrix of the i-th graph. - :param graphs: A list of networkx graphs - :return: Numpy array A - """ - max_size = np.max([g.order() for g in graphs]) - A_list = [get_adjacency_matrix(g) for g in graphs] - A_padded = [np.pad(A, [0, max_size-A.shape[0]]) for A in A_list] - - return np.float32(A_padded) - - -def get_padded_node_labels(graphs): - """ - Computes a 3D Tensor X with shape (k, n, l) that stacks the node labels of all graphs. - Here, k = |graphs|, n = max(|V|) and l is the number of distinct node labels. - Node labels are encoded as l-dimensional one-hot vectors. - - :param graphs: A list of networkx graphs - :return: Numpy array X - """ - node_labels = [get_node_labels(g) for g in graphs] - all_labels = np.hstack(node_labels) - max_label = np.max(all_labels) - min_label = np.min(all_labels) - label_count = max_label-min_label+1 - - max_size = np.max([g.order() for g in graphs]) - n_samples = len(graphs) - - X = np.zeros((n_samples, max_size, label_count), dtype=np.float32) - for i, g in enumerate(graphs): - X[i, np.arange(len(g.nodes())), node_labels[i]-min_label] = 1.0 - - return X - - -def get_padded_node_attributes(graphs): - """ - Computes a 3D Tensor X with shape (k, n, a) that stacks the node attributes of all graphs. - Here, k = |graphs|, n = max(|V|) and a is the length of the attribute vectors. - - :param graphs: A list of networkx graphs - :return: Numpy array X - """ - node_attributes = [get_node_attributes(g) for g in graphs] - - max_size = np.max([g.order() for g in graphs]) - padded = [np.vstack([x, np.zeros((max_size-x.shape[0], x.shape[1]), dtype=np.float32)]) for x in node_attributes] - stacked = np.stack(padded, axis=0) - return stacked diff --git a/Python_files/ex2/graph_level_GCN.py b/Python_files/ex2/graph_level_GCN.py deleted file mode 100644 index cbf2223368082ec418c7f7b569af3ccc3f6a7623..0000000000000000000000000000000000000000 --- a/Python_files/ex2/graph_level_GCN.py +++ /dev/null @@ -1,61 +0,0 @@ -import torch -import torch.nn as nn - -from GCN_modul import GCN_Layer - -class graph_level_GCN(torch.nn.Module): - - def __init__(self, input_dim, output_dim, num_vertices, hidden_dim, num_layers): - """ - Initializes the graph level GCN with several GCN layers. - - :input_dim: input dimension (3rd dimension of batched H0) - :output_dim: output dimension (number of classification classes) - :num_vertices: number of vertices of the graphs (2nd dimension of batched adjecency matrix) - :hidden_dim: size of the hidden layers - :num_layers: number of GCN-layers - """ - super(graph_level_GCN, self).__init__() - self.num_layers = num_layers - - #add sub-modules as attribute - self.input_layer = GCN_Layer(input_dim, hidden_dim, num_vertices) - # Store multiple submodules in 'ModuleList' - self.hidden_layers = torch.nn.ModuleList( - [GCN_Layer(hidden_dim, hidden_dim, num_vertices) for _ in range(num_layers-1)] - ) - - # add linear modules for subsequent classification - self.MLP_layer = nn.Linear(num_vertices, hidden_dim) - self.output_layer = nn.Linear(hidden_dim, output_dim) - # add dropout against overfit - self.dropout1 = nn.Dropout(0.5) - self.dropout2 = nn.Dropout(0.5) - - - - - def forward(self, A, H): - """ - Forward pass for the Graph Level GCN. - - :A: Adjecency matrix - :H: vertex embedding of last layer - :return: torch vector of size batch_size x output_dim, with one-hot-encoded output - """ - # apply GCN-layers - y = self.input_layer(A, H) - for i in range(self.num_layers-1): - y = self.hidden_layers[i](A, y) - - #sum over all rows respectively, resulting dimension: batchx|V|x1 - y = torch.sum(y, dim=2) - - # MLP with one hidden layer + relu, then a linear output layer, with dropout - y = self.dropout1(y) - y = self.MLP_layer(y) - y = self.dropout2(y) - y = torch.relu(y) - y = self.output_layer(y) - - return y diff --git a/Python_files/ex2/load_data.py b/Python_files/ex2/load_data.py deleted file mode 100644 index 8ac1dec2237df5bc9914ddb64098bea262cbc85f..0000000000000000000000000000000000000000 --- a/Python_files/ex2/load_data.py +++ /dev/null @@ -1,56 +0,0 @@ -import pickle - -import numpy as np -import torch -from sklearn.preprocessing import normalize - -from normalized_adj import get_padded_normalized_adjacency -from data_utils import get_padded_node_labels, get_padded_node_attributes - -def load_data(path): - """ - Loads the data from a path and extracts and pads A, H and y_one_hot (see below). - - :path: path where data can be found - :return: - :A: padded and stacked adjacency matrices of all graphs in data - (torch tensor of size #graphs x #vertices x #vertices) - :H: padded and stacked first vertex embedding, consisting of node labels (and attributes) - (torch tensor of size #graphs x #vertices x dim_node_labels+attributes) - :y_one_hot: class labels one-hot-encoded - (torch tensor of size #graphs x #labels) - """ - # load the given data and cast to torch tensors - data = pickle.load(open(path, "rb")) - - # get list of padded & normalized adjecency matrices and cast to torch tensors - A = get_padded_normalized_adjacency(data) - A = torch.tensor(A, dtype=torch.float32) - - # get classification labels, one-hot-encode and cast to torch - y_label = np.array([g.graph["label"] for g in data]) - # labels should start with 0 - if not 0 in y_label: - y_label = y_label -1 - y_one_hot = np.zeros((y_label.size, y_label.max()+1)) - y_one_hot[np.arange(y_label.size),y_label] = 1 - y_one_hot = torch.tensor(np.array(y_one_hot)) - - # get padded node labels gets one-hot-encoded node labels, then cast to torch - H = get_padded_node_labels(data) - H = torch.tensor(H) - - # if there are node attributes, concatenate them to the node labels - if "node_attributes" in data[0].nodes(data=True)[1].keys(): - node_as = get_padded_node_attributes(data) - node_as = [normalize(x,norm='l2') for x in node_as] - node_as = torch.tensor(node_as) - H = torch.cat((H, node_as), dim=2) - - return A, H, y_one_hot - - - -# test output of load_data -#A, H, y = load_data("datasets/ENZYMES/data.pkl", node_attrs=True) -#print(A.size(), H.size(), y.size()) \ No newline at end of file diff --git a/Python_files/ex2/load_data_node_level.py b/Python_files/ex2/load_data_node_level.py deleted file mode 100644 index 61373f473577615b989eb49b74f771c1d59e6c8b..0000000000000000000000000000000000000000 --- a/Python_files/ex2/load_data_node_level.py +++ /dev/null @@ -1,67 +0,0 @@ -import pickle -import torch -from torch.nn.functional import one_hot -import numpy as np -from adj_matrix import norm_adj_matrix -import numpy as np -from torch.utils.data import TensorDataset,DataLoader - -from data_utils import get_padded_node_attributes - -def load_data_node(train_path, test_path, node_attrs=False): - - """ - Loads the training and test data from paths and extracts and pads: - - A, H and y_label (from the training dataset) - - A_test, H_test and y_label_test (from the test dataset) - - :path: path where data can be found - :return: - :A: padded and stacked adjacency matrices of all graphs in data - (torch tensor of size #graphs x #vertices x #vertices) - :H: padded and stacked first vertex embedding, consisting of node labels (and attributes) - (torch tensor of size #graphs x #vertices x dim_node_labels+attributes) - :y_label: class labels one-hot-encoded - (torch tensor of size #graphs x #labels) - :A_test - :H_test - :y_label - - """ - - - # load the given data and cast to torch tensors - data_train = pickle.load(open(train_path, "rb")) - data_test = pickle.load(open(test_path, "rb")) - - # get list of padded & normalized adjecency matrices and cast to torch tensors - A = norm_adj_matrix(data_train) - A = torch.tensor(A, dtype=torch.float32) - - A_test = norm_adj_matrix(data_test) - A_test = torch.tensor(A_test, dtype=torch.float32) - - # get classification labels - y_label = [np.int32([node[1]["node_label"] for node in data_train[0].nodes(data=True)])] - y_label = torch.nn.functional.one_hot(torch.tensor(y_label).long()) - y_label= torch.tensor(y_label).float() - - y_label_test = [np.int32([node[1]["node_label"] for node in data_test[0].nodes(data=True)])] - y_label_test = torch.nn.functional.one_hot(torch.tensor(y_label_test).long()) - y_label_test= torch.tensor(y_label_test).float() - - # get padded node attributes - H = get_padded_node_attributes(data_train) - H = torch.tensor(H) - - H_test = get_padded_node_attributes(data_test) - H_test = torch.tensor(H_test) - return A, A_test, H, H_test, y_label, y_label_test - -A, A_test, H, H_test, y_label, y_label_test=load_data_node('datasets/Citeseer_Train/data.pkl','datasets/Citeseer_Eval/data.pkl') -train_dataset = TensorDataset(A, H, y_label) - -train_loader = DataLoader(train_dataset, batch_size=100, shuffle =True) -print(A.shape, A_test.shape, H.shape, H_test.shape, y_label.shape, y_label_test.shape) -for a,b,c in train_loader: - print('a:',a.shape,'b:',b.shape,'c:',c.shape) \ No newline at end of file diff --git a/Python_files/ex2/main.py b/Python_files/ex2/main.py deleted file mode 100644 index 9f3223eb31b0bfc47b013c69ab76c183715f3e08..0000000000000000000000000000000000000000 --- a/Python_files/ex2/main.py +++ /dev/null @@ -1,44 +0,0 @@ -from train_graph_GCN import train_graph_GCN -from load_data import load_data -import data_utils -from GCN_modul import GCN_Layer -import graph_level_GCN -import normalized_adj -import adj_matrix -from graph_level_GCN import graph_level_GCN -from load_data_node_level import load_data_node -from node_level_GCN import GCN_node -from train_node_level import train_node_GCN -import argparse -import pickle - -""" -The following code is the main code where the defined kernels and functions are imported and called. -""" - -#Specified parameters -parser=argparse.ArgumentParser() -parser.add_argument('-p1','--path',required=True,help='Choose the path of the dataset') -parser.add_argument('-p2','--path2',help='Choose the path of the evaluation dataset') -parser.add_argument('-l','--level',required=True,help='Choose the level of Classification') -args=parser.parse_args() - - -#select the level of classification -#node classification needs two paths -if __name__=='__main__': - print('start') - if args.level=='graph': - A, H, y = load_data(args.path) - train_graph_GCN(graph_level_GCN, A, H, y) - if args.level=='node': - if not args.path2: - raise Exception('Please choose the evaluation dataset') - A, B, H, G, y, yt = load_data_node(args.path, args.path2, - node_attrs=True) - train_node_GCN(GCN_node, A, H, y, B, G, yt) - - else: - raise Exception('Chosen level does not exist') - - diff --git a/Python_files/ex2/node_level_GCN.py b/Python_files/ex2/node_level_GCN.py deleted file mode 100644 index 3a9e6a67fd1a6a9e1bef133ddf3dc907c42ef482..0000000000000000000000000000000000000000 --- a/Python_files/ex2/node_level_GCN.py +++ /dev/null @@ -1,55 +0,0 @@ -import torch -from torch.utils.data import DataLoader, TensorDataset -import numpy as np - -from GCN_modul import GCN_Layer - -class GCN_node(torch.nn.Module): - - def __init__(self, input_dim, output_dim, hidden_dim, num_layers, num_vertices): - - """ - Initializes the node level GCN with several GCN layers. - - :input_dim: input dimension (3rd dimension of batched H0) - :output_dim: output dimension (number of classification classes) - :hidden_dim: size of the hidden layers - :num_layers: number of GCN-layers - :num_vertices: number of vertices of the graphs (2nd dimension of batched adjecency matrix) - """ - super(GCN_node, self).__init__() - self.num_layers = num_layers - - #add sub-modules as attribute - self.input_layer = GCN_Layer(input_dim, hidden_dim, num_vertices) - - # add linear output - self.output_layer = torch.nn.Linear(hidden_dim, output_dim) - - - #store multiple submodules in "ModuleList" - self.hidden_layers = torch.nn.ModuleList( - [GCN_Layer(hidden_dim, hidden_dim, num_vertices) for _ in range(num_layers-1)] - ) - - # add dropout against overfit - self.dropout1 = torch.nn.Dropout(0.5) - - def forward (self, A, H ): - """ - Forward pass for the Node Level GCN. - - :A: Adjecency matrix - :H: vertex embedding of last layer - :return: torch vector of size batch_size x output_dim - """ - - # apply GCN-layers - y = self.input_layer(A, H) - for i in range(self.num_layers-1): - y = self.hidden_layers[i](A, y) - - #linear output layer, with dropout - y = self.dropout1(y) - y = self.output_layer(y) - return y \ No newline at end of file diff --git a/Python_files/ex2/normalized_adj.py b/Python_files/ex2/normalized_adj.py deleted file mode 100644 index 5fe42f94ccbb6720ada97c08218fefbca9d46acb..0000000000000000000000000000000000000000 --- a/Python_files/ex2/normalized_adj.py +++ /dev/null @@ -1,40 +0,0 @@ -import networkx as nx -import scipy.sparse as sp -import numpy as np - - -def normalized_adj(graph): - """ - Normalizes the adjecency of a given graph as was required in exercise 1. - :param graph: one networkx graph - :return: Numpy array result - """ - ADJ = nx.adjacency_matrix(graph).todense() - result = np.zeros(ADJ.shape) - degrees = ADJ.sum(axis=1)+1 - for x in range(ADJ.shape[0]): - for y in range(ADJ.shape[1]): - if (x==y) or (ADJ[x,y] != 0): - result[x,y] = 1/np.sqrt(degrees[x,0]*degrees[y,0]) - return result - -def get_padded_normalized_adjacency(graphs): - """ - Changed version of get_padded_adjacency from data_utils.py - Computes a 3D Tensor A of shape (k,n,n) that stacks all normalized adjacency matrices. - Here, k = |graphs|, n = max(|V|) and A[i,:,:] is the padded normalized adjacency matrix of the i-th graph. - :param graphs: A list of networkx graphs - :return: Numpy array A - """ - max_size = np.max([g.order() for g in graphs]) - A_list = [normalized_adj(g) for g in graphs] - A_padded = [np.pad(A, [0, max_size-A.shape[0]]) for A in A_list] - return np.float32(A_padded) - - -# test functionality of normalized_adj -#import pickle -#ENZ = pickle.load(open("datasets/ENZYMES/data.pkl", "rb")) -#print(normalized_adj(ENZ[0])) -#print(nx.path_graph(4)) -#print(normalized_adj(nx.path_graph(4))) diff --git a/Python_files/ex2/test.py b/Python_files/ex2/test.py deleted file mode 100644 index c74e436654c2b6d3b472bc9ef61692362bd4c4de..0000000000000000000000000000000000000000 --- a/Python_files/ex2/test.py +++ /dev/null @@ -1,14 +0,0 @@ -import pickle -import networkx as nx - -with open("datasets/Citeseer_Train/data.pkl", "rb") as f: - data = pickle.load(f) - -data = data[0] -attrs = nx.get_node_attributes(data,'node_attributes') -labels = nx.get_node_attributes(data,'node_label') -print(labels) -print(len(attrs)) -print(len(attrs[1000])) -# print(nx.get_node_attributes(data,'')) - diff --git a/Python_files/ex2/train_graph_GCN.py b/Python_files/ex2/train_graph_GCN.py deleted file mode 100644 index 93e7049a97263b296dec0f12e506d7c78d744b3d..0000000000000000000000000000000000000000 --- a/Python_files/ex2/train_graph_GCN.py +++ /dev/null @@ -1,145 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.nn.functional import cross_entropy -from torch.utils.data import TensorDataset, DataLoader, SubsetRandomSampler -from sklearn.metrics import accuracy_score -from sklearn.model_selection import StratifiedKFold - -from graph_level_GCN import graph_level_GCN -from load_data import load_data - - -def train_graph_GCN(clf, data_A, data_H, y_labels, epochs=400, batch_size=100, lr=0.004): - """ - Trains the graph level GCN. - :param - clf: The classifier/model, in our case graph level GCN - data_A: stacked and padded adjacency matrices of the given graphs - data_H: stacked and padded vertex embeddings of the given graphs - y_labels: stacked and one-hot-encoded class labels for the given graphs - epochs: number of epochs to train - batch_size: batch size during each epoch - lr: learning rate - """ - #load data - dataset = TensorDataset(data_A, data_H, y_labels) - #get number of classification classes - num_labels = y_labels.size(1) - - # set to 'cuda' if gpu is available - device = 'cpu' - - #apply *Stratified* k-fold Cross-validation to ensure stable learning - strat_kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42) - - training_acc = list() - validation_acc = list() - - - for fold, (train_idx,val_idx) in enumerate(strat_kfold.split(data_A, torch.argmax(y_labels, dim=1))): - print(f"############## FOLD {fold+1}/10 #############") - - #sample validation/test data - val_sampler = SubsetRandomSampler(val_idx) - val_loader = DataLoader(dataset, batch_size=100, sampler=val_sampler) - - # construct neural network and move it to device - model = clf(input_dim=data_H.size(2), output_dim=num_labels, num_vertices=data_A.size(1), - hidden_dim=64, num_layers=5) - model.train() - model.to(device) - # construct optimizer - opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.1e-4) - - - # Training Loop - for i in range(epochs): - acc_score = list() - #reshuffle at each epoch - train_sampler = SubsetRandomSampler(train_idx) - train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) - - #training - for A, H, y_true in train_loader: - # set gradients to zero - opt.zero_grad() - - # move data to device - A = A.to(device) - H = H.to(device) - y_true = y_true.to(device) - - # forward pass and loss - y_pred = model(A, H) - loss = cross_entropy(y_pred, y_true) - - # backward pass and sgd step - loss.backward() - opt.step() - - # computation of current accuracy - y_pred_class = torch.argmax( - nn.functional.softmax(y_pred, dim=1), dim=1) - y_true_class = torch.argmax(y_true, dim=1) - acc_score.append(accuracy_score( - y_true_class.tolist(), y_pred_class.tolist())) - - - #compute validation every 5th epoch - if (i % 5) == 4: - val_score = validation(model, val_loader, device) - - print(f"epoch {i+1}: AVG training accuracy", np.mean(acc_score), - "\tvalidation accuracy:", val_score) - - #for each fold, save the training and validation accuracy - training_acc.append(np.mean(acc_score)) - validation_acc.append(val_score) - - print("average training accuracy & standard deviation:", np.mean(training_acc), np.std(training_acc), - f"\t(train. acc. per fold: {training_acc})" - "\naverage validation accuracy & standard deviation:", np.mean(validation_acc), np.std(training_acc), - f"\t(val. acc. per fold: {validation_acc})") - - -# code in parts from exercise of Text Mining lecture -def validation(model, val_loader, device): - """ - Gets test/validation data and returns the accuracy score of the given model. - :param - model: the trained model - val_loader: DataLoader object with the given test data (A, H, labels) - device: cpu or gpu (where to test) - :return: accuracy score of the test data given the current model - """ - true_labels = [] - pred_labels = [] - model.eval() - with torch.no_grad(): #so that no gradients will be changed - - #loop over all data in val_loader and get the predicted labels - for data_A, data_H, y_true in val_loader: - #data to device - data_A = data_A.to(device) - data_H = data_H.to(device) - y_true = y_true.to(device) - - #forward pass to classify validation data - y_pred = model(data_A, data_H) - #format label data and save them in pred_labels and true_labels respectively - y_pred_class = torch.argmax( - nn.functional.softmax(y_pred, dim=1), dim=1) - y_true_class = torch.argmax(y_true, dim=1) - pred_labels.extend(y_pred_class.numpy().tolist()) - true_labels.extend(y_true_class.numpy().tolist()) - model.train() - return accuracy_score(true_labels, pred_labels) - -# ENZYMES -A,H,y = load_data("datasets/ENZYMES/data.pkl") -train_graph_GCN(graph_level_GCN, A, H, y, epochs=400, batch_size=100, lr=0.004) - -# NCI1 -#A,H,y = load_data("datasets/NCI1/data.pkl") -#train_GCN(graph_level_GCN, A, H, y, epochs=100, batch_size=200, lr=0.004) diff --git a/Python_files/ex2/train_node_level.py b/Python_files/ex2/train_node_level.py deleted file mode 100644 index fb53486cc5a2ef558c671f15ed3e795e72f75057..0000000000000000000000000000000000000000 --- a/Python_files/ex2/train_node_level.py +++ /dev/null @@ -1,127 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.nn.functional import cross_entropy -from torch.utils.data import TensorDataset, DataLoader, SubsetRandomSampler -from tqdm import tqdm -from sklearn.metrics import accuracy_score -import numpy as np - -from node_level_GCN import GCN_node -from load_data_node_level import load_data_node - -def train_node_GCN(clf, data_A, data_H, y_labels, A_test, H_test,y_label_test, epochs=100, batch_size=1, lr=0.001): - - """ - Train the GCN and take the output of load_data_node_level as input. Further, we add epochs, batch_size and - learning rate for Adam algorithm. - """ - - # Dataset wrapping tensor - train_dataset = TensorDataset(data_A, data_H, y_labels) - test_dataset = TensorDataset(A_test, H_test, y_label_test) - - #get number of classification classes - num_labels = y_labels.size(2) - - # set to 'cuda' if gpu is available - device = 'cpu' - - training_acc = list() - validation_acc = list() - - # load test data - test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle =True) - - for k in range(10): - print(f"########## round {k+1}/10 ###########") - - # construct neural network and move it to device - model = clf(input_dim=data_H.size(2), output_dim=num_labels, num_vertices=data_A.size(1), - hidden_dim=64, num_layers=3) - model.train() - model.to(device) - # construct optimizer - opt = torch.optim.Adam(model.parameters(), lr) - - # Training Loop - for i in range(epochs): - train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle =True) - acc_score = list() - for A, H, y_true in train_loader: - - # set gradients to zero - opt.zero_grad() - - # move data to device - A = A.to(device) - H = H.to(device) - y_true = y_true.to(device) - - # forward pass and loss - y_pred = model(A, H) - loss = cross_entropy(y_pred, y_true) - - # backward pass and sgd step - loss.backward() - opt.step() - - # computation of current accuracy - y_pred_class = torch.argmax(y_pred, dim=1)[0] - y_true_class = torch.argmax(y_true, dim=1)[0] - acc_score.append(accuracy_score( - y_true_class.tolist(), y_pred_class.tolist())) - - - #compute validation every 5th epoch - if (i % 5) == 4: - val_score = validation(model, test_loader, device) - - print(f"epoch {i+1}: AVG training accuracy", np.mean(acc_score), - "\tvalidation accuracy:", val_score) - - #for each fold, save the training and validation accuracy - training_acc.append(acc_score) - validation_acc.append(val_score) - - print("average training accuracy & standard deviation:", np.mean(training_acc), np.std(training_acc), - f"\t(train. acc. per fold: {training_acc})" - "\naverage validation accuracy & standard deviation:", np.mean(validation_acc), np.std(training_acc), - f"\t(val. acc. per fold: {validation_acc})") - - - - -def validation(model, val_loader, device): - true_labels = [] - pred_labels = [] - model.eval() - with torch.no_grad(): #so that no gradients will be changed - - #loop over all data in val_loader and get the predicted labels - for data_A, data_H, y_true in val_loader: - #data to device - data_A = data_A.to(device) - data_H = data_H.to(device) - y_true = y_true.to(device) - - #forward pass to classify validation data - y_pred = model(data_A, data_H) - #format label data and save them in pred_labels and true_labels respectively - y_pred_class = torch.argmax( - torch.nn.functional.softmax(y_pred, dim=2), dim=2)[0] - y_true_class = torch.argmax(y_true, dim=2)[0] - pred_labels.extend(y_pred_class.numpy().tolist()) - true_labels.extend(y_true_class.numpy().tolist()) - model.train() - return accuracy_score(true_labels, pred_labels) - - - - -# A,B,H,G, y, yt = load_data_node("datasets/Cora_Train/data.pkl", "datasets/Cora_Eval/data.pkl", -# node_attrs=True) -# -# -# train_node_GCN(GCN_node, A, H, y, B, G, yt) - diff --git a/Python_files/ex3/.DS_Store b/Python_files/ex3/.DS_Store deleted file mode 100644 index 5a1087768fb3d44891a68c7c3a7e39bde5d348f8..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/.DS_Store and /dev/null differ diff --git a/Python_files/ex3/.idea/.gitignore b/Python_files/ex3/.idea/.gitignore deleted file mode 100644 index 26d33521af10bcc7fd8cea344038eaaeb78d0ef5..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.idea/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml diff --git a/Python_files/ex3/.idea/ex3.iml b/Python_files/ex3/.idea/ex3.iml deleted file mode 100644 index 131d6b3a3aa141b5943842434e28969362699761..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.idea/ex3.iml +++ /dev/null @@ -1,8 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<module type="PYTHON_MODULE" version="4"> - <component name="NewModuleRootManager"> - <content url="file://$MODULE_DIR$" /> - <orderEntry type="jdk" jdkName="Python 3.9 (torch)" jdkType="Python SDK" /> - <orderEntry type="sourceFolder" forTests="false" /> - </component> -</module> \ No newline at end of file diff --git a/Python_files/ex3/.idea/inspectionProfiles/profiles_settings.xml b/Python_files/ex3/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2da2d6447d11dfe32bfb846c3d5b199fc99..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ -<component name="InspectionProjectProfileManager"> - <settings> - <option name="USE_PROJECT_PROFILE" value="false" /> - <version value="1.0" /> - </settings> -</component> \ No newline at end of file diff --git a/Python_files/ex3/.idea/misc.xml b/Python_files/ex3/.idea/misc.xml deleted file mode 100644 index 47afc9b110075450a7c1bff984683adfac6804c6..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (torch)" project-jdk-type="Python SDK" /> -</project> \ No newline at end of file diff --git a/Python_files/ex3/.idea/modules.xml b/Python_files/ex3/.idea/modules.xml deleted file mode 100644 index 3cd772cf77c1635913c4b40a705355a11160c6c7..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectModuleManager"> - <modules> - <module fileurl="file://$PROJECT_DIR$/.idea/ex3.iml" filepath="$PROJECT_DIR$/.idea/ex3.iml" /> - </modules> - </component> -</project> \ No newline at end of file diff --git a/Python_files/ex3/.idea/vcs.xml b/Python_files/ex3/.idea/vcs.xml deleted file mode 100644 index 6c0b8635858dc7ad44b93df54b762707ce49eefc..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="VcsDirectoryMappings"> - <mapping directory="$PROJECT_DIR$/.." vcs="Git" /> - </component> -</project> \ No newline at end of file diff --git a/Python_files/ex3/.ipynb_checkpoints/GNN_Layer-checkpoint.py b/Python_files/ex3/.ipynb_checkpoints/GNN_Layer-checkpoint.py deleted file mode 100644 index 27738703ca1ab847c63e02c272a3b127de6d4f00..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.ipynb_checkpoints/GNN_Layer-checkpoint.py +++ /dev/null @@ -1,70 +0,0 @@ -import torch -from torch_scatter import scatter_max, scatter_sum, scatter_mean -from customDataset import CustomDataset -import pickle - -class GNN_Layer(torch.nn.Module): - - def __init__(self, dim_in, hidden_dim, hidden_dim2, aggr_type): - """ - Initializes a GNN Layer. - - :dim_in: sum of d_h (column dimension of H_l-1, from 2nd layer on equals hidden_dim) - + d' (column dim of X_e (edge_attr)) - :hidden_dim: can be chosen arbitrarily - :hidden_dim2: sum of d_h (column dim of H_l-1 (2nd layer: hidden_dim)) + hidden_dim - :aggr_type: type of scatter operation: choose between max, sum and mean - """ - super(GNN_Layer, self).__init__() - self.aggr_type = aggr_type - - #use Kaiming Init when using ReLU - #dim_in: d_h (dim of node_attr) + d' (dim of edge_attr), hidden_dim: can be chosen arbitrarily - self.W1 = torch.nn.Parameter(torch.zeros(dim_in, hidden_dim)) - #hidden_dim2: d_h (dim of H_l-1) + hidden_dim (dim of Z_l) - self.W2 = torch.nn.Parameter(torch.zeros(hidden_dim2, hidden_dim)) - torch.nn.init.kaiming_normal_(self.W1) - torch.nn.init.kaiming_normal_(self.W2) - - - def forward(self, H, idx, X_e): - """ - Forward pass for a GCN Layer. - - :H: vertex embedding of last layer - :return: vertex embedding of this layer - """ - # concatenate input - x = torch.cat((H[idx[0]], X_e), dim=1) - y = torch.matmul(x, self.W1) - # apply activation - y = torch.relu(y) - - if self.aggr_type == "max": - y = scatter_max(y,idx[1],dim=0)[0] - elif self.aggr_type == "sum": - y = scatter_sum(x,idx[1],dim=0) - elif self.aggr_type == "mean": - y = scatter_mean(x,idx[1],dim=0) - else: - raise Exception("Scatter operation not supported. Choose between max, sum and mean.") - - y = torch.cat((H,y), dim=1) - y = torch.matmul(y, self.W2) - # apply activation - y = torch.relu(y) - - return y - - -#test functionality -data = pickle.load(open("datasets/ZINC_Test/data.pkl", "rb")) -dataset = CustomDataset(data) -H = dataset[0][1] -idx = dataset[0][0] -X_e = dataset[0][2] -#print(X_e.size()) -HX = torch.cat((H[idx[0]],X_e), dim=1) -#print(idx.size(), H[idx[0]].size(), H.size()) -y = scatter_max(HX,idx[1],dim=0)[0] -#print(y.size()) \ No newline at end of file diff --git a/Python_files/ex3/.ipynb_checkpoints/README-checkpoint.md b/Python_files/ex3/.ipynb_checkpoints/README-checkpoint.md deleted file mode 100644 index a183eef42a51d9c173d3e4d917b1bd7b5ed01698..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.ipynb_checkpoints/README-checkpoint.md +++ /dev/null @@ -1,5 +0,0 @@ -This directory contains the ZINC dataset [1] for graph regression. As input features the graphs have 21 discrete node labels and 3 discrete edge labels. The target to predict is a single real-valued scalar. - -References: - -[1] Sen, Prithviraj, et al. "Collective classification in network data." AI magazine 29.3 (2008): 93-93. diff --git a/Python_files/ex3/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/Python_files/ex3/.ipynb_checkpoints/Untitled-checkpoint.ipynb deleted file mode 100644 index 09a0f13ce125f229e266b467be8b85a5249b2b96..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.ipynb_checkpoints/Untitled-checkpoint.ipynb +++ /dev/null @@ -1,157 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 152, - "id": "stone-memorabilia", - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "from torch.utils.data import Dataset,DataLoader\n", - "import pickle\n", - "import networkx as nx\n", - "import tensorflow as tf" - ] - }, - { - "cell_type": "code", - "execution_count": 151, - "id": "illegal-civilization", - "metadata": {}, - "outputs": [], - "source": [ - "with open('ZINC_Test/data.pkl','rb') as file:\n", - " data=pickle.load(file)" - ] - }, - { - "cell_type": "code", - "execution_count": 207, - "id": "incorporate-thesis", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.9973257780075073" - ] - }, - "execution_count": 207, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "float(data[0].graph['label'])" - ] - }, - { - "cell_type": "code", - "execution_count": 217, - "id": "available-employment", - "metadata": {}, - "outputs": [], - "source": [ - "class CustomDataset(Dataset):\n", - " \n", - " def __init__(self,data):\n", - " self.graphs=list()\n", - " for graph in data:\n", - " out_nodes=[i[0] for i in data[0].edges]\n", - " in_nodes=[i[1] for i in data[0].edges]\n", - " edge_list=[out_nodes+in_nodes,in_nodes+out_nodes]\n", - " node_attributes=list(nx.get_node_attributes(data[0],'node_label').values())\n", - " edge_attributes=list(nx.get_edge_attributes(data[0],'edge_label').values())\n", - " graph_labels=float(graph.graph['label'])\n", - " self.graphs.append((edge_list,node_attributes,edge_attributes,graph_labels))\n", - " \n", - " \n", - " def __len__(self):\n", - " return len(self.graphs)\n", - " \n", - " \n", - " def __getitem__(self,i):\n", - " return self.graphs[i]\n", - " \n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 242, - "id": "frozen-auditor", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "100" - ] - }, - "execution_count": 242, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset=CustomDataset(data[:100])\n", - "len(dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 240, - "id": "previous-november", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2410" - ] - }, - "execution_count": 240, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "un=nx.disjoint_union_all([graph for graph in data[:100]])\n", - "len(un.edges)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "packed-rogers", - "metadata": {}, - "outputs": [], - "source": [ - "def collation(dataset):\n", - " " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex3/.ipynb_checkpoints/Untitled1-checkpoint.ipynb b/Python_files/ex3/.ipynb_checkpoints/Untitled1-checkpoint.ipynb deleted file mode 100644 index 363fcab7ed6e9634e198cf5555ceb88932c9a245..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.ipynb_checkpoints/Untitled1-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex3/.ipynb_checkpoints/collate-checkpoint.ipynb b/Python_files/ex3/.ipynb_checkpoints/collate-checkpoint.ipynb deleted file mode 100644 index 363fcab7ed6e9634e198cf5555ceb88932c9a245..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.ipynb_checkpoints/collate-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex3/.ipynb_checkpoints/collate_graphs-checkpoint.py b/Python_files/ex3/.ipynb_checkpoints/collate_graphs-checkpoint.py deleted file mode 100644 index c5012caf66e044e3f7bff7e681e24b1a9ebbf1e9..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.ipynb_checkpoints/collate_graphs-checkpoint.py +++ /dev/null @@ -1,38 +0,0 @@ -from numpy import int64 -from customDataset import CustomDataset -import pickle -import torch - - -def collate_graphs(data): - #initialization of the tensors which should be returned in the end - edge_list = torch.tensor([]) #possible that we have to typecast this to int, too - node_features = torch.tensor([]) - edge_features = torch.tensor([]) - graph_label = torch.tensor([]) - batch_idx = torch.tensor([], dtype=torch.int64) - n_nodes = 0 - - #go through each graph in data and concatenate the graph info to the final tensors - for i, graph in enumerate(data): - i_edge_list, i_node_features, i_edge_features, i_graph_label = graph - #print(i_edge_list.size(), i_node_features.size(), i_edge_features.size(), i_graph_label.size()) - - #add the number of nodes which have been seen before to the edge list and concatenate afterwards - edge_list = torch.cat((edge_list, torch.add(i_edge_list, n_nodes)), dim=1) - node_features = torch.cat((node_features, i_node_features), dim=0) - edge_features = torch.cat((edge_features, i_edge_features), dim=0) - graph_label = torch.cat((graph_label, i_graph_label)) - n_nodes += i_node_features.size(0) - #add the number of the graph n_nodes times to the batch_idx, once for each node - batch_idx = torch.cat((batch_idx, torch.tensor([i]*i_node_features.size(0), dtype=torch.int64))) - - return (edge_list, node_features, edge_features, graph_label, batch_idx) - - - - -#test functionality -#data = pickle.load(open("datasets/ZINC_Test/data.pkl", "rb")) -#dataset = CustomDataset(data) -#print(collate_graphs(dataset[:4])) \ No newline at end of file diff --git a/Python_files/ex3/.ipynb_checkpoints/custom-checkpoint.ipynb b/Python_files/ex3/.ipynb_checkpoints/custom-checkpoint.ipynb deleted file mode 100644 index 363fcab7ed6e9634e198cf5555ceb88932c9a245..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.ipynb_checkpoints/custom-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex3/.ipynb_checkpoints/customDataset-checkpoint.py b/Python_files/ex3/.ipynb_checkpoints/customDataset-checkpoint.py deleted file mode 100644 index 9019b4ea44b274c63edfff3dc8f3736ae5967a25..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.ipynb_checkpoints/customDataset-checkpoint.py +++ /dev/null @@ -1,45 +0,0 @@ -from torch.utils.data import Dataset -import networkx as nx -import pickle -import numpy as np -import torch - -class CustomDataset(Dataset): - def __init__(self, data): - self.graphs = list() - - for graph in data: - edge_begin = [i for i,j in graph.edges()] - edge_end = [j for i,j in graph.edges()] - edge_list = torch.tensor([edge_begin+edge_end, edge_end+edge_begin]) - node_features = list(nx.get_node_attributes(graph, "node_label").values()) - edge_attrs = list(nx.get_edge_attributes(graph, "edge_label").values()) - - node_features = np.array(node_features) - node_one_hot = np.zeros((node_features.size, 20)) - node_one_hot[np.arange(node_features.size),node_features] = 1 - node_features = torch.tensor(np.array(node_one_hot), dtype=torch.float32) - - edge_attrs = np.array(edge_attrs)-1 # Because minimal edge attr =1, but we want 0 - #checked that there are 3 different edge attributes -> also for test data??? - edge_one_hot = np.zeros((edge_attrs.size, 3)) - edge_one_hot[np.arange(edge_attrs.size),edge_attrs] = 1 - edge_features = torch.tensor(np.array(edge_one_hot), dtype=torch.float32) - #double the size - edge_features = edge_features.repeat(2,1) - - graph_label = torch.tensor(graph.graph["label"]) - self.graphs.append((edge_list, node_features, edge_features, graph_label)) - - def __len__(self): - return len(self.graphs) - - def __getitem__(self,i): - return self.graphs[i] - -#test functionality -data = pickle.load(open("ZINC_Test/data.pkl", "rb")) -dataset = CustomDataset(data) -print(dataset.__getitem__(0)) - - diff --git a/Python_files/ex3/.ipynb_checkpoints/test-checkpoint.ipynb b/Python_files/ex3/.ipynb_checkpoints/test-checkpoint.ipynb deleted file mode 100644 index 09a0f13ce125f229e266b467be8b85a5249b2b96..0000000000000000000000000000000000000000 --- a/Python_files/ex3/.ipynb_checkpoints/test-checkpoint.ipynb +++ /dev/null @@ -1,157 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 152, - "id": "stone-memorabilia", - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "from torch.utils.data import Dataset,DataLoader\n", - "import pickle\n", - "import networkx as nx\n", - "import tensorflow as tf" - ] - }, - { - "cell_type": "code", - "execution_count": 151, - "id": "illegal-civilization", - "metadata": {}, - "outputs": [], - "source": [ - "with open('ZINC_Test/data.pkl','rb') as file:\n", - " data=pickle.load(file)" - ] - }, - { - "cell_type": "code", - "execution_count": 207, - "id": "incorporate-thesis", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.9973257780075073" - ] - }, - "execution_count": 207, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "float(data[0].graph['label'])" - ] - }, - { - "cell_type": "code", - "execution_count": 217, - "id": "available-employment", - "metadata": {}, - "outputs": [], - "source": [ - "class CustomDataset(Dataset):\n", - " \n", - " def __init__(self,data):\n", - " self.graphs=list()\n", - " for graph in data:\n", - " out_nodes=[i[0] for i in data[0].edges]\n", - " in_nodes=[i[1] for i in data[0].edges]\n", - " edge_list=[out_nodes+in_nodes,in_nodes+out_nodes]\n", - " node_attributes=list(nx.get_node_attributes(data[0],'node_label').values())\n", - " edge_attributes=list(nx.get_edge_attributes(data[0],'edge_label').values())\n", - " graph_labels=float(graph.graph['label'])\n", - " self.graphs.append((edge_list,node_attributes,edge_attributes,graph_labels))\n", - " \n", - " \n", - " def __len__(self):\n", - " return len(self.graphs)\n", - " \n", - " \n", - " def __getitem__(self,i):\n", - " return self.graphs[i]\n", - " \n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 242, - "id": "frozen-auditor", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "100" - ] - }, - "execution_count": 242, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset=CustomDataset(data[:100])\n", - "len(dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 240, - "id": "previous-november", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2410" - ] - }, - "execution_count": 240, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "un=nx.disjoint_union_all([graph for graph in data[:100]])\n", - "len(un.edges)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "packed-rogers", - "metadata": {}, - "outputs": [], - "source": [ - "def collation(dataset):\n", - " " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex3/GNN.py b/Python_files/ex3/GNN.py deleted file mode 100644 index ba139bfa613f65b0cffc2675eb10aa14913362cf..0000000000000000000000000000000000000000 --- a/Python_files/ex3/GNN.py +++ /dev/null @@ -1,85 +0,0 @@ -import torch -import torch.nn as nn - -from GNN_Layer import GNN_Layer -from Virtual_Node import Virtual_Node -from Sparse_Sum_Pooling import Sparse_Sum_Pooling - -class GNN(torch.nn.Module): - - def __init__(self, hidden_dim, aggr_type, num_layers, node_attrs, edge_attrs,drop_out, virtual_node = False): - """ - Initializes the graph level GCN with several GCN layers. - - :input_dim: input dimension (3rd dimension of batched H0) - :output_dim: output dimension (number of classification classes) - :num_vertices: number of vertices of the graphs (2nd dimension of batched adjecency matrix) - :hidden_dim: size of the hidden layers - :num_layers: number of GCN-layers - """ - super(GNN, self).__init__() - self.num_layers = num_layers - self.virtual_node = virtual_node - self.dropout1=nn.Dropout(drop_out) - self.dropout2=nn.Dropout(drop_out) - #add sub-modules as attribute - self.input_layer = GNN_Layer(node_attrs+edge_attrs, hidden_dim, node_attrs+hidden_dim, aggr_type,res_conn=False) - # Store multiple submodules in 'ModuleList' - self.hidden_layers = torch.nn.ModuleList( - [GNN_Layer(hidden_dim+edge_attrs, hidden_dim, hidden_dim+hidden_dim, aggr_type,res_conn=True) for _ in range(num_layers-1)] - ) - - if virtual_node: - self.virtual_nodes = torch.nn.ModuleList( - [Virtual_Node(hidden_dim) for _ in range(num_layers-1)] - ) - - self.sum_pooling = Sparse_Sum_Pooling() - - - # add linear modules for subsequent classification - self.MLP_layer = nn.Linear(hidden_dim, hidden_dim) - self.output_layer = nn.Linear(hidden_dim, 1) - - # add dropout against overfit - #self.dropout1 = nn.Dropout(0.5) - #self.dropout2 = nn.Dropout(0.5) - - - - - def forward(self, H, idx, X_e, batch_idx): - """ - Forward pass for the Graph Level GCN. - - : - :H: vertex embedding of last layer - :idx: Index for edge nodes (2 x 2|E|) - :X_e: Edge attributes (2|E| x d') - :batch_idx: Index noting which graph a node belongs to - :return: torch.float giving regression estimation - """ - # apply GCN-layers - y = self.input_layer(H, idx, X_e) - - for i in range(self.num_layers-1): - if self.virtual_node: - y = self.virtual_nodes[i](y, batch_idx) - - y = self.hidden_layers[i](y, idx, X_e) - y = self.dropout1(y) - # Sum Pooling - y = self.sum_pooling(y, batch_idx) - - - # MLP with one hidden layer + relu, then a linear output layer, with dropout - # y = self.dropout1(y) - y = self.MLP_layer(y) - # y = self.dropout2(y) - y = torch.relu(y) - - y = self.output_layer(y) - - return y - - diff --git a/Python_files/ex3/GNN_Layer.py b/Python_files/ex3/GNN_Layer.py deleted file mode 100644 index caf365063f71881631b8e84fc19508740c40832e..0000000000000000000000000000000000000000 --- a/Python_files/ex3/GNN_Layer.py +++ /dev/null @@ -1,82 +0,0 @@ -import torch -from torch_scatter import scatter_max, scatter_sum, scatter_mean -from customDataset import CustomDataset -import pickle - -class GNN_Layer(torch.nn.Module): - - def __init__(self, dim_in, hidden_dim, hidden_dim2, aggr_type,res_conn): - """ - Initializes a GNN Layer. - - :dim_in: sum of d_h (column dimension of H_l-1, from 2nd layer on equals hidden_dim) - + d' (column dim of X_e (edge_attr)) - :hidden_dim: can be chosen arbitrarily - :hidden_dim2: sum of d_h (column dim of H_l-1 (2nd layer: hidden_dim)) + hidden_dim - :aggr_type: type of scatter operation: choose between max, sum and mean - """ - super(GNN_Layer, self).__init__() - self.aggr_type = aggr_type - self.res_conn=res_conn - #use Kaiming Init when using ReLU - #dim_in: d_h (dim of node_attr) + d' (dim of edge_attr), hidden_dim: can be chosen arbitrarily - self.W1 = torch.nn.Parameter(torch.zeros(dim_in, hidden_dim)) - #hidden_dim2: d_h (dim of H_l-1) + hidden_dim (dim of Z_l) - self.W2 = torch.nn.Parameter(torch.zeros(hidden_dim2, hidden_dim)) - torch.nn.init.kaiming_normal_(self.W1) - torch.nn.init.kaiming_normal_(self.W2) - - - def forward(self, H, idx, X_e): - """ - Forward pass for a GCN Layer. - - :H: vertex embedding of last layer - :return: vertex embedding of this layer - """ - # concatenate input - x = torch.cat((H[idx[0]], X_e), dim=1) - y = torch.matmul(x, self.W1) - # apply activation - y = torch.relu(y) - - if self.aggr_type == "max": - y = scatter_max(y,idx[1],dim=0)[0] - elif self.aggr_type == "sum": - y = scatter_sum(y,idx[1],dim=0) - elif self.aggr_type == "mean": - y = scatter_mean(y,idx[1],dim=0) - else: - raise Exception("Scatter operation not supported. Choose between max, sum and mean.") - - y = torch.cat((H,y), dim=1) - y = torch.matmul(y, self.W2) - # apply activation - y = torch.relu(y) - if self.res_conn: - y=y+H - return y - - - -#test functionality -data = pickle.load(open("ZINC_Test/data.pkl", "rb")) -dataset = CustomDataset(data) -H = dataset[0][1] - -idx = dataset[0][0] -X_e = dataset[0][2] -# print("idx",idx[0].size()) -# print("X_E", X_e.size(1)) -# print("H", H.size(1)) -# print("H idx", H[idx[0]].size()) -#print(X_e.size()) -HX = torch.cat((H[idx[0]],X_e), dim=1) -# print("HX", HX.size()) -#x = torch.cat((H[idx[0]], X_e), dim=1) -#print(idx.size(), H[idx[0]].size(), H.size()) -y = scatter_max(HX,idx[1],dim=0)[0] -#print(y.size()) - -# gn = GNN_Layer(H.size(1)+ X_e.size(1), 32,H.size(1)+32, "sum" ) -# print(gn(H, idx, X_e)) \ No newline at end of file diff --git a/Python_files/ex3/README.md b/Python_files/ex3/README.md deleted file mode 100644 index a183eef42a51d9c173d3e4d917b1bd7b5ed01698..0000000000000000000000000000000000000000 --- a/Python_files/ex3/README.md +++ /dev/null @@ -1,5 +0,0 @@ -This directory contains the ZINC dataset [1] for graph regression. As input features the graphs have 21 discrete node labels and 3 discrete edge labels. The target to predict is a single real-valued scalar. - -References: - -[1] Sen, Prithviraj, et al. "Collective classification in network data." AI magazine 29.3 (2008): 93-93. diff --git a/Python_files/ex3/Sparse_Sum_Pooling.py b/Python_files/ex3/Sparse_Sum_Pooling.py deleted file mode 100644 index c810637831b95fae15b941874a1db5433c1c9f6a..0000000000000000000000000000000000000000 --- a/Python_files/ex3/Sparse_Sum_Pooling.py +++ /dev/null @@ -1,25 +0,0 @@ -import torch -from torch_scatter import scatter_sum - - -class Sparse_Sum_Pooling(torch.nn.Module): - - def __init__(self): - """ - Initializes a Sparse Sum Pooling Layer. - """ - super(Sparse_Sum_Pooling, self).__init__() - - - def forward(self, H, batch_idx): - """ - Forward pass for Sum Pooling. - - :H: vertex embedding of last layer - :return: graph embeddings - """ - y = scatter_sum(H, batch_idx, dim=0) - - return y - - diff --git a/Python_files/ex3/Train_GNN.py b/Python_files/ex3/Train_GNN.py deleted file mode 100644 index 54c45cd3248101096e366058f35acc86d307b69f..0000000000000000000000000000000000000000 --- a/Python_files/ex3/Train_GNN.py +++ /dev/null @@ -1,157 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -from torch.utils.data import DataLoader -from sklearn.metrics import mean_absolute_error -import pickle - -from customDataset import CustomDataset -from collate_graphs import collate_graphs -from GNN import GNN - - -def train_GNN(train_data_path, validation_data_path, test_data_path,hidden_dim, aggr_type, num_layers,drop_out, - virtual_node=False,epochs=200, batch_size=100, lr=0.004): - """ - Trains the graph level GCN. - :param - train_data: Data for training the model - validation_data: Data for model selection - - epochs: number of epochs to train - batch_size: batch size during each epoch - lr: learning rate - """ - - # load the given data and cast to torch tensors - train_data = pickle.load(open(train_data_path, "rb")) - validation_data = pickle.load(open(validation_data_path, "rb")) - test_data = pickle.load(open(test_data_path, "rb")) - - # load all data - train_dataset = CustomDataset(train_data) - validation_dataset = CustomDataset(validation_data) - test_dataset = CustomDataset(test_data) - - - # set to 'cuda' if gpu is available - device = 'cpu' - - training_error = list() - validation_error = list() - - val_loader = DataLoader(validation_dataset, collate_fn = collate_graphs, batch_size=100) - test_loader = DataLoader(test_dataset, collate_fn = collate_graphs, batch_size=100) - - # construct neural network and move it to device - model = GNN(hidden_dim, aggr_type, num_layers,virtual_node=virtual_node, node_attrs = 21, edge_attrs = 3,drop_out=drop_out) - model.train() - model.to(device) - # construct optimizer and loss function - opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.1e-4) - lossL1 = nn.L1Loss() - - val_score = 1 - # Training Loop - for i in range(epochs): - acc_score = list() - #reshuffle at each epoch - train_loader = DataLoader(train_dataset, collate_fn = collate_graphs, batch_size=batch_size, shuffle = True) - - - #training - for edge_list, node_features, edge_features, graph_label, batch_idx in train_loader: - # set gradients to zero - opt.zero_grad() - - # move data to device - edge_list = edge_list.to(device) - node_features = node_features.to(device) - edge_features = edge_features.to(device) - graph_label = graph_label.to(device) - batch_idx = batch_idx.to(device) - - # forward pass and loss - y_pred = model(node_features, edge_list, edge_features, batch_idx) - y_pred = torch.squeeze(y_pred) - loss = lossL1(y_pred, graph_label) - - # backward pass and sgd step - loss.backward() - opt.step() - - # computation of current error - - acc_score.append(mean_absolute_error(graph_label.detach().numpy(), y_pred.detach().numpy())) - - # compute val_score for every 10th epoch - if (i % 10) == 9: - val_score = validation(model, val_loader, device) - - #print validation every 20th epoch - if (i % 20) == 19: - - print(f"epoch {i+1}: Training MAE ", np.mean(acc_score), - "\tValidation MAE:", val_score) - - #for each epoch, save the training and validation error - training_error.append(np.mean(acc_score)) - validation_error.append(val_score) - - #get test error - test_error = validation(model, test_loader, device) - # torch.save(model.state_dict(), model_name) - - print("average training error:", training_error[-1], - "\naverage validation error:", validation_error[-1], - "\naverage test error:", test_error) - # more_lines = ['',model_name,"average training error:",str(training_error[-1]), - # "average validation error:", str(validation_error[-1]), - # "average test error:", str(test_error)] - # with open('models.txt', 'a+') as f: - # f.writelines('\n'.join(more_lines)) - -# code in parts from exercise of Text Mining lecture -def validation(model, val_loader, device): - """ - Gets test/validation data and returns the accuracy score of the given model. - :param - model: the trained model - val_loader: DataLoader object with the given test data (A, H, labels) - device: cpu or gpu (where to test) - :return: accuracy score of the test data given the current model - """ - true_labels = [] - pred_labels = [] - - #put model to evaluation mode - model.eval() - with torch.no_grad(): #so that no gradients will be changed - - #loop over all data in val_loader and get the predicted labels - for edge_list, node_features, edge_features, graph_label, batch_idx in val_loader: - - # move data to device - edge_list = edge_list.to(device) - node_features = node_features.to(device) - edge_features = edge_features.to(device) - graph_label = graph_label.to(device) - batch_idx = batch_idx.to(device) - - - #forward pass to classify validation data - y_pred = model(node_features, edge_list, edge_features, batch_idx) - #format label data and save them in pred_labels and true_labels respectively - - pred_labels.extend(y_pred.numpy().tolist()) - true_labels.extend(graph_label.numpy().tolist()) - - # put model to train mode - model.train() - return mean_absolute_error(true_labels, pred_labels) - -# -# train_data = "ZINC_Train/data.pkl" -# validation_data = "ZINC_Val/data.pkl" -# -# train_GNN( train_data, validation_data, model_name='example_model.pt',hidden_dim = 15, aggr_type="sum", num_layers= 5,epochs=100) diff --git a/Python_files/ex3/Untitled1.ipynb b/Python_files/ex3/Untitled1.ipynb deleted file mode 100644 index 6091c1e1983313a9eabdcabccdb8ba9467e75aba..0000000000000000000000000000000000000000 --- a/Python_files/ex3/Untitled1.ipynb +++ /dev/null @@ -1,149 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "id": "dd8fb848-548a-4f0c-b918-9aa98dd965a5", - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import pickle\n", - "from customDataset import CustomDataset\n", - "from torch_scatter import scatter_max, scatter_sum, scatter_mean" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "a6fbfb86-ea7b-4075-b9d3-76f6a7b62db8", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[0., 0., 0.],\n", - " [0., 0., 0.]])\n" - ] - }, - { - "data": { - "text/plain": [ - "tensor([[0.1413, 0.0516, 0.5475],\n", - " [0.0246, 0.2342, 0.2287],\n", - " [0.2273, 0.8790, 0.3869],\n", - " [0.1413, 0.0516, 0.5475],\n", - " [0.0246, 0.2342, 0.2287],\n", - " [0.2273, 0.8790, 0.3869]])" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# H=torch.zeros([2,3])\n", - "print(H)\n", - "y=torch.rand(3,3)\n", - "y.repeat(2,1)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d3c17c4e-c66d-488a-9a9b-cf811ec39ae1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([1, 2, 3, 4, 5, 6])" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "1fcd9e52-5c6a-4eb6-9b1f-3889406a034f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([2, 3, 4, 6, 7, 9])" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "4784adcc-a197-4aca-a0c2-36b6b9955fab", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[0., 0., 0.],\n", - " [0., 0., 0.]])\n" - ] - }, - { - "ename": "RuntimeError", - "evalue": "The size of tensor a (3) must match the size of tensor b (6) at non-singleton dimension 1", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [14]\u001b[0m, in \u001b[0;36m<cell line: 5>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m y\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mtensor([\u001b[38;5;241m1\u001b[39m,\u001b[38;5;241m2\u001b[39m,\u001b[38;5;241m3\u001b[39m,\u001b[38;5;241m4\u001b[39m,\u001b[38;5;241m5\u001b[39m,\u001b[38;5;241m6\u001b[39m])\n\u001b[1;32m 4\u001b[0m batch_idx\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m0\u001b[39m,\u001b[38;5;241m0\u001b[39m,\u001b[38;5;241m0\u001b[39m,\u001b[38;5;241m1\u001b[39m,\u001b[38;5;241m1\u001b[39m,\u001b[38;5;241m2\u001b[39m]\n\u001b[0;32m----> 5\u001b[0m \u001b[43mH\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd\u001b[49m\u001b[43m(\u001b[49m\u001b[43my\u001b[49m\u001b[43m[\u001b[49m\u001b[43mbatch_idx\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mRuntimeError\u001b[0m: The size of tensor a (3) must match the size of tensor b (6) at non-singleton dimension 1" - ] - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72dca9b4-5f14-42bb-a3ca-64c333deb0dd", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex3/Virtual_Node.py b/Python_files/ex3/Virtual_Node.py deleted file mode 100644 index 1008934b69dfce04e30bcd37475960db06d906ea..0000000000000000000000000000000000000000 --- a/Python_files/ex3/Virtual_Node.py +++ /dev/null @@ -1,76 +0,0 @@ -import torch -from torch_scatter import scatter_sum -import pickle - -from customDataset import CustomDataset -from collate_graphs import collate_graphs - - -class Virtual_Node(torch.nn.Module): - - def __init__(self, hidden_dim): - """ - Initializes a Virtual Node. - - :hidden_dim: hidden dimension of previous layer - """ - super(Virtual_Node, self).__init__() - - self.W = torch.nn.Parameter(torch.zeros(hidden_dim, hidden_dim)) - torch.nn.init.kaiming_normal_(self.W) - - self.linear = torch.nn.Linear(hidden_dim, hidden_dim) - - # self.norm = torch.nn.BatchNorm1d(hidden_dim) - - - - def forward(self, H, batch_idx): - """ - Forward pass for Sum Pooling. - - :H: vertex embedding of last layer - :return: graph embeddings - """ - - #y = list() - - - # for i in range(H.size(0)): - # W[i] = torch.nn.init.kaiming_normal_(self.W) - # y = scatter_sum(H, batch_idx, dim=0) #|G| x hidden dim - # y[i] = torch.matmul(y[i], self.W) - # y[i] = self.norm(y[i]) - # y[i] = torch.relu(y[i]) - - #self.W.add(W[i]) - #y.add(y[i]) - #y = H.add(y[batch_idx]) - - y = scatter_sum(H, batch_idx, dim=0) #|G| x hidden dim - #y = torch.matmul(y, self.W) |G| x hidden_dim - y = self.linear(y) - # y = self.norm(y) - # apply activation - y = torch.relu(y) - #print("H:", H.size(), "y[batch_idx]:", y[batch_idx].size()) - # add information to each row of H (different information for each graph) - y = H + y[batch_idx] - - return y - - - - -#test functionality -#data = pickle.load(open("../datasets/ZINC_Test/data.pkl", "rb")) -#dataset = CustomDataset(data) -#first_graphs = collate_graphs(dataset[:4]) -#H = first_graphs[1] -#batch_idx = first_graphs[4] -#print(batch_idx.type()) -#print("H", H.size(1)) -#vn = Virtual_Node(H.size(1)) -#print(vn(H, batch_idx)) - - diff --git a/Python_files/ex3/ZINC_Test/data.pkl b/Python_files/ex3/ZINC_Test/data.pkl deleted file mode 100644 index 5a6e8abd7b52ea1df4c91916f4b6e27d93287633..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/ZINC_Test/data.pkl and /dev/null differ diff --git a/Python_files/ex3/ZINC_Train/data.pkl b/Python_files/ex3/ZINC_Train/data.pkl deleted file mode 100644 index 972e5b2e857c848b5fccb228a48ed750e20cc28d..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/ZINC_Train/data.pkl and /dev/null differ diff --git a/Python_files/ex3/ZINC_Val/data.pkl b/Python_files/ex3/ZINC_Val/data.pkl deleted file mode 100644 index 7844a0f65ac565682c16b32d5f07823fa66cd97f..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/ZINC_Val/data.pkl and /dev/null differ diff --git a/Python_files/ex3/__pycache__/GNN.cpython-39.pyc b/Python_files/ex3/__pycache__/GNN.cpython-39.pyc deleted file mode 100644 index c76a2db785ee89fcf59f2cd1d4592c659c859233..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/__pycache__/GNN.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex3/__pycache__/GNN_Layer.cpython-310.pyc b/Python_files/ex3/__pycache__/GNN_Layer.cpython-310.pyc deleted file mode 100644 index 47237fada316591ae4caeeda47e71f1164d0365e..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/__pycache__/GNN_Layer.cpython-310.pyc and /dev/null differ diff --git a/Python_files/ex3/__pycache__/GNN_Layer.cpython-39.pyc b/Python_files/ex3/__pycache__/GNN_Layer.cpython-39.pyc deleted file mode 100644 index 8d5e10522f43fa89c2934671cf4971f3da18d78f..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/__pycache__/GNN_Layer.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex3/__pycache__/Sparse_Sum_Pooling.cpython-39.pyc b/Python_files/ex3/__pycache__/Sparse_Sum_Pooling.cpython-39.pyc deleted file mode 100644 index ce7e06d3b9ba486d65fc7ae569be075a10b0d7e0..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/__pycache__/Sparse_Sum_Pooling.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex3/__pycache__/Train_GNN.cpython-39.pyc b/Python_files/ex3/__pycache__/Train_GNN.cpython-39.pyc deleted file mode 100644 index 9030cd15fd44d5bdd4ce0e1b4410a6bdc9193c67..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/__pycache__/Train_GNN.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex3/__pycache__/Virtual_Node.cpython-39.pyc b/Python_files/ex3/__pycache__/Virtual_Node.cpython-39.pyc deleted file mode 100644 index addabdb420569eaebc93ee6f20afdf5e792096d7..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/__pycache__/Virtual_Node.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex3/__pycache__/collate_graphs.cpython-38.pyc b/Python_files/ex3/__pycache__/collate_graphs.cpython-38.pyc deleted file mode 100644 index 08836c17e1f7040118d7b0ef48dbd9458a94c4b1..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/__pycache__/collate_graphs.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex3/__pycache__/collate_graphs.cpython-39.pyc b/Python_files/ex3/__pycache__/collate_graphs.cpython-39.pyc deleted file mode 100644 index bbfda42515232c1b594de601dc7eb4b6279b95c4..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/__pycache__/collate_graphs.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex3/__pycache__/customDataset.cpython-310.pyc b/Python_files/ex3/__pycache__/customDataset.cpython-310.pyc deleted file mode 100644 index 6b0aaedf4a839e89fcb94aef7c15e5ef0bb446cc..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/__pycache__/customDataset.cpython-310.pyc and /dev/null differ diff --git a/Python_files/ex3/__pycache__/customDataset.cpython-38.pyc b/Python_files/ex3/__pycache__/customDataset.cpython-38.pyc deleted file mode 100644 index 06f176fb89660efdc3b71ef387dbba46e70a3754..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/__pycache__/customDataset.cpython-38.pyc and /dev/null differ diff --git a/Python_files/ex3/__pycache__/customDataset.cpython-39.pyc b/Python_files/ex3/__pycache__/customDataset.cpython-39.pyc deleted file mode 100644 index 4ed4cbd5116f86411f398012a2d3180feeb15572..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/__pycache__/customDataset.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex3/collate_graphs.py b/Python_files/ex3/collate_graphs.py deleted file mode 100644 index ed4118a33ab9153301ff0afebbe59e12693a22d0..0000000000000000000000000000000000000000 --- a/Python_files/ex3/collate_graphs.py +++ /dev/null @@ -1,38 +0,0 @@ -from numpy import int64 -from customDataset import CustomDataset -import pickle -import torch - - -def collate_graphs(data): - #initialization of the tensors which should be returned in the end - edge_list = torch.tensor([]).long() - node_features = torch.tensor([]) - edge_features = torch.tensor([]) - graph_label = torch.tensor([]) - batch_idx = torch.tensor([], dtype=torch.int64) - n_nodes = 0 - - #go through each graph in data and concatenate the graph info to the final tensors - for i, graph in enumerate(data): - i_edge_list, i_node_features, i_edge_features, i_graph_label = graph - #print(i_edge_list.size(), i_node_features.size(), i_edge_features.size(), i_graph_label.size()) - - #add the number of nodes which have been seen before to the edge list and concatenate afterwards - edge_list = torch.cat((edge_list, torch.add(i_edge_list, n_nodes)), dim=1) - node_features = torch.cat((node_features, i_node_features), dim=0) - edge_features = torch.cat((edge_features, i_edge_features), dim=0) - graph_label = torch.cat((graph_label, i_graph_label)) - n_nodes += i_node_features.size(0) - #add the number of the graph n_nodes times to the batch_idx, once for each node - batch_idx = torch.cat((batch_idx, torch.tensor([i]*i_node_features.size(0), dtype=torch.int64))) - - return (edge_list, node_features, edge_features, graph_label, batch_idx) - - - - -#test functionality -# data = pickle.load(open("ZINC_Test/data.pkl", "rb")) -# dataset = CustomDataset(data) -# print(collate_graphs(dataset[:4])) \ No newline at end of file diff --git a/Python_files/ex3/customDataset.py b/Python_files/ex3/customDataset.py deleted file mode 100644 index 54c89ff876cca191de32c516d792168b156e606a..0000000000000000000000000000000000000000 --- a/Python_files/ex3/customDataset.py +++ /dev/null @@ -1,46 +0,0 @@ -from torch.utils.data import Dataset -import networkx as nx -import pickle -import numpy as np -import torch - -class CustomDataset(Dataset): - def __init__(self, data): - self.graphs = list() - - for graph in data: - edge_begin = [i for i,j in graph.edges()] - edge_end = [j for i,j in graph.edges()] - edge_list = torch.tensor([edge_begin+edge_end, edge_end+edge_begin]) - node_features = list(nx.get_node_attributes(graph, "node_label").values()) - edge_attrs = list(nx.get_edge_attributes(graph, "edge_label").values()) - - node_features = np.array(node_features) - node_one_hot = np.zeros((node_features.size, 21)) - node_one_hot[np.arange(node_features.size),node_features] = 1 - node_features = torch.tensor(np.array(node_one_hot), dtype=torch.float32) - - edge_attrs = np.array(edge_attrs)-1 # Because minimal edge attr =1, but we want 0 - #checked that there are 3 different edge attributes -> also for test data??? - edge_one_hot = np.zeros((edge_attrs.size, 3)) - edge_one_hot[np.arange(edge_attrs.size),edge_attrs] = 1 - edge_features = torch.tensor(np.array(edge_one_hot), dtype=torch.float32) - #double the size - edge_features = edge_features.repeat(2,1) - - graph_label = torch.tensor(graph.graph["label"]) - self.graphs.append((edge_list, node_features, edge_features, graph_label)) - - def __len__(self): - return len(self.graphs) - - def __getitem__(self,i): - return self.graphs[i] - -#test functionality -# data = pickle.load(open("ZINC_Test/data.pkl", "rb")) -# dataset = CustomDataset(data) -# print(dataset[0]) - - - diff --git a/Python_files/ex3/example_model b/Python_files/ex3/example_model deleted file mode 100644 index 5a2d10e0c5df4345526a093d8cccc85fd39aab7b..0000000000000000000000000000000000000000 Binary files a/Python_files/ex3/example_model and /dev/null differ diff --git a/Python_files/ex3/grid_search.py b/Python_files/ex3/grid_search.py deleted file mode 100644 index d24d36795a90b6dad14153f27c2989163d5d8b84..0000000000000000000000000000000000000000 --- a/Python_files/ex3/grid_search.py +++ /dev/null @@ -1,24 +0,0 @@ -from Train_GNN import train_GNN -import itertools - -hidden_dim=[50] #between 50 and 500 -aggr_type=['sum'] #max, sum -num_layers=[5] #between 3 and 10 -drop_out=[0.3] #between 0 and 0.5 -virtual_node=[False] -epochs=[200] -batch_size=[100] -lr=[0.004] - -train_data = "ZINC_Train/data.pkl" -validation_data = "ZINC_Val/data.pkl" -test_data = "ZINC_Test/data.pkl" - -for x in itertools.product(hidden_dim, aggr_type, num_layers,drop_out,virtual_node,epochs, batch_size, lr): - hidden_dim_x, aggr_type_x, num_layers_x, drop_out_x, virtual_node_x, epochs_x, batch_size_x, lr_x = x - model_name=f'\nhidden_dim:{hidden_dim_x},\naggr_type: {aggr_type_x}, \nnum_layers:{num_layers_x}, \ndrop_out:{drop_out_x}' \ - f',\nvirtual_node: {virtual_node_x}, \nepochs:{epochs_x}, \nbatch_size:{batch_size_x}, \nlr:{lr_x}' - train_GNN(train_data,validation_data, test_data,hidden_dim_x, aggr_type_x, num_layers_x, drop_out_x, virtual_node_x, epochs_x, batch_size_x, lr_x) - - - diff --git a/Python_files/ex3/main.py b/Python_files/ex3/main.py deleted file mode 100644 index 499a99a1e1618886f43867fe55153a8159eaccf0..0000000000000000000000000000000000000000 --- a/Python_files/ex3/main.py +++ /dev/null @@ -1,36 +0,0 @@ -import argparse -from Train_GNN import train_GNN - - -""" -The following code is the main code where the defined kernels and functions are imported and called. -""" - -# Specified parameters -parser = argparse.ArgumentParser() -parser.add_argument('-ptr', '--p_train', required=True, help='Choose the path of the train dataset') -parser.add_argument('-pv', '--p_val', required=True,help='Choose the path of the validation dataset') -parser.add_argument('-pte', '--p_test', required=True, help='Choose the path of test dataset') -parser.add_argument('-dim', '--dim', type=int,default=250,help='Choose the dimension of hidden layers(from 50 to 500)') -parser.add_argument('-type', '--type',default="sum", help='Choose the type of aggregation(max,sum,mean)') -parser.add_argument('-layers', '--layers',type=int,default=5, help='Choose the number of layers(from 3 to 10)') -parser.add_argument('-drop_out', '--drop_out',type=float,default=0.0, help='Choose the drop_out(from 0.0 to 1.0)') -parser.add_argument('-virtual', '--virtual',default=False, help='Set to True if virtual nodes are required') -parser.add_argument('-epochs', '--epochs',type=int,default=200, help='Choose the epochs for traning') -parser.add_argument('-size', '--size', type=int,default=100,help='Choose the batch_size') -parser.add_argument('-lr', '--lr', type=float,default=0.004 ,help='Choose the learning rate') -args = parser.parse_args() - - - -if __name__ == '__main__': - print('start') - if args.dim<50 or args.dim>500: - raise Exception('please choose the right dimension of hidden layers') - if args.layers<3 or args.layers>10: - raise Exception('please choose the right number of layers') - if args.type not in ['sum','mean','max']: - raise Exception('please choose the right aggregation type') - train_GNN(args.p_train,args.p_val,args.p_test,args.dim,args.type,args.layers,args.drop_out,args.virtual,args.epochs,args.size,args.lr) - - diff --git a/Python_files/ex3/models.txt b/Python_files/ex3/models.txt deleted file mode 100644 index 3e26c8a4a2539c52acbd75fb43c56c536fb240ed..0000000000000000000000000000000000000000 --- a/Python_files/ex3/models.txt +++ /dev/null @@ -1,1646 +0,0 @@ - -epochs: 200, batch size: 64, lr: 0.004, p: 1, q: 0.1, C: 2 -Mean accuracy and standard deviation of training data (10-fold cross validation): 0.7622269158559483 0.025883374790469704 -Accuracy of test data: 0.7693726937269373 - -epochs: 200, batch size: 100, lr: 0.004, p: 1, q: 0.1, C: 2 -Mean accuracy and standard deviation of training data (10-fold cross validation): 0.7534242191500257 0.02316723778168579 -Accuracy of test data: 0.7583025830258303 - -epochs: 300, batch size: 64, lr: 0.002, p: 1, q: 0.1, C: 2 -Mean accuracy and standard deviation of training data (10-fold cross validation): 0.7686913295784262 0.02869087631125307 -Accuracy of test data: 0.7675276752767528 - -epochs: 300, batch size: 64, lr: 0.002, p: 0.1, q: 1, C: 2 -Mean accuracy and standard deviation of training data (10-fold cross validation): 0.7543714797747055 0.039911208715787905 -Accuracy of test data: 0.7619926199261993 - -epochs: 300, batch size: 64, lr: 0.004, p: 1, q: 0.1, C: 2 -Mean accuracy and standard deviation of training data (10-fold cross validation): 0.7686593275302952 0.029903388695105046 -Accuracy of test data: 0.7619926199261993 - -epochs: 300, batch size: 100, lr: 0.004, p: 1, q: 1, C: 2 -Mean accuracy and standard deviation of training data (10-fold cross validation): 0.7631251066734938 0.035776125488107165 -Accuracy of test data: 0.7509225092250923 - - -epochs: 300, batch size: 100, lr: 0.004, p: 1, q: 0.1, C: 2 -Mean accuracy and standard deviation of training data (10-fold cross validation): 0.7626493428912784 0.03155381382700748 -Accuracy of test data: 0.7583025830258303 - - -epochs: 300, batch size: 100, lr: 0.004, p: 0.1, q: 1, C: 2 -Mean accuracy and standard deviation of training data (10-fold cross validation): 0.7460381464413721 0.03211819120447599 -Accuracy of test data: 0.7712177121771218 - - - - - - - - - - - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -0.4667202 -average validation error: -0.47257747489394386 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -0.36752895 -average validation error: -0.40259992775594583 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -0.4166115 -average validation error: -0.49542467540869256 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -0.41384265 -average validation error: -0.44158400920120766 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -0.5139987 -average validation error: -0.5298317452966584 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -1.4725455 -average validation error: -1.43828943418985 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.001 -average training error: -0.37223825 -average validation error: -0.4084628358929767 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -1.4723507 -average validation error: -1.4380915352058947 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.01 -average training error: -0.31871057 -average validation error: -0.34218004397669577 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -0.3170278 -average validation error: -0.3469915342293098 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -0.26374215 -average validation error: -0.29314064272114776 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -0.28293592 -average validation error: -0.30658027918165315 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -0.2836015 -average validation error: -0.35116124452470104 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -0.21703482 -average validation error: -0.28230360542790733 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -0.30558816 -average validation error: -0.31960640933556717 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.001 -average training error: -0.2557099 -average validation error: -0.3175297842566506 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.19533418 -average validation error: -0.26481716763047736 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.0, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.01 -average training error: -0.23739761 -average validation error: -0.2690675333737745 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -0.5825771 -average validation error: -0.5558921595988213 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -0.5461032 -average validation error: -0.6405098385875463 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -1.4723723 -average validation error: -1.438060786838585 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -0.47394076 -average validation error: -0.4584782619354664 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -1.4726509 -average validation error: -1.4381121237230836 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -1.4724145 -average validation error: -1.438162121350819 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.001 -average training error: -0.49152443 -average validation error: -0.45008254691859473 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.43390408 -average validation error: -0.43900028391456 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.01 -average training error: -0.48545468 -average validation error: -0.510347008038254 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -0.36847228 -average validation error: -0.4078738188743009 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -0.36423385 -average validation error: -0.3921542417182936 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -0.41410887 -average validation error: -0.3784205554602086 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -0.34643945 -average validation error: -0.39716735309915385 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -0.35523868 -average validation error: -0.3979393225246458 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -0.4158867 -average validation error: -0.4005699462321936 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.001 -average training error: -0.32417744 -average validation error: -0.3494103289031773 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.33013725 -average validation error: -0.41058153724583096 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.1, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.01 -average training error: -0.39773017 -average validation error: -0.41833072428306334 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -0.6750057 -average validation error: -0.5546574952182709 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -0.6044949 -average validation error: -0.599882608364278 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -1.472764 -average validation error: -1.4379836279702722 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -0.5978484 -average validation error: -0.6418436104049324 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -0.5563509 -average validation error: -0.5375889059295296 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -0.5357963 -average validation error: -0.5737408195621684 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.001 -average training error: -0.62700635 -average validation error: -0.5947555764121353 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.5302713 -average validation error: -0.5658134641618817 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.01 -average training error: -0.5481453 -average validation error: -0.5623061870885431 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -0.4563547 -average validation error: -0.5534530538133695 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -0.47608712 -average validation error: -0.5616730184507905 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -0.5264704 -average validation error: -0.582425818650925 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -0.4277502 -average validation error: -0.5832121630217298 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -0.4515262 -average validation error: -0.6218356003341614 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -0.53255 -average validation error: -0.6752192331384286 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.001 -average training error: -0.42861596 -average validation error: -0.5140639691165998 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.44145882 -average validation error: -0.5777561645460664 - -hidden_dim:50, -aggr_type: sum, -num_layers:3, -drop_out:0.3, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.01 -average training error: -0.5151478 -average validation error: -0.629542156028503 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -28.981709 -average validation error: -23.34778699392354 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -1.154716 -average validation error: -1.3228659517941415 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -1.0659926 -average validation error: -1.010159056626202 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -3.4795034 -average validation error: -2.278059638096893 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -0.9987419 -average validation error: -0.8973980754948571 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -0.51537853 -average validation error: -0.48018315739085665 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.001 -average training error: -1.8972931 -average validation error: -1.7252236607334925 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -1.1204959 -average validation error: -0.9627484304132522 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.01 -average training error: -1.4725046 -average validation error: -1.4383783559156 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -0.28874433 -average validation error: -0.32616647188080244 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -0.26062688 -average validation error: -0.28475786621688165 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -0.30573055 -average validation error: -0.3561520150732831 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -0.24805956 -average validation error: -0.3090193579811021 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -0.18479747 -average validation error: -0.24148606534296413 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -0.26119098 -average validation error: -0.26591072240652286 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.001 -average training error: -0.20824105 -average validation error: -0.2679817645623698 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.16063076 -average validation error: -0.21217554276058218 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.01 -average training error: -0.25773942 -average validation error: -0.2555922818752588 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -1.7992882 -average validation error: -1.4381189645481645 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -2.7917342 -average validation error: -1.4380488396835862 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -7212.687 -average validation error: -1.438431390996033 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -1.4722136 -average validation error: -1.4380896183205187 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -1.4722539 -average validation error: -1.4381075681400834 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -1.4731184 -average validation error: -1.438045688207203 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.001 -average training error: -1.4722081 -average validation error: -1.4380810214233934 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -1.4724164 -average validation error: -1.4382946549010813 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.01 -average training error: -1.4727422 -average validation error: -1.4384175317240298 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -0.3627125 -average validation error: -0.3893569602565258 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -0.33237323 -average validation error: -0.36335183904884616 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -0.40903333 -average validation error: -0.38905875283671776 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -0.37570217 -average validation error: -0.3793938218792318 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -0.3042797 -average validation error: -0.40212313660135257 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -0.4057034 -average validation error: -0.43508057936182015 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.001 -average training error: -0.31186524 -average validation error: -0.3899283651168807 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.28087828 -average validation error: -0.3557826428814442 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.1, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.01 -average training error: -0.38125485 -average validation error: -0.36950646124681225 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -156.8637 -average validation error: -1.4380932290506898 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -1.8554306 -average validation error: -1.4381146461916505 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -1.5391828 -average validation error: -1.4392096342397271 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -1.4722279 -average validation error: -1.4381150032234726 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -1.4915471 -average validation error: -1.4381589139652788 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -1.5831621 -average validation error: -1.4380819941711962 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.001 -average training error: -1.4722253 -average validation error: -1.4380941153717577 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -1.4722638 -average validation error: -1.4383493934226572 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.01 -average training error: -1.4726918 -average validation error: -1.4380831023407519 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -0.49496245 -average validation error: -0.6963116425973712 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -0.48143867 -average validation error: -0.563336774535419 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: False, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -0.5450837 -average validation error: -0.6580644121318473 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -0.4434314 -average validation error: -0.6235304115590988 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -0.44592744 -average validation error: -0.5912402409789501 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: False, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -0.518026 -average validation error: -0.6784036650834023 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.001 -average training error: -0.41312832 -average validation error: -0.5817095342017128 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.4016532 -average validation error: -0.5484532128553256 - -hidden_dim:50, -aggr_type: sum, -num_layers:5, -drop_out:0.3, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.01 -average training error: -0.504018 -average validation error: -0.6768312950981199 - -hidden_dim:50, -aggr_type: sum, -num_layers:7, -drop_out:0.0, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.001 -average training error: -113780.1 -average validation error: -138480.2238642506 - -hidden_dim:50, -aggr_type: sum, -num_layers:7, -drop_out:0.0, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.004 -average training error: -2146.6353 -average validation error: -1168.6775315732536 - -hidden_dim:50, -aggr_type: sum, -num_layers:7, -drop_out:0.0, -virtual_node: True, -epochs:100, -batch_size:100, -lr:0.01 -average training error: -168.3609 -average validation error: -432.90652972184023 - -hidden_dim:50, -aggr_type: sum, -num_layers:7, -drop_out:0.0, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.001 -average training error: -5093.3027 -average validation error: -3367.318445852886 - -hidden_dim:50, -aggr_type: sum, -num_layers:7, -drop_out:0.0, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.004 -average training error: -968.1194 -average validation error: -1406.9624986845897 - -hidden_dim:50, -aggr_type: sum, -num_layers:7, -drop_out:0.0, -virtual_node: True, -epochs:150, -batch_size:100, -lr:0.01 -average training error: -44.27439 -average validation error: -69.76587414084275 - -hidden_dim:100, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.124508135 -average validation error: -0.20728237756242743 -average test error: -0.18832414356525987 - -hidden_dim:100, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.587682 -average validation error: -0.5633848028173088 -average test error: -0.6039638999905437 - -hidden_dim:200, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.106642894 -average validation error: -0.20595934339234373 -average test error: -0.17404352608136833 - -hidden_dim:200, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: True, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.5115685 -average validation error: -0.4800128879909753 -average test error: -0.5279192079547793 - -hidden_dim:300, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.13396585 -average validation error: -0.22005548179341713 -average test error: -0.20623027425911278 - -hidden_dim:250, -aggr_type: sum, -num_layers:5, -drop_out:0.0, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.11970349 -average validation error: -0.19254521756613394 -average test error: -0.18963071950059385 - -hidden_dim:250, -aggr_type: max, -num_layers:7, -drop_out:0.0, -virtual_node: False, -epochs:200, -batch_size:100, -lr:0.004 -average training error: -0.14951281 -average validation error: -0.3010806266031577 -average test error: -0.28804748811759057 - -hidden_dim:250, -aggr_type: max, -num_layers:9, -drop_out:0.0, -virtual_node: False, -epochs:260, -batch_size:100, -lr:0.004 -average training error: -0.13266797 -average validation error: -0.29430219581822165 -average test error: -0.25784107088577 \ No newline at end of file diff --git a/Python_files/ex4/.DS_Store b/Python_files/ex4/.DS_Store deleted file mode 100644 index 0dee250b0f6825d7156479d55566237b8446a261..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/.DS_Store and /dev/null differ diff --git a/Python_files/ex4/.idea/.gitignore b/Python_files/ex4/.idea/.gitignore deleted file mode 100644 index 26d33521af10bcc7fd8cea344038eaaeb78d0ef5..0000000000000000000000000000000000000000 --- a/Python_files/ex4/.idea/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml diff --git a/Python_files/ex4/.idea/ex4.iml b/Python_files/ex4/.idea/ex4.iml deleted file mode 100644 index 131d6b3a3aa141b5943842434e28969362699761..0000000000000000000000000000000000000000 --- a/Python_files/ex4/.idea/ex4.iml +++ /dev/null @@ -1,8 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<module type="PYTHON_MODULE" version="4"> - <component name="NewModuleRootManager"> - <content url="file://$MODULE_DIR$" /> - <orderEntry type="jdk" jdkName="Python 3.9 (torch)" jdkType="Python SDK" /> - <orderEntry type="sourceFolder" forTests="false" /> - </component> -</module> \ No newline at end of file diff --git a/Python_files/ex4/.idea/inspectionProfiles/profiles_settings.xml b/Python_files/ex4/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2da2d6447d11dfe32bfb846c3d5b199fc99..0000000000000000000000000000000000000000 --- a/Python_files/ex4/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ -<component name="InspectionProjectProfileManager"> - <settings> - <option name="USE_PROJECT_PROFILE" value="false" /> - <version value="1.0" /> - </settings> -</component> \ No newline at end of file diff --git a/Python_files/ex4/.idea/misc.xml b/Python_files/ex4/.idea/misc.xml deleted file mode 100644 index 5421bd6db78a68f607df8c0c8cd022e9ec59c4aa..0000000000000000000000000000000000000000 --- a/Python_files/ex4/.idea/misc.xml +++ /dev/null @@ -1,7 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (torch)" project-jdk-type="Python SDK" /> - <component name="PyPackaging"> - <option name="earlyReleasesAsUpgrades" value="true" /> - </component> -</project> \ No newline at end of file diff --git a/Python_files/ex4/.idea/modules.xml b/Python_files/ex4/.idea/modules.xml deleted file mode 100644 index ca8bb245edb5e58b490cdc47a9ba159fb2a556d0..0000000000000000000000000000000000000000 --- a/Python_files/ex4/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectModuleManager"> - <modules> - <module fileurl="file://$PROJECT_DIR$/.idea/ex4.iml" filepath="$PROJECT_DIR$/.idea/ex4.iml" /> - </modules> - </component> -</project> \ No newline at end of file diff --git a/Python_files/ex4/.idea/vcs.xml b/Python_files/ex4/.idea/vcs.xml deleted file mode 100644 index 6c0b8635858dc7ad44b93df54b762707ce49eefc..0000000000000000000000000000000000000000 --- a/Python_files/ex4/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="VcsDirectoryMappings"> - <mapping directory="$PROJECT_DIR$/.." vcs="Git" /> - </component> -</project> \ No newline at end of file diff --git a/Python_files/ex4/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/Python_files/ex4/.ipynb_checkpoints/Untitled-checkpoint.ipynb deleted file mode 100644 index 363fcab7ed6e9634e198cf5555ceb88932c9a245..0000000000000000000000000000000000000000 --- a/Python_files/ex4/.ipynb_checkpoints/Untitled-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex4/Citeseer/data.pkl b/Python_files/ex4/Citeseer/data.pkl deleted file mode 100644 index 6283f47dc7d15e285b7407ece488f46117145cca..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/Citeseer/data.pkl and /dev/null differ diff --git a/Python_files/ex4/Cora/data.pkl b/Python_files/ex4/Cora/data.pkl deleted file mode 100644 index c95b258e909ba48f384fbdb0098b4256c7574f4b..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/Cora/data.pkl and /dev/null differ diff --git a/Python_files/ex4/Exercise-4.pdf b/Python_files/ex4/Exercise-4.pdf deleted file mode 100644 index 04fe8df390bb17b4a227dfa32ed7cbc35df45cbc..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/Exercise-4.pdf and /dev/null differ diff --git a/Python_files/ex4/Facebook/data.pkl b/Python_files/ex4/Facebook/data.pkl deleted file mode 100644 index 8a90108a75c4623d52ab391a97c934fdd9fec288..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/Facebook/data.pkl and /dev/null differ diff --git a/Python_files/ex4/PPI/data.pkl b/Python_files/ex4/PPI/data.pkl deleted file mode 100644 index 8020ee0c7d582a385b71fb9eb9c1a91925bf0c0c..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/PPI/data.pkl and /dev/null differ diff --git a/Python_files/ex4/Slides-Node2Vec.pdf b/Python_files/ex4/Slides-Node2Vec.pdf deleted file mode 100644 index fdba648719dfaab3443d79fb9ff88de8b814888d..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/Slides-Node2Vec.pdf and /dev/null differ diff --git a/Python_files/ex4/Untitled.ipynb b/Python_files/ex4/Untitled.ipynb deleted file mode 100644 index 6098f38e6ff6e89cde2d8ff92f66fa2285f6ade1..0000000000000000000000000000000000000000 --- a/Python_files/ex4/Untitled.ipynb +++ /dev/null @@ -1,685 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 175, - "id": "2f209db3-8903-435b-94ea-a7eb3bd445e8", - "metadata": {}, - "outputs": [], - "source": [ - "from torch.utils.data import IterableDataset \n", - "import networkx as nx\n", - "import random as rd\n", - "import pickle\n", - "from torch import nn\n", - "import torch\n", - "import numpy as np\n", - "import types" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "22a6e977-6ca1-456a-b980-0f705650a61e", - "metadata": {}, - "outputs": [], - "source": [ - "class pq_walks(IterableDataset):\n", - " \n", - " def __init__(self,G,p,q,l,ln):\n", - " \n", - " super(pq_walks,self).__init__()\n", - " self.p = p\n", - " self.q = q\n", - " self.l = l\n", - " self.ln = ln\n", - " self.s = rd.sample(G.nodes, 1)\n", - " temp=[]\n", - " for s in self.s:\n", - " temp=temp+nx.neighbors(G,s)\n", - " self.v = rd.sample(temp,1)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "a7eddca8-f2ba-47af-8062-b6397cdea353", - "metadata": {}, - "outputs": [], - "source": [ - "with open('Citeseer/data.pkl','rb') as f:\n", - " data=pickle.load(f)" - ] - }, - { - "cell_type": "code", - "execution_count": 103, - "id": "eac7a6ea-d3d6-468c-88ba-9c0a7a1310bb", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/z2/0cmnp5ls53354_gqrg36ch8m0000gn/T/ipykernel_4785/2727851552.py:1: DeprecationWarning: Sampling from a set deprecated\n", - "since Python 3.9 and will be removed in a subsequent version.\n", - " test=rd.sample(data[0].nodes, 1)\n" - ] - }, - { - "data": { - "text/plain": [ - "[2727]" - ] - }, - "execution_count": 103, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test=rd.sample(data[0].nodes, 1)\n", - "test" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "id": "b204ae10-3634-41e0-82a5-0f82782b5788", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[2625]" - ] - }, - "execution_count": 100, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "temp=[]\n", - "for s in test:\n", - " temp=temp+list(nx.neighbors(data[0],s))\n", - "rd.sample(temp,1)" - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "id": "c2df1408-c6c7-4070-80a0-bbda6c4f6d6e", - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "unsupported operand type(s) for -: 'dict_keyiterator' and 'int'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [108]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m nei1\u001b[38;5;241m=\u001b[39mnx\u001b[38;5;241m.\u001b[39mneighbors(data[\u001b[38;5;241m0\u001b[39m],\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m----> 2\u001b[0m \u001b[43mnei1\u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m100\u001b[39;49m\n", - "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for -: 'dict_keyiterator' and 'int'" - ] - } - ], - "source": [ - "nei1=nx.neighbors(data[0],1)\n", - "nei1-100" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "id": "f56c7ee7-a430-4994-91ec-d80f5a5046f2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]" - ] - }, - "execution_count": 102, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[1/2]*10" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "id": "6d1c1aef-8795-4ce6-a61d-53930d7fe8ff", - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "unsupported operand type(s) for -: 'list' and 'list'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [109]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43m[\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m]\u001b[49m\n", - "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for -: 'list' and 'list'" - ] - } - ], - "source": [ - "[1,2,3,4,5]-[5]" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "id": "a7e2f284-e5f5-46ab-bc50-108d397c0ae6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]" - ] - }, - "execution_count": 110, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[1]*10" - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "id": "bef1bc4f-995a-4d84-b4fa-3ef96fcd1719", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 112, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data[0].is_directed()" - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "id": "34238af6-c5c2-45d8-9284-0e8a38cf8ca9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[2, 3, 4, 5]" - ] - }, - "execution_count": 116, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a=[1,2,3,4,5]\n", - "b=[2,3,4,5,6]\n", - "tes=[i for i in a if i in b]\n", - "tes" - ] - }, - { - "cell_type": "code", - "execution_count": 115, - "id": "f9fe2684-1f2c-40c9-8f0a-950a37d42d6d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[2, 3, 4, 5, 6]" - ] - }, - "execution_count": 115, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a and b" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "id": "379ed5ee-6229-4e63-bec1-03b27503064f", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/z2/0cmnp5ls53354_gqrg36ch8m0000gn/T/ipykernel_4785/285035836.py:1: DeprecationWarning: Sampling from a set deprecated\n", - "since Python 3.9 and will be removed in a subsequent version.\n", - " s=rd.sample(data[0].nodes,1)\n" - ] - }, - { - "data": { - "text/plain": [ - "2527" - ] - }, - "execution_count": 120, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s=rd.sample(data[0].nodes,1)\n", - "s[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "id": "fe6d86e3-7286-4edf-9e6d-847f7382ec9c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([ 0, 1, -1, 0, 0, 0])\n", - "tensor([1, 1, 0, 0, 0, 2])\n", - "tensor([ 0, 0, 0, -1, 0, 0])\n", - "tensor([0, 0, 0, 1, 0, 0])\n", - "tensor([1, 1, 0, 0, 0, 1])\n", - "tensor([ 0, 1, -1, 0, 0, 0])\n" - ] - } - ], - "source": [ - "ten1=torch.tensor([1,2,3,4,5])\n", - "test=nn.functional.one_hot(ten1)\n", - "\n", - "x=torch.randn(6,6).long()\n", - "print(x[5])\n", - "for i in test:\n", - " print(torch.matmul(i,x))" - ] - }, - { - "cell_type": "code", - "execution_count": 143, - "id": "762045cb-99b9-42b6-87d1-0871f13d9c0c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "15" - ] - }, - "execution_count": 143, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.dot([1,2,3,4,5],[1,1,1,1,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 159, - "id": "e6eaabc9-801b-4245-b5d6-1ca83d60e2a2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([0.0117, 0.0317, 0.0861, 0.2341, 0.6364])" - ] - }, - "execution_count": 159, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "li1=[1,2,3,4,5]\n", - "li2=[4,5,6,7,8]\n", - "soft=nn.Softmax(dim=0)\n", - "soft(torch.tensor(li2).float())" - ] - }, - { - "cell_type": "code", - "execution_count": 164, - "id": "50484e06-cc26-41ba-8989-1aba7b7cad2a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2]" - ] - }, - "execution_count": 164, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pro=[100,1,1]\n", - "choice=[1,2,3]\n", - "test=rd.choices(choice,weights=pro,k=20)\n", - "test" - ] - }, - { - "cell_type": "code", - "execution_count": 174, - "id": "c559309a-a4bf-48d5-b8a1-2d7996920e95", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{4}" - ] - }, - "execution_count": 174, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "set1=set([1,2,3,4])\n", - "set2=set([3,1,2,5])\n", - "test=set1.difference(set2)\n", - "test" - ] - }, - { - "cell_type": "code", - "execution_count": 182, - "id": "c962e33e-e308-4379-9a8c-f5ec4f5a125c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<generator object fib at 0x7fe6f3053120>" - ] - }, - "execution_count": 182, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def fib(end = 1000):\n", - " prev,curr=0,1\n", - " while curr < end:\n", - " yield (curr)\n", - " prev,curr=curr,curr+prev\n", - "\n", - "fib()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 183, - "id": "fea5eb8e-45c8-42ba-9288-061e7ee2260f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3312" - ] - }, - "execution_count": 183, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(data[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 191, - "id": "b8df28da-52f2-466d-a81c-132071557994", - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'Graph' object has no attribute 'items'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [191]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m()\n", - "\u001b[0;31mAttributeError\u001b[0m: 'Graph' object has no attribute 'items'" - ] - } - ], - "source": [ - "data[0].items()" - ] - }, - { - "cell_type": "code", - "execution_count": 193, - "id": "46dc533b-0ac6-4428-ad24-fb5b646aadfe", - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "module networkx has no attribute ger_node_attributes", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [193]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mnx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mger_node_attributes\u001b[49m(data[\u001b[38;5;241m0\u001b[39m],\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnode_label\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mvalues()\n", - "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/networkx/__init__.py:51\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(name)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwrite_yaml\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m 44\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mwrite_yaml has been removed from NetworkX, please use `yaml`\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdirectly:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis message will be removed in NetworkX 3.0.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 50\u001b[0m )\n\u001b[0;32m---> 51\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodule \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m has no attribute \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mAttributeError\u001b[0m: module networkx has no attribute ger_node_attributes" - ] - } - ], - "source": [ - "nx.ger_node_attributes(data[0],'node_label').values()" - ] - }, - { - "cell_type": "code", - "execution_count": 207, - "id": "ac53806c-560d-48b5-8a20-9699eb0996f3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "6" - ] - }, - "execution_count": 207, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test=[(data[0].nodes[i]['node_label']) for i in data[0]] \n", - "max(test)" - ] - }, - { - "cell_type": "code", - "execution_count": 201, - "id": "752fa9be-38f4-46a1-84f7-51e583efe9a8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2" - ] - }, - "execution_count": 201, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data[0].nodes[10]['node_label']\n" - ] - }, - { - "cell_type": "code", - "execution_count": 209, - "id": "6687ccc8-b84e-4e1f-89b2-2e2ae6229751", - "metadata": {}, - "outputs": [ - { - "ename": "SyntaxError", - "evalue": "can't use starred expression here (434312620.py, line 2)", - "output_type": "error", - "traceback": [ - "\u001b[0;36m Input \u001b[0;32mIn [209]\u001b[0;36m\u001b[0m\n\u001b[0;31m *set1\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m can't use starred expression here\n" - ] - } - ], - "source": [ - "set1=(1,2)\n", - "*set1" - ] - }, - { - "cell_type": "code", - "execution_count": 217, - "id": "0e7fdbfe-8f7f-4455-ac9b-d304413e8438", - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'Graph' object has no attribute 'bridges'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [217]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbridges\u001b[49m\n", - "\u001b[0;31mAttributeError\u001b[0m: 'Graph' object has no attribute 'bridges'" - ] - } - ], - "source": [ - "data[0].bridges" - ] - }, - { - "cell_type": "code", - "execution_count": 223, - "id": "9ea9357a-aa94-4c99-b6d2-33a3f07c36c7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1386" - ] - }, - "execution_count": 223, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(list(nx.bridges(data[0])))" - ] - }, - { - "cell_type": "code", - "execution_count": 227, - "id": "fba4427d-c1f8-470e-ba77-bb58feba00be", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0, 1, 2, 3, 4])" - ] - }, - "execution_count": 227, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list1=np.array([1,2,3,4,5])\n", - "list1-1" - ] - }, - { - "cell_type": "code", - "execution_count": 225, - "id": "5c6b142a-b453-41c3-adb3-30e6dedc787c", - "metadata": {}, - "outputs": [ - { - "ename": "SyntaxError", - "evalue": "can't use starred expression here (108652081.py, line 1)", - "output_type": "error", - "traceback": [ - "\u001b[0;36m Input \u001b[0;32mIn [225]\u001b[0;36m\u001b[0m\n\u001b[0;31m *(1,2)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m can't use starred expression here\n" - ] - } - ], - "source": [ - "*(1,2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6db58072-810a-4f63-a099-424051950c28", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Python_files/ex4/__pycache__/Node2Vec.cpython-39.pyc b/Python_files/ex4/__pycache__/Node2Vec.cpython-39.pyc deleted file mode 100644 index 19123aa87efbb73c66c4af01d4aed112e635ffe4..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/__pycache__/Node2Vec.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex4/__pycache__/link_prediction.cpython-39.pyc b/Python_files/ex4/__pycache__/link_prediction.cpython-39.pyc deleted file mode 100644 index 44da67bf874865b09c3d74999067d1b6594d2e21..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/__pycache__/link_prediction.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex4/__pycache__/logistic_regression.cpython-39.pyc b/Python_files/ex4/__pycache__/logistic_regression.cpython-39.pyc deleted file mode 100644 index 31dca1a3c2398932ba6335810b70cea938e57512..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/__pycache__/logistic_regression.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex4/__pycache__/logistic_regression_link.cpython-39.pyc b/Python_files/ex4/__pycache__/logistic_regression_link.cpython-39.pyc deleted file mode 100644 index 91e69e7567162787f8f7e07325c10f86f5d500ab..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/__pycache__/logistic_regression_link.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex4/__pycache__/random_walks.cpython-39.pyc b/Python_files/ex4/__pycache__/random_walks.cpython-39.pyc deleted file mode 100644 index 54787273f4f372c39e1960e49afa0c5b8cd12bc0..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/__pycache__/random_walks.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex4/__pycache__/train_node2vec.cpython-39.pyc b/Python_files/ex4/__pycache__/train_node2vec.cpython-39.pyc deleted file mode 100644 index e7b81478f2ebe4ff54d7ad853da04b4f3280addd..0000000000000000000000000000000000000000 Binary files a/Python_files/ex4/__pycache__/train_node2vec.cpython-39.pyc and /dev/null differ diff --git a/Python_files/ex4/link_prediction.py b/Python_files/ex4/link_prediction.py deleted file mode 100644 index 9751888936d60922627c2c04bd40329457c50bcb..0000000000000000000000000000000000000000 --- a/Python_files/ex4/link_prediction.py +++ /dev/null @@ -1,95 +0,0 @@ -from sklearn.linear_model import LogisticRegression -import pickle -import torch -import networkx as nx -import random -import numpy as np - -from train_node2vec import train_node2vec - - - -def link_prediction(network_path): - """ - - :param network_path:graph - :return:train_edges,eval_edges - """ - - with open(network_path, 'rb') as f: - Graph = pickle.load(f) - Graph = Graph[0] - - #make sure the operation on the copied graph does not change the original graph - Graph_copy = Graph.copy() - num_edges = Graph.number_of_edges() - - #number of 20% of all the edges - perc20_edges = int(num_edges*0.2) - - #number of connected components - num_con_components = nx.number_connected_components(Graph) - - E_eval = [] - i=0 - - #Make sure that the original connected components are still connected after removing the eval_edges. - while i < perc20_edges: - # to make it more effective,remove 100 edges everytime and check the number of connected components. - if (perc20_edges-i) >= 100: - sampled_edges = random.sample(Graph_copy.edges,100) - else: - sampled_edges = random.sample(Graph_copy.edges, perc20_edges-i) - - Graph_copy.remove_edges_from(sampled_edges) - - #if the number of connected components changed,then do not remove those edges. - if nx.number_connected_components(Graph_copy) != num_con_components: - # print("Bridge edge was used. Number connected components changed.") - Graph_copy.add_edges_from(sampled_edges) - - else: - E_eval += sampled_edges - i += 100 - - #all the not selected edges are train_edges - E_train = list(set(Graph.edges).difference(set(E_eval))) - return E_train, E_eval - -def sample_non_edges(network_path, len_train, len_eval): - """ - - :param network_path: graph - :param len_train: size of train_edges - :param len_eval: size of eval_edges - :return:list of train_neg_samples and eval_train_samples - """ - with open(network_path, 'rb') as f: - Graph = pickle.load(f) - Graph = Graph[0] - - #number of edges and negative samples should be same - number_to_sample = len_train + len_eval - - non_edges = list(nx.non_edges(Graph)) - - sampled_non_edges = random.sample(non_edges, number_to_sample) - - #seperate nagative samples into train and eval - return sampled_non_edges[:len_train], sampled_non_edges[len_train:] - -def element_wise_product(X, edges): - """ - - :param X: node embeddings - :param edges: edges that need to be computed edge embeddings - :return:edge embeddings - """ - node_1, node_2 = list(zip(*edges)) - node_1 = np.array(node_1) - node_2 = np.array(node_2) - hadamard_prod = np.multiply(X[node_1-1], X[node_2-1]) - - return hadamard_prod - - diff --git a/Python_files/ex4/logistic_regression.py b/Python_files/ex4/logistic_regression.py deleted file mode 100644 index bcc3437b3776436ae5b962928336bbf52ef17e12..0000000000000000000000000000000000000000 --- a/Python_files/ex4/logistic_regression.py +++ /dev/null @@ -1,46 +0,0 @@ -from sklearn.linear_model import LogisticRegression -import pickle -import torch -from sklearn.model_selection import train_test_split, cross_val_score -from train_node2vec import train_node2vec -from node2vec import node2vec -from sklearn.metrics import accuracy_score - - - -def logistic_regression(path,p,q,lr,batch_size,epoch,C): - """ - - :param path: path of dataset - :param p: p of pq_walks - :param q: q of pq_walks - :param lr: learning rate - :param batch_size: batch_size of training - :param epoch: number of epochs of training - :param C: inverse of regularization strength - :return: accuracy of logistic_regression - """ - #open file - with open(path, 'rb') as f: - Graph = pickle.load(f) - - #get class of node for every node(targets) - y = [Graph[0].nodes[i]['node_label'] for i in Graph[0]] - - #get node embeddings for every node(features) - X = train_node2vec(path, p=p, q=q, l=5, ln=5, lr=lr,batch_size=batch_size,epoch=epoch ) - - X = X.numpy() - - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - - clf = LogisticRegression(random_state=0,C=C).fit(X_train, y_train) - - #10-fold cross validation - accuracy= cross_val_score(clf, X_train, y_train, cv=10) - - print ("Mean accuracy and standard deviation of training data (10-fold cross validation):", - accuracy.mean(), accuracy.std()) - print ("Accuracy of test data:", accuracy_score(y_test,clf.predict(X_test))) - - diff --git a/Python_files/ex4/logistic_regression_grid.py b/Python_files/ex4/logistic_regression_grid.py deleted file mode 100644 index cbd7f1ab0be73b396adad60c25523515b1fb0104..0000000000000000000000000000000000000000 --- a/Python_files/ex4/logistic_regression_grid.py +++ /dev/null @@ -1,42 +0,0 @@ -from sklearn.linear_model import LogisticRegression -import pickle -import torch -from sklearn.model_selection import train_test_split, cross_val_score -from sklearn.metrics import accuracy_score -from sklearn.model_selection import GridSearchCV -from sklearn.linear_model import LogisticRegression -import numpy as np -from node2vec import node2vec -from train_node2vec import train_node2vec - - -def logistic_regression(Graph_path, X): - - with open(Graph_path, 'rb') as f: - Graph = pickle.load(f) - - y = [Graph[0].nodes[i]['node_label'] for i in Graph[0]] # Do we need to transform it eg torch or np.array? - - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - - #clf = LogisticRegression(random_state=0).fit(X_train, y_train) - - - grid={"C":np.logspace(-3,3,7)}# l1 lasso l2 ridge - logreg=LogisticRegression() - logreg_cv=GridSearchCV(logreg,grid,cv=10) - logreg_cv.fit(X_train,y_train) - - accuracy= cross_val_score(logreg_cv, X_train, y_train, cv=10 ) - print ("Mean accuracy and standard deviation of training data (10-fold cross validation):", - accuracy.mean(), accuracy.std()) - print ("Accuracy of test data:", accuracy_score(y_test,logreg_cv.predict(X_test))) - - -path='/Users/haron/Desktop/GraphLearning/Ex04/datasets/Citeseer/data.pkl' - -x=train_node2vec(path,0.1,1,5,5,epoch=300,batch_size=128,lr=0.003) - -x=x.numpy() - -logistic_regression(path,x) diff --git a/Python_files/ex4/logistic_regression_grid_nn.py b/Python_files/ex4/logistic_regression_grid_nn.py deleted file mode 100644 index 2ea8183bd6c693018c93d76263eb9e770f50a72d..0000000000000000000000000000000000000000 --- a/Python_files/ex4/logistic_regression_grid_nn.py +++ /dev/null @@ -1,45 +0,0 @@ -from sklearn.linear_model import LogisticRegression -import pickle -import torch -from sklearn.model_selection import train_test_split, cross_val_score -from train_node2vec import train_node2vec -from node2vec import node2vec -from sklearn.metrics import accuracy_score - - - -def logistic_regression(Graph_path, X, C=1): - - with open(Graph_path, 'rb') as f: - Graph = pickle.load(f) - - y = [Graph[0].nodes[i]['node_label'] for i in Graph[0]] # Do we need to transform it eg torch or np.array? - - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2) - - clf = LogisticRegression(random_state=0, C=C).fit(X_train, y_train) - - accuracy= cross_val_score(clf, X_train, y_train, cv=10 ) - print ("Mean accuracy and standard deviation of training data (10-fold cross validation):", - accuracy.mean(), accuracy.std()) - print ("Accuracy of test data:", accuracy_score(y_test,clf.predict(X_test))) - -path='Cora/data.pkl' - -import itertools - -epochs=[200, 300] -batch_size=[64, 100] -lr=[0.002, 0.004] -pq = [(1,1), (1,0.1), (0.1,1)] -c = [1,2] - -for x in itertools.product(epochs, batch_size, lr, pq, c): - i_epochs, i_batch_size, i_lr, i_pq, i_c = x - i_p, i_q = i_pq - print(f"epochs: {i_epochs}, batch size: {i_batch_size}, lr: {i_lr}, p: {i_p}, q: {i_q}, C: {i_c}") - x=train_node2vec(path,i_p,i_q,5,5,epoch=i_epochs,batch_size=i_batch_size,lr=i_lr) - x=x.numpy() - logistic_regression(path,x, i_c) - - diff --git a/Python_files/ex4/logistic_regression_link.py b/Python_files/ex4/logistic_regression_link.py deleted file mode 100644 index c5c79ebc38122062f258c34820cbdc865672861a..0000000000000000000000000000000000000000 --- a/Python_files/ex4/logistic_regression_link.py +++ /dev/null @@ -1,50 +0,0 @@ -from sklearn.linear_model import LogisticRegression -import numpy as np -from sklearn.metrics import accuracy_score -from train_node2vec import train_node2vec -from link_prediction import sample_non_edges,element_wise_product,link_prediction -from sklearn.metrics import roc_auc_score - - -def logistic_regression_link(path,lr,batch_size,epoch,C): - """ - - :param path: path of dataset - :param lr: learning rate - :param batch_size: batch_size of training - :param epoch: number of epochs of training - :param C: inverse of regularization strength - :return: accuracy of logistic_regression for link - """ - - #set of train_edges and eval_edges - E_train, E_eval = link_prediction(path) - - #set of train_non_edges and eval_non_edges - N_train, N_eval = sample_non_edges(path, len(E_train), len(E_eval) ) - - # X is node embeddings for graph - X = train_node2vec(path, p=1,q=1, l=5,ln=5,epoch=epoch, batch_size=batch_size,lr=lr, edges_to_delete =E_eval) - - # get edge embeddings using element wise product of the connected node embeddings - edge_train = element_wise_product(X, E_train+N_train) - - # label the edges as 1 and non_edges as 0 - edge_train_true = [1]*len(E_train)+[0]*len(N_train) - - # get non-edge embeddings using element wise product of the connected node embeddings - edge_eval = element_wise_product(X,E_eval+N_eval) - edge_eval_true = [1]*len(E_eval)+[0]*len(N_eval) - - clf = LogisticRegression(random_state=0,C=C).fit(edge_train, edge_train_true) - - roc_score = roc_auc_score(edge_eval_true, clf.predict_proba(edge_eval)[:, 1]) - - accuracy = accuracy_score(edge_eval_true, clf.predict(edge_eval)) - print("Accuracy of test data:",accuracy,'\nRoc_score:',roc_score) - return accuracy,roc_score - - - - - diff --git a/Python_files/ex4/main.py b/Python_files/ex4/main.py deleted file mode 100644 index 18af4b269b8e7385748edba388918eaa51ffd70d..0000000000000000000000000000000000000000 --- a/Python_files/ex4/main.py +++ /dev/null @@ -1,39 +0,0 @@ -import argparse -import warnings -from logistic_regression_link import logistic_regression_link -from logistic_regression import logistic_regression - -""" -The following code is the main code where the defined type of classification and paramaters of training -""" - -# Specified parameters -parser = argparse.ArgumentParser() -parser.add_argument('-path', '--path', type=str,required=True ,help='Choose the path of the dataset') -parser.add_argument('-type', '--type', type=str,required=True,help='Choose the type of classification') -parser.add_argument('-p', '--p', type=float,default=1 ,help='Choose the p of pq_walks') -parser.add_argument('-q', '--q', type=float,default=1 ,help='Choose the q of pq_walks') -parser.add_argument('-lr', '--lr', type=float,default=0.004 ,help='Choose the learning rate') -parser.add_argument('-batch_size', '--batch_size', type=int,default=200 ,help='Choose the batch_size') -parser.add_argument('-epoch', '--epoch', type=int,default=200,help='Choose the number of epochs') -parser.add_argument('-C', '--C', type=float,default=2 ,help='Choose the C of logistic_regreesion') - -args = parser.parse_args() - - - -if __name__ == '__main__': - print('start') - if args.type not in ['link','node']: - raise Exception('please choose the right classification type') - if args.type == 'link': - if ('Citeseer' or 'Cora') in args.path: - warnings.warn('This dataset is used for node classification') - logistic_regression_link(path=args.path,lr=args.lr,batch_size=args.batch_size,C=args.C,epoch=args.epoch) - if args.type == 'node': - if ('Facebook' or 'PPI') in args.path: - warnings.warn('This dataset is used for link classification') - logistic_regression(path=args.path,lr=args.lr,batch_size=args.batch_size,C=args.C,epoch=args.epoch,p=args.p,q=args.q) - - - diff --git a/Python_files/ex4/node2vec.py b/Python_files/ex4/node2vec.py deleted file mode 100644 index 0a432e3953ef34fa52a6c02ee6f485dbbbfd6ea1..0000000000000000000000000000000000000000 --- a/Python_files/ex4/node2vec.py +++ /dev/null @@ -1,32 +0,0 @@ -import torch -import numpy as np -from torch.utils.data import IterableDataset, get_worker_info, DataLoader -import pickle -from random_walks import pq_walks - -# path='Citeseer/data.pkl' -# with open(path,'rb') as f: -# data=pickle.load(f) - -class node2vec(torch.nn.Module): - def __init__(self,input_dim): - super(node2vec, self).__init__() - self.X=torch.nn.Parameter(torch.zeros(input_dim,128)) - torch.nn.init.kaiming_normal_(self.X) - self.softmax=torch.nn.Softmax(dim=0) - - def forward(self,s,w,neg): - temp = self.X[torch.cat((w,neg),1)-1] - temp = torch.matmul(temp, torch.unsqueeze(self.X[s-1],2)) #node starts from 1 but index starts from 0 - # temp=([torch.matmul(self.X[s],torch.transpose(self.X[i],0,1)) for i in torch.cat((w,neg),1)]) - temp=torch.squeeze(temp) - temp=self.softmax(temp) - temp=torch.log(temp[:,:5]) - return -torch.sum(temp,dim=0) - - -#test functionality -# model=node2vec(5) -# print(model) - -#model.train() \ No newline at end of file diff --git a/Python_files/ex4/random_walks.py b/Python_files/ex4/random_walks.py deleted file mode 100644 index 0d945c81179e82e4b42d47d008968e3e138e7d4e..0000000000000000000000000000000000000000 --- a/Python_files/ex4/random_walks.py +++ /dev/null @@ -1,120 +0,0 @@ -import torch -from torch.utils.data import IterableDataset, get_worker_info, DataLoader -import networkx as nx -import random as rd -#from node2vec import Node2Vec -import numpy as np -import pickle - -class pq_walks(IterableDataset): - """ - create a subclass of an iterable dataset - """ - - def __init__(self, G, p, q, l, ln): - """ - initialize the iterable dataset - - G: one graph - p: 1/p is probability for the second last node to be sampled again - q: 1/q is probability for "unknown" nodes to be sampled - l: length of a sampled walk - ln: length of a negative sample - """ - super(pq_walks, self).__init__() - # set all the necessary variables - self.l = l - self.ln = ln - self.G=G - self.p = p - self.q = q - - - def one_random_walk(self): - """ - samples one random walk of length self.l from the graph self.G, as well as random nodes - of length self.ln - - :return: - s = walk[0]: the starting node of the walk - walk[1:]: the rest of the walk (length self.l-1) - neg: the negative samples of length self.ln - """ - no_neighbor = True - # sample a starting node s - while no_neighbor: - s = rd.sample(self.G.nodes, 1)[0] - # sometimes a node only has no neighbors or only itself as neighbor, - # then algorithm doesn't work. Filter those cases out here - if len(self.G[s])==0: - no_neighbor = True - elif len(self.G[s])==1 and list(self.G[s].keys())[0]==s: - no_neighbor = True - else: - no_neighbor = False - - # sample the second node with equal probabilities - v = rd.sample(list(self.G[s]),1)[0] - - walk=[s,v] - neg=[] - for _ in range(self.l-1): - nodes=[s] - - # get neighbors of the last two nodes - neighbors_s=set(self.G[s]) - neighbors_v=set(self.G[v]) - - # get common neighbors of s and v - node_1 = list(neighbors_s.intersection(neighbors_v)) - nodes+=node_1 - - # get the other neighbors of v (excluding s) - neighbors_s.add(s) - node_q = list(neighbors_v.difference(neighbors_s)) - nodes+=node_q - - v = s - # set the probabilities to be sampled for s, then the common neighbors, then all others - pro = [1/self.p] + [1]*len(node_1) + [1/self.q]*len(node_q) - # choose a neighbor and append - s=rd.choices(nodes,pro,k=1)[0] - walk.append(s) - - # sample the negative samples from all the nodes excluding the ones in the walk - nodes_to_sample_from = list(set(self.G).difference(set(walk))) - neg = rd.sample(nodes_to_sample_from, self.ln) - - return walk[0], walk[1:], neg - - def __iter__(self): - """ - implements the iter-function of the iterable dataset - - :return: through yield returns iterable of torch tensors - s: starting node of random walk - walk: rest of random walk - neg_samples: negative samples - """ - # check if there is more than one worker working on the data - worker_info = get_worker_info() - # if worker info was specified, divide the work by the number of workers - if worker_info: - loopsize = int(np.ceil(1000/worker_info.num_workers)) - else: - loopsize = 1000 - - # sample a walk - for _ in range(loopsize): - s, walk, neg_samples = self.one_random_walk() - yield torch.tensor(s), torch.tensor(walk), torch.tensor(neg_samples) - - -#test functionality - -#path='Citeseer/data.pkl' -#with open(path,'rb') as f: -# data=pickle.load(f) -# train_loader = DataLoader(pq_walks(data[0], 1,2,5,5), batch_size=64) -# for s, walk, neg_samples in train_loader: -# print("s:", s, "\nwalk:", walk, "\nneg_samples:", neg_samples) diff --git a/Python_files/ex4/test.py b/Python_files/ex4/test.py deleted file mode 100644 index dce328c7c9dfabe10a182e1988e653271931a793..0000000000000000000000000000000000000000 --- a/Python_files/ex4/test.py +++ /dev/null @@ -1,17 +0,0 @@ -import numpy as np - -list1 = [1.123213,2,3,4] -list2 = [1,2,3,4] -result = np.multiply(list1,list2) -result = [round(element,2) for element in result] - -print('press enter to continue') -while True: - x = input() - if len(x)==0: - break - - - -print(result) - diff --git a/Python_files/ex4/train_node2vec.py b/Python_files/ex4/train_node2vec.py deleted file mode 100644 index 5dc8c4dd37327a109e3c1a657cfd2f489bd3294f..0000000000000000000000000000000000000000 --- a/Python_files/ex4/train_node2vec.py +++ /dev/null @@ -1,62 +0,0 @@ -from node2vec import node2vec -import pickle -import torch -from torch.utils.data import DataLoader -from random_walks import pq_walks - - -def train_node2vec(datapath,p,q,l,ln,epoch=300,batch_size=64,lr=0.002, edges_to_delete = None): - with open(datapath, 'rb') as f: - data = pickle.load(f) - - #only one graph but stored in the list - data = data[0] - - if edges_to_delete: - data.remove_edges_from(edges_to_delete) - - device='cpu' - - train_dataset=pq_walks(data, p, q, l, ln) - - model=node2vec(len(data)) - model.train() - - model.to(device) - - opt=torch.optim.Adam(model.parameters(),lr=lr) - - lossl1=torch.nn.L1Loss() - # MSE_loss=torch.nn.MSELoss() - #Training Loop - - for i in range(epoch): - - train_loader = DataLoader(train_dataset, batch_size=batch_size) - - for s, walk, neg_samples in train_loader: - - opt.zero_grad() - - s=s.to(device) - walk=walk.to(device) - neg_samples=neg_samples.to(device) - - y_pred=model(s,walk,neg_samples) - - loss = sum(y_pred) - - # loss=lossl1(y_pred,torch.zeros(len(y_pred))) - # loss=MSE_loss(y_pred,torch.zeros(len(y_pred))) - - loss.backward() - - opt.step() - - - # print(f'loss for the {i+1}th epoch:',loss) - - return [x.data for x in model.parameters()][0] - - -#train_node2vec('/Users/haron/Desktop/GraphLearning/Ex04/datasets/Citeseer/data.pkl',1,2,5,5) \ No newline at end of file diff --git a/Python_files/ex5 b/Python_files/ex5 deleted file mode 160000 index 1282ed24d3644b9fc1592ee6281e89a4d19ce52f..0000000000000000000000000000000000000000 --- a/Python_files/ex5 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 1282ed24d3644b9fc1592ee6281e89a4d19ce52f