Welcome to CodeSnippets! Here you can search for various codes and Code Snippets filtered by categories and CRISP-DM methodology. As a user, you can only search these codes. After registration, you can add your own categories and upload your personal codes and Code Snippets to help others too.

Importing Required Libraries

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing DecisionTreeClassifier library

Business understanding
from sklearn.tree import DecisionTreeClassifier

Importing metrics library

Business understanding
from sklearn import metrics

Importing matplotlib library

Business understanding
import matplotlib.pyplot as plt

Importing tree library

Business understanding
from sklearn import tree

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing DecisionTreeClassifier library

Business understanding
from sklearn.tree import DecisionTreeClassifier

Importing metrics library

Business understanding
from sklearn import metrics

Importing matplotlib library

Business understanding
import matplotlib.pyplot as plt

Importing Counter library

Business understanding
from collections import Counter

Importing confusion_matrix library

Business understanding
from sklearn.metrics import confusion_matrix

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing DecisionTreeClassifier library

Business understanding
from sklearn.tree import DecisionTreeClassifier

Importing metrics library

Business understanding
from sklearn import metrics

Importing export_graphviz library

Business understanding
from sklearn.tree import export_graphviz

Importing StringIO library

Business understanding
from six import StringIO

Importing Image library

Business understanding
from IPython.display import Image

Importing pydotplus library

Business understanding
import pydotplus

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing DecisionTreeClassifier library

Business understanding
from sklearn.tree import DecisionTreeClassifier

Importing metrics library

Business understanding
from sklearn import metrics

Importing NumPy library

Business understanding
import numpy as np

Importing Seaborn library

Business understanding
import seaborn as sns

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing DecisionTreeClassifier library

Business understanding
from sklearn.tree import DecisionTreeClassifier

Importing metrics library

Business understanding
from sklearn import metrics

Importing Counter library

Business understanding
from collections import Counter

Importing NumPy library

Business understanding
import numpy as np

Importing DecisionTreeRegressor for regression prediction

Business understanding
from sklearn.tree import DecisionTreeRegressor

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing DecisionTreeClassifier library

Business understanding
from sklearn.tree import DecisionTreeClassifier

Importing metrics library

Business understanding
from sklearn import metrics

Importing Counter library

Business understanding
from collections import Counter

Importing confusion_matrix library

Business understanding
from sklearn.metrics import confusion_matrix

Importing NumPy library

Business understanding
import numpy as np

Importing accuracy_score library

Business understanding
from sklearn.metrics import accuracy_score

Importing precision_recall_fscore_support

Business understanding
from sklearn.metrics import precision_recall_fscore_support

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing DecisionTreeClassifier library

Business understanding
from sklearn.tree import DecisionTreeClassifier

Importing metrics library

Business understanding
from sklearn import metrics

Importing DecisionTreeRegressor for regression prediction

Business understanding
from sklearn.tree import DecisionTreeRegressor

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing matplotlib library

Business understanding
import matplotlib.pyplot as plt

Importing NumPy library

Business understanding
import numpy as np

Importing DecisionTreeRegressor for regression prediction

Business understanding
from sklearn.tree import DecisionTreeRegressor

Importing Pandas library

Business understanding
import pandas as pd

Importing metrics library

Business understanding
from sklearn import metrics

Importing matplotlib library

Business understanding
import matplotlib.pyplot as plt

Importing Seaborn library

Business understanding
import seaborn as sns

Importing RandomForestClassifier

Business understanding
from sklearn.ensemble import RandomForestClassifier

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing Counter library

Business understanding
from collections import Counter

Importing StandardScaler

Business understanding
from sklearn.preprocessing import StandardScaler

Importing LogisticRegression

Business understanding
from sklearn.linear_model import LogisticRegression

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing metrics library

Business understanding
from sklearn import metrics

Importing matplotlib library

Business understanding
import matplotlib.pyplot as plt

Importing Counter library

Business understanding
from collections import Counter

Importing confusion_matrix library

Business understanding
from sklearn.metrics import confusion_matrix

Importing Seaborn library

Business understanding
import seaborn as sns

Importing RandomForestClassifier

Business understanding
from sklearn.ensemble import RandomForestClassifier

Importing StandardScaler

Business understanding
from sklearn.preprocessing import StandardScaler

Importing LogisticRegression

Business understanding
from sklearn.linear_model import LogisticRegression

Importing statsmodels.api

Business understanding
import statsmodels.api as sm

Importing ROC AUC metrics

Business understanding
from sklearn.metrics import roc_auc_score

Importing ROC curve

Business understanding
from sklearn.metrics import roc_curve

Importing matplotlib library

Business understanding
import matplotlib.pyplot as plt

Importing NumPy library

Business understanding
import numpy as np

Importing numpy library for array operations

Business understanding
from numpy import array

Importing colors for visualization

Business understanding
from matplotlib.colors import ListedColormap

Setting inline mode for Jupyter plot display

Business understanding
%matplotlib inline

Importing library for random value generation

Business understanding
import random

Importing matplotlib library

Business understanding
import matplotlib.pyplot as plt

Importing Counter library

Business understanding
from collections import Counter

Importing NumPy library

Business understanding
import numpy as np

Importing library for synthetic data generation

Business understanding
from sklearn import datasets

Importing tools for cluster data creation

Business understanding
from sklearn.datasets import make_blobs

Importing matplotlib library

Business understanding
import matplotlib.pyplot as plt

Importing NumPy library

Business understanding
import numpy as np

Getting input data matrix shape

Data preparation
m, n = X.shape

Creating bias column for data matrix

Data preparation
bias = np.ones((X.shape[0], 1))

Expanding data matrix with bias column

Data preparation
biased_X = np.hstack((bias, X))

Initializing random number generator with fixed seed

Data preparation
random_gen = np.random.RandomState(1)

Importing matplotlib library

Business understanding
import matplotlib.pyplot as plt

Importing NumPy library

Business understanding
import numpy as np

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing Keras Dense layer for model building

Business understanding
from tensorflow.keras.layers import Dense

Importing Keras Sequential API

Business understanding
from tensorflow.keras import Sequential

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing DecisionTreeClassifier library

Business understanding
from sklearn.tree import DecisionTreeClassifier

Importing metrics library

Business understanding
from sklearn import metrics

Importing Counter library

Business understanding
from collections import Counter

Importing Seaborn library

Business understanding
import seaborn as sns

Importing accuracy_score library

Business understanding
from sklearn.metrics import accuracy_score

Importing Keras Dense layer for model building

Business understanding
from tensorflow.keras.layers import Dense

Importing Keras Sequential API

Business understanding
from tensorflow.keras import Sequential

Importing Pandas library

Business understanding
import pandas as pd

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing Counter library

Business understanding
from collections import Counter

Importing NumPy library

Business understanding
import numpy as np

Importing Seaborn library

Business understanding
import seaborn as sns

Importing Keras Dense layer for model building

Business understanding
from tensorflow.keras.layers import Dense

Importing Keras Sequential API

Business understanding
from tensorflow.keras import Sequential

Importing one-hot encoding tools

Business understanding
from tensorflow.keras.utils import to_categorical

Importing train_test_split library

Business understanding
from sklearn.model_selection import train_test_split

Importing matplotlib library

Business understanding
import matplotlib.pyplot as plt

Importing NumPy library

Business understanding
import numpy as np

Setting inline mode for Jupyter plot display

Business understanding
%matplotlib inline

Loading Fashion MNIST dataset

Business understanding
from keras.datasets import fashion_mnist

Importing one-hot encoding tools

Business understanding
from keras.utils import to_categorical

Importing required Keras API components (Sequential, Model)

Business understanding
from keras.models import Sequential, Model

Importing required Keras API components (Dense, Dropout, Flatten)

Business understanding
from keras.layers import Dense, Dropout, Flatten

Importing required Keras API components (Conv2D, MaxPooling2D)

Business understanding
from keras.layers import Conv2D, MaxPooling2D

Importing required Keras API components (LeakyReLU)

Business understanding
from keras.layers import LeakyReLU

Importing Pandas library

Business understanding
import pandas as pd

Importing regex for text processing

Business understanding
import re

Importing regex for text processing

Business understanding
import re

Importing NLP toolkit (NLTK)

Business understanding
import nltk

Downloading WordNet lexical database

Business understanding
nltk.download("wordnet")

Importing WordNet corpus

Business understanding
from nltk.corpus import wordnet

Importing HTTP request library

Business understanding
import requests

Importing HTML parser from lxml

Business understanding
from lxml import html

Importing Pandas library

Business understanding
import pandas as pd

Importing Counter library

Business understanding
from collections import Counter

Importing regex for text processing

Business understanding
import re

Importing NLP toolkit (NLTK)

Business understanding
import nltk

Importing HTTP request library

Business understanding
import requests

Importing HTML parser from lxml

Business understanding
from lxml import html

Downloading NLTK tokenizer module

Business understanding
nltk.download('punkt')

Importing word tokenizer

Business understanding
from nltk.tokenize import word_tokenize

Importing Pandas library

Business understanding
import pandas as pd

Setting inline mode for Jupyter plot display

Business understanding
%matplotlib inline

Importing regex for text processing

Business understanding
import re

Importing NLP toolkit (NLTK)

Business understanding
import nltk

Importing HTTP request library

Business understanding
import requests

Importing HTML parser from lxml

Business understanding
from lxml import html

Downloading NLTK tokenizer module

Business understanding
nltk.download('punkt')

Importing word tokenizer

Business understanding
from nltk.tokenize import word_tokenize

Importing FreqDist for frequency analysis

Business understanding
from nltk.probability import FreqDist

Importing BeautifulSoup for advanced HTML parsing

Business understanding
from bs4 import BeautifulSoup

Downloading NLTK English stopwords

Business understanding
nltk.download('stopwords')

Importing stopwords corpus

Business understanding
from nltk.corpus import stopwords

Loading Data

Loading dataset

Data understanding
golf = pd.read_csv('golf_nominal.csv', sep=';')

Loading dataset

Data understanding
diabetes = pd.read_csv('diabetes_inbalanced.csv', index_col=0)

Loading dataset

Data understanding
titanic = pd.read_csv('titanic.csv')

Loading dataset

Data understanding
titanic = pd.read_csv('titanic.csv')

Loading Possum dataset

Data understanding
df = pd.read_csv('possum.csv')

Loading dataset

Data understanding
diabetes = pd.read_csv('diabetes_inbalanced.csv', index_col=0)

Loading dataset

Data understanding
df = pd.read_csv('ice_cream_data.csv', sep=";")

Loading dataset

Data understanding
df = pd.read_csv('ice_cream_data.csv', sep=";")

Loading dataset

Data understanding
titanic = pd.read_csv('titanic.csv')

Loading dataset

Data understanding
df = pd.read_csv('Heart.csv')

Loading dataset

Data understanding
df = pd.read_csv('Heart.csv')

Loading data from GitHub repository

Data understanding
url = "https://raw.githubusercontent.com/Statology/Python-Guides/main/default.csv"
data = pd.read_csv(url)

Defining training data (input examples and expected outputs)

Data preparation
training_data = [
    (array([121,16.8]), 1),
    (array([114,15.2]), 1),
    (array([210,9.4]), -1),
    (array([195,8.1]), -1),
] 

Alternative training set for XOR problem

Data preparation
training_data = [
    (array([3,-2]), -1),
    (array([3,1]), 1),
    (array([2,0]), -1),
] 

Generating linearly separable data with two classes

Data preparation
X, y = datasets.make_blobs(n_samples=100,n_features=2,
                           centers=2,cluster_std=1,
                           random_state=3) 

Generating multidimensional data with four clusters

Data preparation
X, y = make_blobs(n_samples=400, n_features=3, centers=4, cluster_std=1, random_state=3)

Test data for model error calculation

Data preparation
mal_byt = np.array([1,2,3,4])
bol = np.array([1,0,2,5])

Generating linearly separable data with two classes

Data preparation
X, y = datasets.make_blobs(n_samples=100,n_features=2,
                           centers=2,cluster_std=1,
                           random_state=3) 

Test data for model error calculation

Data preparation
mal_byt = np.array([1,2,3,4])
bol = np.array([1,0,2,5])

Defining input data for XOR problem (2D matrix)

Data preparation
x=np.array([[0,0,1,1],[0,1,0,1]])
print(x) 

Defining target values for XOR problem

Data preparation
y=np.array([[0,1,1,0]])
print(y) 

Loading dataset

Data understanding
titanic = pd.read_csv('titanic.csv')

Loading javelin throw dataset

Data understanding
data = pd.read_csv('darts.csv')

Selecting data for two specific competitors

Data preparation
vyber = data[(data.competitor == 'Michael') | (data.competitor == 'Steve')]

Loading javelin throw dataset

Data understanding
data = pd.read_csv('darts.csv')

Splitting data into train/test sets

Data understanding
(train_X,train_Y), (test_X,test_Y) = fashion_mnist.load_data()

Opening text file for reading

Data understanding
text_file = open('human_rights.txt', 'r')

Loading text file content

Data understanding
h_rights = text_file.read()

Loading tweets from CSV file

Data understanding
tweets = pd.read_csv("tweets.csv")

Sample tweet for regex demonstration

Data preparation
tweet = "@nltk T awesome! #regex #pandas #python"

Sample text for NLP operations

Data preparation
text = "The cat is in the box. The cat likes the box. The box is over the cat."

Data Visualization

Displaying training data

Data understanding
X_train

Displaying test data

Data understanding
X_test

Printing contents of 'person' variable

Data preparation
osoba

Displaying scaled training data

Data understanding
X_train_scaled

Displaying scaled training data

Data understanding
X_train_scaled

Displaying first 4 rows of expanded matrix

Data preparation
biased_X[:4]

Displaying first 4 predicted values

Data preparation
output_pred[:4]

Displaying first 4 model errors

Data preparation
errors[:4]

Data Preprocessing

Encoding categories

Data preparation
golf = pd.get_dummies(golf, columns = ["Outlook"], drop_first = False)

Encoding additional categories

Data preparation
golf = pd.get_dummies(golf, columns = ["Temperature", "Humidity", "Windy"], drop_first = False)

Encoding gender

Data preparation
titanic['Sex'] = titanic['Sex'].replace({'male': 0, 'female': 1})

Encoding gender

Data preparation
titanic['Sex'] = titanic['Sex'].replace({'male': 0, 'female': 1})

Reshaping input data to 2D array for model compatibility

Data preparation
osoba = osoba.reshape(1,-1)

Converting 'other' and 'Vic' text values to 0 and 1

Data preparation
df['Pop'] = df['Pop'].replace({'other': 0, 'Vic': 1})

Converting 'm' and 'f' text values to 0 and 1

Data preparation
df['sex'] = df['sex'].replace({'m': 0, 'f': 1})

Converting 'm' and 'f' text values to 0 and 1

Data preparation
df['sex'] = df['sex'].replace({'m': 0, 'f': 1})

Removing 'Unnamed: 0' column

Data preparation
df = df.drop(columns='Unnamed: 0')

Converting ChestPain category to numerical values

Data preparation
df['ChestPain'] = df['ChestPain'].astype('category')
df['ChestPain'] = df['ChestPain'].cat.codes

Converting Thal category to numerical values

Data preparation
df['Thal'] = df['Thal'].astype('category')
df['Thal'] = df['Thal'].cat.codes

Converting AHD category to numerical values

Data preparation
df['AHD'] = df['AHD'].astype('category')
df['AHD'] = df['AHD'].cat.codes

Scaling training data

Data preparation
X_train_scaled = scaler.fit_transform(X_train)

Scaling test data

Data preparation
X_test_scaled = scaler.transform(X_test)

Removing 'Unnamed: 0' column

Data preparation
df = df.drop(columns='Unnamed: 0')

Converting ChestPain category to numerical values

Data preparation
df['ChestPain'] = df['ChestPain'].astype('category')
df['ChestPain'] = df['ChestPain'].cat.codes

Converting Thal category to numerical values

Data preparation
df['Thal'] = df['Thal'].astype('category')
df['Thal'] = df['Thal'].cat.codes

Converting AHD category to numerical values

Data preparation
df['AHD'] = df['AHD'].astype('category')
df['AHD'] = df['AHD'].cat.codes

Scaling training data

Data preparation
X_train_scaled = scaler.fit_transform(X_train)

Scaling test data

Data preparation
X_test_scaled = scaler.transform(X_test)

Encoding gender

Data preparation
titanic['Sex'] = titanic['Sex'].replace({'male': 0, 'female': 1})

Encoding text classes to numerical values

Data preparation
vyber['competitor'] = vyber['competitor'].replace({'Steve': 0, 'Michael': 1})

Extending class encoding for multi-class classification

Data preparation
multi = data
multi["competitor"]=multi["competitor"].replace({'Steve':0,'Susan':1,'Michael':2,'Kate':3})

Reshaping images for convolutional networks

Data preparation
train_X = train_X.reshape(-1, 28,28, 1)
test_X = test_X.reshape(-1, 28,28, 1)
train_X.shape, test_X.shape

Normalizing pixel values to [0,1] range

Data preparation
train_X = train_X.astype('float32')
test_X = test_X.astype('float32')
train_X = train_X / 255.
test_X = test_X / 255.

One-hot encoding target variables

Data preparation
train_Y_one_hot = to_categorical(train_Y)
test_Y_one_hot = to_categorical(test_Y)
print('Original label:', train_Y[0])
print('After conversion to one-hot:', train_Y_one_hot[0])

Feature Selection

Separating input features (X) from target variable (y)

Data preparation
X = golf[golf.columns.difference(['Play'])]
y = golf.Play

Getting list of features

Data preparation
stlpce = golf[golf.columns.difference(['Play'])].columns.to_list()

Displaying extracted features

Data preparation
stlpce

Selecting all dataset features except target variable, defining target variable

Data preparation
X = diabetes[diabetes.columns.difference(['Outcome'])]
y = diabetes['Outcome']
y=y.astype('int')

Splitting data into training/test sets

Data preparation
X = titanic[titanic.columns.difference(['Survived'])]
y = titanic['Survived']
y=y.astype('int')

Splitting data into training/test sets

Data preparation
X = titanic[titanic.columns.difference(['Survived'])]
y = titanic['Survived']
y=y.astype('int')

Defining test vector representing individual with various attributes

Data preparation
osoba = np.array([10, #age
                  0, #fare
                  0, #parent/children
                  1, #pclass
                  0, #sex
                  3]) #siblings/spouses

Selecting all features except target (age), defining target variable

Data preparation
X = df[df.columns.difference(['age'])]
y = df['age']

Selecting all dataset features except target variable, defining target variable

Data preparation
X = diabetes[diabetes.columns.difference(['Outcome'])]
y = diabetes['Outcome']
y=y.astype('int')

Separating input features (X) from target variable (y)

Data preparation
X = df[df.columns.difference(['Revenue'])]
y = df['Revenue']

Separating input features (X) from target variable (y)

Data preparation
X = df.drop(['Revenue'], axis = 1)
y = df['Revenue']

Selecting Temperature and Revenue attributes

Data preparation
X = df['Temperature'].values
y = df['Revenue'].values

Creating input vector for visualization

Data preparation
vstup = df.drop(["Revenue"], axis=1)

Splitting data into training/test sets

Data preparation
X = titanic[titanic.columns.difference(['Survived'])]
y = titanic['Survived']
y=y.astype('int')

Separating input features (X) from target variable (y)

Data preparation
X = df.drop(columns = "AHD")
y = df['AHD']

Separating input features (X) from target variable (y)

Data preparation
X = df.drop(columns = "AHD")
y = df['AHD']

Selecting variables for binary classification

Data preparation
X = data[['student', 'balance', 'income']]
y = data['default']

Splitting data into training/test sets

Data preparation
X = titanic[titanic.columns.difference(['Survived'])]
y = titanic['Survived']
y=y.astype('int')

Separating input features from target variable

Data preparation
X = vyber[vyber.columns.difference(['competitor'])]
y = vyber['competitor']
y=y.astype('int')

Separating input features from target variable

Data preparation
X = vyber[vyber.columns.difference(['competitor'])]
y = vyber['competitor']
y=y.astype('int')

Preparing data for 4-class classification

Data preparation
X = multi[multi.columns.difference(['competitor'])]
y = to_categorical(multi['competitor'])

Splitting Data

Splitting data into training and test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Splitting data into training and test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Splitting data into training and test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Splitting data into training and test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Splitting data into 70:30 train/test ratio

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

Splitting data into training and test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Splitting data into training and test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Splitting data into training and test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Splitting data into training and test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Splitting data into train/test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=21)

Splitting data into train/test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=21)

Splitting data into train/test sets

Data preparation
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0)

Splitting data into training and test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Splitting data into train/test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

Splitting data into train/test sets

Data preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

Splitting training data into train/validation

Data preparation
train_X,valid_X,train_label,valid_label = train_test_split(train_X, train_Y_one_hot, test_size=0.2, random_state=13)

Building Decision Tree Model

Training decision tree

Modeling
clf = DecisionTreeClassifier()

Creating decision tree model with gini criterion

Modeling
clf = DecisionTreeClassifier(criterion='gini')

Creating decision tree model with gini criterion

Modeling
clf = DecisionTreeClassifier(criterion='gini')

Creating decision tree model with entropy criterion

Modeling
clf = DecisionTreeClassifier(criterion='entropy')

Creating decision tree model with fixed random_state parameter

Modeling
clf = DecisionTreeClassifier(random_state=0)

Training decision tree

Modeling
clf = DecisionTreeClassifier()

Creating DecisionTreeRegressor instance with default parameters

Modeling
model = DecisionTreeRegressor()#max_depth=3)

Training decision tree

Modeling
clf = DecisionTreeClassifier()

Creating decision tree model

Modeling
regressor = DecisionTreeRegressor()

Creating decision tree model

Modeling
regressor = DecisionTreeRegressor()

Creating random forest model

Modeling
rf_model = RandomForestClassifier()

Creating random forest model with 1000 trees

Modeling
rf_model = RandomForestClassifier(n_estimators=1000)

Creating StandardScaler model

Modeling
scaler = StandardScaler()

Creating and training logistic model

Modeling
log_reg = LogisticRegression(random_state=0).fit(X_train_scaled, y_train)

Creating StandardScaler model

Modeling
scaler = StandardScaler()

Creating and training logistic model

Modeling
log_reg = LogisticRegression(random_state=0).fit(X_train_scaled, y_train)

Creating logistic regression model

Modeling
logit_model=sm.Logit(y, X)

Initializing logistic regression model

Modeling
log_regression = LogisticRegression()

Initializing Random Forest classifier

Modeling
rand_forest = RandomForestClassifier(random_state=0)

Training decision tree

Modeling
clf = DecisionTreeClassifier()

Model Training

Training decision tree on training data

Modeling
clf = clf.fit(X_train,y_train)

Training decision tree on training data

Modeling
clf = clf.fit(X_train,y_train)

Training decision tree on training data

Modeling
clf = clf.fit(X_train,y_train)

Training decision tree on training data

Modeling
clf = clf.fit(X_train,y_train)

Training model using training data (X_train, y_train)

Modeling
model.fit(X_train, y_train)

Training decision tree on training data

Modeling
clf = clf.fit(X_train,y_train)

Training decision tree

Modeling
regressor.fit(X_train, y_train)

Training decision tree

Modeling
regressor.fit(X_train, y_train)

Training random forest model

Modeling
rf_model.fit(X_train, y_train)

Training random forest model

Modeling
rf_model.fit(X_train, y_train)

Training logistic regression model

Modeling
result=logit_model.fit()

Training logistic regression model

Modeling
log_regression.fit(X_train,y_train)

Training Random Forest model

Modeling
rand_forest.fit(X_test, y_test)

Training model for 50 epochs

Modeling
model.fit(X_train, y_train, epochs=50)

Training decision tree on training data

Modeling
clf = clf.fit(X_train,y_train)

Training model for 200 epochs

Modeling
model.fit(X_train, y_train, epochs=200)

Training model for 200 epochs

Modeling
model.fit(X_train, y_train, epochs=200)

Training model on image data with validation

Modeling
fashion_train = fashion_model.fit(train_X, train_label, batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(valid_X, valid_label))

Model Prediction

Predicting values on test data

Modeling
y_pred = clf.predict(X_test)

Predicting values on test data

Modeling
y_pred = clf.predict(X_test)

Predicting values on test data

Modeling
y_pred = clf.predict(X_test)

Predicting values on test data

Modeling
y_pred = clf.predict(X_test)

Using decision tree model to predict for 'person'

Modeling
clf.predict(osoba)

Using model to predict test data (X_test)

Modeling
predictions = model.predict(X_test)

Predicting for specific input vector

Modeling
model.predict([[0,33,63,28,55,14,65,95,0,4,55,36,93]])

Predicting values on test data

Modeling
y_pred = clf.predict(X_test)

Predicting values on test data

Modeling
y_pred = regressor.predict(X_test)

Predicting values on test data

Modeling
y_pred = regressor.predict(X_test)

Predicting values on test data

Modeling
y_pred = rf_model.predict(X_test)

Predicting probabilities for test data

Modeling
rf_model.predict_proba(X_test)

Predicting log probabilities

Modeling
rf_model.predict_log_proba(X_test)

Predicting values on training data

Modeling
log_reg.predict(X_train_scaled)

Predicting probabilities for test data

Modeling
log_reg.predict_proba(X_test_scaled)

Predicting probabilities for test data

Modeling
log_reg.predict_proba(X_test_scaled)

Generating logistic regression predictions

Modeling
y_pred = log_reg.predict(X_test_scaled) 

Predicting with logistic regression

Modeling
print(log_regression.predict(X_test))

Predicting with Random Forest model

Modeling
print(rand_forest.predict(X_train))

Predicting output for custom input vector

Modeling
vstup_q = np.array([[-4,8]])
classifier.predict(vstup_q)

Using model to predict test data (X_test)

Modeling
predictions = model.predict(X_test)

Generating predictions for test data

Evaluation
y_pred = model.predict(X_test).round()

Predicting values on test data

Modeling
y_pred = clf.predict(X_test)

Using model to predict test data (X_test)

Modeling
predictions = model.predict(X_test)

Generating predictions for test data

Evaluation
y_pred = model.predict(X_test).round()

Generating predictions for test data

Evaluation
y_pred = model.predict(X_test).round()

Evaluating the Model

Evaluating model accuracy

Evaluation
print("Presnosť:",metrics.accuracy_score(y_test, y_pred))

Evaluating model accuracy

Evaluation
print("Presnosť:",metrics.accuracy_score(y_test, y_pred))

Creating confusion matrix to analyze correct/incorrect predictions

Evaluation
confusion_matrix(y_test, y_pred, labels=[1,0])

Evaluating model accuracy

Evaluation
print("Presnosť:",metrics.accuracy_score(y_test, y_pred))

Displaying maximum depth of trained decision tree

Evaluation
clf.get_depth()

Displaying total number of leaves in trained decision tree

Evaluation
clf.get_n_leaves()

Displaying list of accuracies for gini criterion decision tree

Evaluation
acc_gini

Displaying list of accuracies for entropy criterion decision tree

Evaluation
acc_entropy

Creating DataFrame containing decision trees' accuracies

Evaluation
d = pandas.DataFrame({'acc_gini':pandas.Series(acc_gini), 'acc_entropy':pandas.Series(acc_entropy), 'max_depth':pandas.Series(max_depth)})

Creating pandas DataFrame with training/test data accuracy

Evaluation
data = pandas.DataFrame({'acc_train':pandas.Series(acc_train), 'acc_test':pandas.Series(acc_test), 'max_depth':pandas.Series(max_depth)})

Calculating decision tree pruning path based on training data

Modeling
path = clf.cost_complexity_pruning_path(X_train, y_train)

Extracting ccp_alphas and impurities from pruning path

Modeling
ccp_alphas, impurities = path.ccp_alphas, path.impurities

Evaluating model accuracy

Evaluation
print("Presnosť:",metrics.accuracy_score(y_test, y_pred))

Displaying maximum depth of trained decision tree

Evaluation
clf.get_depth()

Listing all parameters of trained decision tree

Evaluation
clf.get_params()

Displaying model output for test data

Evaluation
predictions

Creating confusion matrix to analyze correct/incorrect predictions

Evaluation
confusion_matrix(y_test, y_pred, labels=[1,0])

Evaluating model accuracy

Evaluation
accuracy_score(y_test, y_pred)

Calculating precision, recall, f1-score and support

Evaluation
precision_recall_fscore_support(y_test, y_pred, labels=[1, 0])

Printing precision, recall, f1-score and support

Evaluation
p, r, f, s = precision_recall_fscore_support(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print('acc: ',metrics.accuracy_score(y_test, y_pred))
print('prec: ',((p[0]+p[1])/2),'(',p[0],' / ',p[1],')')
print('rec: ',((r[0]+r[1])/2),'(',r[0],' / ',r[1],')')
print('f1-sc:',((f[0]+f[1])/2))

print(confusion_matrix(y_test, y_pred)) 

Creating DataFrame for result comparison

Evaluation
d = pd.DataFrame({'Real Values':y_test, 'Predicted Values':y_pred})

Creating DataFrame for result comparison

Evaluation
d = pd.DataFrame({'Real Values':y_test, 'Predicted Values':y_pred})

Calculating residual squares

Evaluation
d['sqr_res'] = pow((d['Real Values'] - d['Predicted Values']), 2)

Summing residual squares

Evaluation
d['sqr_res'].sum()

Evaluating model accuracy

Evaluation
print("Presnosť:",metrics.accuracy_score(y_test, y_pred))

Getting model parameters

Evaluation
rf_model.get_params()

Getting feature names

Evaluation
rf_model.feature_names_in_

Evaluating model accuracy on training data

Evaluation
log_reg.score(X_train_scaled, y_train)

Evaluating model accuracy on test data

Evaluation
log_reg.score(X_test_scaled, y_test)

Displaying statistical model results

Evaluation
print(result.summary2())

Evaluating logistic regression accuracy

Evaluation
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(log_reg.score(X_test_scaled, y_test))) 

Creating confusion matrix

Evaluation
confusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix) 

Generating classification report

Evaluation
classification_report

Displaying classification report

Evaluation
print(classification_report(y_test, y_pred))

Testing network on all XOR input combinations

Evaluation
test = np.array([[0],[0]])
predict(w1,w2,test)
test = np.array([[1],[0]])
predict(w1,w2,test)
test = np.array([[0],[1]])
predict(w1,w2,test)
test = np.array([[1],[1]])
predict(w1,w2,test)

Evaluating model accuracy

Evaluation
print("Presnosť:",metrics.accuracy_score(y_test, y_pred))

Displaying first 6 predictions

Evaluation
preds[:6]

Displaying first 6 actual values

Evaluation
y_test[:6]

Evaluating model accuracy

Evaluation
print("Presnosť:",metrics.accuracy_score(y_test, y_pred))

Displaying model output for test data

Evaluation
predictions

Converting predictions to class labels

Evaluation
labels_predict=np.argmax(y_pred,axis=1)
labels_predict[:6]

Creating confusion matrix for evaluation

Evaluation
confusion_matrix(labels_predict, np.argmax(y_test, axis=1))

Printing classifier accuracy

Evaluation
print("Presnost: ",metrics.accuracy_score(labels_predict, np.argmax(y_test, axis=1)))

Evaluating model on test set

Evaluation
test_eval = fashion_model.evaluate(test_X, test_Y_one_hot, verbose=0)

Visualizing Decision Trees

Plotting decision tree

Deployment
fig = plt.figure(figsize=(20,6))
_ = tree.plot_tree(clf, feature_names = stlpce, class_names=['no','yes'], filled=True)

Re-plotting decision tree

Deployment
fig = plt.figure(figsize=(20,6))
_ = tree.plot_tree(clf, feature_names = stlpce, class_names=['yes','no'], filled=True)

Visualizing decision tree

Deployment
plt.figure(figsize=(40,20))
plot_tree(regressor, feature_names=vstup.columns.tolist())

Data Analysis

Displaying basic statistical values of dataset

Data understanding
diabetes.describe()

Counting value frequency of attribute

Data understanding
Counter(diabetes.Outcome)

Printing basic dataset info

Data understanding
df.info()

Counting occurrences in 'Pop' attribute

Data preparation
Counter(df.Pop)

Counting occurrences in 'sex' attribute

Data preparation
Counter(df.sex)

Displaying basic statistical values of dataset

Data understanding
diabetes.describe()

Counting value frequency of attribute

Data understanding
Counter(diabetes.Outcome)

Calculating correlation matrix

Data understanding
corrMatrix = df.corr()

Counting frequency of 'AHD' attribute values

Data understanding
Counter(df.AHD)

Checking for missing values

Data understanding
df.isnull().sum()

Calculating correlation matrix

Data understanding
corrMatrix = df.corr()

Analyzing class distribution in data

Data preparation
Counter(y)

Extracting examples belonging to class 1

Data preparation
X[:, :][y == 1] 

Printing data matrix shape

Data preparation
X.shape

Analyzing class distribution in data

Data preparation
Counter(vyber.competitor)

Analyzing class distribution in data

Data preparation
Counter(vyber.competitor)

Getting image data dimensions

Data preparation
data.shape

Printing pixel values of image section

Data preparation
selected_part

Analyzing training/test data shape

Data preparation
print('Training data shape : ', train_X.shape, train_Y.shape)
print('Testing data shape : ', test_X.shape, test_Y.shape)

Identifying unique classes in data

Data preparation
classes = np.unique(train_Y)
nClasses = len(classes)
print('Total number of outputs : ', nClasses)
print('Output classes : ', classes) 

Verifying shapes of all data parts

Data preparation
train_X.shape,valid_X.shape,train_label.shape,valid_label.shape

Displaying loaded text content

Data understanding
h_rights

Calculating total character count

Data preparation
len(h_rights)

Calculating number of unique words

Evaluation
len(set(h_rights.split()))

Finding longest word in text

Evaluation
max = 0
for w in slova:
    if len(w)>max:
        max = len(w)

Displaying tweet dataset structure

Data understanding
tweets.head()

Applying word count function to entire dataset

Data preparation
tweets['word_count'] = tweets.apply(lambda x: tweet_count(x), axis = 1)

Calculating tweet character counts

Data preparation
tweets['char_count'] = tweets['tweet'].str.len()

Calculating average word length in tweets

Evaluation
tweets['avg_len'] = (tweets['char_count'] - (tweets['word_count'] - 1)) / tweets['word_count']

Model Interpretation

Displaying decision path taken for 'person' prediction

Deployment
clf.decision_path(osoba).toarray()

Defining decision boundary line for visualization

Evaluation
def priamka(x):
    y = (W[0]*x + b)/(W[1]*(-1))
    return y 

Defining decision boundary line for visualization

Evaluation
def priamka(x):
    y = (W[0]*x + b)/(W[1]*(-1))
    return y 

Printing model structure

Modeling
model.summary()

Printing model structure

Modeling
model.summary()

Printing model structure

Modeling
model.summary()

Printing model structure with layer/parameter details

Modeling
fashion_model.summary()

Saving trained model to HDF5 file

Deployment
fashion_model.save("fashion_model_dropout.h5")

Advanced Visualization

Plotting decision trees' accuracy changes based on max depth

Deployment
plt.plot('max_depth','acc_gini', data=d, label='gini')
plt.plot('max_depth','acc_entropy', data=d, label='entropy')
plt.xlabel('max_depth')
plt.ylabel('accuracy')
plt.legend() 

Plotting training/test data accuracy

Deployment
plt.plot('max_depth','acc_train', data=data, marker='o', label='train')
plt.plot('max_depth','acc_test', data=data, marker='o', label='test')
plt.xlabel('max_depth')
plt.ylabel('accuracy')
plt.legend() 

Plotting graph showing total leaf impurity

Deployment
fig, ax = plt.subplots()
ax.plot(ccp_alphas[:-1], impurities[:-1], marker="o", drawstyle="steps-post")
ax.set_xlabel("effective alpha")
ax.set_ylabel("total impurity of leaves")
ax.set_title("Total Impurity vs effective alpha for training set") 

Visualizing actual vs predicted values

Deployment
plt.scatter(X_test, y_test, color='red')
plt.scatter(X_test, y_pred, color='green')
plt.title('Decision Tree Regression')
plt.xlabel('Temperature')
plt.ylabel('Revenue')
plt.show()

Creating grid for smoother visualization

Deployment
X_grid = np.arange(min(X), max(X), 0.01)
X_grid = X_grid.reshape((len(X_grid), 1))

Visualizing decision tree

Deployment
plt.plot(X_grid, regressor.predict(X_grid), color='black')
plt.title('Decision Tree Regression')
plt.xlabel('Temperature')
plt.ylabel('Revenue')
plt.show()

Visualizing correlation matrix

Deployment
sns.heatmap(corrMatrix, annot=True)
plt.show()

Visualizing correlation matrix

Deployment
sns.heatmap(corrMatrix, annot=True)
plt.show()

Visualizing correlation matrix (formatted values)

Deployment
sns.heatmap(corrMatrix, annot=True, fmt=".1f")
plt.show() 

Visualizing correlation matrix (large format)

Deployment
plt.figure(figsize=(12, 9))
sn.heatmap(corrMatrix, annot=True)
plt.show()

Visualizing logistic regression ROC curve

Deployment
logit_roc_auc = roc_auc_score(y_test, log_reg.predict(X_test_scaled))
fpr, tpr, thresholds = roc_curve(y_test, log_reg.predict_proba(X_test_scaled)[:,1])
plt.figure()
plt.plot(fpr, tpr, label='Logistic Regression (area = %0.2f)' % logit_roc_auc)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig('Log_ROC')
plt.show() 

Comparing ROC curves of logistic regression and Random Forest

Deployment
y_pred = log_regression.predict_proba(X_test)[:, 1]
fpr, tpr, _ = metrics.roc_curve(y_test, y_pred)
auc = round(metrics.roc_auc_score(y_test, y_pred), 4)
plt.plot(fpr,tpr,label="Logistic Regression, AUC="+str(auc))

y_pred = rand_forest.predict_proba(X_test)[:, 1]
fpr, tpr, _ = metrics.roc_curve(y_test, y_pred)
auc = round(metrics.roc_auc_score(y_test, y_pred), 4)
plt.plot(fpr,tpr,label="Random Forest, AUC="+str(auc))
plt.legend() 

Visualizing training data and decision boundary

Evaluation
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
ax = plt.subplot()
ax.set_title("Result")

for x, expected in training_data:
    if expected==1:
        vzor='r'
    else:

        vzor='b'
   # print(x[0])
    ax.scatter(x[0], x[1], color=vzor)

plt.plot([100,300],[priamka(100),priamka(300)])
plt.show() 

Visualizing decision boundary for XOR problem

Evaluation
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
ax = plt.subplot()
ax.set_title("Result")

for x, expected in training_data:
    if expected==1:
        vzor='r'
    else:
        vzor='b'    ax.scatter(x[0], x[1], color=vzor)

plt.plot([0,8],[priamka(0),priamka(8)])
plt.show() 

Visualizing synthetic data with scatter plot

Evaluation
fig = plt.figure(figsize=(5,5))
plt.plot(X[:, 0][y == 0], X[:, 1][y == 0], 'r^')
plt.plot(X[:, 0][y == 1], X[:, 1][y == 1], 'bs')
plt.xlabel("feature 1")
plt.ylabel("feature 2")
plt.title('Random Classification Data with 2 classes') 

Setting figure size for complex visualizations

Evaluation
fig = plt.figure(figsize=(8, 6))

Helper function for decision boundary visualization

Evaluation
def vykresli_rozdelenie(X,Y,vahy,bias):
    print('kresli- vahy:',vahy,' bias:',bias)
    x1 = [min(X[:,0]), max(X[:,0])]
    x2 = [(priamka(x1[0],vahy,bias)),(priamka(x1[1],vahy,bias))]
    print('x1:',x1,'x2:',x2)
    fig = plt.figure(figsize=(8,5))
    plt.plot(X[:, 0][y==0], X[:, 1][y==0], "r^")
    plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs")
    plt.xlabel("feature 1")
    plt.ylabel("feature 2")
    plt.title('Perceptron')
    plt.plot(x1, x2, 'y-') 

Visualizing loss vs epochs relationship

Evaluation
plt.plot(range(1, len(classifier.cost) + 1), classifier.cost)
plt.title("Adaline: learn-rate 0.001")
plt.xlabel('Epochs')
plt.ylabel('Cost (Sum-of-Squares)')
plt.show()

Visualizing training loss

Evaluation
plt.plot(losses)
plt.xlabel("EPOCHS")
plt.ylabel("Loss value")

Visualizing pairwise variable relationships

Evaluation
sns.pairplot(data, hue='competitor')

Visualizing pairwise variable relationships

Evaluation
sns.pairplot(data, hue='competitor')

Visualizing example images from dataset

Evaluation
plt.figure(figsize=[5,5])
plt.subplot(121)
plt.imshow(train_X[0,:,:], cmap='gray')
plt.title("Ground Truth : {}".format(train_Y[0]))
plt.subplot(122)
plt.imshow(test_X[0,:,:], cmap='gray')
plt.title("Ground Truth : {}".format(test_Y[0]))

Visualizing training accuracy and loss

Evaluation
accuracy = fashion_train.history['accuracy']
loss = fashion_train.history['loss']
val_loss = fashion_train.history['val_loss']
epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

Handling Missing Data

Removing rows with missing values

Data preparation
df = df.dropna()

Removing rows with missing values

Data preparation
df = df.dropna()

Data Exploration

Displaying dataset

Data understanding
golf

Displaying first rows of dataset

Data understanding
golf.head()

Displaying dataset

Data understanding
diabetes

Displaying dataset

Data understanding
titanic

Displaying first 5 dataset rows for quick overview

Data understanding
titanic.head()

Displaying first 5 rows of dataset

Data understanding
df.head()

Model Optimization

Training models and analyzing accuracy based on decision tree depth

Evaluation
max_depth = []
acc_gini = []
acc_entropy = []
for i in range(1,30):
    dtree = DecisionTreeClassifier(criterion='gini', max_depth=i)
    dtree.fit(X_train, y_train)
    pred = dtree.predict(X_test)
    acc_gini.append(metrics.accuracy_score(y_test, pred))
 ####
    dtree = DecisionTreeClassifier(criterion='entropy', max_depth=i)
    dtree.fit(X_train, y_train)
    pred = dtree.predict(X_test)
    acc_entropy.append(metrics.accuracy_score(y_test, pred))
 ####
    max_depth.append(i)

Training models and analyzing accuracy based on decision tree depth (gini)

Evaluation
max_depth = []
acc_test = []
acc_train = []
for i in range(1,101):
    dtree = DecisionTreeClassifier(criterion='gini', max_depth=i)
    dtree.fit(X_train, y_train)

    pred_train = dtree.predict(X_train)
    pred_test = dtree.predict(X_test)

    acc_train.append(metrics.accuracy_score(y_train, pred_train))
    acc_test.append(metrics.accuracy_score(y_test, pred_test))
 ####
    max_depth.append(i)

Neural Network Setup

Defining activation function for perceptron (step 0: biological neuron inspiration)

Modeling
def aktivacna_fn(x):
    if x>=0:
        return 1
    else:
        return -1 

Calculating neuron output (weighted sum of inputs + bias)

Modeling
def neuron(X,W,b):
 return  aktivacna_fn(np.dot(X,W) + b) 

Initializing weights and bias with random values

Modeling
W = array([-30,300])
b = -1230
eta = 0.01
print('aktualne vahy: ' , W)
print('bias: ', b) 

Training perceptron using delta rule (weight updates based on error)

Modeling
for i in range(0, 4):
    print('---')
    x, y = training_data[i]
    print('trenovacie data: ' , x , ', vysledok: ', y)

    predikcia = neuron(x,W,b)
    print('predikcia: ',predikcia)
    chyba = y - predikcia
    if (chyba != 0):
        print('potrebne je upravit vahy')
        W = W + (eta * chyba * x)
        b = b + (eta * chyba * 1)
    print('aktualne vahy: ' , W)
    print('bias: ', b) 

Predicting output for custom input vector

Modeling
vektor = array ([100, 10])
neuron(vektor, W, b)

Initializing weights and bias with random values (XOR problem)

Modeling
r1 = random.randint(-100, 100)
r2 = random.randint(-100, 100)
W = array([r1,r2])
b = random.randint(-100, 100)
eta = 0.5
print('aktualne vahy: ' , W)
print('bias: ', b) 

Training perceptron in epochs (iterating through training data)

Modeling
uprava_vahy = True
epocha_id = 1

while uprava_vahy:
  print('epocha: ', epocha_id)
  epocha_id += 1
  uprava_vahy = False
  for i in range(0, 3):
    print('---')
    x, y = training_data[i]
    predikcia = neuron(x,W,b)    chyba = y - predikcia    if (chyba != 0):
        uprava_vahy = True
        W = W + (eta * chyba * x)
        b = b + (eta * chyba * 1)
    print('aktualne vahy: ' , W, ', bias: ', b) 

Defining activation function for perceptron (step 0: biological neuron inspiration)

Modeling
def aktivacna_fn(x):
    if x>=0:
        return 1
    else:
        return -1 

Calculating neuron output (weighted sum of inputs + bias)

Modeling
def neuron(X,W,b):
 return  aktivacna_fn(np.dot(X,W) + b) 

Initializing perceptron weights (zero values)

Modeling
vahy = np.zeros((n+1, 2))

Implementing perceptron algorithm with visualization

Modeling
def perceptron(X, Y, eta, epochs):
    m, n = X.shape
    W = np.zeros(n)
    b = 0
    for epoch in range(epochs):
      print('epocha: ',epoch)
      vykresli_rozdelenie(X,Y,W,b)
      for i in range(0, m):
        x = X[i]
        y = Y[i]
        predikcia = neuron(x,W,b)
        chyba = y - predikcia
        if (chyba != 0):
          W = W + (eta * chyba * x)
          b = b + (eta * chyba * 1) 

Running perceptron algorithm on synthetic data

Modeling
perceptron(X, y, 0.5, 10)

Defining sum of squared errors function

Modeling
def sum_squared_errors(y, output_pred):
    errors = y - output_pred
    return (errors**2).sum()/2.0

Calculating errors between expected and predicted values

Modeling
sum_squared_errors(mal_byt, bol)

Calculating weighted sum of inputs (neuron's internal potential)

Modeling
def vnutorny_potencial(X, weights):
    return np.dot(X, weights)

Defining sum of squared errors function

Modeling
def sum_squared_errors(y, output_pred):
    errors = y - output_pred
    return (errors**2).sum()/2.0

Calculating errors between expected and predicted values

Modeling
sum_squared_errors(mal_byt, bol)

Calculating weighted sum of inputs (neuron's internal potential)

Modeling
def vnutorny_potencial(X, weights):
    return np.dot(X, weights)

Defining linear activation function for Adaline (identity function)

Modeling
def aktivacna_fn(x):
    return x 

Generating initial weights from normal distribution

Modeling
weights = random_gen.normal(loc = 0.0, scale = 0.01, size = biased_X.shape[1]) 

Initializing list for storing errors and calculating predictions

Modeling
cost = []
learn_rate = 0.5
output_pred = aktivacna_fn(vnutorny_potencial(biased_X, weights)) 

Calculating errors between actual and predicted values

Modeling
errors = y - output_pred

Updating weights using gradient descent

Modeling
weights += (learn_rate * biased_X.T.dot(errors))

Displaying updated model weights

Modeling
weights

Calculating loss using sum of squared errors

Evaluation
cost_i = (errors**2).sum() /2.0
cost_i = sum_squared_errors(y,output_pred)

Training Adaline model for 20 epochs

Modeling
for i in range(20):
  output_pred = aktivacna_fn(vnutorny_potencial(biased_X, weights))
  errors = y - output_pred
  weights += (learn_rate * biased_X.T.dot(errors))
  cost_i = (errors**2).sum() / 2.0
  cost.append(cost_i)

Implementing Adaline algorithm with automatic data scaling

Modeling
class Adaline(object):

    def __init__(self, learn_rate = 0.001, iterations = 10000):
        self.learn_rate = learn_rate
        self.iterations = iterations

    def fit(self, X, y, biased_X = False, standardised_X = False):
        if not standardised_X:
            X = self._standardise_features(X)
        if not biased_X:
            X = self._add_bias(X)
        self._initialise_weights(X)
        self.cost = []

        for cycle in range(self.iterations):
            output_pred = self._activation(self._net_input(X))
            errors = y - output_pred
            self.weights += (self.learn_rate * X.T.dot(errors))
            cost = (errors**2).sum() / 2.0
            self.cost.append(cost)
        return self

    def _net_input(self, X):
        return np.dot(X, self.weights)

    def predict(self, X, biased_X=False):
        if not biased_X:
            X = self._add_bias(X)
        return np.where(self._activation(self._net_input(X)) >= 0.0, 1, 0)

    def _add_bias(self, X):
        bias = np.ones((X.shape[0], 1))
        biased_X = np.hstack((bias, X))
        return biased_X

    def _initialise_weights(self, X):
        random_gen = np.random.RandomState(1)
        self.weights = random_gen.normal(loc = 0.0, scale = 0.01, size = X.shape[1])
        return self

    def _standardise_features(self, X):
        X_norm = (X - np.mean(X, axis=0)) / np.std(X, axis = 0)
        return X_norm

    def _activation(self, X):
        return X 

Creating and training Adaline classifier

Modeling
classifier = Adaline(learn_rate = 0.001, iterations = 100)
a = classifier.fit(X, y)

Displaying final trained model weights

Modeling
a.weights

Initializing neural network parameters (2-2-1 architecture)

Modeling
n_x = 2
n_h = 2
n_y = 1
m = x.shape[1]
lr = 0.2
np.random.seed(2)
w1 = np.random.rand(n_h,n_x)   # vahova matica pre skrytu vrstvu
w2 = np.random.rand(n_y,n_h)   # vahova matica pre vystupnu vrstvu
losses = [] 

Defining sigmoid activation function

Modeling
def sigmoid(z):
    z= 1/(1+np.exp(-z))
    return z

Implementing forward propagation for two-layer network

Modeling
def forward_prop(w1,w2,x):
    z1 = np.dot(w1,x)
    a1 = sigmoid(z1)
    z2 = np.dot(w2,a1)
    a2 = sigmoid(z2)
    return z1,a1,z2,a2

Implementing backpropagation for gradient calculation

Modeling
def back_prop(m,w1,w2,z1,a1,z2,a2,y):
    dz2 = a2-y
    dw2 = np.dot(dz2,a1.T)/m
    dz1 = np.dot(w2.T,dz2) * a1*(1-a1)
    dw1 = np.dot(dz1,x.T)/m
    dw1 = np.reshape(dw1,w1.shape)

    dw2 = np.reshape(dw2,w2.shape)
    return dz2,dw2,dz1,dw1

Training neural network for 100,000 iterations

Modeling
iterations = 100000
for i in range(iterations):
    z1,a1,z2,a2 = forward_prop(w1,w2,x)
    loss = -(1/m)*np.sum(y*np.log(a2)+(1-y)*np.log(1-a2))
    losses.append(loss)

    da2,dw2,dz1,dw1 = back_prop(m,w1,w2,z1,a1,z2,a2,y)
    w2 = w2-lr*dw2
    w1 = w1-lr*dw1

Function for predicting outputs using trained weights

Modeling
def predict(w1,w2,input):
    z1,a1,z2,a2 = forward_prop(w1,w2,test)
    a2 = np.squeeze(a2)
    if a2>=0.5:
        print("For input", [i[0] for i in input], "output is 1")
    else:
        print("For input", [i[0] for i in input], "output is 0")

Defining sequential model with three layers

Modeling
model = Sequential()
model.add(Dense(48,input_shape=(6,),activation="sigmoid"))
model.add(Dense(6,activation="sigmoid"))
model.add(Dense(1))

Compiling model with Adam optimizer and MSE loss

Modeling
model.compile(optimizer="adam", loss="mse") 

Compiling model with Adam optimizer and MSE loss

Modeling
model.compile(optimizer="adam", loss="mse") 

Defining model with single hidden layer

Modeling
model = Sequential()
model.add(Dense(4,input_shape=(2,),activation="relu"))
model.add(Dense(1))

Defining model for multi-class classification

Modeling
model = Sequential()
model.add(Dense(4,input_shape=(2,),activation="relu"))
model.add(Dense(4,activation="softmax"))

Compiling model with binary cross-entropy

Modeling
model.compile(optimizer="adam", loss="binary_crossentropy")

Defining training parameters (batch size, epochs, class count)

Modeling
batch_size = 64
epochs = 5
num_classes = 10

CNN architecture with 3 convolutional layers and max pooling

Modeling
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D((2, 2),padding='same'))
fashion_model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(Dense(num_classes, activation='softmax'))

Compiling model with cross-entropy loss and Adam optimizer

Modeling
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])

Defining lambda function for simple addition

Modeling
x = lambda a: a + 100

Text Processing

Cleaning text (removing special characters/punctuation)

Data preparation
h_rights = h_rights.replace('\n', ' ')
h_rights = h_rights.replace("\ufeff", ' ')
h_rights = h_rights.replace(',', ' ')
h_rights = h_rights.replace('.', ' ')

Tokenizing text into words

Data preparation
slova = h_rights.split()

Function for counting words in tweets

Modeling
def tweet_count(row):
    my_var = row['tweet']
    return len(my_var.split())

Basic regex match test

Modeling
re.match('abc','abcdefgh')

Detecting hashtags with regex

Modeling
re.search('#[A-Za-z0-9]+', tweet)

Extracting all hashtags from tweet

Modeling
[w for w in tweet.split() if re.search('#[A-Za-z0-9]+', w)]

Demonstrating regex findall for 'b.+ing' pattern

Modeling
sentence1 = "In the beginning was the Word"
re.findall("b.+ing", sentence1)

Validating emails with regex

Modeling
sent = 'My email is jkapusta@ukf.sk and my colleague has mdrlik@ukf.sk . This is the bad email: jkkkapusta@u.k'
[w for w in sent.split(" ") if re.search("[a-z]+@[a-z.]+.[a-z]{2,3}$",w)]

Getting synsets for 'joy'

Modeling
syns = wordnet.synsets('joy')

Extracting lemma names for first synset

Modeling
for syn in syns[0].lemmas():
    print(syn.name())

Extracting all lemma names

Modeling
for syn in syns:
    for lema in syn.lemmas():
        print(lema.name())

Getting definition for first synset

Evaluation
syns[0].definition()

Finding synonyms/antonyms for given word

Evaluation
slovo = "joy"
synonyma = []
antonyma = []

for syn in wordnet.synsets(slovo):
    for lema in syn.lemmas():
        synonyma.append(lema.name())
        if lema.antonyms():
            antonyma.append(lema.antonyms()[0].name())
print('Synonymá:')
print(set(synonyma))
print('Antonymá:')
print(set(antonyma)) 

Calculating Wu-Palmer similarity between concepts

Evaluation
w1 = wordnet.synset('joy.n.01')
w2 = wordnet.synset('joyousness.n.01')
print(w1.wup_similarity(w2))

Demonstrating low semantic similarity

Evaluation
w1 = wordnet.synset('joy.n.01')
w2 = wordnet.synset('mouse.n.01')
print(w1.wup_similarity(w2))

Validating emails with regex

Evaluation
[w for w in sent.split('"') if re.search("^[a-zA-Z0-9+-_.]{1,64}@[a-zA-Z0-9-]{1,255}\.[a-zA-Z0-9-.]{2,}$", w)]

Tokenizing text into words

Data preparation
array = word_tokenize(text)

Normalizing text to lowercase

Data preparation
smalym = text.lower()

Tokenizing normalized text

Data preparation
word_tokenize(smalym)

Creating word frequency distribution

Evaluation
v = Counter(word_tokenize(smalym))

Getting top 5 frequent words

Evaluation
v.most_common(5)

Tokenizing aggregated text into words

Data preparation
words = word_tokenize(all_titles)

Normalizing tokens to lowercase

Data preparation
text_lower = [w.lower() for w in words]

Creating word frequency distribution

Evaluation
freq = FreqDist(text_lower)

Getting top 10 frequent words

Evaluation
freq.most_common(10)

Initializing regex tokenizer for words

Modeling
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')

Tokenizing text with custom regex pattern

Data preparation
tokens = tokenizer.tokenize(text)

Normalizing tokens to lowercase

Data preparation
words = []
for word in tokens:
    words.append(word.lower())

Calculating normalized word frequency distribution

Evaluation
freq = FreqDist(words)

Loading English stopwords into variable

Data preparation
sw = stopwords.words('english')

Filtering stopwords from tokenized text

Modeling
words_ns = []
for word in words:
    if word not in sw: words_ns.append(word)

Creating clean word frequency distribution

Evaluation
freqdist = nltk.FreqDist(words_ns)

Visualizing top 20 frequent words using FreqDist.plot()

Evaluation
freqdist.plot(20, cumulative=False)

Web Scraping

Loading webpage using GET request

Data preparation
link = "https://ukf.sk"
stranka = requests.get(link)
stranka.text

Creating HTML tree from page content

Data preparation
tree = html.fromstring(stranka.content)

Extracting headings using XPath

Modeling
nazvy = tree.xpath("//h2/a/text()")

Loading eBay product data

Data preparation
link2 = "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p2334524.m570.l1313&_nkw=iphone+15&_sacat=0&_odkw=iphone&_osacat=0"
stranka2 = requests.get(link2)

Complex product price extraction/analysis

Evaluation
tree2 = html.fromstring(stranka2.content)
prices = []
for item in tree2.xpath('//span[@class="s-item__price"]'):
    price_text = item.text_content()
    price_match = re.search(r'\$\d+(?:,\d{3})*(?:\.\d{2})?', price_text)
    if price_match:
        price = float(price_match.group().replace('$', '').replace(',', ''))
        prices.append(price)
        print(prices)

if prices:
    average_price = sum(prices) / len(prices)
    print(f"Average price of iPhone on Ebay: ${average_price:.2f}")
else:
    print("No prices found.")

Alternative eBay price extraction method

Modeling
link3 = "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p2334524.m570.l1313&_nkw=iphone+15&_sacat=0&_odkw=iphone&_osacat=0"
stranka3 = requests.get(link3)
tree3 = html.fromstring(stranka3.content)
ceny3 = tree3.xpath('//div/span[@class="s-item__price"]/text()')
ceny3[:5]

Normalizing price formats

Data preparation
ceny_nove = []
for c in ceny3:
    new_c = c.strip()
    new_c = new_c.replace(",","")
    if len(new_c)>1:
        ceny_nove.append(float(new_c[1:]))

Extracting emails from university website

Modeling
link = "http://www.tu.ff.ukf.sk/kontakty"
stranka = requests.get(link)
sent = stranka.text

Creating HTML tree from page content

Data preparation
tree = html.fromstring(stranka.content)

Loading esoteric texts website using requests.get()

Data preparation
link = "https://sacred-texts.com/cla/aesop/index.htm"
stranka = requests.get(link)
stranka.text

Extracting headings using XPath (//body/a/text())

Modeling
nazvy = tree.xpath("//body/a/text()")

Aggregating all headings into single string

Data preparation
all_titles = ''
for title in nazvy:
  all_titles += ' ' + title 

Extracting all hyperlinks using XPath

Modeling
result_all = tree.xpath("//a/@href")

Complex scraping with recursive subpage loading

Modeling
vsetko = ""
for odkaz in result_all:
    link = 'https://sacred-texts.com/cla/aesop/' + odkaz

    my_content = requests.get(link)
    result_all = tree.xpath("//hr/p/text()")
    for title in result_all:
        all_titles += '' + title
        vsetko = vsetko + " " + result_all

Loading book text from Project Gutenberg

Data preparation
r = requests.get('http://www.gutenberg.org/files/2701/2701-h/2701-h.htm')

Setting UTF-8 encoding for text interpretation

Data preparation
r.encoding = 'utf-8'

Extracting clean text from HTTP response

Data preparation
html = r.text

Showing first 200 characters of raw HTML

Data preparation
print(html[:200])

Creating BeautifulSoup object for DOM traversal

Modeling
soup = BeautifulSoup(html, 'html.parser')

Extracting clean text without HTML tags

Data preparation
text = soup.get_text()

Image Processing

Loading and displaying traffic sign image

Evaluation
data = plt.imread('stop.jfif')
plt.imshow(data)

Cropping and visualizing image section

Evaluation
selected_part = data[100:150 , 10:100 , : ]
plt.imshow(selected_part)

Converting color image to grayscale

Evaluation
variation_img = data[: , : , 0]
plt.imshow(variation_img)

Zooming and visualizing small image section

Evaluation
selected_part = data[100:103 , 10:16 , : ]
plt.imshow(selected_part)

Visualizing grayscale image

Evaluation
data_gray = np.mean(data, axis=2)
plt.imshow(data_gray, cmap='gray')
plt.show()