Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing DecisionTreeClassifier library
Business understandingfrom sklearn.tree import DecisionTreeClassifier
Importing metrics library
Business understandingfrom sklearn import metrics
Importing matplotlib library
Business understandingimport matplotlib.pyplot as plt
Importing tree library
Business understandingfrom sklearn import tree
Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing DecisionTreeClassifier library
Business understandingfrom sklearn.tree import DecisionTreeClassifier
Importing metrics library
Business understandingfrom sklearn import metrics
Importing matplotlib library
Business understandingimport matplotlib.pyplot as plt
Importing Counter library
Business understandingfrom collections import Counter
Importing confusion_matrix library
Business understandingfrom sklearn.metrics import confusion_matrix
Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing DecisionTreeClassifier library
Business understandingfrom sklearn.tree import DecisionTreeClassifier
Importing metrics library
Business understandingfrom sklearn import metrics
Importing export_graphviz library
Business understandingfrom sklearn.tree import export_graphviz
Importing StringIO library
Business understandingfrom six import StringIO
Importing Image library
Business understandingfrom IPython.display import Image
Importing pydotplus library
Business understandingimport pydotplus
Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing DecisionTreeClassifier library
Business understandingfrom sklearn.tree import DecisionTreeClassifier
Importing metrics library
Business understandingfrom sklearn import metrics
Importing NumPy library
Business understandingimport numpy as np
Importing Seaborn library
Business understandingimport seaborn as sns
Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing DecisionTreeClassifier library
Business understandingfrom sklearn.tree import DecisionTreeClassifier
Importing metrics library
Business understandingfrom sklearn import metrics
Importing Counter library
Business understandingfrom collections import Counter
Importing NumPy library
Business understandingimport numpy as np
Importing DecisionTreeRegressor for regression prediction
Business understandingfrom sklearn.tree import DecisionTreeRegressor
Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing DecisionTreeClassifier library
Business understandingfrom sklearn.tree import DecisionTreeClassifier
Importing metrics library
Business understandingfrom sklearn import metrics
Importing Counter library
Business understandingfrom collections import Counter
Importing confusion_matrix library
Business understandingfrom sklearn.metrics import confusion_matrix
Importing NumPy library
Business understandingimport numpy as np
Importing accuracy_score library
Business understandingfrom sklearn.metrics import accuracy_score
Importing precision_recall_fscore_support
Business understandingfrom sklearn.metrics import precision_recall_fscore_support
Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing DecisionTreeClassifier library
Business understandingfrom sklearn.tree import DecisionTreeClassifier
Importing metrics library
Business understandingfrom sklearn import metrics
Importing DecisionTreeRegressor for regression prediction
Business understandingfrom sklearn.tree import DecisionTreeRegressor
Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing matplotlib library
Business understandingimport matplotlib.pyplot as plt
Importing NumPy library
Business understandingimport numpy as np
Importing DecisionTreeRegressor for regression prediction
Business understandingfrom sklearn.tree import DecisionTreeRegressor
Importing Pandas library
Business understandingimport pandas as pd
Importing metrics library
Business understandingfrom sklearn import metrics
Importing matplotlib library
Business understandingimport matplotlib.pyplot as plt
Importing Seaborn library
Business understandingimport seaborn as sns
Importing RandomForestClassifier
Business understandingfrom sklearn.ensemble import RandomForestClassifier
Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing Counter library
Business understandingfrom collections import Counter
Importing StandardScaler
Business understandingfrom sklearn.preprocessing import StandardScaler
Importing LogisticRegression
Business understandingfrom sklearn.linear_model import LogisticRegression
Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing metrics library
Business understandingfrom sklearn import metrics
Importing matplotlib library
Business understandingimport matplotlib.pyplot as plt
Importing Counter library
Business understandingfrom collections import Counter
Importing confusion_matrix library
Business understandingfrom sklearn.metrics import confusion_matrix
Importing Seaborn library
Business understandingimport seaborn as sns
Importing RandomForestClassifier
Business understandingfrom sklearn.ensemble import RandomForestClassifier
Importing StandardScaler
Business understandingfrom sklearn.preprocessing import StandardScaler
Importing LogisticRegression
Business understandingfrom sklearn.linear_model import LogisticRegression
Importing statsmodels.api
Business understandingimport statsmodels.api as sm
Importing ROC AUC metrics
Business understandingfrom sklearn.metrics import roc_auc_score
Importing ROC curve
Business understandingfrom sklearn.metrics import roc_curve
Importing matplotlib library
Business understandingimport matplotlib.pyplot as plt
Importing NumPy library
Business understandingimport numpy as np
Importing numpy library for array operations
Business understandingfrom numpy import array
Importing colors for visualization
Business understandingfrom matplotlib.colors import ListedColormap
Setting inline mode for Jupyter plot display
Business understanding%matplotlib inline
Importing library for random value generation
Business understandingimport random
Importing matplotlib library
Business understandingimport matplotlib.pyplot as plt
Importing Counter library
Business understandingfrom collections import Counter
Importing NumPy library
Business understandingimport numpy as np
Importing library for synthetic data generation
Business understandingfrom sklearn import datasets
Importing tools for cluster data creation
Business understandingfrom sklearn.datasets import make_blobs
Importing matplotlib library
Business understandingimport matplotlib.pyplot as plt
Importing NumPy library
Business understandingimport numpy as np
Getting input data matrix shape
Data preparationm, n = X.shape
Creating bias column for data matrix
Data preparationbias = np.ones((X.shape[0], 1))
Expanding data matrix with bias column
Data preparationbiased_X = np.hstack((bias, X))
Initializing random number generator with fixed seed
Data preparationrandom_gen = np.random.RandomState(1)
Importing matplotlib library
Business understandingimport matplotlib.pyplot as plt
Importing NumPy library
Business understandingimport numpy as np
Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing Keras Dense layer for model building
Business understandingfrom tensorflow.keras.layers import Dense
Importing Keras Sequential API
Business understandingfrom tensorflow.keras import Sequential
Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing DecisionTreeClassifier library
Business understandingfrom sklearn.tree import DecisionTreeClassifier
Importing metrics library
Business understandingfrom sklearn import metrics
Importing Counter library
Business understandingfrom collections import Counter
Importing Seaborn library
Business understandingimport seaborn as sns
Importing accuracy_score library
Business understandingfrom sklearn.metrics import accuracy_score
Importing Keras Dense layer for model building
Business understandingfrom tensorflow.keras.layers import Dense
Importing Keras Sequential API
Business understandingfrom tensorflow.keras import Sequential
Importing Pandas library
Business understandingimport pandas as pd
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing Counter library
Business understandingfrom collections import Counter
Importing NumPy library
Business understandingimport numpy as np
Importing Seaborn library
Business understandingimport seaborn as sns
Importing Keras Dense layer for model building
Business understandingfrom tensorflow.keras.layers import Dense
Importing Keras Sequential API
Business understandingfrom tensorflow.keras import Sequential
Importing one-hot encoding tools
Business understandingfrom tensorflow.keras.utils import to_categorical
Importing train_test_split library
Business understandingfrom sklearn.model_selection import train_test_split
Importing matplotlib library
Business understandingimport matplotlib.pyplot as plt
Importing NumPy library
Business understandingimport numpy as np
Setting inline mode for Jupyter plot display
Business understanding%matplotlib inline
Loading Fashion MNIST dataset
Business understandingfrom keras.datasets import fashion_mnist
Importing one-hot encoding tools
Business understandingfrom keras.utils import to_categorical
Importing required Keras API components (Sequential, Model)
Business understandingfrom keras.models import Sequential, Model
Importing required Keras API components (Dense, Dropout, Flatten)
Business understandingfrom keras.layers import Dense, Dropout, Flatten
Importing required Keras API components (Conv2D, MaxPooling2D)
Business understandingfrom keras.layers import Conv2D, MaxPooling2D
Importing required Keras API components (LeakyReLU)
Business understandingfrom keras.layers import LeakyReLU
Importing Pandas library
Business understandingimport pandas as pd
Importing regex for text processing
Business understandingimport re
Importing regex for text processing
Business understandingimport re
Importing NLP toolkit (NLTK)
Business understandingimport nltk
Downloading WordNet lexical database
Business understandingnltk.download("wordnet")
Importing WordNet corpus
Business understandingfrom nltk.corpus import wordnet
Importing HTTP request library
Business understandingimport requests
Importing HTML parser from lxml
Business understandingfrom lxml import html
Importing Pandas library
Business understandingimport pandas as pd
Importing Counter library
Business understandingfrom collections import Counter
Importing regex for text processing
Business understandingimport re
Importing NLP toolkit (NLTK)
Business understandingimport nltk
Importing HTTP request library
Business understandingimport requests
Importing HTML parser from lxml
Business understandingfrom lxml import html
Downloading NLTK tokenizer module
Business understandingnltk.download('punkt')
Importing word tokenizer
Business understandingfrom nltk.tokenize import word_tokenize
Importing Pandas library
Business understandingimport pandas as pd
Setting inline mode for Jupyter plot display
Business understanding%matplotlib inline
Importing regex for text processing
Business understandingimport re
Importing NLP toolkit (NLTK)
Business understandingimport nltk
Importing HTTP request library
Business understandingimport requests
Importing HTML parser from lxml
Business understandingfrom lxml import html
Downloading NLTK tokenizer module
Business understandingnltk.download('punkt')
Importing word tokenizer
Business understandingfrom nltk.tokenize import word_tokenize
Importing FreqDist for frequency analysis
Business understandingfrom nltk.probability import FreqDist
Importing BeautifulSoup for advanced HTML parsing
Business understandingfrom bs4 import BeautifulSoup
Downloading NLTK English stopwords
Business understandingnltk.download('stopwords')
Importing stopwords corpus
Business understandingfrom nltk.corpus import stopwords
Loading dataset
Data understandinggolf = pd.read_csv('golf_nominal.csv', sep=';')
Loading dataset
Data understandingdiabetes = pd.read_csv('diabetes_inbalanced.csv', index_col=0)
Loading dataset
Data understandingtitanic = pd.read_csv('titanic.csv')
Loading dataset
Data understandingtitanic = pd.read_csv('titanic.csv')
Loading Possum dataset
Data understandingdf = pd.read_csv('possum.csv')
Loading dataset
Data understandingdiabetes = pd.read_csv('diabetes_inbalanced.csv', index_col=0)
Loading dataset
Data understandingdf = pd.read_csv('ice_cream_data.csv', sep=";")
Loading dataset
Data understandingdf = pd.read_csv('ice_cream_data.csv', sep=";")
Loading dataset
Data understandingtitanic = pd.read_csv('titanic.csv')
Loading dataset
Data understandingdf = pd.read_csv('Heart.csv')
Loading dataset
Data understandingdf = pd.read_csv('Heart.csv')
Loading data from GitHub repository
Data understandingurl = "https://raw.githubusercontent.com/Statology/Python-Guides/main/default.csv"
data = pd.read_csv(url)
Defining training data (input examples and expected outputs)
Data preparationtraining_data = [
(array([121,16.8]), 1),
(array([114,15.2]), 1),
(array([210,9.4]), -1),
(array([195,8.1]), -1),
]
Alternative training set for XOR problem
Data preparationtraining_data = [
(array([3,-2]), -1),
(array([3,1]), 1),
(array([2,0]), -1),
]
Generating linearly separable data with two classes
Data preparationX, y = datasets.make_blobs(n_samples=100,n_features=2,
centers=2,cluster_std=1,
random_state=3)
Generating multidimensional data with four clusters
Data preparationX, y = make_blobs(n_samples=400, n_features=3, centers=4, cluster_std=1, random_state=3)
Test data for model error calculation
Data preparationmal_byt = np.array([1,2,3,4])
bol = np.array([1,0,2,5])
Generating linearly separable data with two classes
Data preparationX, y = datasets.make_blobs(n_samples=100,n_features=2,
centers=2,cluster_std=1,
random_state=3)
Test data for model error calculation
Data preparationmal_byt = np.array([1,2,3,4])
bol = np.array([1,0,2,5])
Defining input data for XOR problem (2D matrix)
Data preparationx=np.array([[0,0,1,1],[0,1,0,1]])
print(x)
Defining target values for XOR problem
Data preparationy=np.array([[0,1,1,0]])
print(y)
Loading dataset
Data understandingtitanic = pd.read_csv('titanic.csv')
Loading javelin throw dataset
Data understandingdata = pd.read_csv('darts.csv')
Selecting data for two specific competitors
Data preparationvyber = data[(data.competitor == 'Michael') | (data.competitor == 'Steve')]
Loading javelin throw dataset
Data understandingdata = pd.read_csv('darts.csv')
Splitting data into train/test sets
Data understanding(train_X,train_Y), (test_X,test_Y) = fashion_mnist.load_data()
Opening text file for reading
Data understandingtext_file = open('human_rights.txt', 'r')
Loading text file content
Data understandingh_rights = text_file.read()
Loading tweets from CSV file
Data understandingtweets = pd.read_csv("tweets.csv")
Sample tweet for regex demonstration
Data preparationtweet = "@nltk T awesome! #regex #pandas #python"
Sample text for NLP operations
Data preparationtext = "The cat is in the box. The cat likes the box. The box is over the cat."
Displaying training data
Data understandingX_train
Displaying test data
Data understandingX_test
Printing contents of 'person' variable
Data preparationosoba
Displaying scaled training data
Data understandingX_train_scaled
Displaying scaled training data
Data understandingX_train_scaled
Displaying first 4 rows of expanded matrix
Data preparationbiased_X[:4]
Displaying first 4 predicted values
Data preparationoutput_pred[:4]
Displaying first 4 model errors
Data preparationerrors[:4]
Encoding categories
Data preparationgolf = pd.get_dummies(golf, columns = ["Outlook"], drop_first = False)
Encoding additional categories
Data preparationgolf = pd.get_dummies(golf, columns = ["Temperature", "Humidity", "Windy"], drop_first = False)
Encoding gender
Data preparationtitanic['Sex'] = titanic['Sex'].replace({'male': 0, 'female': 1})
Encoding gender
Data preparationtitanic['Sex'] = titanic['Sex'].replace({'male': 0, 'female': 1})
Reshaping input data to 2D array for model compatibility
Data preparationosoba = osoba.reshape(1,-1)
Converting 'other' and 'Vic' text values to 0 and 1
Data preparationdf['Pop'] = df['Pop'].replace({'other': 0, 'Vic': 1})
Converting 'm' and 'f' text values to 0 and 1
Data preparationdf['sex'] = df['sex'].replace({'m': 0, 'f': 1})
Converting 'm' and 'f' text values to 0 and 1
Data preparationdf['sex'] = df['sex'].replace({'m': 0, 'f': 1})
Removing 'Unnamed: 0' column
Data preparationdf = df.drop(columns='Unnamed: 0')
Converting ChestPain category to numerical values
Data preparationdf['ChestPain'] = df['ChestPain'].astype('category')
df['ChestPain'] = df['ChestPain'].cat.codes
Converting Thal category to numerical values
Data preparationdf['Thal'] = df['Thal'].astype('category')
df['Thal'] = df['Thal'].cat.codes
Converting AHD category to numerical values
Data preparationdf['AHD'] = df['AHD'].astype('category')
df['AHD'] = df['AHD'].cat.codes
Scaling training data
Data preparationX_train_scaled = scaler.fit_transform(X_train)
Scaling test data
Data preparationX_test_scaled = scaler.transform(X_test)
Removing 'Unnamed: 0' column
Data preparationdf = df.drop(columns='Unnamed: 0')
Converting ChestPain category to numerical values
Data preparationdf['ChestPain'] = df['ChestPain'].astype('category')
df['ChestPain'] = df['ChestPain'].cat.codes
Converting Thal category to numerical values
Data preparationdf['Thal'] = df['Thal'].astype('category')
df['Thal'] = df['Thal'].cat.codes
Converting AHD category to numerical values
Data preparationdf['AHD'] = df['AHD'].astype('category')
df['AHD'] = df['AHD'].cat.codes
Scaling training data
Data preparationX_train_scaled = scaler.fit_transform(X_train)
Scaling test data
Data preparationX_test_scaled = scaler.transform(X_test)
Encoding gender
Data preparationtitanic['Sex'] = titanic['Sex'].replace({'male': 0, 'female': 1})
Encoding text classes to numerical values
Data preparationvyber['competitor'] = vyber['competitor'].replace({'Steve': 0, 'Michael': 1})
Extending class encoding for multi-class classification
Data preparationmulti = data
multi["competitor"]=multi["competitor"].replace({'Steve':0,'Susan':1,'Michael':2,'Kate':3})
Reshaping images for convolutional networks
Data preparationtrain_X = train_X.reshape(-1, 28,28, 1)
test_X = test_X.reshape(-1, 28,28, 1)
train_X.shape, test_X.shape
Normalizing pixel values to [0,1] range
Data preparationtrain_X = train_X.astype('float32')
test_X = test_X.astype('float32')
train_X = train_X / 255.
test_X = test_X / 255.
One-hot encoding target variables
Data preparationtrain_Y_one_hot = to_categorical(train_Y)
test_Y_one_hot = to_categorical(test_Y)
print('Original label:', train_Y[0])
print('After conversion to one-hot:', train_Y_one_hot[0])
Separating input features (X) from target variable (y)
Data preparationX = golf[golf.columns.difference(['Play'])]
y = golf.Play
Getting list of features
Data preparationstlpce = golf[golf.columns.difference(['Play'])].columns.to_list()
Displaying extracted features
Data preparationstlpce
Selecting all dataset features except target variable, defining target variable
Data preparationX = diabetes[diabetes.columns.difference(['Outcome'])]
y = diabetes['Outcome']
y=y.astype('int')
Splitting data into training/test sets
Data preparationX = titanic[titanic.columns.difference(['Survived'])]
y = titanic['Survived']
y=y.astype('int')
Splitting data into training/test sets
Data preparationX = titanic[titanic.columns.difference(['Survived'])]
y = titanic['Survived']
y=y.astype('int')
Defining test vector representing individual with various attributes
Data preparationosoba = np.array([10, #age
0, #fare
0, #parent/children
1, #pclass
0, #sex
3]) #siblings/spouses
Selecting all features except target (age), defining target variable
Data preparationX = df[df.columns.difference(['age'])]
y = df['age']
Selecting all dataset features except target variable, defining target variable
Data preparationX = diabetes[diabetes.columns.difference(['Outcome'])]
y = diabetes['Outcome']
y=y.astype('int')
Separating input features (X) from target variable (y)
Data preparationX = df[df.columns.difference(['Revenue'])]
y = df['Revenue']
Separating input features (X) from target variable (y)
Data preparationX = df.drop(['Revenue'], axis = 1)
y = df['Revenue']
Selecting Temperature and Revenue attributes
Data preparationX = df['Temperature'].values
y = df['Revenue'].values
Creating input vector for visualization
Data preparationvstup = df.drop(["Revenue"], axis=1)
Splitting data into training/test sets
Data preparationX = titanic[titanic.columns.difference(['Survived'])]
y = titanic['Survived']
y=y.astype('int')
Separating input features (X) from target variable (y)
Data preparationX = df.drop(columns = "AHD")
y = df['AHD']
Separating input features (X) from target variable (y)
Data preparationX = df.drop(columns = "AHD")
y = df['AHD']
Selecting variables for binary classification
Data preparationX = data[['student', 'balance', 'income']]
y = data['default']
Splitting data into training/test sets
Data preparationX = titanic[titanic.columns.difference(['Survived'])]
y = titanic['Survived']
y=y.astype('int')
Separating input features from target variable
Data preparationX = vyber[vyber.columns.difference(['competitor'])]
y = vyber['competitor']
y=y.astype('int')
Separating input features from target variable
Data preparationX = vyber[vyber.columns.difference(['competitor'])]
y = vyber['competitor']
y=y.astype('int')
Preparing data for 4-class classification
Data preparationX = multi[multi.columns.difference(['competitor'])]
y = to_categorical(multi['competitor'])
Splitting data into training and test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Splitting data into training and test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Splitting data into training and test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Splitting data into training and test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Splitting data into 70:30 train/test ratio
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
Splitting data into training and test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Splitting data into training and test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Splitting data into training and test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Splitting data into training and test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Splitting data into train/test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=21)
Splitting data into train/test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=21)
Splitting data into train/test sets
Data preparationX_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0)
Splitting data into training and test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Splitting data into train/test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
Splitting data into train/test sets
Data preparationX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
Splitting training data into train/validation
Data preparationtrain_X,valid_X,train_label,valid_label = train_test_split(train_X, train_Y_one_hot, test_size=0.2, random_state=13)
Training decision tree
Modelingclf = DecisionTreeClassifier()
Creating decision tree model with gini criterion
Modelingclf = DecisionTreeClassifier(criterion='gini')
Creating decision tree model with gini criterion
Modelingclf = DecisionTreeClassifier(criterion='gini')
Creating decision tree model with entropy criterion
Modelingclf = DecisionTreeClassifier(criterion='entropy')
Creating decision tree model with fixed random_state parameter
Modelingclf = DecisionTreeClassifier(random_state=0)
Training decision tree
Modelingclf = DecisionTreeClassifier()
Creating DecisionTreeRegressor instance with default parameters
Modelingmodel = DecisionTreeRegressor()#max_depth=3)
Training decision tree
Modelingclf = DecisionTreeClassifier()
Creating decision tree model
Modelingregressor = DecisionTreeRegressor()
Creating decision tree model
Modelingregressor = DecisionTreeRegressor()
Creating random forest model
Modelingrf_model = RandomForestClassifier()
Creating random forest model with 1000 trees
Modelingrf_model = RandomForestClassifier(n_estimators=1000)
Creating StandardScaler model
Modelingscaler = StandardScaler()
Creating and training logistic model
Modelinglog_reg = LogisticRegression(random_state=0).fit(X_train_scaled, y_train)
Creating StandardScaler model
Modelingscaler = StandardScaler()
Creating and training logistic model
Modelinglog_reg = LogisticRegression(random_state=0).fit(X_train_scaled, y_train)
Creating logistic regression model
Modelinglogit_model=sm.Logit(y, X)
Initializing logistic regression model
Modelinglog_regression = LogisticRegression()
Initializing Random Forest classifier
Modelingrand_forest = RandomForestClassifier(random_state=0)
Training decision tree
Modelingclf = DecisionTreeClassifier()
Training decision tree on training data
Modelingclf = clf.fit(X_train,y_train)
Training decision tree on training data
Modelingclf = clf.fit(X_train,y_train)
Training decision tree on training data
Modelingclf = clf.fit(X_train,y_train)
Training decision tree on training data
Modelingclf = clf.fit(X_train,y_train)
Training model using training data (X_train, y_train)
Modelingmodel.fit(X_train, y_train)
Training decision tree on training data
Modelingclf = clf.fit(X_train,y_train)
Training decision tree
Modelingregressor.fit(X_train, y_train)
Training decision tree
Modelingregressor.fit(X_train, y_train)
Training random forest model
Modelingrf_model.fit(X_train, y_train)
Training random forest model
Modelingrf_model.fit(X_train, y_train)
Training logistic regression model
Modelingresult=logit_model.fit()
Training logistic regression model
Modelinglog_regression.fit(X_train,y_train)
Training Random Forest model
Modelingrand_forest.fit(X_test, y_test)
Training model for 50 epochs
Modelingmodel.fit(X_train, y_train, epochs=50)
Training decision tree on training data
Modelingclf = clf.fit(X_train,y_train)
Training model for 200 epochs
Modelingmodel.fit(X_train, y_train, epochs=200)
Training model for 200 epochs
Modelingmodel.fit(X_train, y_train, epochs=200)
Training model on image data with validation
Modelingfashion_train = fashion_model.fit(train_X, train_label, batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(valid_X, valid_label))
Predicting values on test data
Modelingy_pred = clf.predict(X_test)
Predicting values on test data
Modelingy_pred = clf.predict(X_test)
Predicting values on test data
Modelingy_pred = clf.predict(X_test)
Predicting values on test data
Modelingy_pred = clf.predict(X_test)
Using decision tree model to predict for 'person'
Modelingclf.predict(osoba)
Using model to predict test data (X_test)
Modelingpredictions = model.predict(X_test)
Predicting for specific input vector
Modelingmodel.predict([[0,33,63,28,55,14,65,95,0,4,55,36,93]])
Predicting values on test data
Modelingy_pred = clf.predict(X_test)
Predicting values on test data
Modelingy_pred = regressor.predict(X_test)
Predicting values on test data
Modelingy_pred = regressor.predict(X_test)
Predicting values on test data
Modelingy_pred = rf_model.predict(X_test)
Predicting probabilities for test data
Modelingrf_model.predict_proba(X_test)
Predicting log probabilities
Modelingrf_model.predict_log_proba(X_test)
Predicting values on training data
Modelinglog_reg.predict(X_train_scaled)
Predicting probabilities for test data
Modelinglog_reg.predict_proba(X_test_scaled)
Predicting probabilities for test data
Modelinglog_reg.predict_proba(X_test_scaled)
Generating logistic regression predictions
Modelingy_pred = log_reg.predict(X_test_scaled)
Predicting with logistic regression
Modelingprint(log_regression.predict(X_test))
Predicting with Random Forest model
Modelingprint(rand_forest.predict(X_train))
Predicting output for custom input vector
Modelingvstup_q = np.array([[-4,8]])
classifier.predict(vstup_q)
Using model to predict test data (X_test)
Modelingpredictions = model.predict(X_test)
Generating predictions for test data
Evaluationy_pred = model.predict(X_test).round()
Predicting values on test data
Modelingy_pred = clf.predict(X_test)
Using model to predict test data (X_test)
Modelingpredictions = model.predict(X_test)
Generating predictions for test data
Evaluationy_pred = model.predict(X_test).round()
Generating predictions for test data
Evaluationy_pred = model.predict(X_test).round()
Evaluating model accuracy
Evaluationprint("Presnosť:",metrics.accuracy_score(y_test, y_pred))
Evaluating model accuracy
Evaluationprint("Presnosť:",metrics.accuracy_score(y_test, y_pred))
Creating confusion matrix to analyze correct/incorrect predictions
Evaluationconfusion_matrix(y_test, y_pred, labels=[1,0])
Evaluating model accuracy
Evaluationprint("Presnosť:",metrics.accuracy_score(y_test, y_pred))
Displaying maximum depth of trained decision tree
Evaluationclf.get_depth()
Displaying total number of leaves in trained decision tree
Evaluationclf.get_n_leaves()
Displaying list of accuracies for gini criterion decision tree
Evaluationacc_gini
Displaying list of accuracies for entropy criterion decision tree
Evaluationacc_entropy
Creating DataFrame containing decision trees' accuracies
Evaluationd = pandas.DataFrame({'acc_gini':pandas.Series(acc_gini), 'acc_entropy':pandas.Series(acc_entropy), 'max_depth':pandas.Series(max_depth)})
Creating pandas DataFrame with training/test data accuracy
Evaluationdata = pandas.DataFrame({'acc_train':pandas.Series(acc_train), 'acc_test':pandas.Series(acc_test), 'max_depth':pandas.Series(max_depth)})
Calculating decision tree pruning path based on training data
Modelingpath = clf.cost_complexity_pruning_path(X_train, y_train)
Extracting ccp_alphas and impurities from pruning path
Modelingccp_alphas, impurities = path.ccp_alphas, path.impurities
Evaluating model accuracy
Evaluationprint("Presnosť:",metrics.accuracy_score(y_test, y_pred))
Displaying maximum depth of trained decision tree
Evaluationclf.get_depth()
Listing all parameters of trained decision tree
Evaluationclf.get_params()
Displaying model output for test data
Evaluationpredictions
Creating confusion matrix to analyze correct/incorrect predictions
Evaluationconfusion_matrix(y_test, y_pred, labels=[1,0])
Evaluating model accuracy
Evaluationaccuracy_score(y_test, y_pred)
Calculating precision, recall, f1-score and support
Evaluationprecision_recall_fscore_support(y_test, y_pred, labels=[1, 0])
Printing precision, recall, f1-score and support
Evaluationp, r, f, s = precision_recall_fscore_support(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
print('acc: ',metrics.accuracy_score(y_test, y_pred))
print('prec: ',((p[0]+p[1])/2),'(',p[0],' / ',p[1],')')
print('rec: ',((r[0]+r[1])/2),'(',r[0],' / ',r[1],')')
print('f1-sc:',((f[0]+f[1])/2))
print(confusion_matrix(y_test, y_pred))
Creating DataFrame for result comparison
Evaluationd = pd.DataFrame({'Real Values':y_test, 'Predicted Values':y_pred})
Creating DataFrame for result comparison
Evaluationd = pd.DataFrame({'Real Values':y_test, 'Predicted Values':y_pred})
Calculating residual squares
Evaluationd['sqr_res'] = pow((d['Real Values'] - d['Predicted Values']), 2)
Summing residual squares
Evaluationd['sqr_res'].sum()
Evaluating model accuracy
Evaluationprint("Presnosť:",metrics.accuracy_score(y_test, y_pred))
Getting model parameters
Evaluationrf_model.get_params()
Getting feature names
Evaluationrf_model.feature_names_in_
Evaluating model accuracy on training data
Evaluationlog_reg.score(X_train_scaled, y_train)
Evaluating model accuracy on test data
Evaluationlog_reg.score(X_test_scaled, y_test)
Displaying statistical model results
Evaluationprint(result.summary2())
Evaluating logistic regression accuracy
Evaluationprint('Accuracy of logistic regression classifier on test set: {:.2f}'.format(log_reg.score(X_test_scaled, y_test)))
Creating confusion matrix
Evaluationconfusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix)
Generating classification report
Evaluationclassification_report
Displaying classification report
Evaluationprint(classification_report(y_test, y_pred))
Testing network on all XOR input combinations
Evaluationtest = np.array([[0],[0]])
predict(w1,w2,test)
test = np.array([[1],[0]])
predict(w1,w2,test)
test = np.array([[0],[1]])
predict(w1,w2,test)
test = np.array([[1],[1]])
predict(w1,w2,test)
Evaluating model accuracy
Evaluationprint("Presnosť:",metrics.accuracy_score(y_test, y_pred))
Displaying first 6 predictions
Evaluationpreds[:6]
Displaying first 6 actual values
Evaluationy_test[:6]
Evaluating model accuracy
Evaluationprint("Presnosť:",metrics.accuracy_score(y_test, y_pred))
Displaying model output for test data
Evaluationpredictions
Converting predictions to class labels
Evaluationlabels_predict=np.argmax(y_pred,axis=1)
labels_predict[:6]
Creating confusion matrix for evaluation
Evaluationconfusion_matrix(labels_predict, np.argmax(y_test, axis=1))
Printing classifier accuracy
Evaluationprint("Presnost: ",metrics.accuracy_score(labels_predict, np.argmax(y_test, axis=1)))
Evaluating model on test set
Evaluationtest_eval = fashion_model.evaluate(test_X, test_Y_one_hot, verbose=0)
Plotting decision tree
Deploymentfig = plt.figure(figsize=(20,6))
_ = tree.plot_tree(clf, feature_names = stlpce, class_names=['no','yes'], filled=True)
Re-plotting decision tree
Deploymentfig = plt.figure(figsize=(20,6))
_ = tree.plot_tree(clf, feature_names = stlpce, class_names=['yes','no'], filled=True)
Visualizing decision tree
Deploymentplt.figure(figsize=(40,20))
plot_tree(regressor, feature_names=vstup.columns.tolist())
Displaying basic statistical values of dataset
Data understandingdiabetes.describe()
Counting value frequency of attribute
Data understandingCounter(diabetes.Outcome)
Printing basic dataset info
Data understandingdf.info()
Counting occurrences in 'Pop' attribute
Data preparationCounter(df.Pop)
Counting occurrences in 'sex' attribute
Data preparationCounter(df.sex)
Displaying basic statistical values of dataset
Data understandingdiabetes.describe()
Counting value frequency of attribute
Data understandingCounter(diabetes.Outcome)
Calculating correlation matrix
Data understandingcorrMatrix = df.corr()
Counting frequency of 'AHD' attribute values
Data understandingCounter(df.AHD)
Checking for missing values
Data understandingdf.isnull().sum()
Calculating correlation matrix
Data understandingcorrMatrix = df.corr()
Analyzing class distribution in data
Data preparationCounter(y)
Extracting examples belonging to class 1
Data preparationX[:, :][y == 1]
Printing data matrix shape
Data preparationX.shape
Analyzing class distribution in data
Data preparationCounter(vyber.competitor)
Analyzing class distribution in data
Data preparationCounter(vyber.competitor)
Getting image data dimensions
Data preparationdata.shape
Printing pixel values of image section
Data preparationselected_part
Analyzing training/test data shape
Data preparationprint('Training data shape : ', train_X.shape, train_Y.shape)
print('Testing data shape : ', test_X.shape, test_Y.shape)
Identifying unique classes in data
Data preparationclasses = np.unique(train_Y)
nClasses = len(classes)
print('Total number of outputs : ', nClasses)
print('Output classes : ', classes)
Verifying shapes of all data parts
Data preparationtrain_X.shape,valid_X.shape,train_label.shape,valid_label.shape
Displaying loaded text content
Data understandingh_rights
Calculating total character count
Data preparationlen(h_rights)
Calculating number of unique words
Evaluationlen(set(h_rights.split()))
Finding longest word in text
Evaluationmax = 0
for w in slova:
if len(w)>max:
max = len(w)
Displaying tweet dataset structure
Data understandingtweets.head()
Applying word count function to entire dataset
Data preparationtweets['word_count'] = tweets.apply(lambda x: tweet_count(x), axis = 1)
Calculating tweet character counts
Data preparationtweets['char_count'] = tweets['tweet'].str.len()
Calculating average word length in tweets
Evaluationtweets['avg_len'] = (tweets['char_count'] - (tweets['word_count'] - 1)) / tweets['word_count']
Displaying decision path taken for 'person' prediction
Deploymentclf.decision_path(osoba).toarray()
Defining decision boundary line for visualization
Evaluationdef priamka(x):
y = (W[0]*x + b)/(W[1]*(-1))
return y
Defining decision boundary line for visualization
Evaluationdef priamka(x):
y = (W[0]*x + b)/(W[1]*(-1))
return y
Printing model structure
Modelingmodel.summary()
Printing model structure
Modelingmodel.summary()
Printing model structure
Modelingmodel.summary()
Printing model structure with layer/parameter details
Modelingfashion_model.summary()
Saving trained model to HDF5 file
Deploymentfashion_model.save("fashion_model_dropout.h5")
Plotting decision trees' accuracy changes based on max depth
Deploymentplt.plot('max_depth','acc_gini', data=d, label='gini')
plt.plot('max_depth','acc_entropy', data=d, label='entropy')
plt.xlabel('max_depth')
plt.ylabel('accuracy')
plt.legend()
Plotting training/test data accuracy
Deploymentplt.plot('max_depth','acc_train', data=data, marker='o', label='train')
plt.plot('max_depth','acc_test', data=data, marker='o', label='test')
plt.xlabel('max_depth')
plt.ylabel('accuracy')
plt.legend()
Plotting graph showing total leaf impurity
Deploymentfig, ax = plt.subplots()
ax.plot(ccp_alphas[:-1], impurities[:-1], marker="o", drawstyle="steps-post")
ax.set_xlabel("effective alpha")
ax.set_ylabel("total impurity of leaves")
ax.set_title("Total Impurity vs effective alpha for training set")
Visualizing actual vs predicted values
Deploymentplt.scatter(X_test, y_test, color='red')
plt.scatter(X_test, y_pred, color='green')
plt.title('Decision Tree Regression')
plt.xlabel('Temperature')
plt.ylabel('Revenue')
plt.show()
Creating grid for smoother visualization
DeploymentX_grid = np.arange(min(X), max(X), 0.01)
X_grid = X_grid.reshape((len(X_grid), 1))
Visualizing decision tree
Deploymentplt.plot(X_grid, regressor.predict(X_grid), color='black')
plt.title('Decision Tree Regression')
plt.xlabel('Temperature')
plt.ylabel('Revenue')
plt.show()
Visualizing correlation matrix
Deploymentsns.heatmap(corrMatrix, annot=True)
plt.show()
Visualizing correlation matrix
Deploymentsns.heatmap(corrMatrix, annot=True)
plt.show()
Visualizing correlation matrix (formatted values)
Deploymentsns.heatmap(corrMatrix, annot=True, fmt=".1f")
plt.show()
Visualizing correlation matrix (large format)
Deploymentplt.figure(figsize=(12, 9))
sn.heatmap(corrMatrix, annot=True)
plt.show()
Visualizing logistic regression ROC curve
Deploymentlogit_roc_auc = roc_auc_score(y_test, log_reg.predict(X_test_scaled))
fpr, tpr, thresholds = roc_curve(y_test, log_reg.predict_proba(X_test_scaled)[:,1])
plt.figure()
plt.plot(fpr, tpr, label='Logistic Regression (area = %0.2f)' % logit_roc_auc)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig('Log_ROC')
plt.show()
Comparing ROC curves of logistic regression and Random Forest
Deploymenty_pred = log_regression.predict_proba(X_test)[:, 1]
fpr, tpr, _ = metrics.roc_curve(y_test, y_pred)
auc = round(metrics.roc_auc_score(y_test, y_pred), 4)
plt.plot(fpr,tpr,label="Logistic Regression, AUC="+str(auc))
y_pred = rand_forest.predict_proba(X_test)[:, 1]
fpr, tpr, _ = metrics.roc_curve(y_test, y_pred)
auc = round(metrics.roc_auc_score(y_test, y_pred), 4)
plt.plot(fpr,tpr,label="Random Forest, AUC="+str(auc))
plt.legend()
Visualizing training data and decision boundary
Evaluationcm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
ax = plt.subplot()
ax.set_title("Result")
for x, expected in training_data:
if expected==1:
vzor='r'
else:
vzor='b'
# print(x[0])
ax.scatter(x[0], x[1], color=vzor)
plt.plot([100,300],[priamka(100),priamka(300)])
plt.show()
Visualizing decision boundary for XOR problem
Evaluationcm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
ax = plt.subplot()
ax.set_title("Result")
for x, expected in training_data:
if expected==1:
vzor='r'
else:
vzor='b' ax.scatter(x[0], x[1], color=vzor)
plt.plot([0,8],[priamka(0),priamka(8)])
plt.show()
Visualizing synthetic data with scatter plot
Evaluationfig = plt.figure(figsize=(5,5))
plt.plot(X[:, 0][y == 0], X[:, 1][y == 0], 'r^')
plt.plot(X[:, 0][y == 1], X[:, 1][y == 1], 'bs')
plt.xlabel("feature 1")
plt.ylabel("feature 2")
plt.title('Random Classification Data with 2 classes')
Setting figure size for complex visualizations
Evaluationfig = plt.figure(figsize=(8, 6))
Helper function for decision boundary visualization
Evaluationdef vykresli_rozdelenie(X,Y,vahy,bias):
print('kresli- vahy:',vahy,' bias:',bias)
x1 = [min(X[:,0]), max(X[:,0])]
x2 = [(priamka(x1[0],vahy,bias)),(priamka(x1[1],vahy,bias))]
print('x1:',x1,'x2:',x2)
fig = plt.figure(figsize=(8,5))
plt.plot(X[:, 0][y==0], X[:, 1][y==0], "r^")
plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs")
plt.xlabel("feature 1")
plt.ylabel("feature 2")
plt.title('Perceptron')
plt.plot(x1, x2, 'y-')
Visualizing loss vs epochs relationship
Evaluationplt.plot(range(1, len(classifier.cost) + 1), classifier.cost)
plt.title("Adaline: learn-rate 0.001")
plt.xlabel('Epochs')
plt.ylabel('Cost (Sum-of-Squares)')
plt.show()
Visualizing training loss
Evaluationplt.plot(losses)
plt.xlabel("EPOCHS")
plt.ylabel("Loss value")
Visualizing pairwise variable relationships
Evaluationsns.pairplot(data, hue='competitor')
Visualizing pairwise variable relationships
Evaluationsns.pairplot(data, hue='competitor')
Visualizing example images from dataset
Evaluationplt.figure(figsize=[5,5])
plt.subplot(121)
plt.imshow(train_X[0,:,:], cmap='gray')
plt.title("Ground Truth : {}".format(train_Y[0]))
plt.subplot(122)
plt.imshow(test_X[0,:,:], cmap='gray')
plt.title("Ground Truth : {}".format(test_Y[0]))
Visualizing training accuracy and loss
Evaluationaccuracy = fashion_train.history['accuracy']
loss = fashion_train.history['loss']
val_loss = fashion_train.history['val_loss']
epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
Removing rows with missing values
Data preparationdf = df.dropna()
Removing rows with missing values
Data preparationdf = df.dropna()
Displaying dataset
Data understandinggolf
Displaying first rows of dataset
Data understandinggolf.head()
Displaying dataset
Data understandingdiabetes
Displaying dataset
Data understandingtitanic
Displaying first 5 dataset rows for quick overview
Data understandingtitanic.head()
Displaying first 5 rows of dataset
Data understandingdf.head()
Training models and analyzing accuracy based on decision tree depth
Evaluationmax_depth = []
acc_gini = []
acc_entropy = []
for i in range(1,30):
dtree = DecisionTreeClassifier(criterion='gini', max_depth=i)
dtree.fit(X_train, y_train)
pred = dtree.predict(X_test)
acc_gini.append(metrics.accuracy_score(y_test, pred))
####
dtree = DecisionTreeClassifier(criterion='entropy', max_depth=i)
dtree.fit(X_train, y_train)
pred = dtree.predict(X_test)
acc_entropy.append(metrics.accuracy_score(y_test, pred))
####
max_depth.append(i)
Training models and analyzing accuracy based on decision tree depth (gini)
Evaluationmax_depth = []
acc_test = []
acc_train = []
for i in range(1,101):
dtree = DecisionTreeClassifier(criterion='gini', max_depth=i)
dtree.fit(X_train, y_train)
pred_train = dtree.predict(X_train)
pred_test = dtree.predict(X_test)
acc_train.append(metrics.accuracy_score(y_train, pred_train))
acc_test.append(metrics.accuracy_score(y_test, pred_test))
####
max_depth.append(i)
Defining activation function for perceptron (step 0: biological neuron inspiration)
Modelingdef aktivacna_fn(x):
if x>=0:
return 1
else:
return -1
Calculating neuron output (weighted sum of inputs + bias)
Modelingdef neuron(X,W,b):
return aktivacna_fn(np.dot(X,W) + b)
Initializing weights and bias with random values
ModelingW = array([-30,300])
b = -1230
eta = 0.01
print('aktualne vahy: ' , W)
print('bias: ', b)
Training perceptron using delta rule (weight updates based on error)
Modelingfor i in range(0, 4):
print('---')
x, y = training_data[i]
print('trenovacie data: ' , x , ', vysledok: ', y)
predikcia = neuron(x,W,b)
print('predikcia: ',predikcia)
chyba = y - predikcia
if (chyba != 0):
print('potrebne je upravit vahy')
W = W + (eta * chyba * x)
b = b + (eta * chyba * 1)
print('aktualne vahy: ' , W)
print('bias: ', b)
Predicting output for custom input vector
Modelingvektor = array ([100, 10])
neuron(vektor, W, b)
Initializing weights and bias with random values (XOR problem)
Modelingr1 = random.randint(-100, 100)
r2 = random.randint(-100, 100)
W = array([r1,r2])
b = random.randint(-100, 100)
eta = 0.5
print('aktualne vahy: ' , W)
print('bias: ', b)
Training perceptron in epochs (iterating through training data)
Modelinguprava_vahy = True
epocha_id = 1
while uprava_vahy:
print('epocha: ', epocha_id)
epocha_id += 1
uprava_vahy = False
for i in range(0, 3):
print('---')
x, y = training_data[i]
predikcia = neuron(x,W,b) chyba = y - predikcia if (chyba != 0):
uprava_vahy = True
W = W + (eta * chyba * x)
b = b + (eta * chyba * 1)
print('aktualne vahy: ' , W, ', bias: ', b)
Defining activation function for perceptron (step 0: biological neuron inspiration)
Modelingdef aktivacna_fn(x):
if x>=0:
return 1
else:
return -1
Calculating neuron output (weighted sum of inputs + bias)
Modelingdef neuron(X,W,b):
return aktivacna_fn(np.dot(X,W) + b)
Initializing perceptron weights (zero values)
Modelingvahy = np.zeros((n+1, 2))
Implementing perceptron algorithm with visualization
Modelingdef perceptron(X, Y, eta, epochs):
m, n = X.shape
W = np.zeros(n)
b = 0
for epoch in range(epochs):
print('epocha: ',epoch)
vykresli_rozdelenie(X,Y,W,b)
for i in range(0, m):
x = X[i]
y = Y[i]
predikcia = neuron(x,W,b)
chyba = y - predikcia
if (chyba != 0):
W = W + (eta * chyba * x)
b = b + (eta * chyba * 1)
Running perceptron algorithm on synthetic data
Modelingperceptron(X, y, 0.5, 10)
Defining sum of squared errors function
Modelingdef sum_squared_errors(y, output_pred):
errors = y - output_pred
return (errors**2).sum()/2.0
Calculating errors between expected and predicted values
Modelingsum_squared_errors(mal_byt, bol)
Calculating weighted sum of inputs (neuron's internal potential)
Modelingdef vnutorny_potencial(X, weights):
return np.dot(X, weights)
Defining sum of squared errors function
Modelingdef sum_squared_errors(y, output_pred):
errors = y - output_pred
return (errors**2).sum()/2.0
Calculating errors between expected and predicted values
Modelingsum_squared_errors(mal_byt, bol)
Calculating weighted sum of inputs (neuron's internal potential)
Modelingdef vnutorny_potencial(X, weights):
return np.dot(X, weights)
Defining linear activation function for Adaline (identity function)
Modelingdef aktivacna_fn(x):
return x
Generating initial weights from normal distribution
Modelingweights = random_gen.normal(loc = 0.0, scale = 0.01, size = biased_X.shape[1])
Initializing list for storing errors and calculating predictions
Modelingcost = []
learn_rate = 0.5
output_pred = aktivacna_fn(vnutorny_potencial(biased_X, weights))
Calculating errors between actual and predicted values
Modelingerrors = y - output_pred
Updating weights using gradient descent
Modelingweights += (learn_rate * biased_X.T.dot(errors))
Displaying updated model weights
Modelingweights
Calculating loss using sum of squared errors
Evaluationcost_i = (errors**2).sum() /2.0
cost_i = sum_squared_errors(y,output_pred)
Training Adaline model for 20 epochs
Modelingfor i in range(20):
output_pred = aktivacna_fn(vnutorny_potencial(biased_X, weights))
errors = y - output_pred
weights += (learn_rate * biased_X.T.dot(errors))
cost_i = (errors**2).sum() / 2.0
cost.append(cost_i)
Implementing Adaline algorithm with automatic data scaling
Modelingclass Adaline(object):
def __init__(self, learn_rate = 0.001, iterations = 10000):
self.learn_rate = learn_rate
self.iterations = iterations
def fit(self, X, y, biased_X = False, standardised_X = False):
if not standardised_X:
X = self._standardise_features(X)
if not biased_X:
X = self._add_bias(X)
self._initialise_weights(X)
self.cost = []
for cycle in range(self.iterations):
output_pred = self._activation(self._net_input(X))
errors = y - output_pred
self.weights += (self.learn_rate * X.T.dot(errors))
cost = (errors**2).sum() / 2.0
self.cost.append(cost)
return self
def _net_input(self, X):
return np.dot(X, self.weights)
def predict(self, X, biased_X=False):
if not biased_X:
X = self._add_bias(X)
return np.where(self._activation(self._net_input(X)) >= 0.0, 1, 0)
def _add_bias(self, X):
bias = np.ones((X.shape[0], 1))
biased_X = np.hstack((bias, X))
return biased_X
def _initialise_weights(self, X):
random_gen = np.random.RandomState(1)
self.weights = random_gen.normal(loc = 0.0, scale = 0.01, size = X.shape[1])
return self
def _standardise_features(self, X):
X_norm = (X - np.mean(X, axis=0)) / np.std(X, axis = 0)
return X_norm
def _activation(self, X):
return X
Creating and training Adaline classifier
Modelingclassifier = Adaline(learn_rate = 0.001, iterations = 100)
a = classifier.fit(X, y)
Displaying final trained model weights
Modelinga.weights
Initializing neural network parameters (2-2-1 architecture)
Modelingn_x = 2
n_h = 2
n_y = 1
m = x.shape[1]
lr = 0.2
np.random.seed(2)
w1 = np.random.rand(n_h,n_x) # vahova matica pre skrytu vrstvu
w2 = np.random.rand(n_y,n_h) # vahova matica pre vystupnu vrstvu
losses = []
Defining sigmoid activation function
Modelingdef sigmoid(z):
z= 1/(1+np.exp(-z))
return z
Implementing forward propagation for two-layer network
Modelingdef forward_prop(w1,w2,x):
z1 = np.dot(w1,x)
a1 = sigmoid(z1)
z2 = np.dot(w2,a1)
a2 = sigmoid(z2)
return z1,a1,z2,a2
Implementing backpropagation for gradient calculation
Modelingdef back_prop(m,w1,w2,z1,a1,z2,a2,y):
dz2 = a2-y
dw2 = np.dot(dz2,a1.T)/m
dz1 = np.dot(w2.T,dz2) * a1*(1-a1)
dw1 = np.dot(dz1,x.T)/m
dw1 = np.reshape(dw1,w1.shape)
dw2 = np.reshape(dw2,w2.shape)
return dz2,dw2,dz1,dw1
Training neural network for 100,000 iterations
Modelingiterations = 100000
for i in range(iterations):
z1,a1,z2,a2 = forward_prop(w1,w2,x)
loss = -(1/m)*np.sum(y*np.log(a2)+(1-y)*np.log(1-a2))
losses.append(loss)
da2,dw2,dz1,dw1 = back_prop(m,w1,w2,z1,a1,z2,a2,y)
w2 = w2-lr*dw2
w1 = w1-lr*dw1
Function for predicting outputs using trained weights
Modelingdef predict(w1,w2,input):
z1,a1,z2,a2 = forward_prop(w1,w2,test)
a2 = np.squeeze(a2)
if a2>=0.5:
print("For input", [i[0] for i in input], "output is 1")
else:
print("For input", [i[0] for i in input], "output is 0")
Defining sequential model with three layers
Modelingmodel = Sequential()
model.add(Dense(48,input_shape=(6,),activation="sigmoid"))
model.add(Dense(6,activation="sigmoid"))
model.add(Dense(1))
Compiling model with Adam optimizer and MSE loss
Modelingmodel.compile(optimizer="adam", loss="mse")
Compiling model with Adam optimizer and MSE loss
Modelingmodel.compile(optimizer="adam", loss="mse")
Defining model with single hidden layer
Modelingmodel = Sequential()
model.add(Dense(4,input_shape=(2,),activation="relu"))
model.add(Dense(1))
Defining model for multi-class classification
Modelingmodel = Sequential()
model.add(Dense(4,input_shape=(2,),activation="relu"))
model.add(Dense(4,activation="softmax"))
Compiling model with binary cross-entropy
Modelingmodel.compile(optimizer="adam", loss="binary_crossentropy")
Defining training parameters (batch size, epochs, class count)
Modelingbatch_size = 64
epochs = 5
num_classes = 10
CNN architecture with 3 convolutional layers and max pooling
Modelingfashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D((2, 2),padding='same'))
fashion_model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(Dense(num_classes, activation='softmax'))
Compiling model with cross-entropy loss and Adam optimizer
Modelingfashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
Defining lambda function for simple addition
Modelingx = lambda a: a + 100
Cleaning text (removing special characters/punctuation)
Data preparationh_rights = h_rights.replace('\n', ' ')
h_rights = h_rights.replace("\ufeff", ' ')
h_rights = h_rights.replace(',', ' ')
h_rights = h_rights.replace('.', ' ')
Tokenizing text into words
Data preparationslova = h_rights.split()
Function for counting words in tweets
Modelingdef tweet_count(row):
my_var = row['tweet']
return len(my_var.split())
Basic regex match test
Modelingre.match('abc','abcdefgh')
Detecting hashtags with regex
Modelingre.search('#[A-Za-z0-9]+', tweet)
Extracting all hashtags from tweet
Modeling[w for w in tweet.split() if re.search('#[A-Za-z0-9]+', w)]
Demonstrating regex findall for 'b.+ing' pattern
Modelingsentence1 = "In the beginning was the Word"
re.findall("b.+ing", sentence1)
Validating emails with regex
Modelingsent = 'My email is jkapusta@ukf.sk and my colleague has mdrlik@ukf.sk . This is the bad email: jkkkapusta@u.k'
[w for w in sent.split(" ") if re.search("[a-z]+@[a-z.]+.[a-z]{2,3}$",w)]
Getting synsets for 'joy'
Modelingsyns = wordnet.synsets('joy')
Extracting lemma names for first synset
Modelingfor syn in syns[0].lemmas():
print(syn.name())
Extracting all lemma names
Modelingfor syn in syns:
for lema in syn.lemmas():
print(lema.name())
Getting definition for first synset
Evaluationsyns[0].definition()
Finding synonyms/antonyms for given word
Evaluationslovo = "joy"
synonyma = []
antonyma = []
for syn in wordnet.synsets(slovo):
for lema in syn.lemmas():
synonyma.append(lema.name())
if lema.antonyms():
antonyma.append(lema.antonyms()[0].name())
print('Synonymá:')
print(set(synonyma))
print('Antonymá:')
print(set(antonyma))
Calculating Wu-Palmer similarity between concepts
Evaluationw1 = wordnet.synset('joy.n.01')
w2 = wordnet.synset('joyousness.n.01')
print(w1.wup_similarity(w2))
Demonstrating low semantic similarity
Evaluationw1 = wordnet.synset('joy.n.01')
w2 = wordnet.synset('mouse.n.01')
print(w1.wup_similarity(w2))
Validating emails with regex
Evaluation[w for w in sent.split('"') if re.search("^[a-zA-Z0-9+-_.]{1,64}@[a-zA-Z0-9-]{1,255}\.[a-zA-Z0-9-.]{2,}$", w)]
Tokenizing text into words
Data preparationarray = word_tokenize(text)
Normalizing text to lowercase
Data preparationsmalym = text.lower()
Tokenizing normalized text
Data preparationword_tokenize(smalym)
Creating word frequency distribution
Evaluationv = Counter(word_tokenize(smalym))
Getting top 5 frequent words
Evaluationv.most_common(5)
Tokenizing aggregated text into words
Data preparationwords = word_tokenize(all_titles)
Normalizing tokens to lowercase
Data preparationtext_lower = [w.lower() for w in words]
Creating word frequency distribution
Evaluationfreq = FreqDist(text_lower)
Getting top 10 frequent words
Evaluationfreq.most_common(10)
Initializing regex tokenizer for words
Modelingtokenizer = nltk.tokenize.RegexpTokenizer('\w+')
Tokenizing text with custom regex pattern
Data preparationtokens = tokenizer.tokenize(text)
Normalizing tokens to lowercase
Data preparationwords = []
for word in tokens:
words.append(word.lower())
Calculating normalized word frequency distribution
Evaluationfreq = FreqDist(words)
Loading English stopwords into variable
Data preparationsw = stopwords.words('english')
Filtering stopwords from tokenized text
Modelingwords_ns = []
for word in words:
if word not in sw: words_ns.append(word)
Creating clean word frequency distribution
Evaluationfreqdist = nltk.FreqDist(words_ns)
Visualizing top 20 frequent words using FreqDist.plot()
Evaluationfreqdist.plot(20, cumulative=False)
Loading webpage using GET request
Data preparationlink = "https://ukf.sk"
stranka = requests.get(link)
stranka.text
Creating HTML tree from page content
Data preparationtree = html.fromstring(stranka.content)
Extracting headings using XPath
Modelingnazvy = tree.xpath("//h2/a/text()")
Loading eBay product data
Data preparationlink2 = "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p2334524.m570.l1313&_nkw=iphone+15&_sacat=0&_odkw=iphone&_osacat=0"
stranka2 = requests.get(link2)
Complex product price extraction/analysis
Evaluationtree2 = html.fromstring(stranka2.content)
prices = []
for item in tree2.xpath('//span[@class="s-item__price"]'):
price_text = item.text_content()
price_match = re.search(r'\$\d+(?:,\d{3})*(?:\.\d{2})?', price_text)
if price_match:
price = float(price_match.group().replace('$', '').replace(',', ''))
prices.append(price)
print(prices)
if prices:
average_price = sum(prices) / len(prices)
print(f"Average price of iPhone on Ebay: ${average_price:.2f}")
else:
print("No prices found.")
Alternative eBay price extraction method
Modelinglink3 = "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p2334524.m570.l1313&_nkw=iphone+15&_sacat=0&_odkw=iphone&_osacat=0"
stranka3 = requests.get(link3)
tree3 = html.fromstring(stranka3.content)
ceny3 = tree3.xpath('//div/span[@class="s-item__price"]/text()')
ceny3[:5]
Normalizing price formats
Data preparationceny_nove = []
for c in ceny3:
new_c = c.strip()
new_c = new_c.replace(",","")
if len(new_c)>1:
ceny_nove.append(float(new_c[1:]))
Extracting emails from university website
Modelinglink = "http://www.tu.ff.ukf.sk/kontakty"
stranka = requests.get(link)
sent = stranka.text
Creating HTML tree from page content
Data preparationtree = html.fromstring(stranka.content)
Loading esoteric texts website using requests.get()
Data preparationlink = "https://sacred-texts.com/cla/aesop/index.htm"
stranka = requests.get(link)
stranka.text
Extracting headings using XPath (//body/a/text())
Modelingnazvy = tree.xpath("//body/a/text()")
Aggregating all headings into single string
Data preparationall_titles = ''
for title in nazvy:
all_titles += ' ' + title
Extracting all hyperlinks using XPath
Modelingresult_all = tree.xpath("//a/@href")
Complex scraping with recursive subpage loading
Modelingvsetko = ""
for odkaz in result_all:
link = 'https://sacred-texts.com/cla/aesop/' + odkaz
my_content = requests.get(link)
result_all = tree.xpath("//hr/p/text()")
for title in result_all:
all_titles += '' + title
vsetko = vsetko + " " + result_all
Loading book text from Project Gutenberg
Data preparationr = requests.get('http://www.gutenberg.org/files/2701/2701-h/2701-h.htm')
Setting UTF-8 encoding for text interpretation
Data preparationr.encoding = 'utf-8'
Extracting clean text from HTTP response
Data preparationhtml = r.text
Showing first 200 characters of raw HTML
Data preparationprint(html[:200])
Creating BeautifulSoup object for DOM traversal
Modelingsoup = BeautifulSoup(html, 'html.parser')
Extracting clean text without HTML tags
Data preparationtext = soup.get_text()
Loading and displaying traffic sign image
Evaluationdata = plt.imread('stop.jfif')
plt.imshow(data)
Cropping and visualizing image section
Evaluationselected_part = data[100:150 , 10:100 , : ]
plt.imshow(selected_part)
Converting color image to grayscale
Evaluationvariation_img = data[: , : , 0]
plt.imshow(variation_img)
Zooming and visualizing small image section
Evaluationselected_part = data[100:103 , 10:16 , : ]
plt.imshow(selected_part)
Visualizing grayscale image
Evaluationdata_gray = np.mean(data, axis=2)
plt.imshow(data_gray, cmap='gray')
plt.show()