assignment final | PDF Host

===== ASSIGNMENT 1 ===== ### Code: import pandas as pd df=pd.read_csv("Titanic-Dataset.csv") ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print (df.head()) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(df.describe) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(df.info) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print (df.tail()) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print (df.head(10)) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print (df.tail(10)) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print (df.isnull()) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(df.isnull().sum()) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(df.columns) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(df.select_dtypes) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print( df.duplicated().sum()) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(df.nunique()) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(df.dtypes) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: import numpy as np cat_cols=df.select_dtypes(include=['object']).columns num_cols = df.select_dtypes(include=np.number).columns.tolist() print("Categorical Variables:") print(cat_cols) print("Numerical Variables:") print(num_cols) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: for col in num_cols: print(col) print('Skew :', round(df[col].skew(), 2)) plt.figure(figsize = (15, 4)) plt.subplot(1, 2, 1) df[col].hist(grid=False) plt.ylabel('count') plt.subplot(1, 2, 2) sns.boxplot(x=df[col]) plt.show() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: sns.countplot(x='Sex', hue='Survived', data=df) plt.title('Survival Count by Sex') plt.xlabel('Sex') plt.ylabel('Count') plt.legend(title='Survived', labels=['No', 'Yes']) plt.show() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: sns.histplot(data=df, x='Age', bins=30, kde=True) plt.title('Age Distribution') plt.show() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: df['survival_status'].value_counts().plot.pie(autopct='%1.1f%%', labels=['Died', 'Survived'], colors=['pink', 'skyblue']) plt.title('Survival Pie Chart') plt.ylabel('') plt.show() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(df.head()) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(df.info()) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(df.columns) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: import pandas as pd df['FamilySize'] = df['SibSp'] + df['Parch'] + 1 ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(df.columns) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(df.head(10)) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: import seaborn as sns sns.violinplot(x='Survived', y='Age', data=df) plt.title('Age Distribution by Survival (Violin Plot)') ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: import seaborn as sns sns.boxplot(x='Survived', y='Age', data=df) plt.title('Age Distribution (Swarm Plot)') ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: import seaborn as sns sns.stripplot(x='Survived', y='Fare', data=df, jitter=True) plt.title('Fare Spread by Survival') ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: df.describe(include='all').T ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: df.describe().T ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ===== ASSIGNMENT 2 ===== ### Code: import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from pandas.core.common import random_state from sklearn.linear_model import LinearRegression ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: df_sal = pd.read_csv('Salary_Data.csv') df_sal.head() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: df_sal.describe() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: plt.title('Salary Distribution Plot') sns.distplot(df_sal['Salary']) plt.show() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: plt.scatter(df_sal['YearsExperience'], df_sal['Salary'], color = 'lightcoral') plt.title('Salary vs Experience') plt.xlabel('Years of Experience') plt.ylabel('Salary') plt.box(False) plt.show() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: X = df_sal.iloc[:, :1] y = df_sal.iloc[:, 1:] ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: regressor = LinearRegression() regressor.fit(X_train, y_train) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: y_pred_test = regressor.predict(X_test) y_pred_train = regressor.predict(X_train) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: plt.scatter(X_train, y_train, color = 'lightcoral') plt.plot(X_train, y_pred_train, color = 'firebrick') plt.title('Salary vs Experience (Training Set)') plt.xlabel('Years of Experience') plt.ylabel('Salary') plt.legend(['X_train/Pred(y_test)', 'X_train/y_train'], title = 'Sal/Exp', loc='best', facecolor='white') plt.box(False) plt.show() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(f'Coefficient: {regressor.coef_}') print(f'Intercept: {regressor.intercept_}') ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: from sklearn.metrics import mean_absolute_error y_true = [3, -0.5, 2, 7] y_pred = [2.5, 0.0, 2, 8] mae = mean_absolute_error(y_true, y_pred) print("Mean Absolute Error (MAE):", mae) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: def train_linear_regression(csv_file): data = pd.read_csv(csv_file) X = data[['YearsExperience']] y = data['Salary'] model = LinearRegression() model.fit(X, y) y_pred = model.predict(X) return y, y_pred, model ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.preprocessing import MinMaxScaler data = pd.read_csv("Salary_Data.csv") X = data[['YearsExperience']] y = data[['Salary']] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) scaler = MinMaxScaler() y_train_scaled = scaler.fit_transform(y_train) y_test_scaled = scaler.transform(y_test) model = LinearRegression() model.fit(X_train, y_train_scaled) y_pred_scaled = model.predict(X_test) mse = mean_squared_error(y_test_scaled, y_pred_scaled) mae = mean_absolute_error(y_test_scaled, y_pred_scaled) rmse = np.sqrt(mse) r2 = r2_score(y_test_scaled, y_pred_scaled) print(f"Slope (m): {model.coef_[0][0]}") print(f"Intercept (b): {model.intercept_[0]}") print(f"\nMean Squared Error (MSE): {mse:.4f}") print(f"Mean Absolute Error (MAE): {mae:.4f}") print(f"Root Mean Squared Error (RMSE): {rmse:.4f}") print(f"R² Score: {r2:.4f}") ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ===== ASSIGNMENT 3 ===== ### Code: import pandas as pd pima = pd.read_csv("diabetes.csv") ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: import pandas as pd col_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi', 'pedigree', 'age', 'label'] pima = pd.read_csv("diabetes.csv", header=0, names=col_names) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(pima.columns) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: pima = pd.read_csv("diabetes.csv") print(pima.head()) print(pima.columns) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: feature_cols = ['Glucose'] X = pima[feature_cols] y = pima['Outcome'].astype(int) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: # split X and y into training and testing sets from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=5) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(X_train.shape) print(y_train.shape) print(set(y_train)) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: # import the class from sklearn.linear_model import LogisticRegression # instantiate the model (using the default parameters) logreg = LogisticRegression(random_state=16) # fit the model with data logreg.fit(X_train, y_train) y_pred = logreg.predict(X_test) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: # import the metrics class from sklearn import metrics cnf_matrix = metrics.confusion_matrix(y_test, y_pred) cnf_matrix ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(X_train.shape, y_train.shape) # same number of rows print(set(y_train)) # should be {0, 1} ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(set(y_train)) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, confusion_matrix, classification_report logreg = LogisticRegression(random_state=16) logreg.fit(X_train, y_train) y_pred = logreg.predict(X_test) print("Accuracy:", accuracy_score(y_test, y_pred)) print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred)) print("\nClassification Report:\n", classification_report(y_test, y_pred)) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt glucose_range = np.linspace(X_train['Glucose'].min(), X_train['Glucose'].max(), 200).reshape(-1, 1) probabilities = logreg.predict_proba(glucose_range)[:, 1] plot_data = X_train.copy() plot_data.columns = ['Glucose'] plot_data['Outcome'] = y_train plot_df = pd.DataFrame({ 'Glucose': glucose_range.flatten(), 'probability': probabilities }) # Plot plt.figure(figsize=(8, 6)) sns.scatterplot(x='Glucose', y='Outcome', data=plot_data, alpha=0.5) sns.lineplot(x='Glucose', y='probability', data=plot_df, color='red', linewidth=2) plt.xlabel('Glucose') plt.ylabel('Probability of Outcome = 1') plt.title('Logistic Regression Sigmoid Curve') plt.show() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns class_names=[0,1] fig, ax = plt.subplots() tick_marks = np.arange(len(class_names)) plt.xticks(tick_marks, class_names) plt.yticks(tick_marks, class_names) sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap="YlGnBu",fmt='g') ax.xaxis.set_label_position("top") plt.tight_layout() plt.title('confusion Matrix',y=1.1) plt.ylabel('Actual Value') plt.xlabel('Predicted Value') ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, confusion_matrix, classification_report logreg = LogisticRegression(random_state=16) logreg.fit(X_train, y_train) y_pred = logreg.predict(X_test) print("Accuracy:", accuracy_score(y_test, y_pred)) print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred)) print("\nClassification Report:\n", classification_report(y_test, y_pred)) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ===== ASSIGNMENT 4 ===== ### Code: import pandas as pd df = pd.read_csv('loan_data.csv') df.head() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: df.info() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: import seaborn as sns import matplotlib.pyplot as plt sns.countplot(data=df, x='purpose', hue='not.fully.paid') plt.xticks(rotation=45, ha='right') ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: pre_df = pd.get_dummies(df,columns=['purpose'],drop_first=True) pre_df.head() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: from sklearn.model_selection import train_test_split X=pre_df.drop("not.fully.paid",axis=1) y=pre_df['not.fully.paid'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=125) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: from sklearn.naive_bayes import GaussianNB model=GaussianNB() model.fit(X_train,y_train); ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: from sklearn .metrics import( accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, Classification_report , ) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: from sklearn.metrics import ( accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, classification_report, ) y_pred = model.predict(X_test) accuray = accuracy_score(y_pred, y_test) f1 = f1_score(y_pred, y_test, average="weighted") print("Accuracy:", accuray) print("F1 Score:", f1) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: labels = ["Fully Paid", "Not fully Paid"] cm = confusion_matrix(y_test, y_pred) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels) disp.plot(); ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ===== ASSIGNMENT 5 ===== ### Code: from sklearn import datasets cancer = datasets.load_breast_cancer() ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print("Features: ", cancer.feature_names) print("Labels: ", cancer.target_names) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: cancer.data.shape ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(cancer.data[0:5]) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print(cancer.target) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: from sklearn.model_selection import train_test_split X_train , X_test , y_train , y_test = train_test_split ( cancer . data , cancer . target , test_size = 0.3 , random_state = 109 ) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: from sklearn import svm clf=svm.SVC(kernel='linear') clf.fit(X_train,y_train) y_pred=clf.predict(X_test) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay labels = ["Malignant", "Benign"] cm = confusion_matrix(y_test, y_pred) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels) disp.plot() from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay labels = ["Malignant", "Benign"] ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: from sklearn.metrics import confusion_matrix labels = ["Malignant", "Benign"] cm = confusion_matrix(y_test, y_pred) # Print the confusion matrix print("Confusion Matrix:\n", cm) # Assign values TN, FP, FN, TP = cm.ravel() print(f"True Negative (TN): {TN}") print(f"False Positive (FP): {FP}") print(f"False Negative (FN): {FN}") print(f"True Positive (TP): {TP}") ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: from sklearn import metrics print("Accuracy:",metrics.accuracy_score(y_test, y_pred)) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print("Precision:", metrics.precision_score(y_test, y_pred)) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print("Recall:", metrics.recall_score(y_test, y_pred)) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: print("F1_Score:", metrics.f1_score(y_test, y_pred)) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ===== ASSIGNMENT 6 ===== ### Code: import pandas as pd from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split from sklearn import metrics ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: col_names = ['Pregnancies', 'glucose', 'bp', 'skin', 'insulin', 'bmi', 'pedigree', 'age', 'label'] data = pd.read_csv("diabetes.csv", header=0, names=col_names) ### Output: (Output will appear here when run in Jupyter Notebook) -------------------------------------------------------------------------------- ### Code: data.head() ### Output: (Output will appear here when run in Jupyter Notebook) --------------------------------------------------------------------------------