Remaining programs

1. T-test Independent: import pandas as pd from scipy.stats import ttest_ind, t df = pd.read_csv(”independent_ttest.csv”) print(”Sample Dataset”) print(df.sample(5)) print(”\nIndependent T-Test”) print(”H0: Mean scores of Class A and Class B are equal”) print(”H1: Mean scores of Class A and Class B are different”) a = df[df[”Class”] == “A”][”Score”] b = df[df[”Class”] == “B”][”Score”] print(”\nSummary”) print(pd.DataFrame({ “Group”: [”Class A”, “Class B”], “Count”: [a.count(), b.count()], “Mean”: [round(a.mean(), 2), round(b.mean(), 2)], “Std”: [round(a.std(), 2), round(b.std(), 2)] })) t_value, p_value = ttest_ind(a, b) dfree = len(a) + len(b) - 2 critical = t.ppf(0.975, dfree) print(”\nCalculated t-value =”, round(t_value, 4)) print(”Critical t-value =”, round(critical, 4)) print(”Degrees of Freedom =”, dfree) if abs(t_value) > critical: print(”Decision: Reject H0”) print(”Conclusion: Class A and Class B scores are significantly different”) else: print(”Decision: Fail to Reject H0”) print(”Conclusion: Class A and Class B scores are not significantly different”) Output:: ______________________________ 02. T-test paired: import pandas as pd from scipy.stats import ttest_rel, t df = pd.read_csv(”paired_ttest.csv”) print(”Sample Dataset”) print(df.sample(5)) print(”\nPaired T-Test”) print(”H0: No significant difference between Before and After scores”) print(”H1: Significant difference between Before and After scores”) before = df[”Before”] after = df[”After”] print(”\nSummary”) print(pd.DataFrame({ “Variable”: [”Before”, “After”], “Count”: [before.count(), after.count()], “Mean”: [round(before.mean(), 2), round(after.mean(), 2)], “Std”: [round(before.std(), 2), round(after.std(), 2)] })) t_value, p_value = ttest_rel(before, after) dfree = len(before) - 1 critical = t.ppf(0.975, dfree) print(”\nCalculated t-value =”, round(t_value, 4)) print(”Critical t-value =”, round(critical, 4)) print(”Degrees of Freedom =”, dfree) if abs(t_value) > critical: print(”Decision: Reject H0”) print(”Conclusion: There is a significant difference between Before and After scores”) else: print(”Decision: Fail to Reject H0”) print(”Conclusion: There is no significant difference between Before and After scores”) OUTPUT:: ____________________________________________________________ 3.*ANOVA* import pandas as pd from scipy.stats import f_oneway, f df = pd.read_csv(”ANOVA.csv”) print(”Dataset”) print(df.head()) a = df[”GroupA”] b = df[”GroupB”] c = df[”GroupC”] f_value, p_value = f_oneway(a, b, c) n = len(a) + len(b) + len(c) df1 = 2 df2 = n - 3 critical = f.ppf(0.95, df1, df2) print(”\n********** Result **********”) print(”Mean of Group A:”, round(a.mean(), 2)) print(”Mean of Group B:”, round(b.mean(), 2)) print(”Mean of Group C:”, round(c.mean(), 2)) grand_mean = (a.sum() + b.sum() + c.sum()) / n print(”Grand Mean:”, round(grand_mean, 2)) print(”\nDegrees of Freedom Between:”, df1) print(”Degrees of Freedom Within:”, df2) print(”\nF Calculated:”, round(f_value, 4)) print(”F Critical:”, round(critical, 4)) print(”P-Value:”, round(p_value, 4)) if f_value > critical: print(”\nDecision: Reject H0”) print(”There is a significant difference among the groups.”) else: print(”\nDecision: Accept H0”) print(”There is no significant difference among the groups.”) OUTPUT: -------------------------------------------- 04.”simple_linear_regression import pandas as pd import matplotlib.pyplot as plt df = pd.read_csv(”simple_linear_regression.csv”) print(”Dataset”) print(df.sample(5)) x = df[”Hours”] y = df[”Marks”] n = len(x) sx = x.sum() sy = y.sum() sxy = (x * y).sum() sx2 = (x ** 2).sum() b1 = (n * sxy - sx * sy) / (n * sx2 - sx ** 2) b0 = (sy - b1 * sx) / n print(”\nSlope =”, round(b1, 4)) print(”Intercept =”, round(b0, 4)) print(”\nRegression Equation”) print(”Y =”, round(b0, 4), “+”, round(b1, 4), “X”) pred = b0 + b1 * x print(”\nPredicted Values”) print(pred) plt.scatter(x, y) plt.plot(x, pred) plt.xlabel(”Study Hours”) plt.ylabel(”Marks”) plt.title(”Simple Linear Regression”) plt.grid() plt.show() OUTPUT: --------------------------------- 05.*Pearson correlation* import pandas as pd df = pd.read_csv(”pearson.csv”) print(”Sample Data”) print(df.sample(5)) x = df[”Hours”] y = df[”Marks”] n = len(x) sx = x.sum() sy = y.sum() sxy = (x * y).sum() sx2 = (x ** 2).sum() sy2 = (y ** 2).sum() r = ((n * sxy) - (sx * sy)) / (((n * sx2 - sx ** 2) * (n * sy2 - sy ** 2)) ** 0.5) print(”\nPearson Correlation Analysis”) print(”n =”, n) print(”Sum of X =”, sx) print(”Sum of Y =”, sy) print(”Sum of XY =”, sxy) print(”Sum of X² =”, sx2) print(”Sum of Y² =”, sy2) print(”\nCorrelation Coefficient =”, round(r, 4)) if r > 0: print(”Result: Positive Correlation”) elif r < 0: print(”Result: Negative Correlation”) else: print(”Result: No Correlation”) OUTPUT:: --------------------------------- 06.*CART* import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier, plot_tree from sklearn.metrics import accuracy_score, confusion_matrix, classification_report iris = load_iris() x = iris.data y = iris.target print(”Samples:”, len(x)) print(”Features:”, iris.feature_names) print(”Classes:”, iris.target_names) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) model = DecisionTreeClassifier() model.fit(x_train, y_train) pred = model.predict(x_test) print(”\nAccuracy:”, round(accuracy_score(y_test, pred) * 100, 2), “%”) print(”\nConfusion Matrix”) print(confusion_matrix(y_test, pred)) print(”\nClassification Report”) print(classification_report(y_test, pred)) plot_tree(model, feature_names=iris.feature_names, class_names=iris.target_names, filled=True) plt.show() OUTPUT: ------------------------------------