Record(DS)-1 | PDF Host

1. Write a program to calculate the measures of central tendency (mean, median, mode) with and without using built - in functions in Python. import statistics n=int(input("Enter number of elements:")) data=[] for i in range(n): x=int(input("Enter element:")) data.append(x) total=0 for i in data: total+=i mean1=total / n temp=data[:] temp.sort() if n%2==0: median1=(temp[n//2 - 1]+temp[n//2])/2 else: median1=temp[n//2] mode1=temp[0] max_count=0 for i in temp: count=0 for j in te mp: if i==j: count+=1 if count>max_count: max_count=count mode1=i mean2=sum(data)/len(data) median2=statistics.median(data) mode2=statistics.mode(data) print("Without Built - in Functions") print("Mean=",mean1) print("Median=",median1) print("Mode=",mode1) print(" \ nWith Built - in Functions") print("Mean=",mean2) print("Median=",median2) print("Mode=",mode2) Output: Enter number of elements: 5 Enter element: 10 Enter element: 20 Enter element: 30 Enter element: 40 Enter element: 10 Without Built - in Functions Mean = 22.0 Median = 20 Mode = 10 With Built - in Functions Mean = 22.0 Median = 20 Mode = 10 2. Write a program to calculate the measures of dispersion with and without using built - in functions in Python. import math import numpy as np l = [10, 20, 304, 50, 10, 23, 1, 0, 10, 20, 30, 10] n = len(l) # Mean mean = sum(l) / n # Range data_range = max(l) - min(l) # Variance and Standard Deviation (without built - in functions) var_sum = 0 for x in l: var_sum += (x - mean) ** 2 variance = var_sum / n std_dev = math.sqrt(variance) # Using NumPy arr = np.array(l) np_mean = np.mean(arr) np_ran ge = np.max(arr) - np.min(arr) np_variance = np.var(arr) np_std = np.std(arr) # IQR without built - in functions s = sorted(l) q1 = s[int(n * 0.25)] q3 = s[int(n * 0.75)] iqr = q3 - q1 # Output print("WITHOUT BUILT - IN FUNCTIONS") print("Mean:", mean) print("Range:", data_range) print("Variance:", variance) print("STD:", std_dev) print("IQR:", iqr) print(" \ nWITH BUILT - IN FUNCTIONS") print("Mean:", np_mean) print("Range:", np_range) print("Variance:", np_variance) print("STD:", np_std) print("IQR:", np. percentile(arr, 75) - np.percentile(arr, 25)) Output: Enter number of elements: 4 Enter element: 10 Enter element: 20 Enter element: 30 Enter element: 40 Without Built - in Functions Range = 30 Variance = 125.0 Standard Deviation = 11.180339887498949 IQR = 20.0 With Built - in Functions Range = 30 Variance = 125 Standard Deviation = 11.180339887498949 IQR = 20.0 3. Write a program that identifies and categorizes data by levels of measurement. import pandas as pd data = { 'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'], 'Gender': ['Female', 'Male', 'Male', 'Male', 'Female'], 'Rank': ['1st', '2nd', '3rd', '4th', '5th'], 'Temperature_C': [22, 25, 20, 30, 28], 'Weight_kg': [55, 70 , 65, 80, 60] } df = pd.DataFrame(data) print("DataFrame demonstrating levels of measurements:") print(df) print(" \ nNominal Level (Gender)") gender_counts = df['Gender'].value_counts() print("counts of each gender:") print(gender_counts) print(" \ nOrdinal Level (Rank)") print("Rank of Individuals:") print(df['Rank']) print(" \ nInterval Level (Temp in C)") temperature_stats = df['Temperature_C'].describe() print("Descriptive Statistics of Temperature:") print(temperature_stats) print(" \ nRatio Lev el (Weight in kg)") weight_stats = df['Weight_kg'].describe() print("Descriptive Statistics for Weight:") print(weight_stats) print("Additional Analysis") average_weight = df['Weight_kg'].mean() print(f"Average Weight: {average_weight:.2f} kg") highest_temperature = df['Temperature_C'].max() print(f"Highest Temperature: {highest_temperature} C" ) Output: DataFrame demonstrating levels of measurement Name Gender Rank Temperature_c Weight_kg 0 Alice Female 1st 55 50 1 Bob Male 2nd 70 75 2 Charlie Male 3rd 65 68 3 David Female 4th 80 80 4 Eve Female 5th 60 55 Nominal Level (Gender) Count of each gender: Gend er Female 3 Male 2 Name: count, dtype: int64 Ordinal Level (Rank) Rank of Individuals: 0 1st 1 2nd 2 3rd 3 4th 4 5th Name: Rank, dtype: object Interval Level (Temperature in °C) Descriptive Statistics of Temperature: count 5.000000 mean 66.000000 std 9.617692 min 55.000000 25% 60.000000 50% 65.000000 75% 70.000000 max 80.000000 Name: Temperature_c, dtype: float64 Ratio Level (Weight in kg) Descriptive Statistics of Weight: count 5.000 000 mean 65.600000 std 12.817956 min 50.000000 25% 55.000000 50% 68.000000 75% 75.000000 max 80.000000 Name: Weight_kg, dtype: float64 4. Write a program to implement data management and indexing operations using Python (use dataset). import pandas as pd # Read CSV file df = pd.read_csv("students.csv") # Display sample data print(df.sample(min(5, len(df)))) # Menu print(" \ n1. Show Data") print("2. Add Row") print("3. Update Value") print("4. Delete Row") print("5. Add Column") print("6. Delete Column") print("7. Filter Data") print("8. Sort Data") print("9. Using loc") print("10. Using iloc") print("11. Fetch Multiple Rows") print(" 12. Fetch Multiple Columns") print("13. Fetch Multiple Rows and Columns") print("14. Exit") while True: ch = int(input("Enter choice: ")) if ch == 1: print(df) elif ch == 2: r = {} for c in df.columns: r[c] = input(f"Enter {c}: ") df.loc[len(df)] = r print(df) elif ch == 3: i = int(input("Enter index: ")) c = input("Enter column: ") v = input("Enter new value: ") df.loc[i, c] = v pr int(df) elif ch == 4: i = int(input("Enter index to delete: ")) df = df.drop(i) print(df) elif ch == 5: c = input("Enter new column: ") vals = [] for i in range(len(df)): vals.append(in put("Enter value: ")) df[c] = vals print(df) elif ch == 6: c = input("Enter column to delete: ") df = df.drop(c, axis=1) print(df) elif ch == 7: c = input("Enter column for filter: ") v = input("Enter value: ") print(df[df[c] == v]) elif ch == 8: c = input("Enter column to sort: ") df = df.sort_values(by=c) print(df) elif ch == 9: r = int(input("Enter row index: ")) c = input("Enter column: ") print(df.loc[r, c]) elif ch == 10: r = int(input("Enter row position: ")) c = int(input("Enter column position: ")) print(df.iloc[r, c]) elif ch == 11: s = int(input("Enter start row: ")) e = int(input("Enter end row: ")) print(df.iloc[s:e + 1]) elif ch == 12: cols = input("Enter column names separated by comma: ").split(",") print(df[cols]) elif ch == 13: rows = list(map(int, input("Enter row indexes separated by comma: ").split(","))) cols = input("Enter column names separated by comma: ").split(",") print(df.loc[rows, cols]) elif ch == 14: break else: print("Invalid choice") Output: id,name,gender,co urse,marks 1,ravi,m,bca,85 2,priya,f,bsc,90 3,amit,m,bcom,78 4,neha,f,bca,88 5,karan,m,bsc,92 1. Show Data 2. Add Row 3. Update Value 4. Delete Row 5. Add Column 6. Delete Column 7. Filter Data 8. Sort Data 9. Using loc 10. Using iloc 11. Fetch Multiple Rows 12. Fetch Multiple Columns 13. Fetch Multiple Rows and Columns 14. Exit Enter choice: 1 id name gender course marks 0 1 ravi m bca 85 1 2 priya f bsc 90 2 3 amit m bcom 78 3 4 neha f bc a 88 4 5 karan m bsc 92 Enter choice: 9 Enter row index: 3 Enter column: course bca Enter choice: 10 Enter row position: 4 Enter column position: 1 karan Enter choice: 13 Enter row indexes separated by comma: 0,2,4 Enter column names separated by comma: name,course,marks name course marks 0 ravi bca 85 2 amit bcom 78 4 karan bsc 92 Enter choice: 14 5. Write a program to apply data transformation using z - score and minmax normalizat ion (use dataset). import pandas as pd df = pd.read_csv("D:/Srujan/data.csv") mean = df["Marks"].mean() std = df["Marks"].std() df["Z_score"] = (df["Marks"] - mean) / std min_val=df["Marks"].min() max_val=df["Marks"].max() df["MinMax_Normalized"]=(df ["Marks"] - min_val)/(max_val - min_val) print(df) Output: ID Name Age Marks Z_score MinMax_Normalized 0 101 Reena 25 80 0.661228 0.765432 1 102 Ramya 28 32 - 1.274074 0.172840 2 103 Sonal 21 18 - 1.838537 0.000000 3 104 Amit 24 65 0.056446 0.580247 4 105 Kiran 27 59 - 0.185466 0.506173 5 106 Raj 22 48 - 0.628973 0.370370 6 107 Sarthak 20 99 1.427 285 1.000000 7 108 Sanjay 19 81 0.701547 0.777778 8 109 Farviza 26 75 0.459634 0.703704 9 110 Sara 24 79 0.620909 0.753086 6. Write a Python program to classify emails as spam or not spam. import pandas as pd from sklearn.feature_extraction.text import CountVectorizer from sklearn.linear_model import LogisticRegression data = pd.read_csv("D:/Srujan/emails.csv") X = data["tex t"] y = data["label"] cv = CountVectorizer() X = cv.fit_transform(X) model = LogisticRegression() model.fit(X, y) while True: email = input(" \ nEnter email text (or type 'exit'): ").lower() if email == "exit": break email_vec = cv.transf orm([email]) result = model.predict(email_vec) print("Prediction:", result[0]) Output: Enter email text (or type 'exit'): offer Prediction: not spam Enter email text (or type 'exit'): lottery Prediction: not spam Enter email text (or type 'exit'): lottery ticket Prediction: not spam Enter email text (or type 'exit'): win money now Prediction: spam Enter email text (or type 'exit'): Here's your lottery ticket claim it before 24 hours. Prediction: spam Enter email text (or type 'exit'): exit 7. Write a Python program to predict whether an Amazon customer will churn or not using a classifier (use dataset). import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression df = pd.read_csv("D:/Srujan/amazon_churn.csv") X = df[["Age", "Tenure", "PurchaseAmount"]] y = df["Churn"] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42) model = LogisticRegressi on() model.fit(X_train, y_train) age = int(input("Enter Age: ")) tenure = int(input("Enter Tenure: ")) purchase = int(input("Enter Purchase Amount: ")) new_data = pd.DataFrame({ "Age": [age], "Tenure": [tenure], "PurchaseAmount": [purchase] }) prediction = model.predict(new_data) if prediction[0] == 1: print("Customer will churn") else: print("Customer will not churn") Output: Enter Age: 24 Enter Tenure: 5 Enter Purchase Amount: 500 Customer will churn 8. Write a Python program to plot five different types of graphs — pie chart, bar chart, histogram, scatter plot, and box plot — using a dataset. import pandas as pd import matplotlib.pyplot as plt df = pd.read_csv("D:/Srujan/matplot.csv") plt.figure(figsize=(15,10)) plt.subplot(2,3,1) plt.pie(df["Marks"], labels=df["Name"], autopct='%1.1f%%') plt.title("Pie Chart") plt.subplot(2,3,2) plt.bar(df["Name"], df["Marks"]) plt.title("Bar Chart") plt.xlabel("Name") plt.ylabel("Marks") plt.subplot(2,3,3) plt.hist(df ["Marks"]) plt.title("Histogram") plt.subplot(2,3,4) plt.scatter(df["Age"], df["Marks"]) plt.title("Scatter Plot") plt.xlabel("Age") plt.ylabel("Marks") plt.subplot(2,3,5) plt.boxplot(df["Marks"]) plt.title("Box Plot") plt.tight_layout() plt.show() Output: 09.Chi - square import pandas as pd from scipy.stats import chi2_contingency df = pd.read_csv("data.csv") table = pd.crosstab(df["Gender"], df["Purchase"]) chi2, p, dof, expected = chi2_contingency(table) print("Contingency Table:") print(table) print(" \ nChi - Square Value:", chi2) print("P - Value:", p) if p < 0.05: print("Variables are associated") else: print("Variables are independent") Output: Contingency Table: Purchase No Yes Gender Female 3 2 Male 2 3 Chi - Square Value: 0.0 P - Value: 1.0 Variables are independent Next 5 programs will be sent soon .....! ThAnK YoU 🙏