import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# 1. Load and Structural Audit
df = pd.read_csv("dataset.csv")
print("--- Data Info ---")
print(df.info())
print("\n--- Missing Percentage ---")
print(df.isnull().mean() * 100) # Friend's trick for quality check
# 2. Advanced Visualizations
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
# Plot A: Target Distribution (Identify skewness)
sns.histplot(df['Target_Col'], kde=True, color='teal', ax=axes[0])
axes[0].set_title("Target Distribution Analysis")
# Plot B: Heatmap (Identify redundant features)
sns.heatmap(df.corr(numeric_only=True), annot=True, fmt=".2f", cmap="viridis", ax=axes[1])
axes[1].set_title("Global Correlation Heatmap")
plt.tight_layout()
plt.show()0 views