2.3 Data Visualization#

This section introduces visual tools to explore and compare relationships within the df_y_clean dataset. For each plot type, we show both matplotlib and seaborn implementations in side-by-side subplots, using consistent data for direct comparison.

Documentation

Although LLMs often replace documentation these days, the best way to make sure you remember what you use and how to use it is still to consult the documentation:

2.3.1 Histogram#

Purpose: Visualize the frequency distribution of IQ scores.

fig, ax = plt.subplots(1, 2, figsize=(14, 5))
ax[0].hist(df_y_clean[["IQ", "IQ_Matrix", "IQ_Vocab"]], bins=15, label=["IQ", "Matrix", "Vocab"], alpha=0.7)
ax[0].set_title("Matplotlib Histograms")
ax[0].set_xlabel("Score")
ax[0].set_ylabel("Frequency")
ax[0].legend()

sns.histplot(data=df_y_clean[["IQ", "IQ_Matrix", "IQ_Vocab"]], kde=True, element="step", ax=ax[1])
ax[1].set_title("Seaborn Histograms with KDE")

plt.tight_layout()
plt.show()
../../../_images/300e9a390b55e7cd603ca8c36fa9722526e5a3f3b59e44f2b80f4826badf2eb1.png

2.3.2 Box Plot#

Purpose: Show distribution, spread, and outliers in IQ scores, grouped by gender.

fig, ax = plt.subplots(1, 2, figsize=(14, 5))

df_y_melt = df_y_clean.melt(id_vars="Gender", value_vars=["IQ", "IQ_Matrix", "IQ_Vocab"],
                            var_name="IQ_Type", value_name="Score")

# Matplotlib approximation via grouped boxplot
groups = df_y_melt.groupby(["Gender", "IQ_Type"])["Score"].apply(list).unstack()
positions = np.arange(len(groups.columns))
width = 0.35

for i, gender in enumerate(groups.index):
    bp = ax[0].boxplot(groups.loc[gender].values,
                       positions=positions + i * width,
                       widths=0.3,
                       patch_artist=True,
                       boxprops=dict(facecolor=["#66c2a5", "#fc8d62", "#8da0cb"][i % 3]))
ax[0].set_xticks(positions + width / 2)
ax[0].set_xticklabels(groups.columns)
ax[0].set_title("Matplotlib Box Plot by Gender")

# Seaborn
sns.boxplot(x="IQ_Type", y="Score", hue="Gender", data=df_y_melt, ax=ax[1])
ax[1].set_title("Seaborn Box Plot by Gender")

plt.tight_layout()
plt.show()
../../../_images/64f9ccab7a7ec84e061dd27b51efcab092fe4a3e8b740b8979582336588a70c7.png

2.3.3 Scatter Plot#

Purpose: Show the relationship between Age and each IQ metric, colored by Gender.

fig, ax = plt.subplots(1, 2, figsize=(14, 5))

colors = {'Male': 'skyblue', 'Female': 'salmon'}
for gender, color in colors.items():
    subset = df_y_clean[df_y_clean["Gender"] == gender]
    ax[0].scatter(subset["Age"], subset["IQ"], label=f"IQ - {gender}", alpha=0.6, color=color)
    ax[0].scatter(subset["Age"], subset["IQ_Matrix"], label=f"Matrix - {gender}", marker='^', alpha=0.6, color=color)
    ax[0].scatter(subset["Age"], subset["IQ_Vocab"], label=f"Vocab - {gender}", marker='s', alpha=0.6, color=color)
ax[0].set_title("Matplotlib Scatter Plot by Gender")
ax[0].set_xlabel("Age")
ax[0].set_ylabel("Score")
ax[0].legend()

melted = df_y_clean.melt(id_vars=["Age", "Gender"],
                         value_vars=["IQ", "IQ_Matrix", "IQ_Vocab"],
                         var_name="IQ_Type", value_name="Score")
sns.scatterplot(data=melted, x="Age", y="Score", hue="Gender", style="IQ_Type", ax=ax[1])
ax[1].set_title("Seaborn Scatter Plot by Gender and IQ Type")

plt.tight_layout()
plt.show()
../../../_images/ff6301200f22b44f46788784007d187c2d6a7a2e7e0d7fde268cd0dd9b17d060.png

2.3.4 Line Plot#

Purpose: Visualize trends in IQ scores across age.

df_y_sorted = df_y_clean.sort_values("Age")
fig, ax = plt.subplots(1, 2, figsize=(14, 5))

ax[0].plot(df_y_sorted["Age"], df_y_sorted["IQ"], label="IQ")
ax[0].plot(df_y_sorted["Age"], df_y_sorted["IQ_Matrix"], label="Matrix")
ax[0].plot(df_y_sorted["Age"], df_y_sorted["IQ_Vocab"], label="Vocab")
ax[0].set_title("Matplotlib Line Plot")
ax[0].set_xlabel("Age")
ax[0].set_ylabel("Score")
ax[0].legend()

sns.lineplot(data=df_y_sorted[["Age", "IQ", "IQ_Matrix", "IQ_Vocab"]], ax=ax[1])
ax[1].set_title("Seaborn Line Plot")

plt.tight_layout()
plt.show()
../../../_images/1d65e7f44a56c021fde24e1e8aa2e6b316b088cba8d529a59e41820c217e99f1.png

2.3.5 Bar Plot#

Purpose: Show frequency of different handedness categories.

counts = df_y_clean["Handedness"].value_counts()
fig, ax = plt.subplots(1, 2, figsize=(14, 5))

ax[0].bar(counts.index.astype(str), counts.values, color="skyblue")
ax[0].set_title("Matplotlib Bar Plot")
ax[0].set_xlabel("Handedness")
ax[0].set_ylabel("Count")

sns.countplot(x="Handedness", data=df_y_clean, ax=ax[1], palette="pastel")
ax[1].set_title("Seaborn Count Plot")

plt.tight_layout()
plt.show()
/tmp/ipykernel_2201/676046580.py:9: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x="Handedness", data=df_y_clean, ax=ax[1], palette="pastel")
../../../_images/b686a23b471b79e0584b1f9ca8c2ce7465365eedf0ece5922502f4f42e77563b.png

2.3.6 Density Plot#

Purpose: Compare smoothed distributions of IQ types.

fig, ax = plt.subplots(1, 2, figsize=(14, 5))

df_y_clean["IQ"].plot.kde(ax=ax[0], label="IQ")
df_y_clean["IQ_Matrix"].plot.kde(ax=ax[0], label="Matrix")
df_y_clean["IQ_Vocab"].plot.kde(ax=ax[0], label="Vocab")
ax[0].set_title("Matplotlib KDE Plot")
ax[0].legend()

sns.kdeplot(data=df_y_clean[["IQ", "IQ_Matrix", "IQ_Vocab"]], fill=True, ax=ax[1])
ax[1].set_title("Seaborn KDE Plot")

plt.tight_layout()
plt.show()
../../../_images/73422c91973b0e4baf1326eece02ce5e0206b7e10b1e3196ed3693d0e41a20a4.png

2.3.7 Correlation Heatmap#

Purpose: Examine pairwise relationships between numerical variables.

corr = df_y_clean[["IQ", "IQ_Matrix", "IQ_Vocab", "Age"]].corr()
fig, ax = plt.subplots(1, 2, figsize=(14, 5))

im = ax[0].imshow(corr, cmap='coolwarm')
fig.colorbar(im, ax=ax[0])
ax[0].set_title("Matplotlib Correlation Heatmap")
ax[0].set_xticks(np.arange(len(corr.columns)))
ax[0].set_yticks(np.arange(len(corr.columns)))
ax[0].set_xticklabels(corr.columns, rotation=45)
ax[0].set_yticklabels(corr.columns)

sns.heatmap(corr, annot=True, cmap='coolwarm', ax=ax[1])
ax[1].set_title("Seaborn Correlation Heatmap")

plt.tight_layout()
plt.show()
../../../_images/9f687ef4a5a63a4862998f61e78f80d9173e39b3f512975b2afdab13276e20af.png