I attempted to shift them in x-coordinate by 1 to right: plt.scatter(i + 1, q3_values[col],...). I assuming you try to indicate Q3 over boxplots:
I tried to reproduce on the similar dataset over selected features:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import LabelEncoder
# Load the Titanic dataset
titanic = fetch_openml(name="titanic", version=1, as_frame=True)
df = titanic.frame
# Load and preprocess Titanic dataset
titanic = fetch_openml(name="titanic", version=1, as_frame=True)
df = titanic.frame
df = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked', 'survived']]
# Select relevant columns and handle missing values
# Use .loc to avoid SettingWithCopyWarning and ensure correct dtype assignment by casting explicitly to right type float, str,...
df.loc[:, 'age'] = df['age'].fillna(df['age'].median()).astype(float)
df.loc[:, 'embarked'] = df['embarked'].fillna(df['embarked'].mode()[0]).astype(str)
df.loc[:, 'fare'] = df['fare'].fillna(df['fare'].median()).astype(float)
# Encode categorical features
le = LabelEncoder()
for col in ['sex', 'embarked']:
df.loc[:, col] = le.fit_transform(df[col].astype(str))
# Example: focusing on 'age', 'fare', 'pclass','sibsp', and 'parch'
columns = ['age', 'fare', 'pclass', 'sibsp', 'parch']
df_encoded = df[columns].copy()
# Create boxplot
df_encoded.boxplot(column=columns, rot=90, fontsize=15, figsize=(10, 6))
# Calculate Q3 for each column for plotting red crosses
q3_values = df_encoded.quantile(0.75)
# Plot Q3 as red crosses. Using .loc for correct indexing
for i, col in enumerate(columns):
plt.scatter(i + 1, q3_values.loc[col], marker='x', color='red', s=100) # ===> Shifted x-coordinate by 1
plt.title('Boxplot and Q3 Markers')
plt.show()
output plot:
