Data Visualizations
Dataviz library
Pandas-Profiling
Sweetviz
Autoviz
D-Tale
Subplot syntax
To plot 3 x 2 plots
variables = ["Age", "Education", "Usage", "Fitness", "Income", "Miles"]
fig, axes = plt.subplots(2, 3, figsize=(20, 12))
for i in range(2):
for j in range(3):
variable = variables[i * 3 + j]
sns.boxplot(ax=axes[i, j], data=df, x="Product", y=variable, hue="Gender")
axes[i, j].set_title(f"Gender wise {variable} vs Product")
plt.show();
Subplots using single line
data.plot(kind='density', subplots=True, layout=(3, 3), sharex=False)
To generate box plot with solid background color
colors = dict(boxes='lightblue', whiskers='dimgrey', medians='dimgrey', caps='dimgrey')
df.plot(kind='box', subplots=True, layout=(3, 4), sharex=False, figsize=(12, 15), patch_artist=True, color=colors);
plt.suptitle("Outliers", y=0.92, fontsize=14);
Add a Pie chart inside the subplot
unique_users["Gender"].value_counts(normalize=True )[:10].plot(kind="pie", autopct='%1.1f%%', startangle=90, ax=axes[1,0])
axes[1,0].set_title("Gender Distribution");
Load top n values in seaborn countplot
sns.countplot(x="Product_Category", data=df,
order=df["Product_Category"].value_counts().iloc[:10].index)
Plot a Gantt Chart using Plotly
import plotly.express as px
fig = px.timeline(dataframe, x_start="buy_date", x_end="sell_date", y="symbol", color="symbol")
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()
Plot Line equation using matplotlib
import numpy as np
import matplotlib.pyplot as plt
x_values = np.linspace(-10, 10, 100)
y_values = 3 * x_values + 4
# Plot the graph
plt.plot(x_values, y_values, label='y = 3x + 4')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Plot of y = 3x + 4')
plt.grid(True)
plt.legend()
plt.show()
Crosstab Plotly subplot
fig = make_subplots(rows=1, cols=2, subplot_titles=("Term vs Loan Status", "Grade vs Loan Status"))
term_vs_loan_status = pd.crosstab(
pdf['term'], pdf['loan_status'], normalize="index")
grade_vs_loan_status = pd.crosstab(
pdf['grade'], pdf['loan_status'], normalize="index")
colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A',
'#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']
for i, col in enumerate(term_vs_loan_status.columns):
fig.add_trace(go.Bar(name=col, x=term_vs_loan_status.index,
y=term_vs_loan_status[col], marker_color=colors[i % len(colors)], showlegend=True), row=1, col=1)
for i, col in enumerate(grade_vs_loan_status.columns):
fig.add_trace(go.Bar(name=col, x=grade_vs_loan_status.index,
y=grade_vs_loan_status[col], marker_color=colors[i % len(colors)], showlegend=False), row=1, col=2)
fig.update_layout(
height=500, width=1200,
title_text="Crosstab Plots of Different Features",
barmode='stack',
legend=dict(
yanchor="top",
y=-0.2,
xanchor="center",
x=0.5,
orientation="h"
)
)
fig.show();
Polar Plot
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def create_polar_plot(data: np.ndarray, features: list, cluster_label: int):
num_vars = len(features)
# Compute angle of each axis (in radians)
angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
# The plot is made in a circular (polar) form, so we need to "close the loop"
data = np.concatenate((data, [data[0]]))
angles += angles[:1]
fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
ax.fill(angles, data, color='blue', alpha=0.25)
ax.plot(angles, data, color='blue', linewidth=2)
ax.set_xticks(angles[:-1])
ax.set_xticklabels(features)
plt.title(f'Cluster {cluster_label}', size=20, color='blue', y=1.1)
plt.show()
def create_combined_polar_plot(normalized_means, features, cluster_labels):
num_vars = len(features)
angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
angles += angles[:1]
fig, ax = plt.subplots(figsize=(12, 12), subplot_kw=dict(polar=True))
for i, data in enumerate(normalized_means):
data = np.concatenate((data, [data[0]]))
ax.fill(angles, data, alpha=0.25, label=f'Cluster {cluster_labels[i]}')
ax.plot(angles, data, linewidth=2)
ax.set_xticks(angles[:-1])
ax.set_xticklabels(features)
plt.title('Cluster Comparison', size=20, y=1.1)
plt.legend(loc='upper right', bbox_to_anchor=(1.1, 1.1))
plt.show()
Usage
cluster_means = df.group_by("y_kmeans").mean().to_pandas().drop(["y_kmeans"], axis=1)
scaler = MinMaxScaler()
normalized_means = scaler.fit_transform(cluster_means)
features =cluster_means.columns.to_list()
##Single polar plot
for cluster in range(cluster_means.shape[0]):
create_polar_plot(normalized_means[cluster], features, cluster)
##Combine Polar Plot```python
cluster_labels = df["y_kmeans"].unique().to_list()
create_combined_polar_plot(normalized_means, features, cluster_labels)
Last updated