# Next, import the necessary modules
import pandas as pd
from sklearn.decomposition import PCA
# Load the dataset into a Pandas DataFrame
= pd.read_excel("Sample - EU Superstore.xls")
df
# Select the Sales, Profit, and Discount columns from the DataFrame
= df[["Sales", "Profit", "Discount"]]
X
# Standardize the data (mean = 0, standard deviation = 1)
= (X - X.mean()) / X.std()
X
# Create a PCA object with 2 components
= PCA(n_components=2)
pca
# Fit and transform the data
= pca.fit_transform(X) X_pca
X_pca
array([[ 0.00327793, -0.73653872],
[ 0.21337696, -0.36726849],
[-0.14137481, -0.7905358 ],
...,
[-0.01794756, -0.71616103],
[ 1.22005237, -0.2189667 ],
[ 0.13156243, -0.64836178]])
import matplotlib.pyplot as plt
# Scatterplot of the first and second principal components
0], X_pca[:, 1])
plt.scatter(X_pca[:, "First Principal Component")
plt.xlabel("Second Principal Component")
plt.ylabel( plt.show()
'PC1']=X_pca[:,0]
df['PC2']=X_pca[:,1] df[
= df["Customer Name"].unique() # Get the unique product names
product_names = plt.cm.viridis(range(len(product_names))) # Generate a set of colors using a colormap
colors = dict(zip(product_names, colors)) # Map each product name to a color
product_name_to_color
# Scatterplot of the first and second principal components, with colors based on product name
0], X_pca[:, 1], c=[product_name_to_color[name] for name in df["Product Name"]])
plt.scatter(X_pca[:, "First Principal Component")
plt.xlabel("Second Principal Component")
plt.ylabel( plt.show()