# Next, import the necessary modules
import pandas as pd
from sklearn.decomposition import PCA

# Load the dataset into a Pandas DataFrame
df = pd.read_excel("Sample - EU Superstore.xls")
# Select the Sales, Profit, and Discount columns from the DataFrame
X = df[["Sales", "Profit", "Discount"]]

# Standardize the data (mean = 0, standard deviation = 1)
X = (X - X.mean()) / X.std()

# Create a PCA object with 2 components
pca = PCA(n_components=2)

# Fit and transform the data
X_pca = pca.fit_transform(X)
X_pca
array([[ 0.00327793, -0.73653872],
       [ 0.21337696, -0.36726849],
       [-0.14137481, -0.7905358 ],
       ...,
       [-0.01794756, -0.71616103],
       [ 1.22005237, -0.2189667 ],
       [ 0.13156243, -0.64836178]])
import matplotlib.pyplot as plt

# Scatterplot of the first and second principal components
plt.scatter(X_pca[:, 0], X_pca[:, 1])
plt.xlabel("First Principal Component")
plt.ylabel("Second Principal Component")
plt.show()

df['PC1']=X_pca[:,0]
df['PC2']=X_pca[:,1]
product_names = df["Customer Name"].unique()  # Get the unique product names
colors = plt.cm.viridis(range(len(product_names)))  # Generate a set of colors using a colormap
product_name_to_color = dict(zip(product_names, colors))  # Map each product name to a color

# Scatterplot of the first and second principal components, with colors based on product name
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=[product_name_to_color[name] for name in df["Product Name"]])
plt.xlabel("First Principal Component")
plt.ylabel("Second Principal Component")
plt.show()