Vor a poner el código
Código Python:
Ver original
# sklearn from sklearn.preprocessing import StandardScaler, normalize from sklearn.cluster import KMeans from sklearn.decomposition import PCA # generico import matplotlib.pyplot as plt import seaborn as sns import pandas as pd import numpy as np import os # --------------- Ejercicio 1 # ---------- parte 1 os.system("clear") data = pd.read_csv("Mall_Customers.csv") # ---------- parte 2 print("Parte 2\nAntes\n") print(data.columns) print("\n") print(data.head()) print("\n") print(data.shape) print("\n") print(data.dtypes) # ---------- parte 3 print("\n--------------------\n") print("Parte 3\nAhora\n") """ # sin variables dummy data["Gender"] = data["Gender"].replace("Male", 1).replace("Female", 0) print(data.head()) """ # con variables dummy df_dummys = pd.get_dummies(data, columns=["Gender"], drop_first=True) print(df_dummys.head()) # ---------- parte 4 print("\n--------------------\n") print("Parte 4\n") # data = data.drop("CustomerID", axis=1) # data = data.astype(np.float64) # data = data.fillna(method="ffill") df_dummys.set_index("CustomerID", drop=True, append=False, inplace=False, verify_integrity=False) print(df_dummys.info()) # ---------- parte 5 print("\n--------------------\n") print("Parte 5\n") df_dummys = df_dummys.astype(np.float64) print(df_dummys.dtypes) print("\n") print(df_dummys.head()) # ---------- parte 6 print("\n--------------------\n") print("Parte 6\n") df_grupo = df_dummys[["Annual Income (k$)", "Spending Score (1-100)"]] print(df_grupo.head()) scaler = StandardScaler() scaled_df = scaler.fit_transform(df_grupo) normalized_df = normalize(scaled_df) normalized_df = pd.DataFrame(normalized_df, columns=df_grupo.columns) print("\n") print(normalized_df.head()) # ---------- K = 3 km = KMeans(n_clusters=K) km.fit(normalized_df) pca = PCA(n_components=2) X_principal = pca.fit_transform(normalized_df) X_principal = pd.DataFrame(X_principal) X_principal.columns = ["P1", "P2"] print("\n") print(X_principal.head()) plt.figure(figsize=(6, 6)) plt.scatter(X_principal["P1"], X_principal["P2"], c=km.predict(normalized_df)) plt.savefig("figura1.png") plt.show() # ---------- parte 7 print("\n--------------------\n") print("Parte 7\n") Sum_of_squared_distances = [] for k in range(1, 15): km = KMeans(n_clusters=k) km = km.fit(normalized_df) Sum_of_squared_distances.append(km.inertia_) print(Sum_of_squared_distances) plt.plot(Sum_of_squared_distances) plt.savefig("figura2.png") plt.show() # ---------- K = 2 km = KMeans(n_clusters=K) km.fit(normalized_df) plt.figure(figsize=(6, 6)) plt.scatter(X_principal["P1"], X_principal["P2"], c=km.predict(normalized_df)) plt.savefig("figura3.png") plt.show() # ---------- parte 8 print("\n--------------------\n") print("Parte 8\n") df_dummys["c"] = km.predict(normalized_df) print(df_dummys.head()) i = 1 [B]# aquí está la parte del problema for col in df_dummys: grid = sns.FacetGrid(df_dummys, col="c") grid.map(plt.hist, col) plt.savefig("grilla" + str(i) + ".png") plt.show() i += 1 [/B]
Y no sé me ocurre cómo generar la imagen única, espero sus respuestas y saludos.