Comments 2
Только я настроился почитать, что же есть новенького в AutoML, так статья и закончилась...
import psycopg2
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import silhouette_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt
# Подключение к базе данных
def connect_to_db():
conn = psycopg2.connect(host="localhost", database="primer", user="postgres", password="12345")
return conn, conn.cursor()
# Загрузка данных
def fetch_data(cursor, table_name="station_data_4"):
cursor.execute(f"SELECT * FROM {table_name}")
return pd.DataFrame(cursor.fetchall(), columns=[desc[0] for desc in cursor.description])
# Основная функция
def main():
conn, cursor = connect_to_db()
if conn and cursor:
df = fetch_data(cursor)
if df is not None:
X = df[['travel_time_interval_average', 'station_load_total', 'capacity_percent_avg', 'peak_travel_duration']]
X_scaled = StandardScaler().fit_transform(X)
kmeans = KMeans(n_clusters=3, random_state=42)
df['Cluster'] = kmeans.fit_predict(X_scaled)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, df['Cluster'], test_size=0.3, random_state=42)
labels_train = kmeans.fit_predict(X_train)
labels_test = kmeans.predict(X_test)
print("Silhouette Score:", silhouette_score(X_train, labels_train))
print("Classification Report:\n", classification_report(y_test, labels_test))
print("Confusion Matrix:\n", confusion_matrix(y_test, labels_test))
# Создаем DataFrame для визуализации
df_visualize = pd.DataFrame(X_train, columns=X.columns)
df_visualize['Cluster'] = labels_train # Добавляем столбец с кластерами
# Визуализация
sns.pairplot(df_visualize, hue='Cluster', palette='Set2')
plt.show()
cursor.close()
conn.close()
if name == "main":
main()
AutoML и NAS