# -*- coding: utf-8 -*-
"""posture_pulse.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1g2LR35aY6toHp-klC2BLkKjhALPbOs4h
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
#from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, accuracy_score
from google.colab import files
from sklearn.ensemble import GradientBoostingClassifier


pd.set_option('display.max_rows',1000)
pd.set_option('display.max_columns',1000)

file_upload = files.upload()

dataset = pd.read_csv('posture_detection (1).csv')
dataset.head()

dataset.shape

sns.countplot(x= 'label', data=dataset)

# y = dataset['label'].values
# X = dataset.drop("label", axis =1).values
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0,stratify=y)

# posture_features = [
#     "head_angle",
#     "neck_angle",
#     "torso_angle",
#     "shoulder_angle",
#     "forward_lean",
#     "spine_curve",
#     "lateral_shift_norm",
#     "shoulder_height_diff",
#     "neck_flexion",
#     "upper_body_curve",
#     "head_forward_distance"
# ]

X = dataset.drop('label', axis=1)
y = dataset['label']

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=0,
    stratify=y
)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)

RFCmodel = RandomForestClassifier(random_state=0)

parameters = {
    "n_estimators": [200, 300, 500],
    "max_depth": [10, 15, 20, None],
    "min_samples_leaf": [2, 3, 5],
    "max_features": ["sqrt"],
    "criterion": ["gini", "entropy"],
    "class_weight": ["balanced",]
}

random_search = RandomizedSearchCV(
    estimator=RFCmodel,
    param_distributions=parameters,
    scoring='accuracy',
    cv=5,
    n_iter=20,
    n_jobs=-1,
    verbose=5,
    random_state=0
)

random_search.fit(X_train, y_train)

print("Best accuracy:", random_search.best_score_)
print("Best parameters:", random_search.best_params_)

y_pred = random_search.predict(X_test)

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred) * 100)

best_clf = random_search.best_estimator_

train_acc = best_clf.score(X_train, y_train)
test_acc  = best_clf.score(X_test,  y_test)
print(f"\nTrain accuracy : {train_acc:.3f}")
print(f"Test  accuracy : {test_acc:.3f}")
print(f"Gap            : {train_acc - test_acc:.3f}  (should be < 0.05)")

from sklearn.model_selection import cross_val_score

X_all = np.vstack([X_train, X_test])
y_all = np.concatenate([y_train, y_test])

cv_scores = cross_val_score(best_clf, X_all, y_all, cv=5, scoring='accuracy')
print(f"CV scores : {cv_scores}")
print(f"Mean      : {cv_scores.mean():.3f}")
print(f"Std       : {cv_scores.std():.3f}  (should be < 0.05)")

cf = confusion_matrix(y_test, y_pred)
class_names = random_search.best_estimator_.classes_

plt.figure(figsize=(10, 7))
sns.heatmap(
    cf,
    annot=True,
    fmt='g',
    xticklabels=class_names,
    yticklabels=class_names
)

plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

import joblib
joblib.dump(random_search.best_estimator_, 'posture_model.pkl')
files.download('posture_model.pkl')

import sklearn
print(sklearn.__version__)

print(random_search.best_estimator_.classes_)

"""### Feature Importance Analysis

Let's visualize which features (landmark coordinates) are most important for the `RandomForestClassifier` in making its predictions. This can provide insights into which parts of the pose are most indicative of different postures.
"""

feature_importances = random_search.best_estimator_.feature_importances_
feature_names = dataset.drop('label', axis=1).columns

# Create a DataFrame for better visualization
importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})

# Sort the features by importance in descending order
importance_df = importance_df.sort_values(by='Importance', ascending=False)

# Plotting the feature importances
plt.figure(figsize=(15, 8))
sns.barplot(x='Importance', y='Feature', data=importance_df)
plt.title('Feature Importances from Random Forest Classifier')
plt.xlabel('Importance (Gini Importance)')
plt.ylabel('Feature Name')
plt.tight_layout()
plt.show()


"""### Training and Comparing Different Classification Models

Now, let's train other classification models using `RandomizedSearchCV` to find their optimal parameters and then compare their performance to identify the best model for posture detection.
"""

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# Define a function to perform RandomizedSearchCV and evaluate models
def train_and_evaluate_model(model, params, X_train, y_train, X_test, y_test, model_name):
    print(f"\n--- Training {model_name} ---")
    random_search = RandomizedSearchCV(
        estimator=model,
        param_distributions=params,
        scoring='accuracy',
        cv=5,
        n_iter=10, # Reduce n_iter for quicker execution during comparison
        n_jobs=-1,
        verbose=0, # Set verbose to 0 to reduce output during search
        random_state=0
    )

    random_search.fit(X_train, y_train)

    best_model = random_search.best_estimator_
    y_pred = best_model.predict(X_test)

    print(f"Best accuracy for {model_name}: {random_search.best_score_:.3f}")
    print(f"Best parameters for {model_name}: {random_search.best_params_}")
    print(f"Test Accuracy for {model_name}: {accuracy_score(y_test, y_pred) * 100:.2f}%")
    print(classification_report(y_test, y_pred))

    return best_model, accuracy_score(y_test, y_pred)

models = {}
model_test_accuracies = {}

"""#### Random Forest Classifier (Already Tuned)"""