0% found this document useful (0 votes)
24 views7 pages

Evaluate Machine Learning Models in Python

The document contains Python code for loading and processing a dataset related to RNA mutations and overall survival. It implements machine learning models, specifically Random Forest and Support Vector Classifier, to evaluate their performance using metrics such as F1 Score and Accuracy. The results are visualized using confusion matrices and bar plots for comparison of model performance.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
24 views7 pages

Evaluate Machine Learning Models in Python

The document contains Python code for loading and processing a dataset related to RNA mutations and overall survival. It implements machine learning models, specifically Random Forest and Support Vector Classifier, to evaluate their performance using metrics such as F1 Score and Accuracy. The results are visualized using confusion matrices and bar plots for comparison of model performance.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

Code:

import pandas as pd

import [Link] as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

from [Link] import (

classification_report, confusion_matrix, roc_auc_score, accuracy_score, f1_score

from [Link] import LabelEncoder

from [Link] import RandomForestClassifier

from [Link] import SVC

# Load and process the dataset

metabric_df = pd.read_csv("./METABRIC_RNA_Mutation.csv")

metabric_df.shape

metabric_df.info(verbose=True)

metabric_df.sample(5)

metabric_df = metabric_df.set_index('patient_id')

df_expression = metabric_df.iloc[:, 30:519].join(metabric_df['overall_survival'], how='inner')

df_expression

# Dictionary to store F1 and accuracy scores of each model

metrics_summary = {"Model": [], "F1 Score": [], "Accuracy": []}

# Function to evaluate and display results

def evaluate_model(model, X_train, X_test, y_train, y_test, model_name):

[Link](X_train, y_train)

# Predictions and probabilities (if applicable)

y_pred = [Link](X_test)

y_proba = model.predict_proba(X_test)[:, 1] if hasattr(

model, 'predict_proba') else None


# Calculate metrics

f1 = f1_score(y_test, y_pred, average='weighted')

accuracy = accuracy_score(y_test, y_pred)

# Store the metrics

metrics_summary["Model"].append(model_name)

metrics_summary["F1 Score"].append(f1)

metrics_summary["Accuracy"].append(accuracy)

# Display classification metrics

print(f"\n=== {model_name} ===")

print("Classification Report:\n", classification_report(y_test, y_pred))

print(f"Accuracy: {accuracy:.4f}")

print(f"F1 Score: {f1:.4f}")

# Confusion Matrix Visualization

cm = confusion_matrix(y_test, y_pred)

[Link](figsize=(6, 4))

[Link](cm, annot=True, fmt='d', cmap='Blues', xticklabels=[

'Pred 0', 'Pred 1'], yticklabels=['True 0', 'True 1'])

[Link](f'Confusion Matrix - {model_name}')

[Link]('Predicted Label')

[Link]('True Label')

[Link]()

# AUC-ROC Score (if applicable)

if y_proba is not None:

print(f"AUC-ROC Score: {roc_auc_score(y_test, y_proba):.4f}")

# Main function to process the dataset and evaluate models


def main(df, target_column):

# Prepare the dataset

X = [Link](target_column, axis=1)

y = df[target_column]

# Handle categorical variables

X = pd.get_dummies(X, drop_first=True)

if [Link] == 'object':

y = LabelEncoder().fit_transform(y)

# Split the dataset

X_train, X_test, y_train, y_test = train_test_split(

X, y, test_size=0.3, random_state=42)

# Define classifiers to be evaluated

models = {

'Random Forest': RandomForestClassifier(),

'Support Vector Classifier': SVC(probability=True),

# Train and evaluate all models

for name, model in [Link]():

evaluate_model(model, X_train, X_test, y_train, y_test, name)

# Plot F1 Scores of all models

[Link](figsize=(10, 5))

[Link](x=metrics_summary["Model"],

y=metrics_summary["F1 Score"], palette='coolwarm')

[Link](rotation=45)

[Link]('F1 Scores of All Models')

[Link]('F1 Score')
[Link]()

# Plot Accuracy Scores of all models

[Link](figsize=(10, 5))

[Link](x=metrics_summary["Model"],

y=metrics_summary["Accuracy"], palette='coolwarm')

[Link](rotation=45)

[Link]('Accuracy Scores of All Models')

[Link]('Accuracy')

[Link]()

# Load your dataset and specify the target column

if __name__ == "__main__":

# Replace with your dataframe

df = df_expression # Example input dataframe

target_column = 'overall_survival' # Replace with the actual target column name

main(df, target_column)

Output:

You might also like