0% found this document useful (0 votes)

43 views11 pages

Turbofan Engine RUL Prediction with SHAP

The document outlines a Python script for analyzing turbofan engine data using machine learning techniques, specifically Random Forest regression and model interpretability methods like LIME and SHAP. It includes steps for importing necessary libraries, downloading and extracting the dataset, preprocessing the data, training the model, and generating explanations for predictions. The script encounters a FileNotFoundError when attempting to read the training data file, indicating that the specified path may be incorrect or the file is missing.

Uploaded by

maheshvx2026

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

43 views11 pages

Turbofan Engine RUL Prediction with SHAP

Uploaded by

maheshvx2026

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

12/2/25, 1:10 AM SHAP_Assignment

In [1]: import numpy as np

import pandas as pd
from [Link] import RandomForestRegressor
from sklearn.model_selection import train_test_split
from [Link] import StandardScaler

from lime import lime_tabular

import shap

import torch
from [Link] import DeepLift

print("All imports OK")

print("numpy:", np.__version__)
print("shap:", shap.__version__)

C:\Users\Lenovo\anaconda3\envs\explain_env\lib\site-packages\tqdm\[Link]: Tqd
mWarning: IProgress not found. Please update jupyter and ipywidgets. See https://
[Link]/en/stable/user_install.html
from .autonotebook import tqdm as notebook_tqdm
All imports OK
numpy: 1.26.4
shap: 0.48.0

In [2]: import os, zipfile, [Link]

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import StandardScaler
from [Link] import RandomForestRegressor

from lime import lime_tabular

import shap
import torch
import [Link] as nn
from [Link] import Dataset, DataLoader
from [Link] import DeepLift

data_dir = "turbofan_data"
[Link](data_dir, exist_ok=True)

zip_url = "[Link]
zip_path = "[Link]"

if not [Link](zip_path):
print("Downloading dataset...")
[Link](zip_url, zip_path)
with [Link](zip_path, "r") as zf:
[Link](data_dir)
print("Download + extract done.")
else:
print("Zip already present.")

Zip already present.

In [5]: import os
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import StandardScaler

[Link] 1/11
12/2/25, 1:10 AM SHAP_Assignment

from [Link] import RandomForestRegressor

# adjust this line ⬇️

train_file = [Link](data_dir, "CMAPSSData", "train_FD001.txt")

cols = ['engine_id', 'cycle'] + [f'setting_{i}' for i in range(1,4)] + [f's_{i}'

df = pd.read_csv(train_file, sep=r"\s+", header=None, names=cols)

# compute RUL
max_cycle = [Link]('engine_id')['cycle'].max().reset_index().rename(columns=
df = [Link](max_cycle, on='engine_id')
df['RUL'] = df['max_cycle'] - df['cycle']
[Link](columns=['max_cycle'], inplace=True)

feature_cols = [c for c in [Link] if c not in ['engine_id','cycle','RUL']]

X = df[feature_cols].values
y = df['RUL'].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_st

scaler = StandardScaler().fit(X_train)
X_train_s = [Link](X_train)
X_val_s = [Link](X_val)

rf = RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1)

[Link](X_train_s, y_train)
print("RF val R^2:", [Link](X_val_s, y_val))

[Link] 2/11
12/2/25, 1:10 AM SHAP_Assignment

---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[5], line 11
8 train_file = [Link](data_dir, "CMAPSSData", "train_FD001.txt")
10 cols = ['engine_id', 'cycle'] + [f'setting_{i}' for i in range(1,4)] +
[f's_{i}' for i in range(1,22)]
---> 11 df = pd.read_csv(train_file, sep=r"\s+", header=None, names=cols)
13 # compute RUL
14 max_cycle = [Link]('engine_id')['cycle'].max().reset_index().rename(c
olumns={'cycle':'max_cycle'})

File ~\anaconda3\envs\explain_env\lib\site-packages\pandas\io\parsers\[Link]:
1026, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, u
secols, dtype, engine, converters, true_values, false_values, skipinitialspace, s
kiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_
blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date
_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, deci
mal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encodi
ng, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_
map, float_precision, storage_options, dtype_backend)
1013 kwds_defaults = _refine_defaults_read(
1014 dialect,
1015 delimiter,
(...)
1022 dtype_backend=dtype_backend,
1023 )
1024 [Link](kwds_defaults)
-> 1026 return _read(filepath_or_buffer, kwds)

File ~\anaconda3\envs\explain_env\lib\site-packages\pandas\io\parsers\[Link]:
620, in _read(filepath_or_buffer, kwds)
617 _validate_names([Link]("names", None))
619 # Create the parser.
--> 620 parser = TextFileReader(filepath_or_buffer, **kwds)
622 if chunksize or iterator:
623 return parser

File ~\anaconda3\envs\explain_env\lib\site-packages\pandas\io\parsers\[Link]:
1620, in TextFileReader.__init__(self, f, engine, **kwds)
1617 [Link]["has_index_names"] = kwds["has_index_names"]
1619 [Link]: IOHandles | None = None
-> 1620 self._engine = self._make_engine(f, [Link])

File ~\anaconda3\envs\explain_env\lib\site-packages\pandas\io\parsers\[Link]:
1880, in TextFileReader._make_engine(self, f, engine)
1878 if "b" not in mode:
1879 mode += "b"
-> 1880 [Link] = get_handle(
1881 f,
1882 mode,
1883 encoding=[Link]("encoding", None),
1884 compression=[Link]("compression", None),
1885 memory_map=[Link]("memory_map", False),
1886 is_text=is_text,
1887 errors=[Link]("encoding_errors", "strict"),
1888 storage_options=[Link]("storage_options", None),
1889 )
1890 assert [Link] is not None
1891 f = [Link]

[Link] 3/11
12/2/25, 1:10 AM SHAP_Assignment

File ~\anaconda3\envs\explain_env\lib\site-packages\pandas\io\[Link], in g
et_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors,
storage_options)
868 elif isinstance(handle, str):
869 # Check whether the filename is to be opened in binary mode.
870 # Binary mode does not support 'encoding' and 'newline'.
871 if [Link] and "b" not in [Link]:
872 # Encoding
--> 873 handle = open(
874 handle,
875 [Link],
876 encoding=[Link],
877 errors=errors,
878 newline="",
879 )
880 else:
881 # Binary mode
882 handle = open(handle, [Link])

FileNotFoundError: [Errno 2] No such file or directory: 'turbofan_data\\CMAPSSDat

a\\train_FD001.txt'

In [7]: import os

data_dir = "turbofan_data" # same as before

for root, dirs, files in [Link](data_dir):

for f in files:
if [Link]() == "train_fd001.txt":
print("FOUND:", [Link](root, f))

In [8]: import os, zipfile, [Link]

data_dir = "turbofan_data"
[Link](data_dir, exist_ok=True)

zip_url = "[Link]
zip_path = "[Link]"

print("Downloading NASA Turbofan dataset...")

[Link](zip_url, zip_path)
print("Download complete.")

print("Extracting...")
with [Link](zip_path, "r") as zf:
[Link](data_dir)

print("Extraction finished.")

Downloading NASA Turbofan dataset...

Download complete.
Extracting...
Extraction finished.

In [9]: import os

for root, dirs, files in [Link]("turbofan_data"):

print(root)
for f in files:
print(" ", f)

[Link] 4/11
12/2/25, 1:10 AM SHAP_Assignment

turbofan_data
turbofan_data\6. Turbofan Engine Degradation Simulation Data Set
[Link]

In [10]: import os, zipfile

outer_dir = [Link]("turbofan_data", "6. Turbofan Engine Degradation Simula

inner_zip = [Link](outer_dir, "[Link]")

extract_dir = [Link]("turbofan_data", "CMAPSSData")

[Link](extract_dir, exist_ok=True)

print("Inner zip path:", inner_zip)

with [Link](inner_zip, "r") as zf:
[Link](extract_dir)

print("Extracted to:", extract_dir)

# quick check
for root, dirs, files in [Link](extract_dir):
print(root)
for f in files:
print(" ", f)

Inner zip path: turbofan_data\6. Turbofan Engine Degradation Simulation Data Set
\[Link]
Extracted to: turbofan_data\CMAPSSData
turbofan_data\CMAPSSData
Damage Propagation [Link]
[Link]
RUL_FD001.txt
RUL_FD002.txt
RUL_FD003.txt
RUL_FD004.txt
test_FD001.txt
test_FD002.txt
test_FD003.txt
test_FD004.txt
train_FD001.txt
train_FD002.txt
train_FD003.txt
train_FD004.txt

In [11]: import pandas as pd

from sklearn.model_selection import train_test_split
from [Link] import StandardScaler
from [Link] import RandomForestRegressor

# 1. path to FD001 training file

train_file = [Link]("turbofan_data", "CMAPSSData", "train_FD001.txt")
print("Using train file:", train_file)

# 2. load with column names (1 id, 1 cycle, 3 settings, 21 sensors)

cols = ['engine_id', 'cycle'] + [f'setting_{i}' for i in range(1,4)] + [f's_{i}'
df = pd.read_csv(train_file, sep=r"\s+", header=None, names=cols)

# 3. compute Remaining Useful Life (RUL) label

max_cycle = [Link]('engine_id')['cycle'].max().reset_index().rename(columns=
df = [Link](max_cycle, on='engine_id')
df['RUL'] = df['max_cycle'] - df['cycle']

[Link] 5/11
12/2/25, 1:10 AM SHAP_Assignment

[Link](columns=['max_cycle'], inplace=True)

# 4. split features / target

feature_cols = [c for c in [Link] if c not in ['engine_id', 'cycle', 'RUL']]
X = df[feature_cols].values
y = df['RUL'].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_st

# 5. scale and train RF

scaler = StandardScaler().fit(X_train)
X_train_s = [Link](X_train)
X_val_s = [Link](X_val)

rf = RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1)

[Link](X_train_s, y_train)

print("RF val R^2:", [Link](X_val_s, y_val))

Using train file: turbofan_data\CMAPSSData\train_FD001.txt

RF val R^2: 0.6248712754209377

In [2]: from lime import lime_tabular

import shap

idx = 10 # choose any validation index you like

# LIME
explainer_lime = lime_tabular.LimeTabularExplainer(
training_data=X_train_s,
feature_names=feature_cols,
mode='regression'
)
lime_exp = explainer_lime.explain_instance(X_val_s[idx], [Link], num_feature
print("LIME explanation (top 8 features):")
for feat, contrib in lime_exp.as_list():
print(f"{feat}: {contrib:.3f}")

# SHAP
import numpy as np
import shap

# use a smaller random subset for SHAP (e.g. 500 rows)

n_shap = min(500, X_val_s.shape[0])
idxs = [Link](X_val_s.shape[0], size=n_shap, replace=False)
X_shap = X_val_s[idxs]

explainer_shap = [Link](rf)

# this is now much faster

shap_values = explainer_shap.shap_values(X_shap)

print("Computed SHAP values for", n_shap, "rows")

# global feature importance summary

shap.summary_plot(shap_values, X_shap, feature_names=feature_cols)

[Link] 6/11
12/2/25, 1:10 AM SHAP_Assignment

---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[2], line 8
4 idx = 10 # choose any validation index you like
6 # LIME
7 explainer_lime = lime_tabular.LimeTabularExplainer(
----> 8 training_data=X_train_s,
9 feature_names=feature_cols,
10 mode='regression'
11 )
12 lime_exp = explainer_lime.explain_instance(X_val_s[idx], [Link], num_
features=8)
13 print("LIME explanation (top 8 features):")

NameError: name 'X_train_s' is not defined

In [3]: # ONE-CELL: data download/extract -> train RF -> LIME -> FAST SHAP
# Paste and run this in a fresh kernel cell.

import os, zipfile, [Link], random

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import StandardScaler
from [Link] import RandomForestRegressor

# reproducible
[Link](0)
[Link](0)

# ----------------- 1) ensure dataset is available and extracted ---------------

data_dir = "turbofan_data"
[Link](data_dir, exist_ok=True)
outer_zip = [Link](data_dir, "[Link]")
# download outer zip if not present
zip_url = "[Link]
if not [Link](outer_zip):
print("Downloading turbofan dataset (will take a short while)...")
[Link](zip_url, outer_zip)
print("Download complete:", outer_zip)
else:
print("Outer zip already present:", outer_zip)

# extract outer zip (it contains an inner [Link])

with [Link](outer_zip, "r") as zf:
[Link](data_dir)

# find inner zip path (it may have a subfolder)

inner_zip = None
for root, dirs, files in [Link](data_dir):
for f in files:
if [Link]().endswith("[Link]"):
inner_zip = [Link](root, f)
break
if inner_zip:
break

if inner_zip is None:
raise FileNotFoundError("Could not find inner [Link] inside download
print("Found inner zip:", inner_zip)

[Link] 7/11
12/2/25, 1:10 AM SHAP_Assignment

# extract inner zip into turbofan_data/CMAPSSData (idempotent)

extract_dir = [Link](data_dir, "CMAPSSData")
[Link](extract_dir, exist_ok=True)
with [Link](inner_zip, "r") as zf:
[Link](extract_dir)
print("Extracted inner zip to:", extract_dir)

# Find FD001 train file

train_file = None
for root, dirs, files in [Link](extract_dir):
for f in files:
if [Link]() == "train_fd001.txt":
train_file = [Link](root, f)
break
if train_file:
break
if train_file is None:
raise FileNotFoundError("train_FD001.txt not found inside extracted CMAPSSDa
print("Using training file:", train_file)

# ----------------- 2) Load data and build RUL labels -----------------

cols = ['engine_id', 'cycle'] + [f'setting_{i}' for i in range(1,4)] + [f's_{i}'
df = pd.read_csv(train_file, sep=r"\s+", header=None, names=cols)

# features & target

feature_cols = [c for c in [Link] if c not in ['engine_id','cycle','RUL']]
X = df[feature_cols].values
y = df['RUL'].values

# train/val split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.20, random_s

# scale
scaler = StandardScaler().fit(X_train)
X_train_s = [Link](X_train)
X_val_s = [Link](X_val)

# ----------------- 3) Train a RandomForest (fast) -----------------

rf = RandomForestRegressor(n_estimators=150, max_depth=12, random_state=42, n_jo
print("Training RandomForest ...")
[Link](X_train_s, y_train)
print("RF val R^2:", [Link](X_val_s, y_val))

# ----------------- 4) LIME explanation for one sample -----------------

print("\n--- LIME (local) ---")
from lime import lime_tabular
idx = 10 if X_val_s.shape[0] > 10 else 0
explainer_lime = lime_tabular.LimeTabularExplainer(
training_data=X_train_s,
feature_names=feature_cols,
mode='regression',
discretize_continuous=True
)

[Link] 8/11
12/2/25, 1:10 AM SHAP_Assignment

lime_exp = explainer_lime.explain_instance(X_val_s[idx], [Link], num_feature

print("LIME explanation (top 8 features):")
for feat, contrib in lime_exp.as_list():
print(f"{feat}: {contrib:.3f}")

# ----------------- 5) FAST SHAP (subsampled) -----------------

print("\n--- SHAP (fast subsample) ---")
import shap
n_shap = min(200, X_val_s.shape[0]) # small and fast
shap_idxs = [Link](X_val_s.shape[0], size=n_shap, replace=False)
X_shap = X_val_s[shap_idxs]

explainer_shap = [Link](rf)
shap_values = explainer_shap.shap_values(X_shap) # fast because n_shap small
print(f"Computed SHAP values for {n_shap} rows")

# summary plot (will show inline if notebook supports plots)

shap.summary_plot(shap_values, X_shap, feature_names=feature_cols, show=True)

# optional: local SHAP for same sample idx

shap_single = explainer_shap.shap_values(X_val_s[idx:idx+1])
print("\nLocal SHAP for sample idx", idx)
print("SHAP values (feature -> value):")
for f_name, val in zip(feature_cols, shap_single[0].flatten()):
# only print non-negligible contributions
if abs(val) > 1e-6:
print(f"{f_name}: {val:.4f}")

print("\nDone. If plots don't display, ensure your notebook supports inline plot

Downloading turbofan dataset (will take a short while)...

Download complete: turbofan_data\[Link]
Found inner zip: turbofan_data\6. Turbofan Engine Degradation Simulation Data Set
\[Link]
Extracted inner zip to: turbofan_data\CMAPSSData
Using training file: turbofan_data\CMAPSSData\train_FD001.txt
Training RandomForest ...
RF val R^2: 0.6306293029798791

--- LIME (local) ---

LIME explanation (top 8 features):
s_12 > 0.74: 11.778
-0.21 < s_9 <= 0.19: 11.520
-0.74 < s_4 <= -0.10: 7.690
-0.72 < s_11 <= -0.09: 4.218
s_20 > 0.74: 3.040
-0.73 < s_15 <= -0.08: 2.392
s_2 <= -0.71: 2.231
0.01 < setting_1 <= 0.69: 2.200

--- SHAP (fast subsample) ---

Computed SHAP values for 200 rows
C:\Users\Lenovo\AppData\Local\Temp\ipykernel_1032\[Link]: FutureWarnin
g: The NumPy global RNG was seeded by calling `[Link]`. In a future versi
on this function will no longer use the global RNG. Pass `rng` explicitly to opt-
in to the new behaviour and silence this warning.
shap.summary_plot(shap_values, X_shap, feature_names=feature_cols, show=True)

[Link] 9/11
12/2/25, 1:10 AM SHAP_Assignment

[Link] 10/11
12/2/25, 1:10 AM SHAP_Assignment

Local SHAP for sample idx 10

SHAP values (feature -> value):
setting_1: -0.2128
setting_2: -0.3015
s_2: 0.5282
s_3: 0.2771
s_4: 8.7374
s_6: -0.0157
s_7: 0.1358
s_8: -0.0640
s_9: 6.4077
s_11: 9.8663
s_12: 12.1784
s_13: 3.5527
s_14: -3.4699
s_15: 1.1906
s_17: -0.4829
s_20: 2.5888
s_21: -0.6387

Done. If plots don't display, ensure your notebook supports inline plotting (matp
lotlib).

In [ ]:

[Link] 11/11

Python ANN for Housing Price Prediction
No ratings yet
Python ANN for Housing Price Prediction
25 pages
Essential Pandas Codes for Data Analysis
No ratings yet
Essential Pandas Codes for Data Analysis
3 pages
notMNIST Image Classification Lab
No ratings yet
notMNIST Image Classification Lab
15 pages
Excel File Format Error Solutions
No ratings yet
Excel File Format Error Solutions
13 pages
Data Preparation for Project Approval
No ratings yet
Data Preparation for Project Approval
216 pages
Engineering Python Basics Lab Guide
No ratings yet
Engineering Python Basics Lab Guide
7 pages
Pomegranate Dataset Preparation Guide
No ratings yet
Pomegranate Dataset Preparation Guide
4 pages
S3 Data Processing and Classification
No ratings yet
S3 Data Processing and Classification
25 pages
Python Handwriting Recognition Setup
No ratings yet
Python Handwriting Recognition Setup
31 pages
Image Classification with Neural APIs
No ratings yet
Image Classification with Neural APIs
32 pages
FileNotFoundError for 50_Startups.csv
No ratings yet
FileNotFoundError for 50_Startups.csv
13 pages
Machine Learning Model Exercise
No ratings yet
Machine Learning Model Exercise
2 pages
Flask App for Car Price Prediction
No ratings yet
Flask App for Car Price Prediction
14 pages
Heart Disease Prediction Model Setup
No ratings yet
Heart Disease Prediction Model Setup
18 pages
Download Chest X-ray Pneumonia Data
No ratings yet
Download Chest X-ray Pneumonia Data
5 pages
Random Forest & DNN for Access Risk Score
No ratings yet
Random Forest & DNN for Access Risk Score
16 pages
Convert IPYNB Files to ZIP Format
No ratings yet
Convert IPYNB Files to ZIP Format
5 pages
Installing BERT for Sentiment Analysis
No ratings yet
Installing BERT for Sentiment Analysis
5 pages
cuDF DataFrame Cheat Sheet
No ratings yet
cuDF DataFrame Cheat Sheet
2 pages
Fine-Tune Qwen-Based Model in Python
No ratings yet
Fine-Tune Qwen-Based Model in Python
1 page
Smart Factory Energy Prediction Model
No ratings yet
Smart Factory Energy Prediction Model
355 pages
Installing and Using BERT with Transformers
No ratings yet
Installing and Using BERT with Transformers
5 pages
Pandas Data Manipulation Guide
No ratings yet
Pandas Data Manipulation Guide
11 pages
Plotting Task Times in PyQt5
No ratings yet
Plotting Task Times in PyQt5
3 pages
Introduction to Google Colab
No ratings yet
Introduction to Google Colab
5 pages
Handling ValueError in Pandas DataFrame
No ratings yet
Handling ValueError in Pandas DataFrame
13 pages
Car Evaluation Data Analysis with Python
No ratings yet
Car Evaluation Data Analysis with Python
5 pages
Data Parsing and File Handling in Python
No ratings yet
Data Parsing and File Handling in Python
22 pages
Data Transformation and Model Training
No ratings yet
Data Transformation and Model Training
5 pages
Training ResNet50 on Custom Dataset
No ratings yet
Training ResNet50 on Custom Dataset
9 pages
Jupyter Notebook Data Acquisition Guide
No ratings yet
Jupyter Notebook Data Acquisition Guide
8 pages
Tensor Flow 2
No ratings yet
Tensor Flow 2
3 pages
Stock Prediction with LSTM and ARIMA
No ratings yet
Stock Prediction with LSTM and ARIMA
47 pages
Linear Regression Price Prediction Guide
No ratings yet
Linear Regression Price Prediction Guide
5 pages
IPL 2022 Match Data Insights
100% (1)
IPL 2022 Match Data Insights
5 pages
EDA Pipeline for Titanic Dataset Analysis
No ratings yet
EDA Pipeline for Titanic Dataset Analysis
7 pages
CIFAR-10 Dataset and Model Setup
No ratings yet
CIFAR-10 Dataset and Model Setup
7 pages
Data Import and Visualization Setup
No ratings yet
Data Import and Visualization Setup
4 pages
Deep Learning Image Classification
No ratings yet
Deep Learning Image Classification
8 pages
Thyroid Cancer Recurrence Prediction
No ratings yet
Thyroid Cancer Recurrence Prediction
24 pages
Python Data Analysis and ML Basics
No ratings yet
Python Data Analysis and ML Basics
91 pages
Machine Learning Lab Manual Guide
No ratings yet
Machine Learning Lab Manual Guide
25 pages
Deep Learning Assignment: CNN & Clustering
No ratings yet
Deep Learning Assignment: CNN & Clustering
1 page
CIFAR-100 Image Classification Model
No ratings yet
CIFAR-100 Image Classification Model
9 pages
Logistic Regression Analysis in Python
No ratings yet
Logistic Regression Analysis in Python
3 pages
Machine Learning Lab Manual for CSE
No ratings yet
Machine Learning Lab Manual for CSE
70 pages
FaceID with RGBD Images and Embeddings
No ratings yet
FaceID with RGBD Images and Embeddings
131 pages
Amazon ML Challenge 2025 Overview
No ratings yet
Amazon ML Challenge 2025 Overview
118 pages
DDoS Dataset Analysis and Processing
No ratings yet
DDoS Dataset Analysis and Processing
51 pages
Python Data Importing Cheat Sheet
No ratings yet
Python Data Importing Cheat Sheet
1 page
Pandas Exercises for DataFrame Skills
No ratings yet
Pandas Exercises for DataFrame Skills
11 pages
Time Series Forecasting with LSTM
No ratings yet
Time Series Forecasting with LSTM
33 pages
FileNotFoundError: IRIS.csv Missing
No ratings yet
FileNotFoundError: IRIS.csv Missing
2 pages
FIND-S Algorithm Implementation Guide
No ratings yet
FIND-S Algorithm Implementation Guide
11 pages
Cisco Prime Infrastructure - Technology Design Guide
No ratings yet
Cisco Prime Infrastructure - Technology Design Guide
42 pages
Ben Shaw's Training and Experience Overview
No ratings yet
Ben Shaw's Training and Experience Overview
1 page
BIM-EDIT User Commands Overview
No ratings yet
BIM-EDIT User Commands Overview
6 pages
C++ Programming Concepts and Examples
No ratings yet
C++ Programming Concepts and Examples
25 pages
Women Safety App Micro Project Report
75% (8)
Women Safety App Micro Project Report
44 pages
PGP and Email Security Overview
No ratings yet
PGP and Email Security Overview
29 pages
LAN Testing Process Overview
No ratings yet
LAN Testing Process Overview
17 pages
NumPy and 3D Plotting Examples
No ratings yet
NumPy and 3D Plotting Examples
6 pages
User+Manual+of+12 1+Inch+Patient+Monitor
No ratings yet
User+Manual+of+12 1+Inch+Patient+Monitor
87 pages
File Handling Techniques in C
No ratings yet
File Handling Techniques in C
11 pages
Driver App User Manual Guide
No ratings yet
Driver App User Manual Guide
46 pages
Hager Branding Elements Guide
No ratings yet
Hager Branding Elements Guide
17 pages
Lightfm Whitepaper
No ratings yet
Lightfm Whitepaper
8 pages
Seminar Report on IoT Applications
No ratings yet
Seminar Report on IoT Applications
40 pages
For Phoenix Securecore Tiano™: Winflash User Guide
No ratings yet
For Phoenix Securecore Tiano™: Winflash User Guide
20 pages
AI-Powered Chatbot for E-Commerce
No ratings yet
AI-Powered Chatbot for E-Commerce
9 pages
ECE Career Guide: VLSI & Specializations
No ratings yet
ECE Career Guide: VLSI & Specializations
21 pages
Contact List for Whitehouse College
No ratings yet
Contact List for Whitehouse College
8 pages
Understanding Pharmaceutical Validation
No ratings yet
Understanding Pharmaceutical Validation
6 pages
NLP and Chatbots: Language Processing Insights
No ratings yet
NLP and Chatbots: Language Processing Insights
5 pages
Quality Management in Software Testing
No ratings yet
Quality Management in Software Testing
2 pages
JavaScript Calculator Project Report
No ratings yet
JavaScript Calculator Project Report
20 pages
ABI Batch & Block Control Guidelines
No ratings yet
ABI Batch & Block Control Guidelines
38 pages
Python Programming Exam - Satavahana University
No ratings yet
Python Programming Exam - Satavahana University
2 pages
Scanner Safe Zone
No ratings yet
Scanner Safe Zone
4 pages
SolidWorks Professional Course Overview
No ratings yet
SolidWorks Professional Course Overview
24 pages
Vsphere DCV CTS Lab 01
No ratings yet
Vsphere DCV CTS Lab 01
30 pages
Understanding Threat Modeling in Security
No ratings yet
Understanding Threat Modeling in Security
58 pages
DISA Gold Disk Compliance Checklist
100% (1)
DISA Gold Disk Compliance Checklist
4 pages
Pothavaram Site Plotting Overview
No ratings yet
Pothavaram Site Plotting Overview
1 page

Turbofan Engine RUL Prediction with SHAP

Uploaded by

Turbofan Engine RUL Prediction with SHAP

Uploaded by

12/2/25, 1:10 AM SHAP_Assignment

In [1]: import numpy as np

from lime import lime_tabular

print("All imports OK")

In [2]: import os, zipfile, [Link]

from lime import lime_tabular

Zip already present.

from [Link] import RandomForestRegressor

# adjust this line ⬇️

cols = ['engine_id', 'cycle'] + [f'setting_{i}' for i in range(1,4)] + [f's_{i}'

feature_cols = [c for c in [Link] if c not in ['engine_id','cycle','RUL']]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_st

rf = RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1)

FileNotFoundError: [Errno 2] No such file or directory: 'turbofan_data\\CMAPSSDat

data_dir = "turbofan_data" # same as before

for root, dirs, files in [Link](data_dir):

In [8]: import os, zipfile, [Link]

print("Downloading NASA Turbofan dataset...")

Downloading NASA Turbofan dataset...

for root, dirs, files in [Link]("turbofan_data"):

In [10]: import os, zipfile

outer_dir = [Link]("turbofan_data", "6. Turbofan Engine Degradation Simula

extract_dir = [Link]("turbofan_data", "CMAPSSData")

print("Inner zip path:", inner_zip)

print("Extracted to:", extract_dir)

In [11]: import pandas as pd

# 1. path to FD001 training file

# 2. load with column names (1 id, 1 cycle, 3 settings, 21 sensors)

# 3. compute Remaining Useful Life (RUL) label

# 4. split features / target

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_st

# 5. scale and train RF

rf = RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1)

print("RF val R^2:", [Link](X_val_s, y_val))

Using train file: turbofan_data\CMAPSSData\train_FD001.txt

In [2]: from lime import lime_tabular

idx = 10 # choose any validation index you like

# use a smaller random subset for SHAP (e.g. 500 rows)

# this is now much faster

print("Computed SHAP values for", n_shap, "rows")

# global feature importance summary

NameError: name 'X_train_s' is not defined

import os, zipfile, [Link], random

# ----------------- 1) ensure dataset is available and extracted ---------------

# extract outer zip (it contains an inner [Link])

# find inner zip path (it may have a subfolder)

# extract inner zip into turbofan_data/CMAPSSData (idempotent)

# Find FD001 train file

# ----------------- 2) Load data and build RUL labels -----------------

# features & target

# ----------------- 3) Train a RandomForest (fast) -----------------

# ----------------- 4) LIME explanation for one sample -----------------

lime_exp = explainer_lime.explain_instance(X_val_s[idx], [Link], num_feature

# ----------------- 5) FAST SHAP (subsampled) -----------------

# summary plot (will show inline if notebook supports plots)

# optional: local SHAP for same sample idx

Downloading turbofan dataset (will take a short while)...

--- LIME (local) ---

--- SHAP (fast subsample) ---

Local SHAP for sample idx 10

You might also like