#pip install pyradiomics -i https://pypi.tuna.tsinghua.edu.cn/simple
#-------multi subjects--------------------------------------
import os
import pandas as pd
import SimpleITK as sitk
from radiomics import featureextractor
root_folder = r"D:\python\Pyradiomics\test-ct-2023.2.23"
folder_paths = []
for root, dirs, files in os.walk(root_folder):
for dir in dirs:
folder_paths.append(os.path.join(root, dir))
for folder_path in folder_paths:
os.chdir(folder_path)
#读取图像
img_path = os.path.join(folder_path, 'orig.nii.gz') #os.path.jion-用于连接两个或更多的路径名组件,生成一个新的路径
label_path = os.path.join(folder_path,'masked.nii.gz')
itk_img = sitk.ReadImage(img_path) #原始图像
img = sitk.GetArrayFromImage(itk_img)
itk_label = sitk.ReadImage(label_path) #读取标签
label = sitk.GetArrayFromImage(itk_label)
#2.特征提取
extractor = featureextractor.RadiomicsFeatureExtractor() # Initialize feature extractor
result = extractor.execute(itk_img, itk_label) # Extract features
df = pd.DataFrame.from_dict(result, orient='index') # Convert result to pandas dataframe
save_path = os.path.join(folder_path, 'features.csv') # Save dataframe to csv file
df.to_csv(save_path)
#1.合并csv
import pandas as pd
import glob
# 获取所有的csv文件
all_files = glob.glob(os.path.join(root_folder, "*/features.csv"))
# 读取并合并所有的csv文件
df_from_each_file = (pd.read_csv(f, usecols=[1]).transpose() for f in all_files)
concatenated_df = pd.concat(df_from_each_file, axis=0) #axis=0横向排列,1-纵向排列
# 保存合并后的csv文件
concatenated_df.to_csv(os.path.join(root_folder, "merged_features.csv"), index=False)
//statistics --需要定义组别Groups//
#1.对merged_features.csv文件的22-128列进行两独立样本t检验,并计算统计量和p值,以及两组的均值和标准差
import pandas as pd
from scipy.stats import ttest_ind
df = pd.read_csv(os.path.join(root_folder, "merged_features.csv")) # 读取csv文件
group1 = df[df['Groups'] == 'PA'] # 假设你的数据框中有一个名为'Groups'的列,用于区分两组数据
group2 = df[df['Groups'] == 'HC']
columns_to_test = df.columns[22:129] # 选择你想要进行t检验的列
# 进行t检验
results = []
for column in columns_to_test:
data_group1 = group1[column]
data_group2 = group2[column]
t_stat, p_val = ttest_ind(data_group1, data_group2)
mean_group1 = data_group1.mean()
std_group1 = data_group1.std()
mean_group2 = data_group2.mean()
std_group2 = data_group2.std()
results.append((column, t_stat, p_val, mean_group1, std_group1, mean_group2, std_group2))
# 将结果保存为一个新的csv文件
results_df = pd.DataFrame(results, columns=['Column', 'T-statistic', 'P-value', 'Mean_Group1', 'Std_Group1', 'Mean_Group2', 'Std_Group2'])
results_df.to_csv(os.path.join(root_folder, "t_test_results.csv"), index=False)
#2.对merged_features.csv文件的22-128列进行wilcoxon检验,并计算统计量和p值,以及两组的中位数,上四分位数和下四分位数
import pandas as pd
from scipy.stats import mannwhitneyu # 导入mannwhitneyu函数 wilcoxon需要两组样本量相等
df = pd.read_csv(os.path.join(root_folder, "merged_features.csv")) # 读取csv文件
group1 = df[df['Groups'] == 'PA'] # 假设你的数据框中有一个名为'Groups'的列,用于区分两组数据
group2 = df[df['Groups'] == 'HC']
columns_to_test = df.columns[23:129]# 选择你想要进行Wilcoxon检验的列
# 进行Wilcoxon检验
results = []
for column in columns_to_test:
data_group1 = group1[column]
data_group2 = group2[column]
stat, p_val = mannwhitneyu(data_group1, data_group2)
median_group1 = data_group1.median()
q1_group1 = data_group1.quantile(0.25)
q3_group1 = data_group1.quantile(0.75)
median_group2 = data_group2.median()
q1_group2 = data_group2.quantile(0.25)
q3_group2 = data_group2.quantile(0.75)
results.append((column, stat, p_val, median_group1, q1_group1, q3_group1, median_group2, q1_group2, q3_group2))
# 将结果保存为一个新的csv文件
results_df = pd.DataFrame(results, columns=['Column', 'Statistic', 'P-value', 'Median_Group1', 'Q1_Group1', 'Q3_Group1', 'Median_Group2', 'Q1_Group2', 'Q3_Group2'])
results_df.to_csv(os.path.join(root_folder, "mannwhitneyu_test_results.csv"), index=False)
影像组学-Pyradiomics-特征提取-合并文件-自动t/wilcoxon检验并保存结果
于 2023-12-25 22:26:21 首次发布