import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pointbiserialr
import networkx as nx
import pingouin as pg
from statsmodels.stats.outliers_influence import variance_inflation_factor
import statsmodels.api as sm
# 设置中文支持和图像清晰度
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.dpi'] = 300
# 固定随机种子
np.random.seed(123)
# %% 1. 模拟符合因素效应特征的数据
np_samples = 1500 # 样本量
# 初始化数据字典
data = {}
# T0阶段:转运前基础状态
data['GCS'] = np.round(np.random.uniform(3, 15, np_samples)).astype(int) # GCS评分
data['APACHE'] = np.round(np.random.uniform(0, 30, np_samples)).astype(int) # APACHE II评分
data['age'] = np.round(np.random.normal(60, 15, np_samples)).astype(int) # 年龄
data['hypertension'] = np.random.randint(0, 2, np_samples) # 高血压史
# T1阶段:转运中过程变量
data['transport_time'] = np.round(np.random.normal(18, 8, np_samples)).astype(int) # 转运时长
data['spo2_mean'] = np.round(np.random.normal(94, 6, np_samples)).astype(int) # 平均血氧
data['sedative'] = np.random.randint(0, 2, np_samples) # 镇静剂使用
data['respiratory_rate'] = np.round(np.random.normal(18, 5, np_samples)).astype(int) # 呼吸频率
data['transport_distance'] = np.round(np.random.normal(10, 5, np_samples)).astype(int) # 距离
data['transport_personnel'] = np.random.randint(1, 4, np_samples) # 人员
data['temperature'] = np.random.normal(36.5, 0.8, np_samples) # 体温
# T2阶段:转运后状态
data['spo2_final'] = (data['spo2_mean'] + np.random.normal(0, 2, np_samples)).round().astype(int)
data['heart_rate'] = np.round(np.random.normal(85, 20, np_samples)).astype(int)
data['resp_support'] = np.random.randint(0, 3, np_samples) # 呼吸支持
# %% 构建结局变量:转运后不良事件
gcs_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['GCS'][i] < 8:
gcs_effect[i] = -0.223 * (8 - data['GCS'][i])
else:
gcs_effect[i] = -0.087 * (15 - data['GCS'][i])
ht_effect = 0.185 * data['hypertension']
time_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['transport_time'][i] > 20:
time_effect[i] = 0.032 * (data['transport_time'][i] - 20)
else:
time_effect[i] = 0.015 * data['transport_time'][i]
age_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['age'][i] > 65:
age_effect[i] = 0.037 * (data['age'][i] - 65) / 5
else:
age_effect[i] = 0.016 * data['age'][i] / 5
resp_support_effect = 0.096 * data['resp_support']
spo2_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['spo2_final'][i] < 90:
spo2_effect[i] = -0.123 * (90 - data['spo2_final'][i])
else:
spo2_effect[i] = -0.021 * (data['spo2_final'][i] - 90)
hr_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['heart_rate'][i] < 50:
hr_effect[i] = 0.42 * (50 - data['heart_rate'][i]) / 10
elif data['heart_rate'][i] > 120:
hr_effect[i] = 0.38 * (data['heart_rate'][i] - 120) / 10
sedative_effect = -0.1839 * data['sedative']
rr_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['respiratory_rate'][i] < 10:
rr_effect[i] = 0.35
elif data['respiratory_rate'][i] > 24:
rr_effect[i] = 0.31
temp_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['temperature'][i] < 36:
temp_effect[i] = 0.28
elif data['temperature'][i] > 38.5:
temp_effect[i] = 0.25
distance_effect = -0.028 * data['transport_distance'] / 5
personnel_effect = -0.025 * (data['transport_personnel'] - 1)
apache_effect = 0.011 * data['APACHE'] / 10
# 总logit计算
logit = (0.2 + 0.257 * gcs_effect + 0.185 * ht_effect + 0.172 * time_effect +
0.153 * age_effect + 0.096 * resp_support_effect + 0.078 * sedative_effect +
0.062 * spo2_effect + 0.058 * hr_effect + 0.049 * rr_effect +
0.045 * temp_effect + 0.028 * distance_effect + 0.025 * personnel_effect +
0.011 * apache_effect)
# 计算概率和结局
prob = 1 / (1 + np.exp(-logit))
data['adverse_event'] = np.random.binomial(1, prob)
# %% 2. 核心因素筛选(不剔除任何变量,不显示 0.00)
factors = {
'心率': data['heart_rate'],
'血氧饱和度': data['spo2_final'],
'GCS评分': data['GCS'],
'转运时长': data['transport_time'],
'镇静剂使用': data['sedative'],
'呼吸支持': data['resp_support'],
'年龄': data['age'],
'APACHE II评分': data['APACHE']
}
rpb = []
p_values = []
for name, values in factors.items():
r, p = pointbiserialr(values, data['adverse_event'])
rpb.append(r)
p_values.append(p)
# 不进行变量剔除,直接保留所有变量
filtered_factors = factors.copy()
print("=== 因素筛选结果(不显示 0.00) ===")
for i, (name, values) in enumerate(factors.items()):
if abs(rpb[i]) > 0.001 or p_values[i] > 0.001:
print(f"{name}: rpb={rpb[i]:.2f}, P={p_values[i]:.2f}")
# %% 3. 共线性处理(VIF)
X = pd.DataFrame(filtered_factors)
vif_data = pd.DataFrame()
vif_data["特征"] = X.columns
vif_data["VIF值"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
print("\n=== VIF共线性分析 ===")
print(vif_data)
# %% 4. 偏相关分析
df = pd.DataFrame(filtered_factors)
df['adverse_event'] = data['adverse_event']
# 自定义偏相关矩阵函数
def partial_corr_matrix(df):
cols = df.columns
pcorr = pd.DataFrame(np.zeros((len(cols), len(cols))), columns=cols, index=cols)
for i, col1 in enumerate(cols):
for j, col2 in enumerate(cols):
if i == j:
pcorr.loc[col1, col2] = 1.0
elif j > i:
controls = list(set(cols) - {col1, col2})
r = pg.partial_corr(data=df, x=col1, y=col2, covar=controls).r[0]
pcorr.loc[col1, col2] = r
pcorr.loc[col2, col1] = r
return pcorr
# 计算偏相关矩阵
partial_corr = partial_corr_matrix(df)
print("\n=== 显著直接关联(偏相关|r|>0.001) ===")
for col in df.columns:
if col != 'adverse_event':
corr = partial_corr.loc[col, 'adverse_event']
if abs(corr) > 0.001:
print(f"{col} 与 转运后不良事件: r={corr:.3f}")
# %% 5. 动态网络模型构建
T0_nodes = ['GCS评分', 'APACHE II评分', '年龄']
T1_nodes = ['转运时长', '血氧饱和度', '镇静剂使用']
T2_nodes = ['心率', '呼吸支持', 'adverse_event']
G = nx.DiGraph()
G.add_nodes_from(df.columns)
# 添加边(显著偏相关)
for i in df.columns:
for j in df.columns:
if i != j:
pc = partial_corr.loc[i, j]
if abs(pc) > 0.2:
G.add_edge(i, j, weight=pc)
# %% 6. 动态特征量化分析
# 节点核心度
node_core = {}
for node in G.nodes:
core = sum(abs(G[u][v]['weight']) for u, v in G.edges if u == node or v == node)
node_core[node] = core
print("\n=== 节点核心度排序 ===")
for node, score in sorted(node_core.items(), key=lambda x: x[1], reverse=True):
print(f"{node}: {score:.3f}")
# %% 7. 网络可视化
pos = nx.spring_layout(G, seed=42)
plt.figure(figsize=(12, 8))
# 节点颜色
color_map = []
for node in G.nodes:
if node in T0_nodes:
color_map.append((0.8, 0.9, 1))
elif node in T1_nodes:
color_map.append((0.9, 0.8, 1))
elif node in T2_nodes:
color_map.append((1, 0.8, 0.8))
# 边颜色
edge_colors = ['red' if G[u][v]['weight'] > 0 else 'blue' for u, v in G.edges]
# 绘图
nx.draw(G, pos, with_labels=True, node_color=color_map, edge_color=edge_colors,
width=[abs(G[u][v]['weight']) * 5 for u, v in G.edges],
node_size=[core * 100 for core in node_core.values()],
font_size=10, font_weight='bold', alpha=0.8)
plt.title('转运后不良事件动态关联网络', fontsize=16)
plt.legend(handles=[
plt.Line2D([0], [0], color='red', lw=2, label='正相关'),
plt.Line2D([0], [0], color='blue', lw=2, label='负相关')
])
plt.show()
此代码的结果
C:\python\py\.venv\Scripts\python.exe C:\python\py\7.22.py
=== 因素筛选结果(不显示 0.00) ===
心率: rpb=-0.01, P=0.58
血氧饱和度: rpb=0.01, P=0.69
GCS评分: rpb=0.02, P=0.40
转运时长: rpb=-0.02, P=0.36
镇静剂使用: rpb=0.00, P=0.92
呼吸支持: rpb=0.00, P=0.88
年龄: rpb=0.02, P=0.34
APACHE II评分: rpb=0.00, P=0.94
=== VIF共线性分析 ===
特征 VIF值
0 心率 17.976832
1 血氧饱和度 42.486776
2 GCS评分 7.331784
3 转运时长 6.108660
4 镇静剂使用 2.009300
5 呼吸支持 2.615991
6 年龄 16.590947
7 APACHE II评分 3.918573
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
C:\python\py\7.22.py:161: FutureWarning:
Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
=== 显著直接关联(偏相关|r|>0.001) ===
心率 与 转运后不良事件: r=-0.015
血氧饱和度 与 转运后不良事件: r=0.010
GCS评分 与 转运后不良事件: r=0.022
转运时长 与 转运后不良事件: r=-0.023
镇静剂使用 与 转运后不良事件: r=0.002
呼吸支持 与 转运后不良事件: r=0.004
年龄 与 转运后不良事件: r=0.024
APACHE II评分 与 转运后不良事件: r=0.004
=== 节点核心度排序 ===
心率: 0.000
血氧饱和度: 0.000
GCS评分: 0.000
转运时长: 0.000
镇静剂使用: 0.000
呼吸支持: 0.000
年龄: 0.000
APACHE II评分: 0.000
adverse_event: 0.000
进程已结束,退出代码为 0
显著直接关联生成的值要使偏相关|r|>0.3
节点核心度排序的值不要0.000
你可以适当调整(数据也行)
最新发布