import pandas as pd # 定义标背景框色函数 def background_color(val): color = 'red' if val < 60 else 'yellow' if val < 80 else 'green' return { 'background-color': color, 'border': '1px solid black' } # 创建DataFrame df = pd.DataFrame({ 'name': ['Alice', 'Bob', 'Charlie', 'David'], 'score': [70, 55, 85, 90] }) # 创建Styler对象并应用标背景框色函数 styler = df.style.applymap(background_color, subset=['score']) styler too many values to unpack (expected 2)
时间: 2023-08-18 07:08:00 浏览: 130
这个错误是因为在执行`background_color`函数时返回的字典格式不正确。`background_color`函数应该返回一个包含两个元素的元组,第一个元素是字符串格式的CSS样式,第二个元素是None。具体来说,你需要将函数的返回值改为`return ('background-color: {}; border: 1px solid black'.format(color), None)`。修改后的代码如下:
```python
import pandas as pd
# 定义标背景框色函数
def background_color(val):
color = 'red' if val < 60 else 'yellow' if val < 80 else 'green'
return ('background-color: {}; border: 1px solid black'.format(color), None)
# 创建DataFrame
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Charlie', 'David'],
'score': [70, 55, 85, 90]
})
# 创建Styler对象并应用标背景框色函数
styler = df.style.applymap(background_color, subset=['score'])
styler
```
这样就可以正确执行了。
相关问题
def cal_linear(iaqi_lo, iaqi_hi, bp_lo, bp_hi, cp): """ 范围缩放 """ iaqi = (iaqi_hi - iaqi_lo) * (cp - bp_lo) / (bp_hi - bp_lo) + iaqi_lo return iaqi def cal_pm25_iaqi(pm25_val): """ PM2.5的iaqi计算函数 """ if 0 <= pm25_val < 36: iaqi1 = cal_linear(0, 50, 0, 35, pm25_val) elif 36 <= pm25_val < 76: iaqi1 = cal_linear(50, 100, 35, 75, pm25_val) elif 76 <= pm25_val < 116: iaqi1 = cal_linear(100, 150, 75, 115, pm25_val) elif 116 <= pm25_val < 151: iaqi1 = cal_linear(150, 200, 115, 150, pm25_val) elif 151 <= pm25_val < 251: iaqi1 = cal_linear(200, 300, 150, 250, pm25_val) elif 251 <= pm25_val < 351: iaqi1 = cal_linear(300, 400, 250, 350, pm25_val) elif 351 <= pm25_val < 501: iaqi1 = cal_linear(400, 500, 350, 500, pm25_val) return iaqi1 def cal_pm10_iaqi(pm10_val): """ CO的iaqi计算函数 """ if 0 <= pm10_val < 51: iaqi2 = cal_linear(0, 50, 0, 50, pm10_val) elif 51 <= pm10_val < 151: iaqi2 = cal_linear(50, 100, 50, 150, pm10_val) elif 151 <= pm10_val < 251: iaqi2 = cal_linear(100, 150, 150, 250, pm10_val) elif 251 <= pm10_val < 351: iaqi2 = cal_linear(150, 200, 250, 350, pm10_val) elif 351 <= pm10_val < 421: iaqi2 = cal_linear(200, 300, 350, 420, pm10_val) elif 421 <= pm10_val < 501: iaqi2 = cal_linear(300, 400, 420, 500, pm10_val) elif 501 <= pm10_val < 601: iaqi2 = cal_linear(400, 500, 500, 600, pm10_val) return iaqi2 pm25_value = dust_weather_data['pm2.5'] pm10_value = dust_weather_data['pm10'] pm25_iaqi = cal_pm25_iaqi(pm25_value) pm10_iaqi = cal_pm10_iaqi(pm10_val) aqi = max(iaqi1,iaqi2)出错,The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
在你的代码中,出现了相同的错误。这是因为在计算aqi时,你尝试将Series对象(iaqi1和iaqi2)与标量值进行比较,导致了"The truth value of a Series is ambiguous"错误。
要解决这个问题,你需要使用逐元素的比较运算符来比较Series对象中的每个元素。在这种情况下,你可以使用Series对象的.max()方法来找到iaqi1和iaqi2中的最大值。
请修改以下代码:
```python
aqi = max(iaqi1, iaqi2)
```
为:
```python
aqi = pd.Series([iaqi1, iaqi2]).max()
```
这样就可以避免出现"The truth value of a Series is ambiguous"错误,并找到iaqi1和iaqi2中的最大值作为aqi。请确保在使用该代码之前导入pandas库(import pandas as pd)以使用pd.Series()方法。
``` import os import pandas as pd import numpy as np from sklearn.model_selection import train_test_split # 加载函数保持不变 def processTarget(): main_folder = 'C:/Users/Lenovo/Desktop/crcw不同端12k在0负载下/风扇端' data_list = [] label_list = [] for folder_name in sorted(os.listdir(main_folder)): folder_path = os.path.join(main_folder, folder_name) if os.path.isdir(folder_path): csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')] print(f"Processing folder: {folder_name}, found {len(csv_files)} CSV files.") for filename in sorted(csv_files): file_path = os.path.join(folder_path, filename) csv_data = pd.read_csv(file_path, header=None) if csv_data.shape[1] >= 4: csv_data = csv_data.iloc[:, [0, 1, 2]].values else: print(f"Skipping file {filename}, unexpected shape: {csv_data.shape}") continue data_list.append(csv_data) if '内圈故障' in folder_name: class_label = 0 elif '球故障' in folder_name: class_label = 1 else: continue label_list.append(class_label) if data_list and label_list: data = np.array(data_list) # Shape: (num_samples, seq_length, num_features) labels = np.array(label_list) # Shape: (num_samples,) return data, labels else: raise ValueError("No valid data available to process.") # 划分数据集 def split_datasets(X, y, test_size=0.2, val_size=0.25): """ :param X: 特征数据数组 :param y: 标签数组 :param test_size: 测试集占比,默认值为 0.2(即 80% 训练 + 验证) :param val_size: 验证集占剩余训练数据的比例,默认值为 0.25 """ X_train_val, X_test, y_train_val, y_test = train_test_split( X, y, test_size=test_size, stratify=y, random_state=42 ) # 继续从剩下的数据中切出 validation set X_train, X_val, y_train, y_val = train_test_split( X_train_val, y_train_val, test_size=val_size, stratify=y_train_val, random_state=42 ) return X_train, X_val, X_test, y_train, y_val, y_test if __name__ == "__main__": try: data0, label0 = processTarget() # 分割成训练集、验证集和测试集 X_train, X_val, X_test, y_train, y_val, y_test = split_datasets(data0, label0) print("Training Set:", X_train.shape, y_train.shape) print("Validation Set:", X_val.shape, y_val.shape) print("Testing Set:", X_test.shape, y_test.shape) # 存储结果以便后续步骤使用 np.savez('datasets.npz', X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, X_test=X_test, y_test=y_test) except ValueError as e: print(e)```这是我将数据集划分训练集,测试集,验证集的代码,现在,我要在这个代码的基础上对该数据集运用DEEP DOMAIN CONFUSION进行处理,可以给出完整的代码吗?要求:划分数据集和DEEP DOMAIN CONFUSION分为两个不同的文件
好的!下面是基于您提供的代码,在另一个文件中实现 **Deep Domain Confusion (DDC)** 的完整解决方案,并将数据集划分部分与 DDC 实现分离到两个独立的 Python 文件中。
---
### 第一步:`data_preparation.py`
这个文件负责加载和分割数据集:
```python
# data_preparation.py
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
def process_target():
main_folder = 'C:/Users/Lenovo/Desktop/crcw不同端12k在0负载下/风扇端'
data_list = []
label_list = []
for folder_name in sorted(os.listdir(main_folder)):
folder_path = os.path.join(main_folder, folder_name)
if os.path.isdir(folder_path):
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
print(f"Processing folder: {folder_name}, found {len(csv_files)} CSV files.")
for filename in sorted(csv_files):
file_path = os.path.join(folder_path, filename)
csv_data = pd.read_csv(file_path, header=None)
if csv_data.shape[1] >= 4:
csv_data = csv_data.iloc[:, [0, 1, 2]].values
else:
print(f"Skipping file {filename}, unexpected shape: {csv_data.shape}")
continue
data_list.append(csv_data)
if '内圈故障' in folder_name:
class_label = 0
elif '球故障' in folder_name:
class_label = 1
else:
continue
label_list.append(class_label)
if data_list and label_list:
data = np.array(data_list) # Shape: (num_samples, seq_length, num_features)
labels = np.array(label_list) # Shape: (num_samples,)
return data, labels
else:
raise ValueError("No valid data available to process.")
def split_datasets(X, y, test_size=0.2, val_size=0.25):
"""
数据集划分函数
参数:
- X: 特征数据数组
- y: 标签数组
- test_size: 测试集比例,默认为 0.2
- val_size: 验证集占剩余训练数据的比例,默认为 0.25
"""
X_train_val, X_test, y_train_val, y_test = train_test_split(
X, y, test_size=test_size, stratify=y, random_state=42
)
X_train, X_val, y_train, y_val = train_test_split(
X_train_val, y_train_val, test_size=val_size, stratify=y_train_val, random_state=42
)
return X_train, X_val, X_test, y_train, y_val, y_test
if __name__ == "__main__":
try:
data0, label0 = process_target()
# 划分数据集
X_train, X_val, X_test, y_train, y_val, y_test = split_datasets(data0, label0)
print("Training Set:", X_train.shape, y_train.shape)
print("Validation Set:", X_val.shape, y_val.shape)
print("Testing Set:", X_test.shape, y_test.shape)
# 将数据保存至 .npz 文件供其他脚本使用
np.savez('datasets.npz',
X_train=X_train, y_train=y_train,
X_val=X_val, y_val=y_val,
X_test=X_test, y_test=y_test)
except ValueError as e:
print(e)
```
---
### 第二步:`deep_domain_confusion.py`
此文件实现了 Deep Domain Confusion (DDC),并从 `data_preparation.py` 中生成的数据集中读取内容:
```python
# deep_domain_confusion.py
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Lambda
from tensorflow.keras.optimizers import Adam
class DDCLoss(tf.keras.losses.Loss):
"""自定义损失函数用于计算领域混淆"""
def call(self, domain_labels, domain_predictions):
mmd_loss = compute_mmd(domain_labels, domain_predictions)
return mmd_loss
def compute_mmd(source_features, target_features):
"""
最大均值差异(MMD)计算公式
MMD^2(P, Q) = || μ_P - μ_Q ||^2
"""
source_mean = tf.reduce_mean(source_features, axis=0)
target_mean = tf.reduce_mean(target_features, axis=0)
return tf.reduce_sum(tf.square(source_mean - target_mean))
def build_model(input_shape):
inputs = Input(shape=input_shape)
# 共享特征提取层
shared_layer = Dense(64, activation='relu')(inputs)
# 分类分支
classification_output = Dense(2, activation='softmax', name="classification")(shared_layer)
# 域适应分支
domain_output = Dense(1, activation='sigmoid', name="domain")(shared_layer)
model = Model(inputs=inputs, outputs=[classification_output, domain_output])
return model
if __name__ == "__main__":
# 载入预处理后的数据集
dataset = np.load('datasets.npz')
X_train, y_train = dataset['X_train'], dataset['y_train']
X_val, y_val = dataset['X_val'], dataset['y_val']
input_shape = X_train.shape[1:]
# 构建模型
model = build_model(input_shape)
# 定义优化器和损失函数
optimizer = Adam(learning_rate=0.001)
loss_weights = {'classification': 1., 'domain': 0.1}
model.compile(optimizer=optimizer,
loss={'classification': 'sparse_categorical_crossentropy',
'domain': DDCLoss()},
metrics=['accuracy'],
loss_weights=loss_weights)
# 模拟域标签(假设前半部分是源域,后半部分是目标域)
batch_size = 32
steps_per_epoch = len(X_train) // batch_size
def generate_batch(x, y):
half_batch = batch_size // 2
while True:
# 源域样本
indices_source = np.random.choice(len(y), size=half_batch)
x_source = x[indices_source]
y_class_source = y[indices_source]
d_source = np.zeros((half_batch,))
# 目标域样本
indices_target = np.random.choice(len(y), size=batch_size - half_batch)
x_target = x[indices_target]
y_class_target = y[indices_target]
d_target = np.ones((batch_size - half_batch,))
# 合并批次
x_batch = np.vstack([x_source, x_target])
y_class_batch = np.hstack([y_class_source, y_class_target])
d_batch = np.hstack([d_source, d_target])
yield ({'input_1': x_batch},
{'classification': y_class_batch,
'domain': d_batch})
generator = generate_batch(X_train, y_train)
# 训练模型
model.fit(generator,
epochs=10,
steps_per_epoch=steps_per_epoch,
validation_data=(X_val, {"classification": y_val}),
verbose=1)
# 输出评估结果
scores = model.evaluate(X_val, {"classification": y_val})
print(f"\nValidation Loss: {scores[0]}, Classification Accuracy: {scores[1]}")
```
---
### 结果说明
- 上述两份代码分别完成了数据准备 (`data_preparation.py`) 和深度域混淆算法的应用 (`deep_domain_confusion.py`)。
- 这种设计保证了模块化开发的理念,便于维护和复用。
---
阅读全文
相关推荐














