95 lines
5.7 KiB
Python
95 lines
5.7 KiB
Python
from src.sample_collector import SampleCollector
|
||
|
||
# 初始化样本采集器
|
||
collector = SampleCollector()
|
||
|
||
# 添加猫叫声样本
|
||
import os
|
||
# sounds_dir, species = "./data/cat_sounds_2", "cat"
|
||
sounds_dir, species = "./data/extras/dataset", "cat"
|
||
# sounds_dir, species = "./data/dog_sounds", "dog"
|
||
for file in os.listdir(sounds_dir):
|
||
if file.endswith(".wav") or file.endswith(".WAV"):
|
||
collector.add_sounds(os.path.join(sounds_dir, file),species)
|
||
|
||
# 添加非物种叫声样本
|
||
non_sounds_dir = "./data/non_sounds"
|
||
for file in os.listdir(non_sounds_dir):
|
||
if file.endswith(".wav") or file.endswith(".WAV"):
|
||
collector.add_non_sounds(os.path.join(non_sounds_dir, file))
|
||
|
||
# 查看样本数量
|
||
print(collector.get_sample_counts())
|
||
|
||
# from src.audio_input import AudioInput
|
||
# from src.audio_processor import AudioProcessor
|
||
# from src.feature_extractor import FeatureExtractor
|
||
# from src.cat_intent_classifier_v2 import CatIntentClassifier
|
||
# import os
|
||
# import numpy as np
|
||
#
|
||
# # 初始化组件
|
||
# audio_input = AudioInput()
|
||
# audio_processor = AudioProcessor()
|
||
# feature_extractor = FeatureExtractor()
|
||
#
|
||
# # 提取情感类别特征
|
||
# emotions_dir = "./cat_intents/emotions"
|
||
# emotion_embeddings = []
|
||
# emotion_labels = []
|
||
#
|
||
# for emotion in os.listdir(emotions_dir):
|
||
# emotion_path = os.path.join(emotions_dir, emotion)
|
||
# if os.path.isdir(emotion_path):
|
||
# for file in os.listdir(emotion_path):
|
||
# if file.endswith(".wav") or file.endswith(".WAV"):
|
||
# file_path = os.path.join(emotion_path, file)
|
||
# print(f"处理情感样本: {file_path}")
|
||
#
|
||
# # 加载音频
|
||
# audio_data, sample_rate = audio_input.load_from_file(file_path)
|
||
#
|
||
# # 预处理音频
|
||
# processed_audio = audio_processor.preprocess(audio_data)
|
||
#
|
||
# # 准备YAMNet输入
|
||
# yamnet_input = audio_processor.prepare_yamnet_input(processed_audio)
|
||
#
|
||
# # 提取特征
|
||
# features = feature_extractor.process_audio(yamnet_input)
|
||
#
|
||
# # 使用平均嵌入向量
|
||
# embedding_mean = np.mean(features["embeddings"], axis=0)
|
||
#
|
||
# # 添加到训练数据
|
||
# emotion_embeddings.append(embedding_mean)
|
||
# emotion_labels.append(emotion)
|
||
#
|
||
# # 训练情感分类器
|
||
# print(f"训练情感分类器,样本数: {len(emotion_embeddings)}")
|
||
# emotion_classifier = CatIntentClassifier()
|
||
# emotion_history = emotion_classifier.train(
|
||
# np.array(emotion_embeddings),
|
||
# emotion_labels,
|
||
# epochs=100,
|
||
# batch_size=16
|
||
# )
|
||
#
|
||
# # 保存情感分类器
|
||
# os.makedirs("./models", exist_ok=True)
|
||
# emotion_paths = emotion_classifier.save_model("./models", "emotions")
|
||
# # phrases_paths = emotion_classifier.save_model("./models", "phrases")
|
||
# print(f"情感分类器已保存: {emotion_paths}")
|
||
|
||
# 类似地,训练短语分类器
|
||
# ...(重复上述过程,但使用phrases目录)
|
||
|
||
# aa = "F_BAC01_MC_MN_SIM01_101.wav, F_BAC01_MC_MN_SIM01_102.wav, F_BAC01_MC_MN_SIM01_103.wav, F_BAC01_MC_MN_SIM01_104.wav, F_BAC01_MC_MN_SIM01_105.wav, F_BAC01_MC_MN_SIM01_201.wav, F_BAC01_MC_MN_SIM01_202.wav, F_BAC01_MC_MN_SIM01_203.wav, F_BAC01_MC_MN_SIM01_301.wav, F_BAC01_MC_MN_SIM01_302.wav, F_BAC01_MC_MN_SIM01_303.wav, F_BAC01_MC_MN_SIM01_304.wav, F_BLE01_EU_FN_DEL01_101.wav, F_BLE01_EU_FN_DEL01_102.wav, F_BLE01_EU_FN_DEL01_103.wav, F_BRA01_MC_MN_SIM01_301.wav, F_BRA01_MC_MN_SIM01_302.wav, F_BRI01_MC_FI_SIM01_101.wav, F_BRI01_MC_FI_SIM01_102.wav, F_BRI01_MC_FI_SIM01_103.wav, F_BRI01_MC_FI_SIM01_104.wav, F_BRI01_MC_FI_SIM01_105.wav, F_BRI01_MC_FI_SIM01_106.wav, F_BRI01_MC_FI_SIM01_201.wav, F_BRI01_MC_FI_SIM01_202.wav, F_CAN01_EU_FN_GIA01_201.wav, F_CAN01_EU_FN_GIA01_202.wav, F_DAK01_MC_FN_SIM01_301.wav, F_DAK01_MC_FN_SIM01_302.wav, F_DAK01_MC_FN_SIM01_303.wav, F_DAK01_MC_FN_SIM01_304.wav, F_IND01_EU_FN_ELI01_101.wav, F_IND01_EU_FN_ELI01_102.wav, F_IND01_EU_FN_ELI01_103.wav, F_IND01_EU_FN_ELI01_104.wav, F_IND01_EU_FN_ELI01_201.wav, F_IND01_EU_FN_ELI01_202.wav, F_IND01_EU_FN_ELI01_203.wav, F_IND01_EU_FN_ELI01_301.wav, F_IND01_EU_FN_ELI01_302.wav, F_IND01_EU_FN_ELI01_304.wav, F_LEO01_EU_MI_RIT01_101.wav, F_LEO01_EU_MI_RIT01_102.wav, F_LEO01_EU_MI_RIT01_103.wav, F_LEO01_EU_MI_RIT01_104.wav, F_LEO01_EU_MI_RIT01_105.wav, F_MAG01_EU_FN_FED01_101.wav, F_MAG01_EU_FN_FED01_102.wav, F_MAG01_EU_FN_FED01_103.wav, F_MAG01_EU_FN_FED01_104.wav, F_MAG01_EU_FN_FED01_105.wav, F_MAG01_EU_FN_FED01_106.wav, F_MAG01_EU_FN_FED01_201.wav, F_MAG01_EU_FN_FED01_202.wav, F_MAG01_EU_FN_FED01_203.wav, F_MAG01_EU_FN_FED01_301.wav, F_MAG01_EU_FN_FED01_302.wav, F_MAG01_EU_FN_FED01_303.wav, F_MAG01_EU_FN_FED01_304.wav, F_MAG01_EU_FN_FED01_305.wav, F_MAT01_EU_FN_RIT01_101.wav, F_MAT01_EU_FN_RIT01_102.wav, F_MAT01_EU_FN_RIT01_103.wav, F_MAT01_EU_FN_RIT01_301.wav, F_MAT01_EU_FN_RIT01_302.wav, F_MAT01_EU_FN_RIT01_303.wav, F_MEG01_MC_FI_SIM01_301.wav, F_MEG01_MC_FI_SIM01_302.wav, F_MEG01_MC_FI_SIM01_303.wav, F_MEG01_MC_FI_SIM01_304.wav, F_MIN01_EU_FN_BEN01_101.wav, F_MIN01_EU_FN_BEN01_102.wav, F_MIN01_EU_FN_BEN01_103.wav, F_MIN01_EU_FN_BEN01_104.wav, F_REG01_EU_FN_GIO01_201.wav, F_SPI01_EU_MN_NAI01_101.wav, F_SPI01_EU_MN_NAI01_102.wav, F_SPI01_EU_MN_NAI01_103.wav, F_SPI01_EU_MN_NAI01_104.wav, F_SPI01_EU_MN_NAI01_201.wav, F_SPI01_EU_MN_NAI01_202.wav, F_SPI01_EU_MN_NAI01_203.wav, F_SPI01_EU_MN_NAI01_301.wav, F_WHO01_MC_FI_SIM01_101.wav, F_WHO01_MC_FI_SIM01_102.wav, F_WHO01_MC_FI_SIM01_103.wav, F_WHO01_MC_FI_SIM01_301.wav, F_WHO01_MC_FI_SIM01_302.wav, F_WHO01_MC_FI_SIM01_303.wav, F_WHO01_MC_FI_SIM01_304.wav, F_WHO01_MC_FI_SIM01_306.wav, F_WHO01_MC_FI_SIM01_307.wav"
|
||
#
|
||
#
|
||
#
|
||
# print(
|
||
# [{
|
||
# "path": f"./data/is_cat_sound_true/{dd}", "intent": "等待喂食"
|
||
# } for dd in aa.split(", ")]
|
||
# ) |