Files
petshy/detector_tester.py
2025-10-08 20:39:09 +08:00

95 lines
5.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from src.sample_collector import SampleCollector
# 初始化样本采集器
collector = SampleCollector()
# 添加猫叫声样本
import os
# sounds_dir, species = "./data/cat_sounds_2", "cat"
sounds_dir, species = "./data/extras/dataset", "cat"
# sounds_dir, species = "./data/dog_sounds", "dog"
for file in os.listdir(sounds_dir):
if file.endswith(".wav") or file.endswith(".WAV"):
collector.add_sounds(os.path.join(sounds_dir, file),species)
# 添加非物种叫声样本
non_sounds_dir = "./data/non_sounds"
for file in os.listdir(non_sounds_dir):
if file.endswith(".wav") or file.endswith(".WAV"):
collector.add_non_sounds(os.path.join(non_sounds_dir, file))
# 查看样本数量
print(collector.get_sample_counts())
# from src.audio_input import AudioInput
# from src.audio_processor import AudioProcessor
# from src.feature_extractor import FeatureExtractor
# from src.cat_intent_classifier_v2 import CatIntentClassifier
# import os
# import numpy as np
#
# # 初始化组件
# audio_input = AudioInput()
# audio_processor = AudioProcessor()
# feature_extractor = FeatureExtractor()
#
# # 提取情感类别特征
# emotions_dir = "./cat_intents/emotions"
# emotion_embeddings = []
# emotion_labels = []
#
# for emotion in os.listdir(emotions_dir):
# emotion_path = os.path.join(emotions_dir, emotion)
# if os.path.isdir(emotion_path):
# for file in os.listdir(emotion_path):
# if file.endswith(".wav") or file.endswith(".WAV"):
# file_path = os.path.join(emotion_path, file)
# print(f"处理情感样本: {file_path}")
#
# # 加载音频
# audio_data, sample_rate = audio_input.load_from_file(file_path)
#
# # 预处理音频
# processed_audio = audio_processor.preprocess(audio_data)
#
# # 准备YAMNet输入
# yamnet_input = audio_processor.prepare_yamnet_input(processed_audio)
#
# # 提取特征
# features = feature_extractor.process_audio(yamnet_input)
#
# # 使用平均嵌入向量
# embedding_mean = np.mean(features["embeddings"], axis=0)
#
# # 添加到训练数据
# emotion_embeddings.append(embedding_mean)
# emotion_labels.append(emotion)
#
# # 训练情感分类器
# print(f"训练情感分类器,样本数: {len(emotion_embeddings)}")
# emotion_classifier = CatIntentClassifier()
# emotion_history = emotion_classifier.train(
# np.array(emotion_embeddings),
# emotion_labels,
# epochs=100,
# batch_size=16
# )
#
# # 保存情感分类器
# os.makedirs("./models", exist_ok=True)
# emotion_paths = emotion_classifier.save_model("./models", "emotions")
# # phrases_paths = emotion_classifier.save_model("./models", "phrases")
# print(f"情感分类器已保存: {emotion_paths}")
# 类似地,训练短语分类器
# ...重复上述过程但使用phrases目录
# aa = "F_BAC01_MC_MN_SIM01_101.wav, F_BAC01_MC_MN_SIM01_102.wav, F_BAC01_MC_MN_SIM01_103.wav, F_BAC01_MC_MN_SIM01_104.wav, F_BAC01_MC_MN_SIM01_105.wav, F_BAC01_MC_MN_SIM01_201.wav, F_BAC01_MC_MN_SIM01_202.wav, F_BAC01_MC_MN_SIM01_203.wav, F_BAC01_MC_MN_SIM01_301.wav, F_BAC01_MC_MN_SIM01_302.wav, F_BAC01_MC_MN_SIM01_303.wav, F_BAC01_MC_MN_SIM01_304.wav, F_BLE01_EU_FN_DEL01_101.wav, F_BLE01_EU_FN_DEL01_102.wav, F_BLE01_EU_FN_DEL01_103.wav, F_BRA01_MC_MN_SIM01_301.wav, F_BRA01_MC_MN_SIM01_302.wav, F_BRI01_MC_FI_SIM01_101.wav, F_BRI01_MC_FI_SIM01_102.wav, F_BRI01_MC_FI_SIM01_103.wav, F_BRI01_MC_FI_SIM01_104.wav, F_BRI01_MC_FI_SIM01_105.wav, F_BRI01_MC_FI_SIM01_106.wav, F_BRI01_MC_FI_SIM01_201.wav, F_BRI01_MC_FI_SIM01_202.wav, F_CAN01_EU_FN_GIA01_201.wav, F_CAN01_EU_FN_GIA01_202.wav, F_DAK01_MC_FN_SIM01_301.wav, F_DAK01_MC_FN_SIM01_302.wav, F_DAK01_MC_FN_SIM01_303.wav, F_DAK01_MC_FN_SIM01_304.wav, F_IND01_EU_FN_ELI01_101.wav, F_IND01_EU_FN_ELI01_102.wav, F_IND01_EU_FN_ELI01_103.wav, F_IND01_EU_FN_ELI01_104.wav, F_IND01_EU_FN_ELI01_201.wav, F_IND01_EU_FN_ELI01_202.wav, F_IND01_EU_FN_ELI01_203.wav, F_IND01_EU_FN_ELI01_301.wav, F_IND01_EU_FN_ELI01_302.wav, F_IND01_EU_FN_ELI01_304.wav, F_LEO01_EU_MI_RIT01_101.wav, F_LEO01_EU_MI_RIT01_102.wav, F_LEO01_EU_MI_RIT01_103.wav, F_LEO01_EU_MI_RIT01_104.wav, F_LEO01_EU_MI_RIT01_105.wav, F_MAG01_EU_FN_FED01_101.wav, F_MAG01_EU_FN_FED01_102.wav, F_MAG01_EU_FN_FED01_103.wav, F_MAG01_EU_FN_FED01_104.wav, F_MAG01_EU_FN_FED01_105.wav, F_MAG01_EU_FN_FED01_106.wav, F_MAG01_EU_FN_FED01_201.wav, F_MAG01_EU_FN_FED01_202.wav, F_MAG01_EU_FN_FED01_203.wav, F_MAG01_EU_FN_FED01_301.wav, F_MAG01_EU_FN_FED01_302.wav, F_MAG01_EU_FN_FED01_303.wav, F_MAG01_EU_FN_FED01_304.wav, F_MAG01_EU_FN_FED01_305.wav, F_MAT01_EU_FN_RIT01_101.wav, F_MAT01_EU_FN_RIT01_102.wav, F_MAT01_EU_FN_RIT01_103.wav, F_MAT01_EU_FN_RIT01_301.wav, F_MAT01_EU_FN_RIT01_302.wav, F_MAT01_EU_FN_RIT01_303.wav, F_MEG01_MC_FI_SIM01_301.wav, F_MEG01_MC_FI_SIM01_302.wav, F_MEG01_MC_FI_SIM01_303.wav, F_MEG01_MC_FI_SIM01_304.wav, F_MIN01_EU_FN_BEN01_101.wav, F_MIN01_EU_FN_BEN01_102.wav, F_MIN01_EU_FN_BEN01_103.wav, F_MIN01_EU_FN_BEN01_104.wav, F_REG01_EU_FN_GIO01_201.wav, F_SPI01_EU_MN_NAI01_101.wav, F_SPI01_EU_MN_NAI01_102.wav, F_SPI01_EU_MN_NAI01_103.wav, F_SPI01_EU_MN_NAI01_104.wav, F_SPI01_EU_MN_NAI01_201.wav, F_SPI01_EU_MN_NAI01_202.wav, F_SPI01_EU_MN_NAI01_203.wav, F_SPI01_EU_MN_NAI01_301.wav, F_WHO01_MC_FI_SIM01_101.wav, F_WHO01_MC_FI_SIM01_102.wav, F_WHO01_MC_FI_SIM01_103.wav, F_WHO01_MC_FI_SIM01_301.wav, F_WHO01_MC_FI_SIM01_302.wav, F_WHO01_MC_FI_SIM01_303.wav, F_WHO01_MC_FI_SIM01_304.wav, F_WHO01_MC_FI_SIM01_306.wav, F_WHO01_MC_FI_SIM01_307.wav"
#
#
#
# print(
# [{
# "path": f"./data/is_cat_sound_true/{dd}", "intent": "等待喂食"
# } for dd in aa.split(", ")]
# )