调整脚本
This commit is contained in:
@@ -10,19 +10,43 @@ data_dirs = ['data_test_dir']
|
||||
all_records = []
|
||||
|
||||
for d in data_dirs:
|
||||
if not os.path.exists(d): continue
|
||||
for f in os.listdir(d):
|
||||
print(f"Processing directory: {d}")
|
||||
if not os.path.exists(d):
|
||||
print(f"Directory {d} does not exist")
|
||||
continue
|
||||
print(f"Found directory: {d}")
|
||||
files = os.listdir(d)
|
||||
print(f"Files in directory: {files}")
|
||||
for f in files:
|
||||
if f.endswith('.json') and f != 'stat_result.json':
|
||||
with open(os.path.join(d, f), 'r') as file:
|
||||
try:
|
||||
all_records.extend(json.load(file))
|
||||
except:
|
||||
continue
|
||||
file_path = os.path.join(d, f)
|
||||
print(f"Processing file: {file_path}")
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
data = json.load(file)
|
||||
print(f"Loaded {len(data)} records from {f}")
|
||||
if len(data) > 0:
|
||||
print(f"First record keys: {list(data[0].keys())}")
|
||||
all_records.extend(data)
|
||||
except Exception as e:
|
||||
print(f"Error processing {file_path}: {str(e)}")
|
||||
continue
|
||||
|
||||
print(f"Total records loaded: {len(all_records)}")
|
||||
df = pd.DataFrame(all_records)
|
||||
df = df.drop_duplicates(subset=['id'], keep='last')
|
||||
df['time'] = pd.to_datetime(df['time'])
|
||||
print(f"Total unique records: {len(df)}")
|
||||
print(f"DataFrame columns: {list(df.columns)}")
|
||||
if 'id' in df.columns:
|
||||
df = df.drop_duplicates(subset=['id'], keep='last')
|
||||
print(f"Total unique records after deduplication: {len(df)}")
|
||||
else:
|
||||
print("No 'id' column found in DataFrame")
|
||||
if 'time' in df.columns:
|
||||
df['time'] = pd.to_datetime(df['time'])
|
||||
print(f"Total unique records: {len(df)}")
|
||||
else:
|
||||
print("No 'time' column found in DataFrame")
|
||||
print("Sample of first 5 records:")
|
||||
print(df.head())
|
||||
|
||||
|
||||
# 2. 极速统计函数
|
||||
@@ -88,7 +112,18 @@ output_data = {
|
||||
'last_updated': last_date.strftime('%Y-%m-%d %H:%M:%S')
|
||||
}
|
||||
|
||||
with open('data_test_predict/aggregated_stats_v7.json', 'w') as f:
|
||||
# 创建data_test_predict目录(如果不存在)
|
||||
predict_dir = 'data_test_predict'
|
||||
if not os.path.exists(predict_dir):
|
||||
print(f"Creating directory: {predict_dir}")
|
||||
os.makedirs(predict_dir)
|
||||
else:
|
||||
print(f"Directory {predict_dir} already exists")
|
||||
|
||||
# 保存结果
|
||||
output_file = os.path.join(predict_dir, 'aggregated_stats_v7.json')
|
||||
print(f"Saving results to: {output_file}")
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(output_data, f)
|
||||
|
||||
print(f"Stats V7 generated with 100-day window. Last data point: {last_date}")
|
||||
Reference in New Issue
Block a user