调整脚本

2026-01-23 18:04:11 +08:00
parent 5eac38c0c1
commit c3597e57f6
4 changed files with 56990 additions and 77 deletions
--- a/PyModel/aggregate_data_v7.py
+++ b/PyModel/aggregate_data_v7.py
@@ -10,19 +10,43 @@ data_dirs = ['data_test_dir']
 all_records = []

 for d in data_dirs:
-    if not os.path.exists(d): continue
-    for f in os.listdir(d):
+    print(f"Processing directory: {d}")
+    if not os.path.exists(d):
+        print(f"Directory {d} does not exist")
+        continue
+    print(f"Found directory: {d}")
+    files = os.listdir(d)
+    print(f"Files in directory: {files}")
+    for f in files:
        if f.endswith('.json') and f != 'stat_result.json':
-            with open(os.path.join(d, f), 'r') as file:
-                try:
-                    all_records.extend(json.load(file))
-                except:
-                    continue
+            file_path = os.path.join(d, f)
+            print(f"Processing file: {file_path}")
+            try:
+                with open(file_path, 'r', encoding='utf-8') as file:
+                    data = json.load(file)
+                    print(f"Loaded {len(data)} records from {f}")
+                    if len(data) > 0:
+                        print(f"First record keys: {list(data[0].keys())}")
+                    all_records.extend(data)
+            except Exception as e:
+                print(f"Error processing {file_path}: {str(e)}")
+                continue

+print(f"Total records loaded: {len(all_records)}")
 df = pd.DataFrame(all_records)
-df = df.drop_duplicates(subset=['id'], keep='last')
-df['time'] = pd.to_datetime(df['time'])
-print(f"Total unique records: {len(df)}")
+print(f"DataFrame columns: {list(df.columns)}")
+if 'id' in df.columns:
+    df = df.drop_duplicates(subset=['id'], keep='last')
+    print(f"Total unique records after deduplication: {len(df)}")
+else:
+    print("No 'id' column found in DataFrame")
+if 'time' in df.columns:
+    df['time'] = pd.to_datetime(df['time'])
+    print(f"Total unique records: {len(df)}")
+else:
+    print("No 'time' column found in DataFrame")
+    print("Sample of first 5 records:")
+    print(df.head())


 # 2. 极速统计函数
@@ -88,7 +112,18 @@ output_data = {
    'last_updated': last_date.strftime('%Y-%m-%d %H:%M:%S')
 }

-with open('data_test_predict/aggregated_stats_v7.json', 'w') as f:
+# 创建data_test_predict目录（如果不存在）
+predict_dir = 'data_test_predict'
+if not os.path.exists(predict_dir):
+    print(f"Creating directory: {predict_dir}")
+    os.makedirs(predict_dir)
+else:
+    print(f"Directory {predict_dir} already exists")
+
+# 保存结果
+output_file = os.path.join(predict_dir, 'aggregated_stats_v7.json')
+print(f"Saving results to: {output_file}")
+with open(output_file, 'w') as f:
    json.dump(output_data, f)

 print(f"Stats V7 generated with 100-day window. Last data point: {last_date}")
--- a/PyModel/data_test_predict/aggregated_stats_v7.json
+++ b/PyModel/data_test_predict/aggregated_stats_v7.json
--- a/PyModel/data_test_predict/betting_predictions_final_1_20.json
+++ b/PyModel/data_test_predict/betting_predictions_final_1_20.json