修改爬取数据为增量

This commit is contained in:
2026-02-10 09:42:13 +08:00
parent 22e3d844bf
commit 7a61514e1c

View File

@@ -255,8 +255,7 @@ public class LotteryWebMagicCrawler implements PageProcessor {
String directoryPath = path+"/current_data"; // 项目根目录下的 output/json 文件夹
// 使用年月日作为文件名格式result_yyyyMMdd.json
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
String dateStr = dateFormat.format(new Date());
String dateStr = DateUtils.getTodayDate();
String fileName = "result_" + dateStr + ".json";
String filePath = directoryPath + "/" + fileName;
@@ -269,18 +268,47 @@ public class LotteryWebMagicCrawler implements PageProcessor {
// 创建文件对象
File outputFile = new File(filePath);
// 如果文件已存在,删除旧文件(实现替换功能)
// 如果文件已存在,读取现有数据并对比
List<Map<String, Object>> existingData = new ArrayList<>();
Set<String> existingIds = new HashSet<>();
if (outputFile.exists()) {
boolean deleted = outputFile.delete();
if (!deleted) {
throw new IOException("无法删除已存在的文件: " + filePath);
try {
existingData = objectMapper.readValue(outputFile,
objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class));
for (Map<String, Object> item : existingData) {
if (item.containsKey("id")) {
existingIds.add(item.get("id").toString());
}
}
log.info("已读取现有数据,共 " + existingData.size() + " 条记录");
} catch (IOException e) {
log.warn("读取现有文件失败,将覆盖写入: " + e.getMessage());
existingIds.clear();
}
System.out.println("已删除旧文件,准备创建新文件: " + fileName);
}
// 将 List 写入 JSON 文件
objectMapper.writeValue(outputFile, resultList);
log.info("数据已成功写入文件: " + outputFile.getAbsolutePath());
// 筛选出新增的数据id不在existingIds中的记录
List<Map<String, Object>> newData = new ArrayList<>();
for (Map<String, Object> item : resultList) {
if (item.containsKey("id")) {
String id = item.get("id").toString();
if (!existingIds.contains(id)) {
newData.add(item);
}
}
}
// 合并现有数据和新数据
List<Map<String, Object>> finalData = new ArrayList<>();
if (!existingData.isEmpty()) {
finalData.addAll(existingData);
}
finalData.addAll(newData);
// 将合并后的数据写入 JSON 文件
objectMapper.writeValue(outputFile, finalData);
log.info("数据已成功写入文件: " + outputFile.getAbsolutePath() +
" (现有: " + existingData.size() + " 条, 新增: " + newData.size() + " 条, 总计: " + finalData.size() + " 条)");
} catch (IOException e) {
e.printStackTrace();
log.error("写入 JSON 文件失败: " + e.getMessage(), e);