修改爬取数据为增量
This commit is contained in:
@@ -255,8 +255,7 @@ public class LotteryWebMagicCrawler implements PageProcessor {
|
||||
String directoryPath = path+"/current_data"; // 项目根目录下的 output/json 文件夹
|
||||
|
||||
// 使用年月日作为文件名(格式:result_yyyyMMdd.json)
|
||||
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
|
||||
String dateStr = dateFormat.format(new Date());
|
||||
String dateStr = DateUtils.getTodayDate();
|
||||
String fileName = "result_" + dateStr + ".json";
|
||||
String filePath = directoryPath + "/" + fileName;
|
||||
|
||||
@@ -269,18 +268,47 @@ public class LotteryWebMagicCrawler implements PageProcessor {
|
||||
// 创建文件对象
|
||||
File outputFile = new File(filePath);
|
||||
|
||||
// 如果文件已存在,删除旧文件(实现替换功能)
|
||||
// 如果文件已存在,读取现有数据并对比
|
||||
List<Map<String, Object>> existingData = new ArrayList<>();
|
||||
Set<String> existingIds = new HashSet<>();
|
||||
if (outputFile.exists()) {
|
||||
boolean deleted = outputFile.delete();
|
||||
if (!deleted) {
|
||||
throw new IOException("无法删除已存在的文件: " + filePath);
|
||||
try {
|
||||
existingData = objectMapper.readValue(outputFile,
|
||||
objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class));
|
||||
for (Map<String, Object> item : existingData) {
|
||||
if (item.containsKey("id")) {
|
||||
existingIds.add(item.get("id").toString());
|
||||
}
|
||||
}
|
||||
log.info("已读取现有数据,共 " + existingData.size() + " 条记录");
|
||||
} catch (IOException e) {
|
||||
log.warn("读取现有文件失败,将覆盖写入: " + e.getMessage());
|
||||
existingIds.clear();
|
||||
}
|
||||
System.out.println("已删除旧文件,准备创建新文件: " + fileName);
|
||||
}
|
||||
|
||||
// 将 List 写入 JSON 文件
|
||||
objectMapper.writeValue(outputFile, resultList);
|
||||
log.info("数据已成功写入文件: " + outputFile.getAbsolutePath());
|
||||
// 筛选出新增的数据(id不在existingIds中的记录)
|
||||
List<Map<String, Object>> newData = new ArrayList<>();
|
||||
for (Map<String, Object> item : resultList) {
|
||||
if (item.containsKey("id")) {
|
||||
String id = item.get("id").toString();
|
||||
if (!existingIds.contains(id)) {
|
||||
newData.add(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 合并现有数据和新数据
|
||||
List<Map<String, Object>> finalData = new ArrayList<>();
|
||||
if (!existingData.isEmpty()) {
|
||||
finalData.addAll(existingData);
|
||||
}
|
||||
finalData.addAll(newData);
|
||||
|
||||
// 将合并后的数据写入 JSON 文件
|
||||
objectMapper.writeValue(outputFile, finalData);
|
||||
log.info("数据已成功写入文件: " + outputFile.getAbsolutePath() +
|
||||
" (现有: " + existingData.size() + " 条, 新增: " + newData.size() + " 条, 总计: " + finalData.size() + " 条)");
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
log.error("写入 JSON 文件失败: " + e.getMessage(), e);
|
||||
|
||||
Reference in New Issue
Block a user