From 67335e08aa70d2ca7b898ad34fa10ad16262a86b Mon Sep 17 00:00:00 2001
From: xuelijun <977662702@qq.com>
Date: Wed, 25 Feb 2026 09:53:43 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E7=88=AC=E5=8F=96?=
 =?UTF-8?q?=E6=9C=80=E8=BF=917=E5=A4=A9=E7=9A=84=E5=BC=80=E5=A5=96?=
 =?UTF-8?q?=E7=BB=93=E6=9E=9C=E4=BB=BB=E5=8A=A1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../java/com/tem/bocai/BocaiApplication.java  |   6 +-
 .../tem/bocai/schedules/CrawlerSchedule.java  | 131 +++++++
 .../java/com/tem/bocai/util/DateUtils.java    |  16 +
 .../tem/bocai/util/LotteryHistoryCrawler.java | 332 ++++++++++++++++++
 4 files changed, 482 insertions(+), 3 deletions(-)
 create mode 100644 src/main/java/com/tem/bocai/util/LotteryHistoryCrawler.java
diff --git a/src/main/java/com/tem/bocai/BocaiApplication.java b/src/main/java/com/tem/bocai/BocaiApplication.java
index d65e6bc..c844efc 100644
--- a/src/main/java/com/tem/bocai/BocaiApplication.java
+++ b/src/main/java/com/tem/bocai/BocaiApplication.java
@@ -18,9 +18,9 @@ public class BocaiApplication {
 //        // 依次执行三个任务
 //
         // 1. 执行CrawlerSchedule方法
-//        System.out.println("\n=== 开始执行CrawlerSchedule任务 ===");
-//        CrawlerSchedule crawlerSchedule = context.getBean(CrawlerSchedule.class);
-//        crawlerSchedule.executePksHistory();
+        System.out.println("\n=== 开始执行初始化爬取最近7天的开奖结果任务 ===");
+        CrawlerSchedule crawlerSchedule = context.getBean(CrawlerSchedule.class);
+        crawlerSchedule.executeLotteryDrawHistory();
 //
         // 3. 执行ExBetScriptSchedule方法
 //        System.out.println("\n=== 开始执行ExBetScriptSchedule任务 ===");
diff --git a/src/main/java/com/tem/bocai/schedules/CrawlerSchedule.java b/src/main/java/com/tem/bocai/schedules/CrawlerSchedule.java
index 317a418..e96dd38 100644
--- a/src/main/java/com/tem/bocai/schedules/CrawlerSchedule.java
+++ b/src/main/java/com/tem/bocai/schedules/CrawlerSchedule.java
@@ -337,4 +337,135 @@ public class CrawlerSchedule {
         }
     }
 
+
+    //开始爬取最近7天的开奖结果
+    public void executeLotteryDrawHistory() {
+        log.info("开始爬取最近7天的开奖结果");
+
+        LoginInfoResult firstByOrderByCreateTimeDesc = loginInfoRepository.findFirstByOrderByCreateTimeDesc()
+                .orElse(null);
+        if (firstByOrderByCreateTimeDesc == null) {
+            log.error("未找到登录信息");
+            return;
+        }
+        if(firstByOrderByCreateTimeDesc.getOnOff() == ONOFF){
+            log.info("开关已关闭，停止爬取");
+            return;
+        }
+
+        String token = tokenCacheService.getToken();
+        if (token == null || token.isEmpty()) {
+            log.error("token为空");
+            return;
+        }
+
+        // 获取过去7天的日期列表
+        List<String> dateList = DateUtils.getLast7Days();
+
+        for (String date : dateList) {
+            log.info("\n=== 开始爬取日期: {} 的数据 ===", date);
+
+            // 检查该日期的数据文件是否已存在且有数据
+            if (isDateDataExists(date)) {
+                log.info("日期 {} 的数据已存在，跳过爬取", date);
+                continue;
+            }
+
+            // 对每个日期进行重试
+            boolean success = crawlDataForDate(date, token);
+
+            if (success) {
+                log.info("日期 {} 数据爬取成功", date);
+            } else {
+                log.error("日期 {} 数据爬取失败，已达到最大重试次数", date);
+            }
+
+            // 每次请求后稍作等待，避免请求过于频繁
+            try {
+                Thread.sleep(1000);
+            } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+            }
+        }
+
+        log.info("最近7天数据爬取完成");
+    }
+
+    /**
+     * 爬取指定日期的数据
+     */
+    private boolean crawlDataForDate(String date, String token) {
+        int retryCount = 0;
+        boolean success = false;
+        String currentToken = token;
+
+        LoginInfoResult loginInfo = loginInfoRepository.findFirstByOrderByCreateTimeDesc()
+                .orElse(null);
+        if (loginInfo == null) {
+            return false;
+        }
+
+        while (!success && retryCount < MAX_CRA) {
+            log.info("\n=== 第 " + (retryCount + 1) + " 次尝试获取 " + date + " 的开奖结果 ===");
+
+            if (currentToken == null || currentToken.isEmpty()) {
+                log.info("token为空，从数据库重新获取");
+                currentToken = tokenCacheService.getTokenSqlite();
+                if (currentToken == null) {
+                    log.error("无法获取有效token");
+                    retryCount++;
+                    continue;
+                }
+            }
+
+            log.info("使用token: " + (currentToken.length() > 20 ? currentToken.substring(0, 20) + "..." : currentToken));
+
+            // 创建爬虫实例，传入token
+            LotteryHistoryCrawler crawler = new LotteryHistoryCrawler(currentToken, pypath,date);
+
+            // 构建URL
+            String url = loginInfo.getLoginUrl() + "/member/dresult?lottery=SGFT&date=" + date;
+
+            Spider.create(crawler)
+                    .addUrl(url)
+                    .thread(1)
+                    .run();
+
+            // 检查是否成功解析数据
+            success = LotteryHistoryCrawler.isLastParseSuccess();
+
+            if (!success) {
+                log.info("本次尝试未解析到数据");
+                // 重新获取token（下次重试用）
+                currentToken = tokenCacheService.getTokenSqlite();
+                retryCount++;
+
+                // 等待一下再重试
+                if (retryCount < MAX_CRA) {
+                    try {
+                        Thread.sleep(2000 * retryCount); // 等待时间递增
+                    } catch (InterruptedException e) {
+                        Thread.currentThread().interrupt();
+                    }
+                }
+            } else {
+                log.info("成功解析到数据");
+            }
+        }
+
+        return success;
+    }
+    /**
+     * 检查指定日期的数据文件是否存在且包含数据
+     */
+    private boolean isDateDataExists(String date) {
+        try {
+            List<LotteryResult> data =  lotteryResultRepository.findByTimeContaining(date);
+            return data != null && !data.isEmpty();
+        } catch (Exception e) {
+            log.warn("检查文件失败: " + e.getMessage());
+            return false;
+        }
+    }
+
 }
diff --git a/src/main/java/com/tem/bocai/util/DateUtils.java b/src/main/java/com/tem/bocai/util/DateUtils.java
index efcacf3..01605f5 100644
--- a/src/main/java/com/tem/bocai/util/DateUtils.java
+++ b/src/main/java/com/tem/bocai/util/DateUtils.java
@@ -72,6 +72,22 @@ public class DateUtils extends org.apache.commons.lang3.time.DateUtils
         return targetDate.format(DATE_FORMATTER);
     }*/
 
+    // 近7天日期的方法
+    public static List<String> getLast7Days() {
+        List<String> dateList = new ArrayList<>();
+        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
+        Calendar calendar = Calendar.getInstance();
+
+        // 从今天开始，往前推7天
+        for (int i = 0; i < 7; i++) {
+            dateList.add(sdf.format(calendar.getTime()));
+            calendar.add(Calendar.DAY_OF_YEAR, -1);
+        }
+
+        return dateList;
+    }
+
+
     public static void main(String[] args) {
         System.out.println("====="+getTodayDate());
        /* Date now = new Date(); // 当前时间
diff --git a/src/main/java/com/tem/bocai/util/LotteryHistoryCrawler.java b/src/main/java/com/tem/bocai/util/LotteryHistoryCrawler.java
new file mode 100644
index 0000000..f132db0
--- /dev/null
+++ b/src/main/java/com/tem/bocai/util/LotteryHistoryCrawler.java
@@ -0,0 +1,332 @@
+package com.tem.bocai.util;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import lombok.extern.slf4j.Slf4j;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.processor.PageProcessor;
+import us.codecraft.webmagic.selector.Html;
+import us.codecraft.webmagic.selector.Selectable;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+
+//开奖的历史结果
+@Slf4j
+public class LotteryHistoryCrawler implements PageProcessor {
+
+    private final String token;
+    // 站点配置
+    private Site site;
+    // final LoginService loginService;
+    // 添加一个字段标记是否成功解析数据
+    private static volatile boolean lastParseSuccess = true;
+
+    private String path;
+    private String date;
+
+    public LotteryHistoryCrawler(String token, String path,String date) {
+        this.token = token;
+        this.path =path;
+        this.date =date;
+        initSite();
+    }
+
+    /**
+     * 初始化Site配置
+     */
+    private void initSite() {
+        site = Site.me()
+                .setRetryTimes(3)
+                .setSleepTime(1000)
+                .setTimeOut(10000)
+                .setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36");
+
+        // 设置cookie
+        if (token != null && !token.isEmpty()) {
+            site.addHeader("cookie", "token=" + token);
+        }
+    }
+
+
+    @Override
+    public void process(Page page) {
+        // 获取页面HTML
+        Html html = page.getHtml();
+
+        // 打印页面基本信息
+        log.info("页面URL: " + page.getUrl());
+        log.info("页面标题: " + html.xpath("//title/text()").get());
+        // 示例：提取所有表格数据
+        Selectable tables = html.xpath("//table");
+        log.info("找到 " + tables.nodes().size() + " 个表格");
+        if(tables.nodes().isEmpty()){
+            lastParseSuccess = false;
+        }else {
+            lastParseSuccess = true;
+        }
+        // 提取表格数据（根据实际页面结构调整选择器）
+        extractTableData(html);
+
+        // 示例：提取所有链接
+        Selectable links = html.links();
+        System.out.println("页面包含 " + links.all().size() + " 个链接");
+        // 如果需要继续爬取其他页面
+        // page.addTargetRequests(links.all());
+
+        // 将数据存入结果
+       /* page.putField("html========", html.toString());
+        page.putField("title", html.xpath("//title/text()").get());*/
+        parseLotteryHtml(html.toString());
+
+    }
+
+    private void extractTableData(Html html) {
+        // 根据实际页面结构编写数据提取逻辑
+        // 示例：提取所有tr元素
+        Selectable rows = html.xpath("//tr");
+        for (Selectable row : rows.nodes()) {
+            // 提取每行的td内容
+            String rowText = row.xpath("//td/text()").all().toString();
+            if (!rowText.isEmpty()) {
+                System.out.println("行数据: " + rowText);
+            }
+        }
+    }
+
+    @Override
+    public Site getSite() {
+
+        return site;
+    }
+
+    /**
+     * 添加一个方法获取解析状态
+     */
+    public static boolean isLastParseSuccess() {
+        return lastParseSuccess;
+    }
+    /**
+     * 解析彩票HTML数据，转换成指定的List<Map<String, Object>>格式
+     *
+     * @param htmlContent 爬取到的HTML文本内容
+     * @return 解析后的结构化数据列表
+     */
+    public  List<Map<String, Object>> parseLotteryHtml(String htmlContent) {
+        List<Map<String, Object>> resultList = new ArrayList<>();
+
+        // 初始化Jsoup解析器
+        Document doc = Jsoup.parse(htmlContent);
+
+        // 定位到数据所在的表格行（drawTable下的table > tbody > tr）
+        Element targetTable = doc.selectFirst("#drawTable");
+        if (targetTable == null) {
+            return resultList;
+        }
+
+        Elements trList = targetTable.select("table > tbody > tr");
+
+        // 遍历每一行数据
+        for (Element tr : trList) {
+            Map<String, Object> rowData = new HashMap<>();
+
+            // 1. 提取期数（id）
+            Element periodTd = tr.selectFirst("td.period");
+            rowData.put("id", periodTd != null ? periodTd.text().trim() : "");
+
+            // 2. 提取开奖时间（time）
+            Element timeTd = tr.selectFirst("td.drawTime");
+            rowData.put("time", timeTd != null ? timeTd.text().trim() : "");
+
+            // 3. 提取开出号码（result）- 10个ballname的数字
+            Elements ballTds = tr.select("td.ballname");
+            List<Integer> resultNumbers = new ArrayList<>();
+            int count = 0;
+            for (Element td : ballTds) {
+                if (count >= 10) break;
+                String text = td.text().trim();
+                if (text.matches("\\d+")) {
+                    resultNumbers.add(Integer.parseInt(text));
+                    count++;
+                }
+            }
+            rowData.put("result", resultNumbers);
+
+            // 4. 提取winner（other1）
+            Element winnerTd = tr.selectFirst("td.other1");
+            if (winnerTd != null) {
+                String winnerText = winnerTd.text().trim();
+                if (winnerText.matches("\\d+")) {
+                    rowData.put("winner", Integer.parseInt(winnerText));
+                } else {
+                    rowData.put("winner", "");
+                }
+            } else {
+                rowData.put("winner", "");
+            }
+
+            // 5. 提取GD1（冠亚小/大）、GD2（冠亚单/双）
+            Elements otherTds = tr.select("td.other");
+            String gd1 = "";
+            String gd2 = "";
+            for (Element td : otherTds) {
+                String className = td.className();
+                if (className.contains("GDX")) {
+                    gd1 = td.text().trim();
+                } else if (className.contains("GDS")) {
+                    gd2 = td.text().trim();
+                }
+            }
+            rowData.put("GD1", gd1);
+            rowData.put("GD2", gd2);
+
+            // 6. 提取sum1（dldhl_sum）、sum2（dldhh_sum）
+            Element sum1Td = tr.selectFirst("td.dldhl_sum");
+            if (sum1Td != null) {
+                String sum1Text = sum1Td.text().trim();
+                if (sum1Text.matches("\\d+")) {
+                    rowData.put("sum1", Integer.parseInt(sum1Text));
+                } else {
+                    rowData.put("sum1", "");
+                }
+            } else {
+                rowData.put("sum1", "");
+            }
+
+            Element sum2Td = tr.selectFirst("td.dldhh_sum");
+            if (sum2Td != null) {
+                String sum2Text = sum2Td.text().trim();
+                if (sum2Text.matches("\\d+")) {
+                    rowData.put("sum2", Integer.parseInt(sum2Text));
+                } else {
+                    rowData.put("sum2", "");
+                }
+            } else {
+                rowData.put("sum2", "");
+            }
+
+            // 7. 提取GLH_result（龙虎结果，5个GLH开头的td）
+            List<String> glhResults = new ArrayList<>();
+            int glhCount = 0;
+            for (Element td : otherTds) {
+                if (glhCount >= 5) break;
+                String className = td.className();
+                if (className.contains("GLH_")) {
+                    glhResults.add(td.text().trim());
+                    glhCount++;
+                }
+            }
+            rowData.put("GLH_result", glhResults);
+
+            // 将单行数据加入结果列表（只保留有期数的有效行）
+            if (!rowData.get("id").toString().isEmpty()) {
+                resultList.add(rowData);
+            }
+        }
+        // 将数据写入SQLite数据库
+        SQLiteUtil.writeToSQLite(resultList);
+        // 将数据写入JSON文件（保留原有功能）
+        writeToJsonFile(resultList);
+        log.info("历史爬虫打印结果===" + resultList);
+        return resultList;
+    }
+
+    public  void writeToJsonFile(List<Map<String, Object>> resultList) {
+        try {
+            // 创建 ObjectMapper 实例
+            ObjectMapper objectMapper = new ObjectMapper();
+
+            // 设置 JSON 格式化（可选，更易读）
+            objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
+
+            // 定义输出目录
+            String directoryPath = path+"/current_data"; // 项目根目录下的 output/json 文件夹
+
+            // 使用年月日作为文件名（格式：result_yyyyMMdd.json）
+            String fileName = "result_" + date + ".json";
+            String filePath = directoryPath + "/" + fileName;
+
+            // 创建目录（如果不存在）
+            File directory = new File(directoryPath);
+            if (!directory.exists()) {
+                directory.mkdirs(); // 创建多级目录
+            }
+
+            // 创建文件对象
+            File outputFile = new File(filePath);
+
+            // 如果文件已存在，读取现有数据并对比
+            List<Map<String, Object>> existingData = new ArrayList<>();
+            Set<String> existingIds = new HashSet<>();
+            if (outputFile.exists()) {
+                try {
+                    existingData = objectMapper.readValue(outputFile,
+                            objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class));
+                    for (Map<String, Object> item : existingData) {
+                        if (item.containsKey("id")) {
+                            existingIds.add(item.get("id").toString());
+                        }
+                    }
+                    log.info("已读取现有数据，共 " + existingData.size() + " 条记录");
+                } catch (IOException e) {
+                    log.warn("读取现有文件失败，将覆盖写入: " + e.getMessage());
+                    existingIds.clear();
+                }
+            }
+
+            // 筛选出新增的数据（id不在existingIds中的记录）
+            List<Map<String, Object>> newData = new ArrayList<>();
+            for (Map<String, Object> item : resultList) {
+                if (item.containsKey("id")) {
+                    String id = item.get("id").toString();
+                    if (!existingIds.contains(id)) {
+                        newData.add(item);
+                    }
+                }
+            }
+
+            // 合并现有数据和新数据
+            List<Map<String, Object>> finalData = new ArrayList<>();
+            if (!existingData.isEmpty()) {
+                finalData.addAll(existingData);
+            }
+            finalData.addAll(newData);
+
+            // 将合并后的数据写入 JSON 文件
+            objectMapper.writeValue(outputFile, finalData);
+            log.info("数据已成功写入文件: " + outputFile.getAbsolutePath() +
+                    " (现有: " + existingData.size() + " 条, 新增: " + newData.size() + " 条, 总计: " + finalData.size() + " 条)");
+        } catch (IOException e) {
+            e.printStackTrace();
+            log.error("写入 JSON 文件失败: " + e.getMessage(), e);
+            throw new RuntimeException("写入 JSON 文件失败: " + e.getMessage(), e);
+        }
+    }
+
+
+    public static void main(String[] args) {
+        String url = "https://4701268539-esh.qdk63ayw8g.com/member/dresult?lottery=SGFT&date=2026-02-06";
+
+        // 创建爬虫
+        Spider.create(new LotteryHistoryCrawler("","",""))
+                .addUrl(url) // 添加起始URL
+                .thread(1) // 线程数
+                .run(); // 开始爬取
+    }
+
+
+    // 自定义headers
+    /*private Map<String, String> getHeaders() {
+        Map<String, String> headers = new HashMap<>();
+        headers.put("cookie", "token=a1b219fe7e39374d6af532c56fdc911b76ae8f83");
+
+        return headers;
+    }*/
+}
\ No newline at end of file