From 6f1c990d51c8ec8d11e60b142478b56064712bf5 Mon Sep 17 00:00:00 2001 From: xuelijun <977662702@qq.com> Date: Wed, 21 Jan 2026 16:28:53 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BB=8A=E6=97=A5=E5=B7=B2=E7=BB=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 12 + .../com/tem/bocai/config/CacheConfig.java | 42 ++ .../tem/bocai/controller/LoginCrawler.java | 6 + .../com/tem/bocai/service/LoginService.java | 4 + .../bocai/service/impl/LoginServiceImpl.java | 20 + .../tem/bocai/util/CompletedTodayCrawler.java | 396 ++++++++++++++++++ .../bocai/util/LotteryWebMagicCrawler.java | 1 + 7 files changed, 481 insertions(+) create mode 100644 src/main/java/com/tem/bocai/config/CacheConfig.java create mode 100644 src/main/java/com/tem/bocai/util/CompletedTodayCrawler.java diff --git a/pom.xml b/pom.xml index 44d7c7c..9b6e291 100644 --- a/pom.xml +++ b/pom.xml @@ -78,6 +78,18 @@ tess4j 5.8.0 --> + + + org.springframework.boot + spring-boot-starter-cache + + + + + com.github.ben-manes.caffeine + caffeine + 3.1.8 + diff --git a/src/main/java/com/tem/bocai/config/CacheConfig.java b/src/main/java/com/tem/bocai/config/CacheConfig.java new file mode 100644 index 0000000..0187c97 --- /dev/null +++ b/src/main/java/com/tem/bocai/config/CacheConfig.java @@ -0,0 +1,42 @@ +package com.tem.bocai.config; + + +import com.github.benmanes.caffeine.cache.Caffeine; +import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; +import org.springframework.cache.CacheManager; +import org.springframework.cache.annotation.EnableCaching; +import org.springframework.cache.caffeine.CaffeineCacheManager; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Primary; + +import java.util.concurrent.TimeUnit; + +@Configuration +@EnableCaching // 启用缓存支持 +public class CacheConfig { + + /** + * 主缓存管理器 - 用于token缓存(19分钟过期) + */ + @Bean + @Primary // 标记为主缓存管理器 + public CacheManager cacheManager() { + CaffeineCacheManager cacheManager = new CaffeineCacheManager(); + cacheManager.setCaffeine(Caffeine.newBuilder() + .expireAfterWrite(19, TimeUnit.MINUTES) // 19分钟过期 + .maximumSize(100) // 最大缓存数量 + .recordStats() // 记录统计信息 + ); + return cacheManager; + } + + /** + * 备用缓存管理器 - 如果没有配置Caffeine,使用ConcurrentMap + */ + @Bean + @ConditionalOnMissingBean(CacheManager.class) + public CacheManager fallbackCacheManager() { + return new org.springframework.cache.concurrent.ConcurrentMapCacheManager(); + } +} diff --git a/src/main/java/com/tem/bocai/controller/LoginCrawler.java b/src/main/java/com/tem/bocai/controller/LoginCrawler.java index 3e19e89..2455772 100644 --- a/src/main/java/com/tem/bocai/controller/LoginCrawler.java +++ b/src/main/java/com/tem/bocai/controller/LoginCrawler.java @@ -24,6 +24,12 @@ public class LoginCrawler { return ResponseEntity.ok(result); } + @GetMapping("/ocr/completedToday") + public ResponseEntity completedToday() throws IOException, TesseractException { + String result = loginService.completedToday(); + return ResponseEntity.ok(result); + } + } diff --git a/src/main/java/com/tem/bocai/service/LoginService.java b/src/main/java/com/tem/bocai/service/LoginService.java index 496828d..50758d3 100644 --- a/src/main/java/com/tem/bocai/service/LoginService.java +++ b/src/main/java/com/tem/bocai/service/LoginService.java @@ -7,4 +7,8 @@ public interface LoginService { //获取token String getToken(String username, String password, String loginUrl); + + //获取token + String completedToday(); + } diff --git a/src/main/java/com/tem/bocai/service/impl/LoginServiceImpl.java b/src/main/java/com/tem/bocai/service/impl/LoginServiceImpl.java index 6a5bde7..016b7de 100644 --- a/src/main/java/com/tem/bocai/service/impl/LoginServiceImpl.java +++ b/src/main/java/com/tem/bocai/service/impl/LoginServiceImpl.java @@ -1,6 +1,7 @@ package com.tem.bocai.service.impl; import com.tem.bocai.service.LoginService; +import com.tem.bocai.util.CompletedTodayCrawler; import com.tem.bocai.util.LotteryDataPipeline; import com.tem.bocai.util.LotteryWebMagicCrawler; import com.tem.bocai.util.TokenCacheService; @@ -107,6 +108,25 @@ public class LoginServiceImpl implements LoginService { return ""; } + @Override + public String completedToday() { + String token = tokenCacheService.getToken(); + System.out.println("得到token = " + token); + if (token != null && !token.isEmpty()) { + // 2. 创建爬虫实例,传入token + CompletedTodayCrawler crawler = new CompletedTodayCrawler(token); + + // 4. 执行爬虫 + String url = "https://4701268539-esh.qdk63ayw8g.com/member/bets?settled=true"; + + Spider.create(crawler) + .addUrl(url) + .thread(1) + .run(); + } + return ""; + } + /** * 单次登录尝试 */ diff --git a/src/main/java/com/tem/bocai/util/CompletedTodayCrawler.java b/src/main/java/com/tem/bocai/util/CompletedTodayCrawler.java new file mode 100644 index 0000000..8a1b752 --- /dev/null +++ b/src/main/java/com/tem/bocai/util/CompletedTodayCrawler.java @@ -0,0 +1,396 @@ +package com.tem.bocai.util; + + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import us.codecraft.webmagic.Page; +import us.codecraft.webmagic.Site; +import us.codecraft.webmagic.Spider; +import us.codecraft.webmagic.processor.PageProcessor; +import us.codecraft.webmagic.selector.Html; + +import java.io.File; +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class CompletedTodayCrawler implements PageProcessor { + + private final String token; + private Site site; + private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + + public CompletedTodayCrawler(String token) { + this.token = token; + initSite(); + } + + private void initSite() { + site = Site.me() + .setRetryTimes(3) + .setSleepTime(2000) // 增加等待时间 + .setTimeOut(15000) // 增加超时时间 + .setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36") + .addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") + .addHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") + .addHeader("Accept-Encoding", "gzip, deflate, br") + .addHeader("Connection", "keep-alive") + .addHeader("Upgrade-Insecure-Requests", "1") + .addHeader("Sec-Fetch-Dest", "document") + .addHeader("Sec-Fetch-Mode", "navigate") + .addHeader("Sec-Fetch-Site", "same-origin") + .addHeader("Sec-Fetch-User", "?1"); + + // 设置cookie + if (token != null && !token.isEmpty()) { + site.addHeader("cookie", "token=" + token); + } + } + + @Override + public void process(Page page) { + String url = page.getUrl().toString(); + System.out.println("处理页面: " + url); + + Html html = page.getHtml(); + String content = html.toString(); + + // 打印一些基本信息 + System.out.println("页面标题: " + html.xpath("//title/text()").get()); + System.out.println("页面大小: " + content.length() + " 字符"); + + // 检查是否有"暂无数据"提示 + if (content.contains("暂无数据")) { + System.out.println("警告: 页面显示'暂无数据'"); + return; + } + // 解析注单数据 + List> betList = parseBetHtml(content); + if (betList.isEmpty()) { + System.out.println("未解析到注单数据"); + + // 尝试从其他可能的位置解析 + extractDebugInfo(html); + } else { + System.out.println("解析到 " + betList.size() + " 条注单数据"); + + // 打印部分数据示例 + printSampleData(betList); + } + + // 保存原始HTML用于调试 + saveHtmlForDebug(content, url); + } + + /** + * 解析注单HTML数据 + */ + private List> parseBetHtml(String htmlContent) { + List> betList = new ArrayList<>(); + + try { + Document doc = Jsoup.parse(htmlContent); + + // 查找注单表格 + Element table = doc.selectFirst("table.list"); + if (table == null) { + System.out.println("未找到注单表格"); + return betList; + } + + // 查找表头 + Elements headers = table.select("thead th"); + List headerList = new ArrayList<>(); + for (Element header : headers) { + headerList.add(header.text().trim()); + } + System.out.println("表头信息: " + headerList); + + // 查找数据行(跳过表头) + Elements rows = table.select("tbody tr"); + + for (Element row : rows) { + // 跳过"暂无数据"的行 + if (row.select("td.nodata").size() > 0) { + continue; + } + + Map betData = new HashMap<>(); + Elements cells = row.select("td"); + + // 按列解析数据 + for (int i = 0; i < cells.size() && i < headerList.size(); i++) { + String header = headerList.get(i); + String value = cells.get(i).text().trim(); + + // 根据表头映射到对应的字段名 + switch (header) { + case "注单号": + betData.put("bet_id", value); + break; + case "时间": + betData.put("time", value); + break; + case "类型": + betData.put("type", value); + break; + case "玩法": + betData.put("game_type", value); + break; + case "盘": + betData.put("plate", value); + break; + case "下注金额": + betData.put("bet_amount", parseAmount(value)); + break; + case "退水(%)": + betData.put("rebate_rate", parseRate(value)); + break; + case "结果": + betData.put("result", parseResult(value)); + betData.put("result_amount", parseResultAmount(value)); + break; + default: + betData.put(header, value); + } + } + + // 添加额外信息 + if (!betData.isEmpty()) { + betData.put("parse_time", dateFormat.format(new Date())); + betData.put("source", "completed_today"); + + // 提取期数信息(从玩法中提取) + extractPeriodInfo(betData); + + betList.add(betData); + } + } + + } catch (Exception e) { + System.err.println("解析HTML时出错: " + e.getMessage()); + e.printStackTrace(); + } + + return betList; + } + + /** + * 从玩法中提取期数信息 + */ + private void extractPeriodInfo(Map betData) { + try { + Object gameTypeObj = betData.get("game_type"); + if (gameTypeObj instanceof String) { + String gameType = (String) gameTypeObj; + + // 尝试匹配期数模式,如"2024001", "001", "期号2024001"等 + Pattern pattern = Pattern.compile("(\\d{7})|期[号码]?(\\d{3,7})|(\\d{3,4})期"); + Matcher matcher = pattern.matcher(gameType); + + if (matcher.find()) { + for (int i = 1; i <= matcher.groupCount(); i++) { + if (matcher.group(i) != null) { + betData.put("period", matcher.group(i)); + break; + } + } + } + } + } catch (Exception e) { + // 忽略提取错误 + } + } + + /** + * 解析金额(去除货币符号) + */ + private Double parseAmount(String amountStr) { + try { + if (amountStr == null || amountStr.isEmpty()) { + return 0.0; + } + // 移除非数字字符(保留小数点和负号) + String cleaned = amountStr.replaceAll("[^\\d.-]", ""); + return cleaned.isEmpty() ? 0.0 : Double.parseDouble(cleaned); + } catch (Exception e) { + return 0.0; + } + } + + /** + * 解析退水率 + */ + private Double parseRate(String rateStr) { + try { + if (rateStr == null || rateStr.isEmpty()) { + return 0.0; + } + // 移除百分号 + String cleaned = rateStr.replace("%", "").trim(); + return cleaned.isEmpty() ? 0.0 : Double.parseDouble(cleaned) / 100; + } catch (Exception e) { + return 0.0; + } + } + + /** + * 解析结果状态 + */ + private String parseResult(String resultStr) { + if (resultStr == null) { + return "未知"; + } + + if (resultStr.contains("赢") || resultStr.contains("+")) { + return "赢"; + } else if (resultStr.contains("输") || resultStr.contains("-")) { + return "输"; + } else if (resultStr.contains("和") || resultStr.contains("0")) { + return "和"; + } else if (resultStr.contains("取消")) { + return "取消"; + } else { + return "未知"; + } + } + + /** + * 解析结果金额 + */ + private Double parseResultAmount(String resultStr) { + try { + // 提取数字部分(包含负号) + Pattern pattern = Pattern.compile("[-+]?\\d+\\.?\\d*"); + Matcher matcher = pattern.matcher(resultStr); + + if (matcher.find()) { + return Double.parseDouble(matcher.group()); + } + return 0.0; + } catch (Exception e) { + return 0.0; + } + } + + /** + * 转换数据结构以适应数据库 + */ + private List> convertForDatabase(List> betList) { + List> dbData = new ArrayList<>(); + + for (Map bet : betList) { + Map dbRecord = new HashMap<>(); + + dbRecord.put("id", bet.get("bet_id")); + dbRecord.put("bet_id", bet.get("bet_id")); + dbRecord.put("period", bet.get("period")); + dbRecord.put("bet_time", bet.get("time")); + dbRecord.put("game_type", bet.get("game_type")); + dbRecord.put("plate", bet.get("plate")); + dbRecord.put("bet_amount", bet.get("bet_amount")); + dbRecord.put("rebate_rate", bet.get("rebate_rate")); + dbRecord.put("result", bet.get("result")); + dbRecord.put("result_amount", bet.get("result_amount")); + dbRecord.put("parse_time", bet.get("parse_time")); + dbRecord.put("source", bet.get("source")); + + dbData.add(dbRecord); + } + + return dbData; + } + + + /** + * 保存HTML用于调试 + */ + private void saveHtmlForDebug(String content, String url) { + try { + String safeUrl = url.replaceAll("[^a-zA-Z0-9]", "_"); + String fileName = "debug_" + safeUrl + "_" + System.currentTimeMillis() + ".html"; + String filePath = "output/debug/" + fileName; + + File directory = new File("output/debug"); + if (!directory.exists()) { + directory.mkdirs(); + } + + File outputFile = new File(filePath); + java.nio.file.Files.write(outputFile.toPath(), content.getBytes()); + + System.out.println("调试HTML已保存: " + outputFile.getAbsolutePath()); + + } catch (Exception e) { + System.err.println("保存调试HTML失败: " + e.getMessage()); + } + } + + /** + * 提取调试信息 + */ + private void extractDebugInfo(Html html) { + System.out.println("\n=== 调试信息 ==="); + + // 检查所有表格 + List tables = html.xpath("//table/@class").all(); + System.out.println("所有表格class: " + tables); + + // 检查所有tr + int trCount = html.xpath("//tr").all().size(); + System.out.println("TR数量: " + trCount); + + // 检查所有td + int tdCount = html.xpath("//td").all().size(); + System.out.println("TD数量: " + tdCount); + + // 检查cookie相关元素 + String cookieScript = html.xpath("//script[contains(text(), 'token')]/text()").get(); + if (cookieScript != null && cookieScript.contains("token")) { + System.out.println("发现token相关脚本"); + } + + // 检查是否有JavaScript重定向 + String redirectScript = html.xpath("//script[contains(text(), 'location.href') or contains(text(), 'window.location')]/text()").get(); + if (redirectScript != null) { + System.out.println("发现重定向脚本: " + redirectScript.substring(0, Math.min(100, redirectScript.length()))); + } + } + + /** + * 打印示例数据 + */ + private void printSampleData(List> betList) { + System.out.println("\n=== 前3条数据示例 ==="); + int count = Math.min(3, betList.size()); + for (int i = 0; i < count; i++) { + Map bet = betList.get(i); + System.out.printf("注单%d: ID=%s, 时间=%s, 金额=%.2f, 结果=%s, 金额=%.2f%n", + i + 1, + bet.get("bet_id"), + bet.get("time"), + bet.get("bet_amount"), + bet.get("result"), + bet.get("result_amount")); + } + } + + @Override + public Site getSite() { + return site; + } + + public static void main(String[] args) { + String url = "https://4701268539-esh.qdk63ayw8g.com/member/bets?settled=true"; + // 创建爬虫 + Spider.create(new CompletedTodayCrawler("")) + .addUrl(url) // 添加起始URL + .thread(1) // 线程数 + .run(); // 开始爬取 + } + + +} diff --git a/src/main/java/com/tem/bocai/util/LotteryWebMagicCrawler.java b/src/main/java/com/tem/bocai/util/LotteryWebMagicCrawler.java index ad38d95..188b5c4 100644 --- a/src/main/java/com/tem/bocai/util/LotteryWebMagicCrawler.java +++ b/src/main/java/com/tem/bocai/util/LotteryWebMagicCrawler.java @@ -23,6 +23,7 @@ import java.io.File; import java.io.IOException; import java.util.*; +//开奖的历史结果 public class LotteryWebMagicCrawler implements PageProcessor { private final String token;