爬虫重试

This commit is contained in:
xuelijun
2026-01-22 16:14:43 +08:00
parent 09ff742cf2
commit e10d40f8b2
4 changed files with 216 additions and 15 deletions

View File

@@ -19,11 +19,72 @@ public class CrawlerSchedule {
private TokenCacheService tokenCacheService; private TokenCacheService tokenCacheService;
@Autowired @Autowired
private LoginInfoRepository loginInfoRepository; private LoginInfoRepository loginInfoRepository;
private static final int MAX_CRA = 3;
// 每天凌晨2点执行爬取开奖结果 // 每天凌晨2点执行爬取开奖结果
@Scheduled(cron = "0 0 2 * * ?") //@Scheduled(cron = "0 0 2 * * ?")
// 每7秒执行一次爬取开奖结果 // 每7秒执行一次爬取开奖结果
//@Scheduled(cron = "*/7 * * * * ?") //@Scheduled(cron = "*/9 * * * * ?")
public void executeLotteryDraw() { public void executeLotteryDraw() {
System.out.println("开始爬取开奖结果...");
int retryCount = 0;
boolean success = false;
String token = tokenCacheService.getToken();
while (!success && retryCount < MAX_CRA) {
System.out.println("\n=== 第 " + (retryCount + 1) + " 次尝试获取开奖结果 ===");
if (token == null || token.isEmpty()) {
System.out.println("token为空从数据库重新获取");
token = tokenCacheService.getTokenSqlite();
if (token == null) {
System.err.println("无法获取有效token");
}
}
System.out.println("使用token: " + (token.length() > 20 ? token.substring(0, 20) + "..." : token));
// 创建爬虫实例传入token
LotteryWebMagicCrawler crawler = new LotteryWebMagicCrawler(token);
LoginInfoResult firstByOrderByCreateTimeDesc = loginInfoRepository.findFirstByOrderByCreateTimeDesc()
.orElse(null);
String YesterdayDate = DateUtils.getYesterdayDate();// 4. 执行爬虫
String url = firstByOrderByCreateTimeDesc.getLoginUrl()+"/member/dresult?lottery=SGFT&date="+YesterdayDate;
Spider.create(crawler)
.addUrl(url)
.thread(1)
.run();
// 检查是否成功解析数据
success = LotteryWebMagicCrawler.isLastParseSuccess();
if (!success) {
System.out.println("本次尝试未解析到数据");
// 重新获取token下次重试用
token = tokenCacheService.getTokenSqlite();
retryCount++;
// 等待一下再重试
if (retryCount < MAX_CRA) {
try {
Thread.sleep(2000 * retryCount); // 等待时间递增
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
} else {
System.out.println("成功解析到数据");
}
}
if (!success) {
System.err.println("获取开奖结果失败,所有重试均未成功");
}
}
/*public void executeLotteryDraw() {
System.out.println("开始爬取开奖结果..."); System.out.println("开始爬取开奖结果...");
String token = tokenCacheService.getToken(); String token = tokenCacheService.getToken();
System.out.println("得到token = " + token); System.out.println("得到token = " + token);
@@ -39,12 +100,71 @@ public class CrawlerSchedule {
.run(); .run();
} }
} }*/
// 每7秒执行一次爬取今日已经结算 // 每7秒执行一次爬取今日已经结算
//@Scheduled(cron = "*/7 * * * * ?") //@Scheduled(cron = "*/7 * * * * ?")
public void executeSettlement() { public void executeSettlement() {
System.out.println("开始爬取今日已经结算...");
int retryCount = 0;
boolean success = false;
String token = tokenCacheService.getToken();
while (!success && retryCount < MAX_CRA) {
System.out.println("\n=== 第 " + (retryCount + 1) + " 次尝试获取今日注单 ===");
if (token == null || token.isEmpty()) {
System.out.println("token为空从数据库重新获取");
token = tokenCacheService.getTokenSqlite();
if (token == null) {
System.err.println("无法获取有效token");
}
}
System.out.println("使用token: " + (token.length() > 20 ? token.substring(0, 20) + "..." : token));
// 创建爬虫实例传入token
CompletedTodayCrawler crawler = new CompletedTodayCrawler(token);
LoginInfoResult firstByOrderByCreateTimeDesc = loginInfoRepository.findFirstByOrderByCreateTimeDesc()
.orElse(null);
// 执行爬虫
String url = firstByOrderByCreateTimeDesc.getLoginUrl()+"/member/bets?settled=true";
Spider.create(crawler)
.addUrl(url)
.thread(1)
.run();
// 检查是否成功解析数据
success = CompletedTodayCrawler.isLastParseSuccess();
if (!success) {
System.out.println("本次尝试未解析到数据");
// 重新获取token下次重试用
token = tokenCacheService.getTokenSqlite();
retryCount++;
// 等待一下再重试
if (retryCount < MAX_CRA) {
try {
Thread.sleep(2000 * retryCount); // 等待时间递增
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
} else {
System.out.println("成功解析到数据");
}
}
if (!success) {
System.err.println("获取今日注单失败,所有重试均未成功");
}
}
/*public void executeSettlement() {
String token = tokenCacheService.getToken(); String token = tokenCacheService.getToken();
System.out.println("得到token = " + token); System.out.println("得到token = " + token);
if (token != null && !token.isEmpty()) { if (token != null && !token.isEmpty()) {
@@ -59,7 +179,7 @@ public class CrawlerSchedule {
.thread(1) .thread(1)
.run(); .run();
} }
} }*/
} }

View File

@@ -26,7 +26,7 @@ public class LoginServiceImpl implements LoginService {
private TokenCacheService tokenCacheService; private TokenCacheService tokenCacheService;
@Autowired @Autowired
private LoginInfoRepository loginInfoRepository; private LoginInfoRepository loginInfoRepository;
private static final int MAX_CRA = 3;
private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
@Override @Override
public String loginAutomatic(LoginInfoParam loginInfoParam) { public String loginAutomatic(LoginInfoParam loginInfoParam) {
@@ -35,7 +35,6 @@ public class LoginServiceImpl implements LoginService {
System.out.println("\n=== 第 " + attempt + " 次尝试 ==="); System.out.println("\n=== 第 " + attempt + " 次尝试 ===");
try { try {
token = tokenCacheService.attemptLogin(loginInfoParam); token = tokenCacheService.attemptLogin(loginInfoParam);
System.out.println("1token = " + token);
tokenCacheService.saveToken(token); tokenCacheService.saveToken(token);
if (token != null && !token.isEmpty()) { if (token != null && !token.isEmpty()) {
//保存用户信息 //保存用户信息
@@ -46,7 +45,7 @@ public class LoginServiceImpl implements LoginService {
// 3. 创建数据处理器 // 3. 创建数据处理器
LotteryDataPipeline pipeline = new LotteryDataPipeline(); LotteryDataPipeline pipeline = new LotteryDataPipeline();
// 4. 执行爬虫 // 4. 执行爬虫
String url = "https://4701268539-esh.qdk63ayw8g.com/member/dresult?lottery=SGFT&date=2026-01-21"; String url = "https://4701268539-esh.qdk63ayw8g.com/member/dresult?lottery=SGFT&date=2026-01-18";
Spider.create(crawler) Spider.create(crawler)
.addUrl(url) .addUrl(url)
@@ -71,7 +70,7 @@ public class LoginServiceImpl implements LoginService {
return ""; return "";
} }
@Override /* @Override
public String completedToday() { public String completedToday() {
String token = tokenCacheService.getToken(); String token = tokenCacheService.getToken();
System.out.println("得到token = " + token); System.out.println("得到token = " + token);
@@ -88,6 +87,67 @@ public class LoginServiceImpl implements LoginService {
.run(); .run();
} }
return ""; return "";
}*/
@Override
public String completedToday() {
int retryCount = 0;
boolean success = false;
String token = tokenCacheService.getToken();
while (!success && retryCount < MAX_CRA) {
System.out.println("\n=== 第 " + (retryCount + 1) + " 次尝试获取今日注单 ===");
if (token == null || token.isEmpty()) {
System.out.println("token为空从数据库重新获取");
token = tokenCacheService.getTokenSqlite();
if (token == null) {
System.err.println("无法获取有效token");
return "";
}
}
System.out.println("使用token: " + (token.length() > 20 ? token.substring(0, 20) + "..." : token));
// 创建爬虫实例传入token
CompletedTodayCrawler crawler = new CompletedTodayCrawler(token);
// 执行爬虫
String url = "https://4701268539-esh.qdk63ayw8g.com/member/bets?settled=true";
Spider.create(crawler)
.addUrl(url)
.thread(1)
.run();
// 检查是否成功解析数据
success = CompletedTodayCrawler.isLastParseSuccess();
if (!success) {
System.out.println("本次尝试未解析到数据");
// 重新获取token下次重试用
token = tokenCacheService.getTokenSqlite();
retryCount++;
// 等待一下再重试
if (retryCount < MAX_CRA) {
try {
Thread.sleep(2000 * retryCount); // 等待时间递增
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
} else {
System.out.println("成功解析到数据");
}
}
if (!success) {
System.err.println("获取今日注单失败,所有重试均未成功");
}
return success ? "success" : "";
} }

View File

@@ -29,7 +29,8 @@ public class CompletedTodayCrawler implements PageProcessor {
private final String token; private final String token;
private Site site; private Site site;
private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
// 添加一个字段标记是否成功解析数据
private static volatile boolean lastParseSuccess = true;
// 或者提供带参数的构造函数 // 或者提供带参数的构造函数
public CompletedTodayCrawler(String token) { public CompletedTodayCrawler(String token) {
@@ -74,16 +75,18 @@ public class CompletedTodayCrawler implements PageProcessor {
// 检查是否有"暂无数据"提示 // 检查是否有"暂无数据"提示
if (content.contains("暂无数据")) { if (content.contains("暂无数据")) {
System.out.println("警告: 页面显示'暂无数据'"); System.out.println("警告: 页面显示'暂无数据'");
lastParseSuccess = false; // 标记失败
return; return;
} }
// 解析注单数据 // 解析注单数据
List<Map<String, Object>> betList = parseBetHtml(content); List<Map<String, Object>> betList = parseBetHtml(content);
if (betList.isEmpty()) { if (betList.isEmpty()) {
System.out.println("未解析到注单数据"); System.out.println("未解析到注单数据");
lastParseSuccess = false; // 标记失败
// 尝试从其他可能的位置解析 // 尝试从其他可能的位置解析
extractDebugInfo(html); //extractDebugInfo(html);
} else { } else {
lastParseSuccess = true; // 标记成功
System.out.println("解析到 " + betList.size() + " 条注单数据"); System.out.println("解析到 " + betList.size() + " 条注单数据");
List<CompletedToday> completedTodayList = convertForDatabase(betList); List<CompletedToday> completedTodayList = convertForDatabase(betList);
SQLiteUtil.saveCompletedToday(completedTodayList); SQLiteUtil.saveCompletedToday(completedTodayList);
@@ -394,6 +397,15 @@ public class CompletedTodayCrawler implements PageProcessor {
return site; return site;
} }
/**
* 添加一个方法获取解析状态
*/
public static boolean isLastParseSuccess() {
return lastParseSuccess;
}
public static void main(String[] args) { public static void main(String[] args) {
String url = "https://4701268539-esh.qdk63ayw8g.com/member/bets?settled=true"; String url = "https://4701268539-esh.qdk63ayw8g.com/member/bets?settled=true";
// 创建爬虫 // 创建爬虫

View File

@@ -30,7 +30,8 @@ public class LotteryWebMagicCrawler implements PageProcessor {
// 站点配置 // 站点配置
private Site site; private Site site;
// final LoginService loginService; // final LoginService loginService;
// 添加一个字段标记是否成功解析数据
private static volatile boolean lastParseSuccess = true;
public LotteryWebMagicCrawler(String token) { public LotteryWebMagicCrawler(String token) {
this.token = token; this.token = token;
@@ -66,14 +67,17 @@ public class LotteryWebMagicCrawler implements PageProcessor {
// 示例:提取所有表格数据 // 示例:提取所有表格数据
Selectable tables = html.xpath("//table"); Selectable tables = html.xpath("//table");
System.out.println("找到 " + tables.nodes().size() + " 个表格"); System.out.println("找到 " + tables.nodes().size() + " 个表格");
if(tables.nodes().isEmpty()){
lastParseSuccess = false;
}else {
lastParseSuccess = true;
}
// 提取表格数据(根据实际页面结构调整选择器) // 提取表格数据(根据实际页面结构调整选择器)
extractTableData(html); extractTableData(html);
// 示例:提取所有链接 // 示例:提取所有链接
Selectable links = html.links(); Selectable links = html.links();
System.out.println("页面包含 " + links.all().size() + " 个链接"); System.out.println("页面包含 " + links.all().size() + " 个链接");
// 如果需要继续爬取其他页面 // 如果需要继续爬取其他页面
// page.addTargetRequests(links.all()); // page.addTargetRequests(links.all());
@@ -103,7 +107,12 @@ public class LotteryWebMagicCrawler implements PageProcessor {
return site; return site;
} }
/**
* 添加一个方法获取解析状态
*/
public static boolean isLastParseSuccess() {
return lastParseSuccess;
}
/** /**
* 解析彩票HTML数据转换成指定的List<Map<String, Object>>格式 * 解析彩票HTML数据转换成指定的List<Map<String, Object>>格式
* *