爬虫重试
This commit is contained in:
@@ -19,11 +19,72 @@ public class CrawlerSchedule {
|
||||
private TokenCacheService tokenCacheService;
|
||||
@Autowired
|
||||
private LoginInfoRepository loginInfoRepository;
|
||||
private static final int MAX_CRA = 3;
|
||||
// 每天凌晨2点执行爬取开奖结果
|
||||
@Scheduled(cron = "0 0 2 * * ?")
|
||||
//@Scheduled(cron = "0 0 2 * * ?")
|
||||
// 每7秒执行一次爬取开奖结果
|
||||
//@Scheduled(cron = "*/7 * * * * ?")
|
||||
//@Scheduled(cron = "*/9 * * * * ?")
|
||||
public void executeLotteryDraw() {
|
||||
System.out.println("开始爬取开奖结果...");
|
||||
int retryCount = 0;
|
||||
boolean success = false;
|
||||
String token = tokenCacheService.getToken();
|
||||
|
||||
while (!success && retryCount < MAX_CRA) {
|
||||
System.out.println("\n=== 第 " + (retryCount + 1) + " 次尝试获取开奖结果 ===");
|
||||
|
||||
if (token == null || token.isEmpty()) {
|
||||
System.out.println("token为空,从数据库重新获取");
|
||||
token = tokenCacheService.getTokenSqlite();
|
||||
if (token == null) {
|
||||
System.err.println("无法获取有效token");
|
||||
}
|
||||
}
|
||||
System.out.println("使用token: " + (token.length() > 20 ? token.substring(0, 20) + "..." : token));
|
||||
|
||||
// 创建爬虫实例,传入token
|
||||
LotteryWebMagicCrawler crawler = new LotteryWebMagicCrawler(token);
|
||||
LoginInfoResult firstByOrderByCreateTimeDesc = loginInfoRepository.findFirstByOrderByCreateTimeDesc()
|
||||
.orElse(null);
|
||||
String YesterdayDate = DateUtils.getYesterdayDate();// 4. 执行爬虫
|
||||
String url = firstByOrderByCreateTimeDesc.getLoginUrl()+"/member/dresult?lottery=SGFT&date="+YesterdayDate;
|
||||
|
||||
Spider.create(crawler)
|
||||
.addUrl(url)
|
||||
.thread(1)
|
||||
.run();
|
||||
|
||||
// 检查是否成功解析数据
|
||||
success = LotteryWebMagicCrawler.isLastParseSuccess();
|
||||
|
||||
if (!success) {
|
||||
System.out.println("本次尝试未解析到数据");
|
||||
|
||||
// 重新获取token(下次重试用)
|
||||
token = tokenCacheService.getTokenSqlite();
|
||||
retryCount++;
|
||||
|
||||
// 等待一下再重试
|
||||
if (retryCount < MAX_CRA) {
|
||||
try {
|
||||
Thread.sleep(2000 * retryCount); // 等待时间递增
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
System.out.println("成功解析到数据");
|
||||
}
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
System.err.println("获取开奖结果失败,所有重试均未成功");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*public void executeLotteryDraw() {
|
||||
System.out.println("开始爬取开奖结果...");
|
||||
String token = tokenCacheService.getToken();
|
||||
System.out.println("得到token = " + token);
|
||||
@@ -39,12 +100,71 @@ public class CrawlerSchedule {
|
||||
.run();
|
||||
}
|
||||
|
||||
}
|
||||
}*/
|
||||
|
||||
|
||||
// 每7秒执行一次爬取今日已经结算
|
||||
//@Scheduled(cron = "*/7 * * * * ?")
|
||||
public void executeSettlement() {
|
||||
System.out.println("开始爬取今日已经结算...");
|
||||
int retryCount = 0;
|
||||
boolean success = false;
|
||||
String token = tokenCacheService.getToken();
|
||||
|
||||
while (!success && retryCount < MAX_CRA) {
|
||||
System.out.println("\n=== 第 " + (retryCount + 1) + " 次尝试获取今日注单 ===");
|
||||
|
||||
if (token == null || token.isEmpty()) {
|
||||
System.out.println("token为空,从数据库重新获取");
|
||||
token = tokenCacheService.getTokenSqlite();
|
||||
if (token == null) {
|
||||
System.err.println("无法获取有效token");
|
||||
}
|
||||
}
|
||||
System.out.println("使用token: " + (token.length() > 20 ? token.substring(0, 20) + "..." : token));
|
||||
|
||||
// 创建爬虫实例,传入token
|
||||
CompletedTodayCrawler crawler = new CompletedTodayCrawler(token);
|
||||
LoginInfoResult firstByOrderByCreateTimeDesc = loginInfoRepository.findFirstByOrderByCreateTimeDesc()
|
||||
.orElse(null);
|
||||
// 执行爬虫
|
||||
String url = firstByOrderByCreateTimeDesc.getLoginUrl()+"/member/bets?settled=true";
|
||||
|
||||
Spider.create(crawler)
|
||||
.addUrl(url)
|
||||
.thread(1)
|
||||
.run();
|
||||
|
||||
// 检查是否成功解析数据
|
||||
success = CompletedTodayCrawler.isLastParseSuccess();
|
||||
|
||||
if (!success) {
|
||||
System.out.println("本次尝试未解析到数据");
|
||||
|
||||
// 重新获取token(下次重试用)
|
||||
token = tokenCacheService.getTokenSqlite();
|
||||
retryCount++;
|
||||
|
||||
// 等待一下再重试
|
||||
if (retryCount < MAX_CRA) {
|
||||
try {
|
||||
Thread.sleep(2000 * retryCount); // 等待时间递增
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
System.out.println("成功解析到数据");
|
||||
}
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
System.err.println("获取今日注单失败,所有重试均未成功");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*public void executeSettlement() {
|
||||
String token = tokenCacheService.getToken();
|
||||
System.out.println("得到token = " + token);
|
||||
if (token != null && !token.isEmpty()) {
|
||||
@@ -59,7 +179,7 @@ public class CrawlerSchedule {
|
||||
.thread(1)
|
||||
.run();
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ public class LoginServiceImpl implements LoginService {
|
||||
private TokenCacheService tokenCacheService;
|
||||
@Autowired
|
||||
private LoginInfoRepository loginInfoRepository;
|
||||
|
||||
private static final int MAX_CRA = 3;
|
||||
private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
||||
@Override
|
||||
public String loginAutomatic(LoginInfoParam loginInfoParam) {
|
||||
@@ -35,7 +35,6 @@ public class LoginServiceImpl implements LoginService {
|
||||
System.out.println("\n=== 第 " + attempt + " 次尝试 ===");
|
||||
try {
|
||||
token = tokenCacheService.attemptLogin(loginInfoParam);
|
||||
System.out.println("1token = " + token);
|
||||
tokenCacheService.saveToken(token);
|
||||
if (token != null && !token.isEmpty()) {
|
||||
//保存用户信息
|
||||
@@ -46,7 +45,7 @@ public class LoginServiceImpl implements LoginService {
|
||||
// 3. 创建数据处理器
|
||||
LotteryDataPipeline pipeline = new LotteryDataPipeline();
|
||||
// 4. 执行爬虫
|
||||
String url = "https://4701268539-esh.qdk63ayw8g.com/member/dresult?lottery=SGFT&date=2026-01-21";
|
||||
String url = "https://4701268539-esh.qdk63ayw8g.com/member/dresult?lottery=SGFT&date=2026-01-18";
|
||||
|
||||
Spider.create(crawler)
|
||||
.addUrl(url)
|
||||
@@ -71,7 +70,7 @@ public class LoginServiceImpl implements LoginService {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
/* @Override
|
||||
public String completedToday() {
|
||||
String token = tokenCacheService.getToken();
|
||||
System.out.println("得到token = " + token);
|
||||
@@ -88,6 +87,67 @@ public class LoginServiceImpl implements LoginService {
|
||||
.run();
|
||||
}
|
||||
return "";
|
||||
}*/
|
||||
|
||||
@Override
|
||||
public String completedToday() {
|
||||
int retryCount = 0;
|
||||
boolean success = false;
|
||||
String token = tokenCacheService.getToken();
|
||||
|
||||
while (!success && retryCount < MAX_CRA) {
|
||||
System.out.println("\n=== 第 " + (retryCount + 1) + " 次尝试获取今日注单 ===");
|
||||
|
||||
if (token == null || token.isEmpty()) {
|
||||
System.out.println("token为空,从数据库重新获取");
|
||||
token = tokenCacheService.getTokenSqlite();
|
||||
if (token == null) {
|
||||
System.err.println("无法获取有效token");
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println("使用token: " + (token.length() > 20 ? token.substring(0, 20) + "..." : token));
|
||||
|
||||
// 创建爬虫实例,传入token
|
||||
CompletedTodayCrawler crawler = new CompletedTodayCrawler(token);
|
||||
|
||||
// 执行爬虫
|
||||
String url = "https://4701268539-esh.qdk63ayw8g.com/member/bets?settled=true";
|
||||
|
||||
Spider.create(crawler)
|
||||
.addUrl(url)
|
||||
.thread(1)
|
||||
.run();
|
||||
|
||||
// 检查是否成功解析数据
|
||||
success = CompletedTodayCrawler.isLastParseSuccess();
|
||||
|
||||
if (!success) {
|
||||
System.out.println("本次尝试未解析到数据");
|
||||
|
||||
// 重新获取token(下次重试用)
|
||||
token = tokenCacheService.getTokenSqlite();
|
||||
retryCount++;
|
||||
|
||||
// 等待一下再重试
|
||||
if (retryCount < MAX_CRA) {
|
||||
try {
|
||||
Thread.sleep(2000 * retryCount); // 等待时间递增
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
System.out.println("成功解析到数据");
|
||||
}
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
System.err.println("获取今日注单失败,所有重试均未成功");
|
||||
}
|
||||
|
||||
return success ? "success" : "";
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -29,7 +29,8 @@ public class CompletedTodayCrawler implements PageProcessor {
|
||||
private final String token;
|
||||
private Site site;
|
||||
private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
||||
|
||||
// 添加一个字段标记是否成功解析数据
|
||||
private static volatile boolean lastParseSuccess = true;
|
||||
|
||||
// 或者提供带参数的构造函数
|
||||
public CompletedTodayCrawler(String token) {
|
||||
@@ -74,16 +75,18 @@ public class CompletedTodayCrawler implements PageProcessor {
|
||||
// 检查是否有"暂无数据"提示
|
||||
if (content.contains("暂无数据")) {
|
||||
System.out.println("警告: 页面显示'暂无数据'");
|
||||
lastParseSuccess = false; // 标记失败
|
||||
return;
|
||||
}
|
||||
// 解析注单数据
|
||||
List<Map<String, Object>> betList = parseBetHtml(content);
|
||||
if (betList.isEmpty()) {
|
||||
System.out.println("未解析到注单数据");
|
||||
|
||||
lastParseSuccess = false; // 标记失败
|
||||
// 尝试从其他可能的位置解析
|
||||
extractDebugInfo(html);
|
||||
//extractDebugInfo(html);
|
||||
} else {
|
||||
lastParseSuccess = true; // 标记成功
|
||||
System.out.println("解析到 " + betList.size() + " 条注单数据");
|
||||
List<CompletedToday> completedTodayList = convertForDatabase(betList);
|
||||
SQLiteUtil.saveCompletedToday(completedTodayList);
|
||||
@@ -394,6 +397,15 @@ public class CompletedTodayCrawler implements PageProcessor {
|
||||
return site;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 添加一个方法获取解析状态
|
||||
*/
|
||||
public static boolean isLastParseSuccess() {
|
||||
return lastParseSuccess;
|
||||
}
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
String url = "https://4701268539-esh.qdk63ayw8g.com/member/bets?settled=true";
|
||||
// 创建爬虫
|
||||
|
||||
@@ -30,7 +30,8 @@ public class LotteryWebMagicCrawler implements PageProcessor {
|
||||
// 站点配置
|
||||
private Site site;
|
||||
// final LoginService loginService;
|
||||
|
||||
// 添加一个字段标记是否成功解析数据
|
||||
private static volatile boolean lastParseSuccess = true;
|
||||
|
||||
public LotteryWebMagicCrawler(String token) {
|
||||
this.token = token;
|
||||
@@ -66,14 +67,17 @@ public class LotteryWebMagicCrawler implements PageProcessor {
|
||||
// 示例:提取所有表格数据
|
||||
Selectable tables = html.xpath("//table");
|
||||
System.out.println("找到 " + tables.nodes().size() + " 个表格");
|
||||
|
||||
if(tables.nodes().isEmpty()){
|
||||
lastParseSuccess = false;
|
||||
}else {
|
||||
lastParseSuccess = true;
|
||||
}
|
||||
// 提取表格数据(根据实际页面结构调整选择器)
|
||||
extractTableData(html);
|
||||
|
||||
// 示例:提取所有链接
|
||||
Selectable links = html.links();
|
||||
System.out.println("页面包含 " + links.all().size() + " 个链接");
|
||||
|
||||
// 如果需要继续爬取其他页面
|
||||
// page.addTargetRequests(links.all());
|
||||
|
||||
@@ -103,7 +107,12 @@ public class LotteryWebMagicCrawler implements PageProcessor {
|
||||
return site;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 添加一个方法获取解析状态
|
||||
*/
|
||||
public static boolean isLastParseSuccess() {
|
||||
return lastParseSuccess;
|
||||
}
|
||||
/**
|
||||
* 解析彩票HTML数据,转换成指定的List<Map<String, Object>>格式
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user