feat(selenium):重构ChromeDriver预加载与防检测配置

- 移除旧的WebDriverManager配置逻辑
- 新增SeleniumStealthUtil工具类,集成防检测脚本
- 实现全局单例ChromeDriver Bean管理- 添加驱动生命周期自动清理机制
-优化驱动创建参数,增强浏览器伪装能力
- 移除无用的线程池销毁方法
- 调整配置类注解与加载顺序
This commit is contained in:
2025-10-29 16:36:34 +08:00
parent 6443cdc8d0
commit d0a930d4f2
5 changed files with 136 additions and 54 deletions

View File

@@ -54,9 +54,7 @@
<artifactId>webmagic-extension</artifactId> <artifactId>webmagic-extension</artifactId>
<version>1.0.3</version> <version>1.0.3</version>
</dependency> </dependency>
<!-- JavaFX 相关依赖已移除 --> <!-- JavaFX 相关依赖已移除 -->
<dependency> <dependency>
<groupId>org.projectlombok</groupId> <groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId> <artifactId>lombok</artifactId>
@@ -118,7 +116,6 @@
<version>0.11.5</version> <version>0.11.5</version>
<scope>runtime</scope> <scope>runtime</scope>
</dependency> </dependency>
<!-- OSHI for hardware information --> <!-- OSHI for hardware information -->
<dependency> <dependency>
<groupId>com.github.oshi</groupId> <groupId>com.github.oshi</groupId>

View File

@@ -1,58 +1,42 @@
package com.tashow.erp.config; package com.tashow.erp.config;
import com.tashow.erp.utils.SeleniumStealthUtil;
import io.github.bonigarcia.wdm.WebDriverManager; import jakarta.annotation.PreDestroy;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.boot.ApplicationArguments; import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner; import org.springframework.boot.ApplicationRunner;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.annotation.Order; import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;
import java.net.URL;
/** /**
* ChromeDriver预加载器 * ChromeDriver 配置类
* 在应用启动时后台下载ChromeDriver避免用户首次使用时等待 * 启动时后台预加载驱动,提供全局单例 Bean
* 使用国内镜像源加速下载
*/ */
@Slf4j @Slf4j
@Component @Configuration
@Order(2) // 在DatabaseConfig之后运行 @Order(100)
public class ChromeDriverPreloader implements ApplicationRunner { public class ChromeDriverPreloader implements ApplicationRunner {
private ChromeDriver globalDriver;
@Override @Override
public void run(ApplicationArguments args) { public void run(ApplicationArguments args) {
// 使用后台线程执行,不阻塞应用启动 new Thread(() -> {
Thread preloadThread = new Thread(() -> { globalDriver = SeleniumStealthUtil.createDriver(true);
try { log.info("ChromeDriver 预加载完成");
log.info("开始预加载ChromeDriver驱动使用国内镜像加速..."); }, "ChromeDriver-Preloader").start();
}
// 配置WebDriverManager使用国内镜像源
WebDriverManager.chromedriver() @Bean
.driverRepositoryUrl(new URL("https://registry.npmmirror.com/-/binary/chromedriver/")) public ChromeDriver chromeDriver() {
.setup(); if (globalDriver == null) globalDriver = SeleniumStealthUtil.createDriver(true);
return globalDriver;
log.info("ChromeDriver驱动下载完成开始验证..."); }
// 快速验证驱动可用性 @PreDestroy
ChromeOptions options = new ChromeOptions(); public void cleanup() {
options.addArguments("--headless"); // 无头模式,不显示浏览器 globalDriver.quit();
options.addArguments("--disable-gpu");
options.addArguments("--no-sandbox");
ChromeDriver driver = new ChromeDriver(options);
driver.quit(); // 立即关闭
log.info("ChromeDriver驱动预加载成功");
} catch (Exception e) {
// 预加载失败不影响应用启动,第一次使用时会自动下载
log.warn("ChromeDriver驱动预加载失败不影响使用: {}", e.getMessage());
}
}, "ChromeDriver-Preloader");
preloadThread.setDaemon(true); // 设置为守护线程
preloadThread.start();
} }
} }

View File

@@ -1,5 +1,4 @@
package com.tashow.erp.utils; package com.tashow.erp.utils;
import lombok.Data; import lombok.Data;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@@ -7,7 +6,6 @@ import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor; import us.codecraft.webmagic.processor.PageProcessor;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
@@ -19,13 +17,6 @@ public class SagawaExpressSdk {
private static final Logger logger = LoggerFactory.getLogger(SagawaExpressSdk.class); private static final Logger logger = LoggerFactory.getLogger(SagawaExpressSdk.class);
private final ExecutorService executorService = Executors.newFixedThreadPool(10); private final ExecutorService executorService = Executors.newFixedThreadPool(10);
private int timeout = 5000; // 默认超时时间5秒 private int timeout = 5000; // 默认超时时间5秒
/**
* 关闭线程池
*/
// @PreDestroy
public void destroy() {
executorService.shutdownNow();
}
/** /**
* 查询佐川急便物流信息 - API入口 * 查询佐川急便物流信息 - API入口
*/ */

View File

@@ -0,0 +1,73 @@
package com.tashow.erp.utils;
import io.github.bonigarcia.wdm.WebDriverManager;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.Map;
/**
* Selenium 防检测工具类
* 提供配置好的 ChromeDriver隐藏自动化痕迹
*/
public class SeleniumStealthUtil {
/**
* 创建防检测的 ChromeDriver
* @param headless 是否启用无头模式
* @return 配置好的 ChromeDriver
*/
public static ChromeDriver createDriver(boolean headless) {
try {
WebDriverManager.chromedriver()
.driverRepositoryUrl(new URL("https://registry.npmmirror.com/-/binary/chromedriver/"))
.setup();
ChromeOptions options = new ChromeOptions();
// 三板斧:移除自动化特征
options.addArguments("--disable-blink-features=AutomationControlled");
options.setExperimentalOption("excludeSwitches", Collections.singletonList("enable-automation"));
options.setExperimentalOption("useAutomationExtension", false);
// 常规参数
options.addArguments("--no-sandbox", "--disable-gpu", "--disable-dev-shm-usage");
options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
// 无头模式
if (headless) {
options.addArguments("--headless=new");
}
ChromeDriver driver = new ChromeDriver(options);
// 注入完整 stealth 脚本
String stealthScript = """
Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
window.chrome = {runtime: {}};
Object.defineProperty(navigator, 'permissions', {
get: () => ({
query: () => Promise.resolve({state: 'prompt'})
})
});
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function(parameter) {
if (parameter === 37445) return 'Intel Inc.';
if (parameter === 37446) return 'Intel Iris OpenGL Engine';
return getParameter.call(this, parameter);
};
""";
driver.executeCdpCommand("Page.addScriptToEvaluateOnNewDocument", Map.of("source", stealthScript));
return driver;
} catch (Exception e) {
throw new RuntimeException("创建浏览器失败", e);
}
}
}

View File

@@ -0,0 +1,37 @@
package com.tashow.erp.utils;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.chrome.ChromeDriver;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
@Component
public class TrademarkCheckUtil {
@Autowired
private ChromeDriver driver;
public boolean checkTrademark(String brandName) {
try {
driver.get("https://tmsearch.uspto.gov/search/search-results");
Thread.sleep(2000);
String script = String.format("""
return fetch('https://tmsearch.uspto.gov/prod-stage-v1-0-0/tmsearch', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({
query: {bool: {must: [{bool: {should: [
{match_phrase: {WM: {query: '%s', boost: 5}}},
{match: {WM: {query: '%s', boost: 2}}},
{match_phrase: {PM: {query: '%s', boost: 2}}}
]}}]}},
size: 1, _source: ['alive']
})
}).then(r => r.json()).then(d => d?.hits?.hits?.[0]?.source?.alive || false);
""", brandName, brandName, brandName);
Object result = ((JavascriptExecutor) driver).executeAsyncScript("var callback = arguments[arguments.length - 1];" + script.replace("return", "").replace(";", ".then(callback);"));
return Boolean.TRUE.equals(result);
} catch (Exception e) {
System.err.println("检测失败: " + e.getMessage());
return false;
}
}
}