feat(selenium):重构ChromeDriver预加载与防检测配置
- 移除旧的WebDriverManager配置逻辑 - 新增SeleniumStealthUtil工具类,集成防检测脚本 - 实现全局单例ChromeDriver Bean管理- 添加驱动生命周期自动清理机制 -优化驱动创建参数,增强浏览器伪装能力 - 移除无用的线程池销毁方法 - 调整配置类注解与加载顺序
This commit is contained in:
@@ -54,9 +54,7 @@
|
|||||||
<artifactId>webmagic-extension</artifactId>
|
<artifactId>webmagic-extension</artifactId>
|
||||||
<version>1.0.3</version>
|
<version>1.0.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- JavaFX 相关依赖已移除 -->
|
<!-- JavaFX 相关依赖已移除 -->
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.projectlombok</groupId>
|
<groupId>org.projectlombok</groupId>
|
||||||
<artifactId>lombok</artifactId>
|
<artifactId>lombok</artifactId>
|
||||||
@@ -118,7 +116,6 @@
|
|||||||
<version>0.11.5</version>
|
<version>0.11.5</version>
|
||||||
<scope>runtime</scope>
|
<scope>runtime</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- OSHI for hardware information -->
|
<!-- OSHI for hardware information -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.oshi</groupId>
|
<groupId>com.github.oshi</groupId>
|
||||||
|
|||||||
@@ -1,58 +1,42 @@
|
|||||||
package com.tashow.erp.config;
|
package com.tashow.erp.config;
|
||||||
|
import com.tashow.erp.utils.SeleniumStealthUtil;
|
||||||
import io.github.bonigarcia.wdm.WebDriverManager;
|
import jakarta.annotation.PreDestroy;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.openqa.selenium.chrome.ChromeDriver;
|
import org.openqa.selenium.chrome.ChromeDriver;
|
||||||
import org.openqa.selenium.chrome.ChromeOptions;
|
|
||||||
import org.springframework.boot.ApplicationArguments;
|
import org.springframework.boot.ApplicationArguments;
|
||||||
import org.springframework.boot.ApplicationRunner;
|
import org.springframework.boot.ApplicationRunner;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
import org.springframework.core.annotation.Order;
|
import org.springframework.core.annotation.Order;
|
||||||
import org.springframework.stereotype.Component;
|
|
||||||
import java.net.URL;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ChromeDriver预加载器
|
* ChromeDriver 配置类
|
||||||
* 在应用启动时后台下载ChromeDriver,避免用户首次使用时等待
|
* 启动时后台预加载驱动,提供全局单例 Bean
|
||||||
* 使用国内镜像源加速下载
|
|
||||||
*/
|
*/
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Component
|
@Configuration
|
||||||
@Order(2) // 在DatabaseConfig之后运行
|
@Order(100)
|
||||||
public class ChromeDriverPreloader implements ApplicationRunner {
|
public class ChromeDriverPreloader implements ApplicationRunner {
|
||||||
|
|
||||||
|
private ChromeDriver globalDriver;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run(ApplicationArguments args) {
|
public void run(ApplicationArguments args) {
|
||||||
// 使用后台线程执行,不阻塞应用启动
|
new Thread(() -> {
|
||||||
Thread preloadThread = new Thread(() -> {
|
globalDriver = SeleniumStealthUtil.createDriver(true);
|
||||||
try {
|
log.info("ChromeDriver 预加载完成");
|
||||||
log.info("开始预加载ChromeDriver驱动(使用国内镜像加速)...");
|
}, "ChromeDriver-Preloader").start();
|
||||||
|
|
||||||
// 配置WebDriverManager使用国内镜像源
|
|
||||||
WebDriverManager.chromedriver()
|
|
||||||
.driverRepositoryUrl(new URL("https://registry.npmmirror.com/-/binary/chromedriver/"))
|
|
||||||
.setup();
|
|
||||||
|
|
||||||
log.info("ChromeDriver驱动下载完成,开始验证...");
|
|
||||||
|
|
||||||
// 快速验证驱动可用性
|
|
||||||
ChromeOptions options = new ChromeOptions();
|
|
||||||
options.addArguments("--headless"); // 无头模式,不显示浏览器
|
|
||||||
options.addArguments("--disable-gpu");
|
|
||||||
options.addArguments("--no-sandbox");
|
|
||||||
|
|
||||||
ChromeDriver driver = new ChromeDriver(options);
|
|
||||||
driver.quit(); // 立即关闭
|
|
||||||
|
|
||||||
log.info("ChromeDriver驱动预加载成功");
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
// 预加载失败不影响应用启动,第一次使用时会自动下载
|
|
||||||
log.warn("ChromeDriver驱动预加载失败(不影响使用): {}", e.getMessage());
|
|
||||||
}
|
}
|
||||||
}, "ChromeDriver-Preloader");
|
|
||||||
|
|
||||||
preloadThread.setDaemon(true); // 设置为守护线程
|
@Bean
|
||||||
preloadThread.start();
|
public ChromeDriver chromeDriver() {
|
||||||
|
if (globalDriver == null) globalDriver = SeleniumStealthUtil.createDriver(true);
|
||||||
|
return globalDriver;
|
||||||
|
}
|
||||||
|
|
||||||
|
@PreDestroy
|
||||||
|
public void cleanup() {
|
||||||
|
globalDriver.quit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
package com.tashow.erp.utils;
|
package com.tashow.erp.utils;
|
||||||
|
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@@ -7,7 +6,6 @@ import us.codecraft.webmagic.Page;
|
|||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.Spider;
|
import us.codecraft.webmagic.Spider;
|
||||||
import us.codecraft.webmagic.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
@@ -19,13 +17,6 @@ public class SagawaExpressSdk {
|
|||||||
private static final Logger logger = LoggerFactory.getLogger(SagawaExpressSdk.class);
|
private static final Logger logger = LoggerFactory.getLogger(SagawaExpressSdk.class);
|
||||||
private final ExecutorService executorService = Executors.newFixedThreadPool(10);
|
private final ExecutorService executorService = Executors.newFixedThreadPool(10);
|
||||||
private int timeout = 5000; // 默认超时时间5秒
|
private int timeout = 5000; // 默认超时时间5秒
|
||||||
/**
|
|
||||||
* 关闭线程池
|
|
||||||
*/
|
|
||||||
// @PreDestroy
|
|
||||||
public void destroy() {
|
|
||||||
executorService.shutdownNow();
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* 查询佐川急便物流信息 - API入口
|
* 查询佐川急便物流信息 - API入口
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -0,0 +1,73 @@
|
|||||||
|
package com.tashow.erp.utils;
|
||||||
|
|
||||||
|
import io.github.bonigarcia.wdm.WebDriverManager;
|
||||||
|
import org.openqa.selenium.chrome.ChromeDriver;
|
||||||
|
import org.openqa.selenium.chrome.ChromeOptions;
|
||||||
|
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Selenium 防检测工具类
|
||||||
|
* 提供配置好的 ChromeDriver,隐藏自动化痕迹
|
||||||
|
*/
|
||||||
|
public class SeleniumStealthUtil {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 创建防检测的 ChromeDriver
|
||||||
|
* @param headless 是否启用无头模式
|
||||||
|
* @return 配置好的 ChromeDriver
|
||||||
|
*/
|
||||||
|
public static ChromeDriver createDriver(boolean headless) {
|
||||||
|
try {
|
||||||
|
WebDriverManager.chromedriver()
|
||||||
|
.driverRepositoryUrl(new URL("https://registry.npmmirror.com/-/binary/chromedriver/"))
|
||||||
|
.setup();
|
||||||
|
|
||||||
|
ChromeOptions options = new ChromeOptions();
|
||||||
|
|
||||||
|
// 三板斧:移除自动化特征
|
||||||
|
options.addArguments("--disable-blink-features=AutomationControlled");
|
||||||
|
options.setExperimentalOption("excludeSwitches", Collections.singletonList("enable-automation"));
|
||||||
|
options.setExperimentalOption("useAutomationExtension", false);
|
||||||
|
|
||||||
|
// 常规参数
|
||||||
|
options.addArguments("--no-sandbox", "--disable-gpu", "--disable-dev-shm-usage");
|
||||||
|
options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
|
||||||
|
|
||||||
|
// 无头模式
|
||||||
|
if (headless) {
|
||||||
|
options.addArguments("--headless=new");
|
||||||
|
}
|
||||||
|
|
||||||
|
ChromeDriver driver = new ChromeDriver(options);
|
||||||
|
|
||||||
|
// 注入完整 stealth 脚本
|
||||||
|
String stealthScript = """
|
||||||
|
Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
|
||||||
|
Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
|
||||||
|
Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
|
||||||
|
window.chrome = {runtime: {}};
|
||||||
|
Object.defineProperty(navigator, 'permissions', {
|
||||||
|
get: () => ({
|
||||||
|
query: () => Promise.resolve({state: 'prompt'})
|
||||||
|
})
|
||||||
|
});
|
||||||
|
const getParameter = WebGLRenderingContext.prototype.getParameter;
|
||||||
|
WebGLRenderingContext.prototype.getParameter = function(parameter) {
|
||||||
|
if (parameter === 37445) return 'Intel Inc.';
|
||||||
|
if (parameter === 37446) return 'Intel Iris OpenGL Engine';
|
||||||
|
return getParameter.call(this, parameter);
|
||||||
|
};
|
||||||
|
""";
|
||||||
|
|
||||||
|
driver.executeCdpCommand("Page.addScriptToEvaluateOnNewDocument", Map.of("source", stealthScript));
|
||||||
|
return driver;
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException("创建浏览器失败", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
package com.tashow.erp.utils;
|
||||||
|
import org.openqa.selenium.JavascriptExecutor;
|
||||||
|
import org.openqa.selenium.chrome.ChromeDriver;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class TrademarkCheckUtil {
|
||||||
|
@Autowired
|
||||||
|
private ChromeDriver driver;
|
||||||
|
public boolean checkTrademark(String brandName) {
|
||||||
|
try {
|
||||||
|
driver.get("https://tmsearch.uspto.gov/search/search-results");
|
||||||
|
Thread.sleep(2000);
|
||||||
|
String script = String.format("""
|
||||||
|
return fetch('https://tmsearch.uspto.gov/prod-stage-v1-0-0/tmsearch', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify({
|
||||||
|
query: {bool: {must: [{bool: {should: [
|
||||||
|
{match_phrase: {WM: {query: '%s', boost: 5}}},
|
||||||
|
{match: {WM: {query: '%s', boost: 2}}},
|
||||||
|
{match_phrase: {PM: {query: '%s', boost: 2}}}
|
||||||
|
]}}]}},
|
||||||
|
size: 1, _source: ['alive']
|
||||||
|
})
|
||||||
|
}).then(r => r.json()).then(d => d?.hits?.hits?.[0]?.source?.alive || false);
|
||||||
|
""", brandName, brandName, brandName);
|
||||||
|
Object result = ((JavascriptExecutor) driver).executeAsyncScript("var callback = arguments[arguments.length - 1];" + script.replace("return", "").replace(";", ".then(callback);"));
|
||||||
|
return Boolean.TRUE.equals(result);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("检测失败: " + e.getMessage());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user