This commit is contained in:
2025-09-30 09:48:13 +08:00
parent c5ac27cdec
commit fa7edc0cc2
10 changed files with 226 additions and 2511 deletions

View File

@@ -1,75 +0,0 @@
package com.ruoyi.web.controller.tool;
import java.util.List;
import java.util.Map;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.*;
import com.ruoyi.common.annotation.Anonymous;
import com.ruoyi.common.core.controller.BaseController;
import com.ruoyi.common.core.domain.R;
import com.ruoyi.system.domain.BanmaAccount;
import com.ruoyi.system.service.IBanmaAccountService;
/**
* 斑马账号管理(数据库版,极简接口):
* - 仅负责账号与 Token 的存取
* - 不参与登录/刷新与数据采集,客户端自行处理
*/
@RestController
@RequestMapping("/tool/banma")
@Anonymous
public class BanmaOrderController extends BaseController {
@Autowired
private IBanmaAccountService accountService;
/**
* 查询账号列表(
*/
@GetMapping("/accounts")
public R<?> listAccounts() {
List<BanmaAccount> list = accountService.listSimple();
return R.ok(list);
}
/**
* 新增或编辑账号(含设为默认)
*/
@PostMapping("/accounts")
public R<?> saveAccount(@RequestBody BanmaAccount body) {
// 先验证Token
String token = ((com.ruoyi.system.service.impl.BanmaAccountServiceImpl) accountService)
.validateAndGetToken(body.getUsername(), body.getPassword());
if (token == null) {
return R.fail("账号或密码错误无法获取Token");
}
// 验证成功后保存账号
Long id = accountService.saveOrUpdate(body);
// 刷新Token到数据库
accountService.refreshToken(id);
return R.ok(Map.of("id", id));
}
/**
* 删除账号
*/
@DeleteMapping("/accounts/{id}")
public R<?> remove(@PathVariable Long id) {
accountService.remove(id);
return R.ok();
}
/** 手动刷新单个账号 Token */
@PostMapping("/accounts/{id}/refresh-token")
public R<?> refreshOne(@PathVariable Long id) {
accountService.refreshToken(id);
return R.ok();
}
/** 手动刷新全部启用账号 Token */
@PostMapping("/refresh-all")
public R<?> refreshAll() {
accountService.refreshAllTokens();
return R.ok();
}
}

View File

@@ -1,211 +0,0 @@
package com.ruoyi.web.controller.tool;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.ruoyi.common.annotation.Anonymous;
import com.ruoyi.common.core.controller.BaseController;
import com.ruoyi.common.core.domain.AjaxResult;
import com.ruoyi.common.utils.StringUtils;
import com.ruoyi.web.util.Alibaba1688CookieUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.*;
import org.springframework.util.LinkedMultiValueMap;
import org.springframework.util.MultiValueMap;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.multipart.MultipartFile;
import javax.annotation.PreDestroy;
import javax.xml.bind.DatatypeConverter;
import java.security.MessageDigest;
import java.util.*;
import java.util.concurrent.*;
import java.util.stream.Collectors;
/**
* 1688图像搜索API
*/
@RestController
@RequestMapping("/figre")
@Anonymous
public class FigureTransmissionController extends BaseController {
private static final Logger log = LoggerFactory.getLogger(FigureTransmissionController.class);
private static final String APP_KEY = "12574478";
private static final int MAX_BATCH_SIZE = 30;
private static final int REQUEST_TIMEOUT = 30;
@Autowired
private RestTemplate restTemplate;
private final ExecutorService executorService = Executors.newFixedThreadPool(10);
@PreDestroy
public void shutdown() {
executorService.shutdown();
}
public static class BatchImageUrlRequest {
private List<String> imageUrls;
public List<String> getImageUrls() {
return imageUrls;
}
public void setImageUrls(List<String> imageUrls) {
this.imageUrls = imageUrls;
}
}
@PostMapping("/batchUpload1688Images")
public AjaxResult batchUpload1688Images(@RequestBody BatchImageUrlRequest request) {
List<String> batchUrls = request.getImageUrls().stream().limit(MAX_BATCH_SIZE).collect(Collectors.toList());
List<CompletableFuture<Map<String, Object>>> futures = batchUrls.stream().map(imageUrl -> CompletableFuture.supplyAsync(() -> {
Map<String, Object> result = new HashMap<>();
result.put("imageUrl", imageUrl);
try {
ResponseEntity<byte[]> response = restTemplate.getForEntity(imageUrl, byte[].class);
String base64Image = Base64.getEncoder().encodeToString(response.getBody());
AjaxResult uploadResult = uploadImageBase64(base64Image);
String responseBody = (String) uploadResult.get("data");
ObjectMapper objectMapper = new ObjectMapper();
Map<String, Object> apiResponse = objectMapper.readValue(responseBody, Map.class);
Map<String, Object> data = (Map<String, Object>) apiResponse.get("data");
String imageId = (String) data.get("imageId");
result.put("success", true);
result.put("imageId", imageId);
result.put("searchUrl", "https://s.1688.com/youyuan/index.html?tab=imageSearch&imageType=spider&imageId=" + imageId);
} catch (Exception e) {
result.put("success", false);
result.put("error", "处理图片失败: " + e.getMessage());
}
return result;
}, executorService)).collect(Collectors.toList());
List<Map<String, Object>> results = futures.stream().map(future -> {
try {
return future.get(REQUEST_TIMEOUT, TimeUnit.SECONDS);
} catch (Exception e) {
throw new RuntimeException(e);
}
}).collect(Collectors.toList());
return AjaxResult.success("批量处理完成", results);
}
@PostMapping("/upload1688ImageMobile")
public AjaxResult upload1688ImageMobile(@RequestParam String imageUrl) {
try {
String token = Alibaba1688CookieUtil.getToken(restTemplate);
long timestamp = System.currentTimeMillis();
String jsonData = "{\"appId\":\"32517\",\"params\":\"{\\\"categoryId\\\":-1,\\\"imageAddress\\\":\\\"" + imageUrl + "\\\",\\\"interfaceName\\\":\\\"imageOfferSearchService\\\",\\\"needYolocrop\\\":false,\\\"pageIndex\\\":\\\"1\\\",\\\"pageSize\\\":\\\"40\\\",\\\"searchScene\\\":\\\"image\\\",\\\"snAppAb\\\":true,\\\"appName\\\":\\\"ios\\\",\\\"scene\\\":\\\"seoSearch\\\"}\"}";
String sign = generateSign(token, String.valueOf(timestamp), jsonData);
String url = "https://h5api.m.1688.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/" + "?jsv=2.6.1" + "&appKey=" + APP_KEY + "&t=" + timestamp + "&sign=" + sign + "&v=2.0" + "&type=originaljson" + "&isSec=0" + "&timeout=20000" + "&api=mtop.relationrecommend.WirelessRecommend.recommend" + "&ignoreLogin=true" + "&prefix=h5api" + "&dataType=jsonp";
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_FORM_URLENCODED);
headers.set("Cookie", Alibaba1688CookieUtil.getCookieString(restTemplate));
headers.set("authority", "h5api.m.1688.com");
headers.set("accept", "application/json");
headers.set("origin", "https://m.1688.com");
headers.set("referer", "https://m.1688.com/");
headers.set("user-agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1");
MultiValueMap<String, String> formData = new LinkedMultiValueMap<>();
formData.add("data", jsonData);
HttpEntity<MultiValueMap<String, String>> requestEntity = new HttpEntity<>(formData, headers);
ResponseEntity<String> response = restTemplate.exchange(url, HttpMethod.POST, requestEntity, String.class);
updateCookiesFromResponse(response);
ObjectMapper objectMapper = new ObjectMapper();
Map<String, Object> originalResponse = objectMapper.readValue(response.getBody(), Map.class);
List<String> detailUrls = new ArrayList<>();
try {
Map<String, Object> offerList = (Map<String, Object>) ((Map<String, Object>) originalResponse.get("data")).get("offerList");
for (Map<String, Object> offer : (List<Map<String, Object>>) offerList.get("offers")) {
String detailUrl = (String) offer.get("detailUrl");
if (StringUtils.isNotEmpty(detailUrl)) {
detailUrls.add(detailUrl);
}
}
} catch (Exception e) {
log.error("提取detailUrls失败", e);
}
Map<String, Object> result = new HashMap<>();
List<String> modifiedUrls = detailUrls.stream().map(detailUrl -> {
String baseUrl = detailUrl.split("\\?")[0];
return baseUrl.replace("detail.1688.com", "m.1688.com") + "?ptow=113d26e7c9a&ptow=113d26e&callByHgJs=1&__removesafearea__=1&src_cna=cPb9IGgLcDgBASQOA3xQ3mUM";
}).collect(Collectors.toList());
result.put("detailUrls", modifiedUrls);
return AjaxResult.success(result);
} catch (Exception e) {
log.error("图片搜索失败", e);
return AjaxResult.error("图片搜索失败: " + e.getMessage());
}
}
@PostMapping("/refreshCookie")
public AjaxResult refreshCookie() {
try {
Alibaba1688CookieUtil.refreshCookies(restTemplate);
String token = Alibaba1688CookieUtil.getToken(restTemplate);
return StringUtils.isNotEmpty(token) ? AjaxResult.success("刷新Cookie成功", token) : AjaxResult.error("获取1688 API token失败请稍后重试");
} catch (Exception e) {
log.error("刷新Cookie失败", e);
return AjaxResult.error("刷新Cookie失败: " + e.getMessage());
}
}
private String generateSign(String token, String timestamp, String data) {
try {
String signStr = token + "&" + timestamp + "&" + APP_KEY + "&" + data;
MessageDigest md = MessageDigest.getInstance("MD5");
return DatatypeConverter.printHexBinary(md.digest(signStr.getBytes("UTF-8"))).toLowerCase();
} catch (Exception e) {
log.error("生成签名失败", e);
return "";
}
}
private AjaxResult uploadImageBase64(String base64Image) {
try {
String token = Alibaba1688CookieUtil.getToken(restTemplate);
long timestamp = System.currentTimeMillis();
String jsonData = "{\"appId\":32517,\"params\":\"{\\\"searchScene\\\":\\\"imageEx\\\",\\\"interfaceName\\\":\\\"imageBase64ToImageId\\\",\\\"serviceParam.extendParam[imageBase64]\\\":\\\"" + base64Image + "\\\",\\\"subChannel\\\":\\\"pc_image_search_image_id\\\"}\"}";
String sign = generateSign(token, String.valueOf(timestamp), jsonData);
String url = "https://h5api.m.1688.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?jsv=2.7.4&appKey=" + APP_KEY + "&t=" + timestamp + "&sign=" + sign + "&api=mtop.relationrecommend.wirelessrecommend.recommend&v=2.0&type=originaljson&timeout=20000&dataType=jsonp";
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_FORM_URLENCODED);
headers.set("Cookie", Alibaba1688CookieUtil.getCookieString(restTemplate));
headers.set("authority", "h5api.m.1688.com");
headers.set("accept", "application/json");
headers.set("origin", "https://www.1688.com");
headers.set("referer", "https://www.1688.com/");
headers.set("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36");
MultiValueMap<String, String> formData = new LinkedMultiValueMap<>();
formData.add("data", jsonData);
HttpEntity<MultiValueMap<String, String>> requestEntity = new HttpEntity<>(formData, headers);
ResponseEntity<String> response = restTemplate.exchange(url, HttpMethod.POST, requestEntity, String.class);
updateCookiesFromResponse(response);
return AjaxResult.success("上传成功", response.getBody());
} catch (Exception e) {
log.error("上传图片失败", e);
return AjaxResult.error("上传失败: " + e.getMessage());
}
}
private void updateCookiesFromResponse(ResponseEntity<String> response) {
List<String> newCookies = response.getHeaders().get("Set-Cookie");
if (newCookies != null && !newCookies.isEmpty()) {
for (String cookie : newCookies) {
String[] parts = cookie.split(";")[0].split("=", 2);
if (parts.length == 2) {
Alibaba1688CookieUtil.setCookie(parts[0], parts[1]);
}
}
}
}
}

View File

@@ -1,57 +0,0 @@
package com.ruoyi.web.controller.tool;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.bind.annotation.GetMapping;
import com.ruoyi.common.core.controller.BaseController;
import com.ruoyi.common.core.domain.AjaxResult;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import java.util.ArrayList;
import java.util.List;
/**
* 1688Controller
*
* @author ruoyi
* @date 2025-07-15
*/
@RestController
@RequestMapping("/prod/ozon")
public class ProductsController extends BaseController
{
@GetMapping("/scrapeImages")
public AjaxResult scrapeImages()
{
String url = "https://www.ozon.ru/highlight/ozon-global/?currency_price=14.000%3B500.000";
List<String> imageUrls = new ArrayList<>();
Site site = Site.me()
.setRetryTimes(3)
.setTimeOut(10000);
site.addHeader("cookie", "xcid=b30f6db523d4aee734a822aa0a230b3f; __Secure-ext_xcid=b30f6db523d4aee734a822aa0a230b3f; __Secure-ab-group=92; rfuid=NjkyNDcyNDUyLDEyNC4wNDM0NzUyNzUxNjA3NCwxMDI4MjM3MjIzLC0xLC05ODc0NjQ3MjQsVzNzaWJtRnRaU0k2SWtOb2NtOXRhWFZ0SUZCRVJpQlFiSFZuYVc0aUxDSmtaWE5qY21sd2RHbHZiaUk2SWxCdmNuUmhZbXhsSUVSdlkzVnRaVzUwSUVadmNtMWhkQ0lzSW0xcGJXVlVlWEJsY3lJNlczc2lkSGx3WlNJNkltRndjR3hwWTJGMGFXOXVMM2d0WjI5dloyeGxMV05vY205dFpTMXdaR1lpTENKemRXWm1hWGhsY3lJNkluQmtaaUo5WFgwc2V5SnVZVzFsSWpvaVEyaHliMjFwZFcwZ1VFUkdJRlpwWlhkbGNpSXNJbVJsYzJOeWFYQjBhVzl1SWpvaUlpd2liV2x0WlZSNWNHVnpJanBiZXlKMGVYQmxJam9pWVhCd2JHbGpZWFJwYjI0dmNHUm1JaXdpYzNWbVptbDRaWE1pT2lKd1pHWWlmVjE5WFE9PSxXeUo2YUMxRFRpSmQsMCwxLDAsMjQsMjM3NDE1OTMwLDgsMjI3MTI2NTIwLDAsMSwwLC00OTEyNzU1MjMsUjI5dloyeGxJRWx1WXk0Z1RtVjBjMk5oY0dVZ1IyVmphMjhnVjJsdU16SWdOUzR3SUNoWGFXNWtiM2R6SUU1VUlERXdMakE3SUZkcGJqWTBPeUI0TmpRcElFRndjR3hsVjJWaVMybDBMelV6Tnk0ek5pQW9TMGhVVFV3c0lHeHBhMlVnUjJWamEyOHBJRU5vY205dFpTOHhNamN1TUM0d0xqQWdVMkZtWVhKcEx6VXpOeTR6TmlBeU1EQXpNREV3TnlCTmIzcHBiR3hoLGV5SmphSEp2YldVaU9uc2lZWEJ3SWpwN0ltbHpTVzV6ZEdGc2JHVmtJanBtWVd4elpTd2lTVzV6ZEdGc2JGTjBZWFJsSWpwN0lrUkpVMEZDVEVWRUlqb2laR2x6WVdKc1pXUWlMQ0pKVGxOVVFVeE1SVVFpT2lKcGJuTjBZV3hzWldRaUxDSk9UMVJmU1U1VFZFRk1URVZFSWpvaWJtOTBYMmx1YzNSaGJHeGxaQ0o5TENKU2RXNXVhVzVuVTNSaGRHVWlPbnNpUTBGT1RrOVVYMUpWVGlJNkltTmhibTV2ZEY5eWRXNGlMQ0pTUlVGRVdWOVVUMTlTVlU0aU9pSnlaV0ZrZVY5MGIxOXlkVzRpTENKU1ZVNU9TVTVISWpvaWNuVnVibWx1WnlKOWZYMTksNjUsLTExODM0MTA3MiwxLDEsLTEsMTY5OTk1NDg4NywxNjk5OTU0ODg3LDMzNjAwNzkzMyw4; guest=true; x-hng=lang=zh-CN&domain=www.ozon.ru; __Secure-user-id=210183128; is_adult_confirmed=; is_alco_adult_confirmed=; ozonIdAuthResponseToken=eyJhbGciOiJIUzI1NiIsIm96b25pZCI6Im5vdHNlbnNpdGl2ZSIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoyMTAxODMxMjgsImlzX3JlZ2lzdHJhdGlvbiI6ZmFsc2UsInJldHVybl91cmwiOiIiLCJwYXlsb2FkIjpudWxsLCJleHAiOjE3NTI1NDczNzksImlhdCI6MTc1MjU0NzM2OSwiaXNzIjoib3pvbmlkIn0.DX0nfNf9rcuPdt9lzE5fn1en5yqiD7Aw1vqpRt13OiU; abt_data=7.iE3bYFZ2m7yMc8mn5Rm8V9pI_ELHBH8eHNcM1w0kxMd8-HXap37uTEk6E_nAsmdWsO5pYQKhwamysCHZexl_YPPpWOWk7wgfKSuP8pTEdlDlXwLOy-sokLKLOdHyTFxcxNx5yRfpmNqFoQP8D5KccoiDh5U5kU8x7rJDLpqixSah6TFKKYsTiZrokn5Tb5aJu5lMAZBOkhr7CkYTFd_4j9wtALKnFM-oZGxCX0qTgUP5kIf9MfDSGI0U0pZ6igW6aSGirFb5ZVNmCV2D4NImCGn00K_Sn8ZX0vR6krWW1cixLrCnKp0rO7JJEFi9c7-4FL54ZvaJ-tKg8ALwlPmRIr-aI156iSlHQU6lULo8oKmBL13eLI9d_d8fp50BJe7oA6PNylYhI5DdV81WGI2EFnZtR3sVYF8O4qrS7gXwsMCoku51prrYZG1pLJJiD8HfAPTnFLvxpURZ7x1-lsAinljoh8_N1oVP89PVPJjd-tKQyRiUnleAnwaeF9kQmGvIMqYSYi506QWf-KHqfdEnEA; __Secure-ETC=90ef0679199c55cdec8592a7e480c3c5; __Secure-access-token=8.210183128.eH-AgbboQkWnqX9a7XeDqQ.92.Aa5lCeHJOXOoA90AsaTtX6OsppswLMDuxLGNHW98_K79BKfyceiX8mpea_qxY5qoaYUOO7_5hsQ6ndRa9tCxUzOCvERd7f056ZaoOw8W9lnUgQn_q5TX-7X2WK8ejP7OZg.20250715024249.20250715073104.M4j1mEB0PD4T55SZ4iQqCG6lZytjYmwG50Q28F-3q_s.1a7857bd2a7e95575; __Secure-refresh-token=8.210183128.eH-AgbboQkWnqX9a7XeDqQ.92.Aa5lCeHJOXOoA90AsaTtX6OsppswLMDuxLGNHW98_K79BKfyceiX8mpea_qxY5qoaYUOO7_5hsQ6ndRa9tCxUzOCvERd7f056ZaoOw8W9lnUgQn_q5TX-7X2WK8ejP7OZg.20250715024249.20250715073104.Ip-CdBv1mEGilz15LZCTwFmfoxEL8RrayFNa90r_XLI.197047eb6a54a7041; is_cookies_accepted=1; token=eyJhbGciOiJIUzUxMiJ9.eyJsb2dpbl91c2VyX2tleSI6IjJiMDM5ODBiLWYzZDQtNDI2Ny04NTQ2LWYwZjQxOTczNTAyNiJ9.oIF-bHh7pubdNWzETutNcoc_Nu-A7zgqBIJwcHFgF0V2s-xfnZVbs_EbvyJSBYYkUjqrBlJP_1qBl3vB1mQ_ow");
Spider.create(new PageProcessor() {
@Override
public void process(Page page) {
List<String> imgs = page.getHtml()
.css("div.ip7_24.p7i_24 img.i7p_24.b95_3_1-a", "src").all();
page.putField("imageUrls", imgs);
}
@Override
public Site getSite() {
return site;
}
}).addUrl(url)
.addPipeline((resultItems, task) -> {
List<String> imgs = resultItems.get("imageUrls");
if (imgs != null) {
imageUrls.addAll(imgs);
}
})
.thread(1)
.run();
return AjaxResult.success(imageUrls);
}
}

View File

@@ -1,430 +0,0 @@
package com.ruoyi.web.controller.tool;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestParam;
import com.ruoyi.common.core.controller.BaseController;
import com.ruoyi.common.core.domain.AjaxResult;
import com.ruoyi.common.annotation.Anonymous;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import com.ruoyi.web.util.WebMagicProxyUtil;
import com.ruoyi.web.util.Alibaba1688CookieUtil;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.*;
import org.springframework.util.LinkedMultiValueMap;
import org.springframework.util.MultiValueMap;
import org.springframework.web.client.RestTemplate;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.stream.Collectors;
import javax.xml.bind.DatatypeConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import java.net.*;
import java.io.BufferedReader;
import java.io.InputStreamReader;
@RestController
@RequestMapping("/prod/rakuten")
public class RakutenController extends BaseController {
private static final Logger logger = LoggerFactory.getLogger(RakutenController.class);
private final Random random = new Random();
private static final String APP_KEY = "12574478";
@Autowired
private RestTemplate restTemplate;
// @GetMapping("test")
// public String test(){
// final StringBuilder aa = new StringBuilder();
// String targetUrl = "https://ranking.rakuten.co.jp/?l-id=top_normal_grayheader02";
// logger.info("=== 开始爬取测试 ===");
// Proxy systemProxy = detectSystemProxy(targetUrl);
// if (systemProxy != null) {
// logger.info("成功检测到代理,准备配置下载器");
// } else {
// logger.info("未检测到代理,使用直连模式");
// }
// HttpClientDownloader downloader = createProxyDownloader(systemProxy);
// Spider.create(new PageProcessor() {
// @Override
// public void process(Page page) {
// aa.append(page.getHtml().toString());
// }
// @Override
// public Site getSite() {
// Site site= Site.me();
// site.addHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36");
// site.addHeader("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
// site.addHeader("accept-encoding", "gzip, deflate, br, zstd");
// site.addHeader("accept-language", "zh-CN,zh;q=0.9");
// site.addHeader("cache-control", "max-age=0");
// site.addHeader("priority", "u=0, i");
// site.addHeader("sec-ch-ua", "\"Not)A;Brand\";v=\"8\", \"Chromium\";v=\"138\", \"Microsoft Edge\";v=\"138\"");
// site.addHeader("sec-ch-ua-mobile", "?0");
// site.addHeader("sec-ch-ua-platform", "\"Windows\"");
// site.addHeader("sec-fetch-dest", "document");
// site.addHeader("sec-fetch-mode", "navigate");
// site.addHeader("sec-fetch-site", "same-origin");
// site.addHeader("sec-fetch-user", "?1");
// site.addHeader("upgrade-insecure-requests", "1");
// site.addHeader("cookie", "_ra=1750472997398|0ff0eb32-5d9f-4c27-a7ca-c9ff1149e90b; Rp=779873a7b6c0e87edcf6f39cf2368561928309c8; rcx=136377ca-334f-4305-b45e-ad43e9538d2e; rcxGlobal=136377ca-334f-4305-b45e-ad43e9538d2e;");
// return site;
// }
// }).addUrl(targetUrl).setDownloader(downloader).thread(1).run();
// return aa.toString();
// }
//
/**
* 根据imageUrl爬取1688商品价格和重量信息返回中位数价格及对应的重量
*
* @param imageUrl 图片URL
* @return 商品中位数价格和对应重量
*/
@GetMapping("/scrape1688Products")
public AjaxResult scrape1688Products(@RequestParam String imageUrl) {
try {
List<ProductInfo> productInfoList = new ArrayList<>();
Site site = createOptimizedSite();
List<String> detailUrls = get1688DetailUrls(imageUrl);
scrapeProductDetailsSequential(detailUrls, site, productInfoList);
if (productInfoList.isEmpty()) {
logger.error("爬取1688商品数据失败");
return AjaxResult.error("未找到商品信息");
}
return buildResult(productInfoList);
} catch (Exception e) {
logger.error("爬取1688商品数据失败: {}", e.getMessage(), e);
return AjaxResult.error("爬取1688商品数据失败: " + e.getMessage());
} finally {
WebMagicProxyUtil.clearSystemProxy();
}
}
private Site createOptimizedSite() {
Site site = Site.me().setRetryTimes(2).setTimeOut(10000).setSleepTime(500 + random.nextInt(1000));
site.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36");
site.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
site.addHeader("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
site.addHeader("Accept-Encoding", "gzip, deflate, br");
site.addHeader("Referer", "https://s.1688.com/");
site.addHeader("Connection", "keep-alive");
site.addHeader("Upgrade-Insecure-Requests", "1");
site.addHeader(" sec-ch-ua-platform", "\"Windows\"");
site.addHeader("cookie", "arms_uid=c609390d-9eec-4d96-8402-705e85eec143; taklid=60a276649e864914a72a034bf895d586; _bl_uid=C2m9kdwR3URv2h9XU1zLv7ep0wL0; tracknick=; trackId=8f5f1443ebaa40a5b878a0a8c847e248; x-hng=lang=zh-CN&domain=detail.1688.com; union={\"amug_biz\":\"oneself\",\"amug_fl_src\":\"awakeId_982\",\"creative_url\":\"https%3A%2F%2Fdetail.1688.com%2Foffer%2F650296798072.html%3Ffromkv%3DcbuPcPlugin%3AimageSearchDrawerCard%26spm%3Da2639h.29135425.offerlist.i0%26source%3Daction%2523offerItem%253Borigin%2523s.1688.com%26amug_biz%3Doneself%26amug_fl_src%3DawakeId_982\",\"creative_time\":1752544005175}; cookie2=13f737f8500b45917b248310f133fd85; t=3bce9026cffba4d4cb7f7707b055a37b; _tb_token_=e6b5e3e75b136; lid=tb242078004181; __last_loginid__=b2b-2215893137000702f0; __last_memberid__=b2b-2215893137000702f0; ali_apache_track=c_mid=b2b-2215893137000702f0|c_lid=tb242078004181|c_ms=1; token=; cookie1=U%2BbMNkTCG0pSsj5xAGEoX6SKc5SIUjqNO8PwGVfgBd0%3D; cookie17=UUpgQyFSrJGiDvwfPw%3D%3D; sgcookie=E100xeguhPGg55wTvssYju665JLiOpV0Uo%2Fg4P60bQVtc4M7xRIpk5LNActuqoYy5dtVMTm89tkdXDt7XgRS%2B%2BfE5j5bZ4iD4feBA8XC5P2OCjs%3D; sg=104; csg=3eac1f8c; unb=2215893137000; uc4=nk4=0%40FY4Mt4wfbfnv2ZVPeOSXt4Tr3gwwfHbyww%3D%3D&id4=0%40U2gqzJfuijLPEdau%2FJG20lxehlz4ZAg5; _nk_=tb242078004181; last_mid=b2b-2215893137000702f0; __cn_logon__=true; __cn_logon_id__=tb242078004181; cna=ip/8IIvOgXABASQOA3z+hOcs; plugin_home_downLoad_cookie=%E5%AE%89%E8%A3%85%E6%8F%92%E4%BB%B6; keywordsHistory=%E5%A5%B3%E6%AC%BE%E5%86%85%E8%A1%A3%E8%96%84%E6%AC%BE%3B%E8%96%84%E6%AC%BE%E5%86%85%E8%A1%A3%3B%E5%81%8F%E5%85%89%E5%A2%A8%E9%95%9C%3B%E5%BE%95%E8%8A%AC%E7%94%B5%E5%90%B9%E9%A3%8E%E6%9C%BA%3B%E5%A5%B3%E7%AB%A5%E8%A2%9C%E5%AD%90; isg=BC4udCqKymJkBz6IOd_5ftKDf4TwL_IpL98Yxlj3mjHsO86VwL9COdS686fX4-pB; mtop_partitioned_detect=1; _m_h5_tk=480ae7820698a739a9733b209cba6c5c_1753070964344; _m_h5_tk_enc=ea1ad31e4aceb42009dd55ecbc385446; xlly_s=1; _csrf_token=1753063846774; _user_vitals_session_data_={\"user_line_track\":true,\"ul_session_id\":\"stdvffkk9zs\",\"last_page_id\":\"detail.1688.com%2Fytcik8fiacs\"}; tfstk=grCjjLqRZmmbw5At5K4rOOFnlBO_zzPeMVTO-NhqWIdv5f_hfZRNWKH-yExPgn72MGM1AMTwDxd9wbL2SmKtMxp-y3sx_KSN1Tc1-Nf4oCzDiZAM6krUT1_coCfkyKFg4aU9SCTxf5rrBZAM6uur6JPCoib59jb9WUUW7FovXGKvy4Kk7jhOXnp-2eTH6CI9HUhJJe-tMCLvezTM2hd96Gd8PF-JXjqtRFwXmZaI0hq2r8KFk3Gt6_FMhH_xQfhONE9f6ZK5uZ5WlKtpBtQs6sIPWs5DE-kDsaWCfOIgLYAfWtdv8ZVjNBQAU1TP_Pk6Da5WIspxDxLBGn9dML3t9QJOF_OVMlDhzatvdQWzEoJwGi6HxKeuqG_W0GCXemZyb97PMLsL4bsMC9bw5iNjMMIzZX-Q9zHsPpc6PHz7PADMSdViVT7hrw9vrE9zPziAIKLkPHz7PmBMHUYX8zaSDOf..");
return site;
}
/**
* 一站式获取1688商品详情URL列表
*
* @param imageUrl 图片URL从七牛云上传后获取的URL
* @return 商品详情URL列表
*/
private List<String> get1688DetailUrls(String imageUrl) {
try {
String token = Alibaba1688CookieUtil.getToken(restTemplate);
long timestamp = System.currentTimeMillis();
String jsonData = "{\"appId\":\"32517\",\"params\":\"{\\\"categoryId\\\":-1,\\\"imageAddress\\\":\\\"" + imageUrl + "\\\",\\\"interfaceName\\\":\\\"imageOfferSearchService\\\",\\\"needYolocrop\\\":false,\\\"pageIndex\\\":\\\"1\\\",\\\"pageSize\\\":\\\"40\\\",\\\"searchScene\\\":\\\"image\\\",\\\"snAppAb\\\":true,\\\"appName\\\":\\\"ios\\\",\\\"scene\\\":\\\"seoSearch\\\"}\"}";
String sign = generateSign(token, String.valueOf(timestamp), jsonData);
String url = "https://h5api.m.1688.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/" + "?jsv=2.6.1" + "&appKey=" + APP_KEY + "&t=" + timestamp + "&sign=" + sign + "&v=2.0" + "&type=originaljson" + "&isSec=0" + "&timeout=20000" + "&api=mtop.relationrecommend.WirelessRecommend.recommend" + "&ignoreLogin=true" + "&prefix=h5api" + "&dataType=jsonp";
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_FORM_URLENCODED);
headers.set("Cookie", Alibaba1688CookieUtil.getCookieString(restTemplate));
headers.set("authority", "h5api.m.1688.com");
headers.set("accept", "application/json");
headers.set("origin", "https://m.1688.com");
headers.set("referer", "https://m.1688.com/");
//这个乐天导入还是有bug,为什么
// headers.set(" sec-ch-ua-platform", " \"Windows\"");
headers.set("user-agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1");
MultiValueMap<String, String> formData = new LinkedMultiValueMap<>();
formData.add("data", jsonData);
HttpEntity<MultiValueMap<String, String>> requestEntity = new HttpEntity<>(formData, headers);
ResponseEntity<String> response = restTemplate.exchange(url, HttpMethod.POST, requestEntity, String.class);
List<String> newCookies = response.getHeaders().get("Set-Cookie");
if (newCookies != null && !newCookies.isEmpty()) {
for (String cookie : newCookies) {
String[] parts = cookie.split(";")[0].split("=", 2);
if (parts.length == 2) {
Alibaba1688CookieUtil.setCookie(parts[0], parts[1]);
}
}
}
ObjectMapper objectMapper = new ObjectMapper();
Map<String, Object> responseData = objectMapper.readValue(response.getBody(), Map.class);
List<String> detailUrls = new ArrayList<>();
Map<String, Object> data = (Map<String, Object>) responseData.get("data");
Map<String, Object> offerList = (Map<String, Object>) data.get("offerList");
List<Map<String, Object>> offers = (List<Map<String, Object>>) offerList.get("offers");
for (Map<String, Object> offer : offers) {
String detailUrl = (String) offer.get("detailUrl");
String baseUrl = detailUrl.split("\\?")[0];
SecureRandom secureRandom = new SecureRandom();
StringBuilder randomString = new StringBuilder(20);
String chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
for (int i = 0; i < 20; i++) {
randomString.append(chars.charAt(secureRandom.nextInt(chars.length())));
}
String formattedUrl = baseUrl.replace("detail.1688.com", "m.1688.com") + "?callByHgJs=1&__removesafearea__=1&src_cna=" + randomString.toString();
detailUrls.add(formattedUrl);
}
return detailUrls.stream().limit(10).collect(Collectors.toList());
} catch (Exception e) {
logger.error("通过1688 API获取商品详情链接失败: {}", e.getMessage(), e);
return new ArrayList<>();
}
}
private String generateSign(String token, String timestamp, String data) {
try {
String signStr = token + "&" + timestamp + "&" + APP_KEY + "&" + data;
MessageDigest md = MessageDigest.getInstance("MD5");
return DatatypeConverter.printHexBinary(md.digest(signStr.getBytes("UTF-8"))).toLowerCase();
} catch (Exception e) {
logger.error("生成签名失败", e);
return "";
}
}
/**
* 串行爬取商品详情信息
*
* @param detailUrls 详情页URL列表
* @param site 爬虫配置
* @param productInfoList 商品信息列表
*/
private void scrapeProductDetailsSequential(List<String> detailUrls, Site site, List<ProductInfo> productInfoList) {
for (String detailUrl : detailUrls) {
try {
int initialSize = productInfoList.size();
scrapeProductDetail(detailUrl, site, productInfoList);
boolean hasValidPrice = false;
for (int i = initialSize; i < productInfoList.size(); i++) {
ProductInfo info = productInfoList.get(i);
if (info.getPrice() != null && !info.getPrice().trim().isEmpty()) {
hasValidPrice = true;
break;
}
}
if (!hasValidPrice) {
try {
int waitTime = 50000 + random.nextInt(30000);
Thread.sleep(waitTime);
logger.info("等待{}秒后开始重试", waitTime / 1000);
scrapeProductDetail(detailUrl, site, productInfoList);
} catch (Exception retryException) {
logger.error("重试爬取失败: {}", retryException.getMessage());
}
}
} catch (Exception e) {
logger.error("爬取商品详情失败: {}", e.getMessage());
}
}
}
private void scrapeProductDetail(String detailUrl, Site site, List<ProductInfo> productInfoList) {
try {
Spider.create(new PageProcessor() {
@Override
public void process(Page page) {
try {
System.out.println("正在爬取商品详情: " + page.getHtml());
String htmlContent = page.getRawText();
Pattern pricePattern = Pattern.compile("\"price\":\\s*\"([^\"]+)\"");
Matcher priceMatcher = pricePattern.matcher(htmlContent);
String price = null;
if (priceMatcher.find()) {
String fullPrice = priceMatcher.group(1);
price = fullPrice.split("-")[0].trim();
}
String weight = null;
Pattern weightPattern = Pattern.compile("\"weight\":(\\d+)");
Matcher weightMatcher = weightPattern.matcher(htmlContent);
if (weightMatcher.find()) {
weight = weightMatcher.group(1) + "g";
} else {
Pattern unitWeightPattern = Pattern.compile("\"unitWeight\":(\\d+(?:\\.\\d+)?)");
Matcher unitWeightMatcher = unitWeightPattern.matcher(htmlContent);
if (unitWeightMatcher.find()) {
double weightValue = Double.parseDouble(unitWeightMatcher.group(1));
if (weightValue > 0) {
weight = (int) (weightValue * 1000) + "g";
}
}
}
System.out.println("价格重量:" + price + "-" + weight);
Thread.sleep(WebMagicProxyUtil.getRandomSleepTime(1000, 3000));
productInfoList.add(new ProductInfo(price, weight));
} catch (Exception e) {
logger.error("解析商品详情页面失败: {}", e.getMessage());
}
}
@Override
public Site getSite() {
return site;
}
}).addUrl(detailUrl).setDownloader(WebMagicProxyUtil.getDefaultProxyDownloader()).thread(1).run();
} catch (Exception e) {
logger.error("爬取商品详情失败: {}", e.getMessage());
}
}
private AjaxResult buildResult(List<ProductInfo> productInfoList) {
// 分别计算价格和重量的中位数
List<Float> validPrices = new ArrayList<>();
List<Float> validWeights = new ArrayList<>();
for (ProductInfo info : productInfoList) {
try {
if (info.getPrice() != null) {
validPrices.add(Float.parseFloat(info.getPrice()));
}
} catch (NumberFormatException e) {
// 忽略无效价格
}
try {
if (info.getWeight() != null) {
String weightStr = info.getWeight().replace("g", "");
validWeights.add(Float.parseFloat(weightStr));
}
} catch (NumberFormatException e) {
// 忽略无效重量
}
}
Map<String, Object> result = new HashMap<>();
// 计算价格中位数
if (!validPrices.isEmpty()) {
Collections.sort(validPrices);
float medianPrice = validPrices.get(validPrices.size() / 2);
result.put("priceList", Collections.singletonList(String.valueOf(medianPrice)));
} else {
result.put("priceList", Collections.emptyList());
}
// 计算重量中位数
if (!validWeights.isEmpty()) {
Collections.sort(validWeights);
float medianWeight = validWeights.get(validWeights.size() / 2);
result.put("weightList", Collections.singletonList(medianWeight + "g"));
} else {
result.put("weightList", Collections.emptyList());
}
return AjaxResult.success(result);
}
private static class ProductInfo {
private final String price;
private final String weight;
public ProductInfo(String price, String weight) {
this.price = price;
this.weight = weight;
}
public String getPrice() {
return price;
}
public String getWeight() {
return weight;
}
}
@GetMapping("/scrapeProducts")
public AjaxResult scrapeProducts(@RequestParam String shopName) {
try {
String url = "https://ranking.rakuten.co.jp/search?stx=" + URLEncoder.encode(shopName, StandardCharsets.UTF_8.toString());
List<Map<String, String>> products = new ArrayList<>();
Site site = Site.me().setRetryTimes(3).setTimeOut(10000).setSleepTime(1000 + random.nextInt(2000));
site.addHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36");
site.addHeader("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
site.addHeader("accept-encoding", "gzip, deflate, br, zstd");
site.addHeader("accept-language", "zh-CN,zh;q=0.9");
site.addHeader("cache-control", "max-age=0");
site.addHeader("priority", "u=0, i");
site.addHeader("sec-ch-ua", "\"Not)A;Brand\";v=\"8\", \"Chromium\";v=\"138\", \"Microsoft Edge\";v=\"138\"");
site.addHeader("sec-ch-ua-mobile", "?0");
site.addHeader("sec-ch-ua-platform", "\"Windows\"");
site.addHeader("sec-fetch-dest", "document");
site.addHeader("sec-fetch-mode", "navigate");
site.addHeader("sec-fetch-site", "same-origin");
site.addHeader("sec-fetch-user", "?1");
site.addHeader("upgrade-insecure-requests", "1");
site.addHeader("cookie", "_ra=1750472997398|0ff0eb32-5d9f-4c27-a7ca-c9ff1149e90b; Rp=779873a7b6c0e87edcf6f39cf2368561928309c8; rcx=136377ca-334f-4305-b45e-ad43e9538d2e; rcxGlobal=136377ca-334f-4305-b45e-ad43e9538d2e;");
Spider.create(new PageProcessor() {
@Override
public void process(Page page) {
try {
List<String> rankings = page.getHtml().xpath("//div[@class='srhRnk']/span[@class='icon']/text()").all();
List<String> productUrls = page.getHtml().xpath("//div[@class='srhPic']//div[@class='rnkRanking_bigImageBox']/a/@href").all();
List<String> imageUrls = page.getHtml().xpath("//div[@class='srhPic']//div[@class='rnkRanking_bigImageBox']/a/img/@src").all();
List<String> titles = page.getHtml().xpath("//div[@class='srhItm']/a/text()").all();
List<String> prices = page.getHtml().xpath("//div[@class='srhPri']/text()").all();
int count = Math.min(productUrls.size(), Math.min(imageUrls.size(), Math.min(titles.size(), Math.min(prices.size(), rankings.size()))));
for (int i = 0; i < count; i++) {
Map<String, String> product = new HashMap<>();
product.put("ranking", rankings.get(i).trim());
String productUrl = productUrls.get(i);
product.put("productUrl", productUrl);
String[] parts = productUrl.split("/");
if (parts.length > 3) {
product.put("shopName", parts[3]);
}
product.put("imageUrl", imageUrls.get(i));
product.put("productTitle", titles.get(i).trim());
String price = prices.get(i).replaceAll("[^0-9]", "").trim();
if (!price.isEmpty()) {
product.put("price", price);
}
product.put("imageId", null);
product.put("image1688Url", null);
products.add(product);
}
} catch (Exception e) {
logger.error("解析页面时发生错误: {}", e.getMessage());
}
}
@Override
public Site getSite() {
return site;
}
}).addUrl(url).setDownloader(WebMagicProxyUtil.getDefaultProxyDownloader()).thread(1).run();
return AjaxResult.success(products);
} catch (Exception e) {
logger.error("抓取商品数据失败: {}", e.getMessage());
return AjaxResult.error("抓取商品数据失败");
} finally {
WebMagicProxyUtil.clearSystemProxy();
}
}
}

View File

@@ -1,179 +0,0 @@
package com.ruoyi.web.controller.tool;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.PreDestroy;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.ruoyi.common.annotation.Anonymous;
import com.ruoyi.common.core.controller.BaseController;
import com.ruoyi.common.core.domain.R;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.ResultItems;
/**
* 佐川急便物流查询控制器
*/
@Api("佐川急便物流查询接口")
@RestController
@RequestMapping("/tool/sagawa")
@Anonymous
public class SagawaExpressController extends BaseController implements PageProcessor {
private static final Logger logger = LoggerFactory.getLogger(SagawaExpressController.class);
private final ExecutorService executorService = Executors.newFixedThreadPool(10);
private final Site site = Site.me()
.setRetryTimes(2)
.setSleepTime(1000)
.setTimeOut(10000)
.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
/**
* 关闭线程池
*/
@PreDestroy
public void destroy() {
executorService.shutdownNow();
}
@Override
public void process(Page page) {
try {
String pageContent = page.getHtml().toString();
Map<String, Object> result = new HashMap<>();
if (pageContent.contains("お荷物データが登録されておりません")) {
result.put("status", "notFound");
result.put("message", "没有找到对应的包裹信息");
page.putField("result", result);
return;
}
// 提取物流表格
String trackingTableRegex = "<table[^>]*class=\"table_basic table_okurijo_detail2\"[^>]*>\\s*<tbody>\\s*(?:<tr>.*?<th[^>]*>荷物状況</th>.*?</tr>.*?<tr>.*?</tr>.*?)+\\s*</tbody>\\s*</table>";
Pattern pattern = Pattern.compile(trackingTableRegex, Pattern.DOTALL);
Matcher matcher = pattern.matcher(pageContent);
if (matcher.find()) {
String trackingTable = matcher.group(0);
String rowRegex = "<tr>\\s*<td>\\s*([^<]*?)\\s*</td>\\s*<td>\\s*([^<]*?)\\s*</td>\\s*<td>\\s*([^<]*?)\\s*</td>\\s*</tr>";
Pattern rowPattern = Pattern.compile(rowRegex, Pattern.DOTALL);
Matcher rowMatcher = rowPattern.matcher(trackingTable);
String status = "";
String dateTime = "";
String office = "";
while (rowMatcher.find()) {
status = rowMatcher.group(1).trim();
dateTime = rowMatcher.group(2).trim();
office = rowMatcher.group(3).trim();
}
if (!status.isEmpty()) {
Map<String, String> trackInfo = new HashMap<>();
trackInfo.put("status", status);
trackInfo.put("dateTime", dateTime);
trackInfo.put("office", office);
result.put("status", "success");
result.put("trackInfo", trackInfo);
} else {
result.put("status", "noRecords");
result.put("message", "没有物流记录");
}
} else {
result.put("status", "noTable");
result.put("message", "未找到物流跟踪表格");
}
page.putField("result", result);
} catch (Exception e) {
logger.error("解析页面失败", e);
Map<String, Object> result = new HashMap<>();
result.put("status", "error");
result.put("message", "解析页面失败: " + e.getMessage());
page.putField("result", result);
}
}
@Override
public Site getSite() {
return site;
}
/**
* 构建佐川急便查询URL
*/
private String buildSagawaUrl(String trackingNumber) {
return "https://k2k.sagawa-exp.co.jp/p/web/okurijosearch.do?okurijoNo=" + trackingNumber.trim();
}
/**
* 自定义结果收集Pipeline
*/
private static class ResultCollectorPipeline implements Pipeline {
private Map<String, Object> result = null;
@Override
public void process(ResultItems resultItems, us.codecraft.webmagic.Task task) {
this.result = resultItems.get("result");
}
public Map<String, Object> getResult() {
return result;
}
}
/**
* 查询佐川急便物流信息 - API入口
*/
@ApiOperation("查询佐川急便物流信息")
@GetMapping("/tracking/{trackingNumber}")
public R<Map<String, Object>> getTrackingInfo(@PathVariable("trackingNumber") String trackingNumber) {
try {
if (trackingNumber == null || trackingNumber.trim().isEmpty()) {
return R.fail("运单号不能为空");
}
String url = buildSagawaUrl(trackingNumber);
ResultCollectorPipeline pipeline = new ResultCollectorPipeline();
Spider.create(this)
.addUrl(url)
.addPipeline(pipeline)
.thread(executorService, 1)
.run();
Map<String, Object> result = pipeline.getResult();
// 如果没有获取到结果,设置默认值
if (result == null) {
Map<String, String> defaultTrackInfo = new HashMap<>();
defaultTrackInfo.put("status", "处理中");
defaultTrackInfo.put("dateTime", "");
defaultTrackInfo.put("office", "");
result = new HashMap<>();
result.put("status", "success");
result.put("trackInfo", defaultTrackInfo);
}
return R.ok(result);
} catch (Exception e) {
logger.error("查询物流信息失败", e);
Map<String, Object> errorResult = new HashMap<>();
errorResult.put("status", "error");
errorResult.put("message", "查询物流信息失败: " + e.getMessage());
return R.ok(errorResult);
}
}
}

View File

@@ -1,594 +0,0 @@
package com.ruoyi.web.controller.tool;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.stream.Collectors;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import javax.annotation.PreDestroy;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.context.request.async.DeferredResult;
import org.springframework.http.ResponseEntity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.ruoyi.common.annotation.Anonymous;
import com.ruoyi.common.core.controller.BaseController;
import com.ruoyi.common.core.domain.R;
import com.ruoyi.web.util.WebMagicProxyUtil;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.SimpleProxyProvider;
import us.codecraft.webmagic.scheduler.QueueScheduler;
import java.util.Objects;
import java.util.concurrent.TimeoutException;
/**
* 亚马逊爬虫控制器 - 爬取价格和卖家信息
* 性能优化版本
*
* @author ruoyi
*/
@Api("亚马逊爬虫功能")
@RestController
@RequestMapping("/tool/webmagic")
@Anonymous
public class WebMagicController extends BaseController implements PageProcessor {
private static final Logger logger = LoggerFactory.getLogger(WebMagicController.class);
private final Random random = new Random();
private static volatile Spider activeSpider = null;
private static final Object spiderLock = new Object();
private final ExecutorService executorService = Executors.newFixedThreadPool(10);
private final AtomicInteger activeTasks = new AtomicInteger(0);
private final int MAX_CONCURRENT_TASKS = 5;
private final Map<String, Map<String, Object>> resultCache = new ConcurrentHashMap<>();
private final Site site = Site.me()
.setRetryTimes(3)
.setSleepTime(1000 + random.nextInt(2000))
.setTimeOut(15000)
.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
.addCookie("session-id", "358-1261309-0483141")
.addCookie("session-id-time", "2082787201l")
.addCookie("i18n-prefs", "JPY")
.addCookie("lc-acbjp", "zh_CN")
.addCookie("ubid-acbjp", "357-8224002-9668932");
/**
* 关闭线程池
*/
@PreDestroy
public void destroy() {
try {
logger.info("正在关闭爬虫线程池...");
executorService.shutdown();
if (!executorService.awaitTermination(10, TimeUnit.SECONDS)) {
executorService.shutdownNow();
}
} catch (InterruptedException e) {
executorService.shutdownNow();
Thread.currentThread().interrupt();
}
}
@Override
public void process(Page page) {
try {
Html html = page.getHtml();
String priceSymbol = html.xpath("//span[@class='a-price-symbol']/text()").toString();
String priceWhole = html.xpath("//span[@class='a-price-whole']/text()").toString();
Map<String, Object> resultMap = new HashMap<>();
if (priceSymbol != null && !priceSymbol.isEmpty() && priceWhole != null && !priceWhole.isEmpty()) {
resultMap.put("price", priceSymbol + priceWhole);
}
// 提取卖家信息
resultMap.put("seller", html.xpath("//a[@id='sellerProfileTriggerId']/text()").toString());
String asin = html.xpath("//input[@id='ASIN']/@value").toString();
resultMap.put("asin", asin);
String price = (String) resultMap.get("price");
String seller = (String) resultMap.get("seller");
Object retriesObj = page.getRequest().getExtra("retries");
int retries = retriesObj == null ? 0 : (int) retriesObj;
if ((price == null || price.isEmpty() || seller == null || seller.isEmpty()) && retries < 3) {
String url = page.getUrl().toString();
us.codecraft.webmagic.Request request = new us.codecraft.webmagic.Request(url);
request.putExtra("retries", retries + 1);
page.addTargetRequest(request);
int backoffTime = (int) Math.pow(2, retries) * 1000 + random.nextInt(1000);
logger.info("数据不完整,准备进行第{}次重试ASIN: {}, 等待: {}ms", retries + 1, asin, backoffTime);
} else {
if (asin != null && !asin.isEmpty()) {
resultCache.put(asin, resultMap);
}
}
page.putField("resultMap", resultMap);
} catch (Exception e) {
logger.error("解析页面失败", e);
}
}
@Override
public Site getSite() {
return site;
}
/**
* 构建亚马逊产品URL
*/
private String buildAmazonUrl(String asin) {
asin = asin.replaceAll("[^a-zA-Z0-9]", "");
return "https://www.amazon.co.jp/dp/" + asin;
}
/**
* 获取所有可用代理节点
*/
@ApiOperation("获取所有可用代理节点")
@GetMapping("/proxies")
public R<List<Map<String, String>>> getProxies() {
return R.ok(WebMagicProxyUtil.getAllProxies());
}
/**
* 设置当前使用的代理节点
*/
@ApiOperation("设置当前使用的代理节点")
@PostMapping("/proxy/set")
public R<Map<String, String>> setCurrentProxy(@RequestParam String proxyName) {
boolean success = WebMagicProxyUtil.setCurrentProxy(proxyName);
if (success) {
List<Map<String, String>> proxies = WebMagicProxyUtil.getAllProxies();
Map<String, String> currentProxy = proxies.stream()
.filter(p -> p.get("name").equals(proxyName))
.findFirst()
.orElse(new HashMap<>());
return R.ok(currentProxy);
} else {
return R.fail("未找到指定的代理节点: " + proxyName);
}
}
/**
* 获取 Clash 运行状态
*/
@ApiOperation("获取Clash运行状态")
@GetMapping("/clash/status")
public R<Map<String, Object>> getClashStatus() {
Map<String, Object> result = new HashMap<>();
boolean running = WebMagicProxyUtil.isProxyRunning();
result.put("running", running);
result.put("status", running ? "运行中" : "已停止");
return R.ok(result);
}
/**
* 手动控制 Clash 启动
*/
@ApiOperation("启动Clash")
@PostMapping("/clash/start")
public R<Map<String, Object>> startClashManually() {
Map<String, Object> result = new HashMap<>();
boolean success = WebMagicProxyUtil.startProxy();
result.put("success", success);
result.put("message", success ? "Clash 启动成功" : "Clash 启动失败");
try {
// 添加延迟确保Clash完全启动
Thread.sleep(3000);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
logger.warn("Clash启动等待被中断", e);
}
return R.ok(result);
}
/**
* 手动控制 Clash 停止
*/
@ApiOperation("停止Clash")
@PostMapping("/clash/stop")
public R<Map<String, Object>> stopClashManually() {
Map<String, Object> result = new HashMap<>();
boolean success = WebMagicProxyUtil.stopProxy();
result.put("success", success);
result.put("message", success ? "Clash 停止成功" : "Clash 停止失败");
return R.ok(result);
}
/**
* 批量爬取亚马逊产品信息 - 优化版本
*/
@ApiOperation("批量爬取亚马逊产品信息")
@PostMapping("/batch")
public DeferredResult<ResponseEntity<R<List<Map<String, Object>>>>> batchGetAmazonProductInfo(@RequestBody List<String> asinList) {
DeferredResult<ResponseEntity<R<List<Map<String, Object>>>>> deferredResult = new DeferredResult<>(500000L);
// 清理输入数据
List<String> cleanedAsinList = asinList.stream()
.filter(asin -> asin != null && !asin.trim().isEmpty())
.map(asin -> asin.trim().replaceAll("[^a-zA-Z0-9]", ""))
.distinct() // 去重
.collect(Collectors.toList());
if (cleanedAsinList.isEmpty()) {
deferredResult.setResult(ResponseEntity.ok(R.ok(new ArrayList<>())));
return deferredResult;
}
CompletableFuture.runAsync(() -> {
try {
WebMagicProxyUtil.startProxy();
logger.info("开始批量爬取 {} 个ASIN", cleanedAsinList.size());
List<CompletableFuture<Map<String, Object>>> futures = new ArrayList<>();
Map<String, Map<String, Object>> results = new ConcurrentHashMap<>();
int batchSize = 3;
for (int i = 0; i < cleanedAsinList.size(); i += batchSize) {
final int startIndex = i;
final int endIndex = Math.min(i + batchSize, cleanedAsinList.size());
List<String> batch = cleanedAsinList.subList(startIndex, endIndex);
Thread.sleep(500 + random.nextInt(1500));
CompletableFuture<Void> batchFuture = CompletableFuture.runAsync(() -> {
try {
processBatch(batch, results);
} catch (Exception e) {
logger.error("处理批次 {}~{} 失败: {}", startIndex, endIndex - 1, e.getMessage());
}
}, executorService);
// 添加延迟,避免同时发送太多请求
Thread.sleep(2000 + random.nextInt(3000));
}
Thread.sleep(cleanedAsinList.size() * 1000);
List<Map<String, Object>> resultList = new ArrayList<>();
for (String asin : cleanedAsinList) {
Map<String, Object> result = resultCache.getOrDefault(asin, new HashMap<>());
if (result.isEmpty() || !result.containsKey("price") || !result.containsKey("seller")) {
Map<String, Object> defaultResult = new HashMap<>();
defaultResult.put("asin", asin);
defaultResult.put("price", "未获取");
defaultResult.put("seller", "未获取");
resultList.add(defaultResult);
} else {
resultList.add(result);
}
}
resultCache.clear();
retryFailedItems(resultList).thenAccept(finalResults -> {
try {
WebMagicProxyUtil.stopProxy();
deferredResult.setResult(ResponseEntity.ok(R.ok(finalResults)));
logger.info("批量爬取完成,成功获取 {} 个产品信息", finalResults.size());
} catch (Exception e) {
logger.error("处理最终结果失败", e);
deferredResult.setResult(ResponseEntity.ok(R.fail("处理最终结果失败: " + e.getMessage())));
}
});
} catch (Exception e) {
logger.error("批量爬取失败: {}", e.getMessage());
WebMagicProxyUtil.stopProxy();
deferredResult.setResult(ResponseEntity.ok(R.fail("批量爬取失败: " + e.getMessage())));
}
}, executorService);
return deferredResult;
}
/**
* 处理一个批次的ASIN
*/
private void processBatch(List<String> asinBatch, Map<String, Map<String, Object>> results) {
for (String asin : asinBatch) {
try {
String url = buildAmazonUrl(asin);
synchronized (spiderLock) {
if (activeTasks.get() >= MAX_CONCURRENT_TASKS) {
Thread.sleep(1000);
}
activeTasks.incrementAndGet();
}
HttpClientDownloader downloader = WebMagicProxyUtil.getProxyDownloader();
Spider spider = Spider.create(this)
.addUrl(url)
.setDownloader(downloader)
.thread(1);
try {
synchronized (spiderLock) {
activeSpider = spider;
}
spider.run();
Thread.sleep(1000 + random.nextInt(2000));
} finally {
WebMagicProxyUtil.clearSystemProxy();
synchronized (spiderLock) {
if (activeSpider == spider) {
activeSpider = null;
}
}
activeTasks.decrementAndGet();
}
} catch (Exception e) {
logger.error("处理ASIN: {} 失败: {}", asin, e.getMessage());
}
}
}
/**
* 重试失败的项目
*/
private CompletableFuture<List<Map<String, Object>>> retryFailedItems(List<Map<String, Object>> results) {
return CompletableFuture.supplyAsync(() -> {
try {
List<String> failedAsins = results.stream()
.filter(item -> "未获取".equals(item.get("seller")) || "未获取".equals(item.get("price")))
.map(item -> (String) item.get("asin"))
.collect(Collectors.toList());
if (failedAsins.isEmpty()) {
return results;
}
logger.info("开始重试 {} 个失败的ASIN", failedAsins.size());
Map<String, Map<String, Object>> retryResults = new ConcurrentHashMap<>();
for (String asin : failedAsins) {
try {
String url = buildAmazonUrl(asin);
HttpClientDownloader downloader = WebMagicProxyUtil.getProxyDownloader();
Spider spider = Spider.create(this)
.addUrl(url)
.setDownloader(downloader)
.thread(1);
synchronized (spiderLock) {
activeSpider = spider;
}
spider.run();
Thread.sleep(2000 + random.nextInt(3000));
Map<String, Object> result = resultCache.get(asin);
if (result != null && result.get("seller") != null && result.get("price") != null) {
for (Map<String, Object> item : results) {
if (asin.equals(item.get("asin"))) {
item.put("seller", result.get("seller"));
item.put("price", result.get("price"));
break;
}
}
logger.info("重试成功: ASIN={}", asin);
} else {
logger.warn("重试失败: ASIN={}", asin);
}
} catch (Exception e) {
logger.error("重试ASIN: {} 失败: {}", asin, e.getMessage());
} finally {
WebMagicProxyUtil.clearSystemProxy();
synchronized (spiderLock) {
activeSpider = null;
}
}
Thread.sleep(3000 + random.nextInt(2000));
}
return results;
} catch (Exception e) {
logger.error("重试失败项目时出错: {}", e.getMessage());
return results;
}
}, executorService);
}
/**
* 测试代理节点延迟
*/
@ApiOperation("测试代理节点延迟")
@PostMapping("/proxy/test")
public DeferredResult<ResponseEntity<R<List<Map<String, Object>>>>> testProxyDelay(@RequestBody List<String> proxyNames) {
DeferredResult<ResponseEntity<R<List<Map<String, Object>>>>> deferredResult = new DeferredResult<>(600000L);
// 测试前启动 Clash
boolean clashStarted = WebMagicProxyUtil.startProxy();
if (!clashStarted) {
logger.warn("启动 Clash 失败,将尝试继续测试...");
}
CompletableFuture.runAsync(() -> {
List<Map<String, Object>> resultList = new ArrayList<>();
try {
int batchSize = 5; // 一次测试5个代理
for (int i = 0; i < proxyNames.size(); i += batchSize) {
// 获取当前批次
final int startIndex = i;
final int endIndex = Math.min(i + batchSize, proxyNames.size());
List<String> batch = proxyNames.subList(startIndex, endIndex);
// 并行测试当前批次的代理
List<CompletableFuture<Map<String, Object>>> futures = batch.stream()
.map(proxyName -> CompletableFuture.supplyAsync(
() -> testSingleProxy(proxyName), executorService))
.collect(Collectors.toList());
try {
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]))
.get(5, TimeUnit.SECONDS);
} catch (TimeoutException te) {
}
// 收集结果
for (CompletableFuture<Map<String, Object>> future : futures) {
try {
if (future.isDone()) {
resultList.add(future.join());
} else {
future.cancel(true);
Map<String, Object> timeoutResult = new HashMap<>();
timeoutResult.put("name", "unknown");
timeoutResult.put("status", "fail");
timeoutResult.put("message", "测试超时");
timeoutResult.put("delay", -1);
resultList.add(timeoutResult);
}
} catch (Exception e) {
Map<String, Object> errorResult = new HashMap<>();
errorResult.put("name", "error");
errorResult.put("status", "fail");
errorResult.put("message", "测试异常: " + e.getMessage());
errorResult.put("delay", -1);
resultList.add(errorResult);
}
}
// 批次之间增加延迟
Thread.sleep(1000);
}
deferredResult.setResult(ResponseEntity.ok(R.ok(resultList)));
} catch (Exception e) {
logger.error("测试代理延迟失败", e);
deferredResult.setResult(ResponseEntity.ok(R.fail("测试代理延迟失败")));
} finally {
boolean clashStopped = WebMagicProxyUtil.stopProxy();
if (!clashStopped) {
logger.warn("停止 Clash 失败,请手动检查 Clash 状态");
}
}
}, executorService);
return deferredResult;
}
/**
* 测试单个代理节点延迟
*/
private Map<String, Object> testSingleProxy(String proxyName) {
Map<String, Object> result = new HashMap<>();
result.put("name", proxyName);
long startTime = System.currentTimeMillis();
try {
// 查找匹配的代理节点
List<Map<String, String>> proxyNodes = WebMagicProxyUtil.getAllProxies();
Map<String, String> targetProxy = proxyNodes.stream()
.filter(node -> proxyName.equals(node.get("name")))
.findFirst()
.orElse(null);
if (targetProxy == null) {
result.put("status", "fail");
result.put("message", "未找到指定的代理节点");
result.put("delay", -1);
return result;
}
String proxyHost = targetProxy.get("server");
int proxyPort = Integer.parseInt(targetProxy.get("port"));
HttpClientDownloader testDownloader = new HttpClientDownloader();
WebMagicProxyUtil.clearSystemProxy();
testDownloader.setProxyProvider(SimpleProxyProvider.from(new Proxy(proxyHost, proxyPort)));
Spider.create(new PageProcessor() {
@Override
public void process(Page page) {
// 只是测试连接,不做实际处理
}
@Override
public Site getSite() {
return Site.me()
.setRetryTimes(0)
.setSleepTime(100)
.setTimeOut(3000);
}
})
.setDownloader(testDownloader)
.addUrl("http://www.gstatic.com/generate_204")
.thread(1)
.run();
long endTime = System.currentTimeMillis();
result.put("status", "success");
result.put("delay", endTime - startTime);
} catch (Exception e) {
result.put("status", "fail");
result.put("message", "连接超时或失败");
result.put("delay", -1);
} finally {
WebMagicProxyUtil.clearSystemProxy();
}
return result;
}
/**
* 停止所有爬虫活动
*/
@ApiOperation("停止所有爬虫活动")
@PostMapping("/stop-crawling")
public R<Map<String, Object>> stopAllCrawling() {
Map<String, Object> result = new HashMap<>();
boolean spiderStopped = false;
synchronized (spiderLock) {
if (activeSpider != null) {
try {
activeSpider.stop();
activeSpider = null;
spiderStopped = true;
logger.info("爬虫任务已终止");
} catch (Exception e) {
logger.error("停止爬虫时出错", e);
}
}
}
boolean clashStopped = WebMagicProxyUtil.stopProxy();
result.put("success", spiderStopped || clashStopped);
result.put("message", "爬虫和Clash已停止");
return R.ok(result);
}
}

View File

@@ -1,168 +0,0 @@
package com.ruoyi.web.util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.*;
import org.springframework.web.client.RestTemplate;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 阿里巴巴1688 API Cookie工具类
*/
public class Alibaba1688CookieUtil {
private static final Logger log = LoggerFactory.getLogger(Alibaba1688CookieUtil.class);
private static Map<String, String> cookieCache = new HashMap<>();
private static final long COOKIE_EXPIRE_TIME = 10 * 60 * 1000;
private static long lastUpdateTime = 0;
private static final String M_H5_TK = "_m_h5_tk";
private static final String M_H5_TK_ENC = "_m_h5_tk_enc";
/**
* 获取1688 API的Token (从_m_h5_tk cookie中提取)
*
* @param restTemplate RestTemplate实例
* @return Token字符串失败返回空字符串
*/
public static String getToken(RestTemplate restTemplate) {
String tokenCookie = getCookie(M_H5_TK, restTemplate);
if (tokenCookie != null && !tokenCookie.isEmpty()) {
return tokenCookie.split("_")[0];
}
return "";
}
/**
* 获取指定名称的Cookie值
*
* @param cookieName Cookie名称
* @param restTemplate RestTemplate实例
* @return Cookie值未找到返回null
*/
public static String getCookie(String cookieName, RestTemplate restTemplate) {
if (System.currentTimeMillis() - lastUpdateTime > COOKIE_EXPIRE_TIME) {
refreshCookies(restTemplate);
}
return cookieCache.get(cookieName);
}
/**
* 获取完整的Cookie字符串用于HTTP请求头
*
* @param restTemplate RestTemplate实例
* @return 完整的Cookie字符串
*/
public static String getCookieString(RestTemplate restTemplate) {
if (System.currentTimeMillis() - lastUpdateTime > COOKIE_EXPIRE_TIME) {
refreshCookies(restTemplate);
}
StringBuilder cookieBuilder = new StringBuilder();
for (Map.Entry<String, String> entry : cookieCache.entrySet()) {
if (cookieBuilder.length() > 0) {
cookieBuilder.append("; ");
}
cookieBuilder.append(entry.getKey()).append("=").append(entry.getValue());
}
return cookieBuilder.toString();
}
/**
* 刷新Cookie
*
* @param restTemplate RestTemplate实例
*/
public static synchronized void refreshCookies(RestTemplate restTemplate) {
try {
HttpHeaders headers = new HttpHeaders();
headers.set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36");
headers.set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
headers.set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
HttpEntity<String> entity = new HttpEntity<>(headers);
ResponseEntity<String> response = restTemplate.exchange("https://www.1688.com/", HttpMethod.GET, entity, String.class);
List<String> cookies = response.getHeaders().get("Set-Cookie");
if (cookies != null) {
for (String cookie : cookies) {
parseCookie(cookie);
}
}
headers = new HttpHeaders();
headers.set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36");
headers.set("Accept", "application/json");
headers.set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
headers.set("Referer", "https://www.1688.com/");
if (!cookieCache.isEmpty()) {
StringBuilder cookieBuilder = new StringBuilder();
for (Map.Entry<String, String> entry : cookieCache.entrySet()) {
if (cookieBuilder.length() > 0) {
cookieBuilder.append("; ");
}
cookieBuilder.append(entry.getKey()).append("=").append(entry.getValue());
}
headers.set("Cookie", cookieBuilder.toString());
}
entity = new HttpEntity<>(headers);
response = restTemplate.exchange(
"https://h5api.m.1688.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?jsv=2.7.4&appKey=12574478&api=mtop.relationrecommend.wirelessrecommend.recommend&v=2.0&type=originaljson&dataType=json",
HttpMethod.GET, entity, String.class);
// 解析新的Set-Cookie头
cookies = response.getHeaders().get("Set-Cookie");
if (cookies != null) {
for (String cookie : cookies) {
parseCookie(cookie);
}
}
// 打印获取到的关键Cookie
log.info("获取到的1688 API token: {}", cookieCache.getOrDefault(M_H5_TK, "未获取到"));
log.info("获取到的1688 API token_enc: {}", cookieCache.getOrDefault(M_H5_TK_ENC, "未获取到"));
lastUpdateTime = System.currentTimeMillis();
} catch (Exception e) {
log.error("刷新1688 API Cookie失败", e);
}
}
/**
* 解析Cookie字符串并更新缓存
*
* @param cookieStr Cookie字符串
*/
private static void parseCookie(String cookieStr) {
if (cookieStr == null || cookieStr.isEmpty()) {
return;
}
String[] parts = cookieStr.split(";")[0].split("=", 2);
if (parts.length == 2) {
String name = parts[0].trim();
String value = parts[1].trim();
cookieCache.put(name, value);
}
}
/**
* 手动设置Cookie
*
* @param name Cookie名称
* @param value Cookie值
*/
public static void setCookie(String name, String value) {
cookieCache.put(name, value);
lastUpdateTime = System.currentTimeMillis();
}
/**
* 清除所有Cookie缓存
*/
public static void clearCookies() {
cookieCache.clear();
lastUpdateTime = 0;
}
}

View File

@@ -1,69 +0,0 @@
package com.ruoyi.web.util;
import com.ruoyi.common.exception.ServiceException;
import com.ruoyi.framework.config.ImgUpload;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;
import javax.annotation.Resource;
import java.io.File;
import java.io.IOException;
import java.util.Objects;
/**
* 本地文件上传与删除
* @author TRACK
*/
@Component
public class ImgUploadUtil {
@Resource(name = "imgUpload")
private ImgUpload imgUpload;
public Integer getUploadType() {
Integer uploadType = imgUpload.getUploadType();
if (Objects.isNull(uploadType)) {
throw new ServiceException("请配置图片存储方式!");
}
return uploadType;
}
public String getUploadPath() {
String imagePath = imgUpload.getImagePath();
if (Objects.isNull(imagePath) || StringUtils.isBlank(imagePath)) {
throw new ServiceException("请配置图片存储路径");
}
return imagePath;
}
public String getResourceUrl() {
String resourceUrl = imgUpload.getResourceUrl();
if (Objects.isNull(resourceUrl) || StringUtils.isBlank(resourceUrl)) {
throw new ServiceException("请配置图片路径");
}
return resourceUrl;
}
public String upload(MultipartFile img, String fileName) {
String filePath = imgUpload.getImagePath();
File file = new File(filePath + fileName);
if (!file.exists()) {
boolean result = file.mkdirs();
if (!result) {
throw new ServiceException("创建目录:" + filePath + "失败");
}
}
try {
img.transferTo(file);
} catch (IOException e) {
throw new ServiceException("图片上传失败");
}
return fileName;
}
public void delete(String fileName) {
String filePath = imgUpload.getImagePath();
File file = new File(filePath + fileName);
file.deleteOnExit();
}
}

View File

@@ -1,346 +0,0 @@
package com.ruoyi.web.util;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import com.ruoyi.common.core.redis.RedisCache;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.yaml.snakeyaml.Yaml;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.SimpleProxyProvider;
/**
* WebMagic爬虫代理工具类
*
* @author ruoyi
*/
@Component
public class WebMagicProxyUtil {
private static final Logger logger = LoggerFactory.getLogger(WebMagicProxyUtil.class);
// 随机数生成器
private static final Random random = new Random();
// 默认代理配置
private static final String DEFAULT_PROXY_HOST = "127.0.0.1";
private static final String DEFAULT_PROXY_PORT = "7890";
// 存储所有代理节点信息
private static List<Map<String, Object>> proxyNodes = new ArrayList<>();
// 当前使用的代理信息
private static String currentProxyName = "";
private static String currentProxyHost = DEFAULT_PROXY_HOST;
private static int currentProxyPort = Integer.parseInt(DEFAULT_PROXY_PORT);
// Clash状态标志
private static boolean clashRunning = false;
// Redis缓存相关
@Autowired
private RedisCache redisCache;
private static RedisCache staticRedisCache;
private static final String PROXY_POOL_CACHE_KEY = "proxy:pool:cache";
private static final Integer CACHE_DURATION = 30 * 60; // 30分钟缓存时间
static {
loadProxyConfig();
}
/**
* 获取代理IP池
* @return 代理IP列表
*/
public static List<String> fetchProxyPool() {
List<String> proxyList = new ArrayList<>();
try {
List<String> cachedProxyPool = staticRedisCache.getCacheObject(PROXY_POOL_CACHE_KEY);
if (cachedProxyPool != null) {
return new ArrayList<>(cachedProxyPool);
}
URL url = new URL("https://www.proxy-list.download/api/v2/get?l=en&t=http");
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
connection.setConnectTimeout(10000);
connection.setReadTimeout(10000);
connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36");
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
line = line.trim();
if (!line.isEmpty() && line.contains(":")) {
proxyList.add(line);
}
}
}
staticRedisCache.setCacheObject(PROXY_POOL_CACHE_KEY, proxyList, CACHE_DURATION, TimeUnit.SECONDS);
logger.info("成功获取{}个代理IP", proxyList.size());
} catch (Exception e) {
logger.error("获取代理IP池失败: {}", e.getMessage());
}
return proxyList;
}
/**
* 加载代理配置文件
*/
@SuppressWarnings("unchecked")
private static void loadProxyConfig() {
try {
File configFile = new File("/www/java_mall/erp/config/test_proxy.yml");
if (configFile.exists()) {
Yaml yaml = new Yaml();
try (InputStream inputStream = new FileInputStream(configFile)) {
Map<String, Object> config = yaml.load(inputStream);
if (config != null && config.containsKey("proxies")) {
List<Map<String, Object>> proxies = (List<Map<String, Object>>) config.get("proxies");
proxyNodes = proxies.stream()
.filter(proxy -> proxy != null && proxy.containsKey("name") && proxy.containsKey("server") && proxy.containsKey("port"))
.collect(Collectors.toList());
logger.info("成功加载{}个代理节点配置", proxyNodes.size());
}
}
} else {
logger.warn("未找到代理配置文件,将使用默认代理设置");
}
} catch (Exception e) {
logger.error("加载代理配置失败", e);
}
}
/**
* 获取所有可用代理节点
*/
public static List<Map<String, String>> getAllProxies() {
if (proxyNodes.isEmpty()) {
loadProxyConfig();
}
if (proxyNodes.isEmpty()) {
List<Map<String, String>> defaultProxyList = new ArrayList<>();
Map<String, String> defaultProxy = new HashMap<>();
defaultProxy.put("name", "默认代理");
defaultProxy.put("server", DEFAULT_PROXY_HOST);
defaultProxy.put("port", DEFAULT_PROXY_PORT);
defaultProxy.put("type", "http");
defaultProxyList.add(defaultProxy);
return defaultProxyList;
}
return proxyNodes.stream()
.map(node -> {
Map<String, String> proxyInfo = new HashMap<>();
proxyInfo.put("name", (String) node.get("name"));
proxyInfo.put("server", (String) node.get("server"));
proxyInfo.put("port", String.valueOf(node.get("port")));
proxyInfo.put("type", (String) node.get("type"));
return proxyInfo;
})
.collect(Collectors.toList());
}
/**
* 设置当前使用的代理节点
*/
public static boolean setCurrentProxy(String proxyName) {
// 查找匹配的代理节点
Map<String, Object> targetProxy = proxyNodes.stream()
.filter(node -> proxyName.equals(node.get("name")))
.findFirst()
.orElse(null);
if (targetProxy != null) {
currentProxyName = (String) targetProxy.get("name");
currentProxyHost = (String) targetProxy.get("server");
currentProxyPort = ((Number) targetProxy.get("port")).intValue();
logger.info("已设置代理节点: {}, 地址: {}:{}", currentProxyName, currentProxyHost, currentProxyPort);
return true;
} else if ("默认代理".equals(proxyName)) {
// 使用默认代理
currentProxyName = "默认代理";
currentProxyHost = DEFAULT_PROXY_HOST;
currentProxyPort = Integer.parseInt(DEFAULT_PROXY_PORT);
logger.info("已设置默认代理: {}:{}", DEFAULT_PROXY_HOST, DEFAULT_PROXY_PORT);
return true;
} else {
logger.warn("未找到指定的代理节点: {}, 将使用默认代理", proxyName);
return false;
}
}
/**
* 获取配置了代理的下载器
*/
public static HttpClientDownloader getProxyDownloader() {
return getProxyDownloader(currentProxyHost, currentProxyPort);
}
/**
* 获取配置了指定代理的下载器
*/
public static HttpClientDownloader getProxyDownloader(String host, int port) {
HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
clearSystemProxy();
try {
// 设置系统代理
System.setProperty("http.proxyHost", host);
System.setProperty("http.proxyPort", String.valueOf(port));
System.setProperty("https.proxyHost", host);
System.setProperty("https.proxyPort", String.valueOf(port));
System.setProperty("http.nonProxyHosts", "localhost|127.0.0.1");
// 设置WebMagic代理
httpClientDownloader.setProxyProvider(SimpleProxyProvider.from(
new Proxy(host, port)
));
} catch (Exception e) {
logger.error("设置代理失败", e);
}
return httpClientDownloader;
}
/**
* 获取配置了默认代理的下载器
*/
public static HttpClientDownloader getDefaultProxyDownloader() {
return getProxyDownloader(DEFAULT_PROXY_HOST, Integer.parseInt(DEFAULT_PROXY_PORT));
}
/**
* 清除系统代理设置
*/
public static void clearSystemProxy() {
System.clearProperty("http.proxyHost");
System.clearProperty("http.proxyPort");
System.clearProperty("https.proxyHost");
System.clearProperty("https.proxyPort");
System.clearProperty("socksProxyHost");
System.clearProperty("socksProxyPort");
}
/**
* 获取随机休眠时间
*/
public static int getRandomSleepTime(int min, int max) {
return min + random.nextInt(max - min);
}
/**
* 启动代理服务
*
* @return 是否成功启动
*/
public static boolean startProxy() {
logger.info("正在启动代理服务...");
try {
if (isProxyRunning()) {
logger.info("代理服务已经在运行中,无需重复启动");
clashRunning = true;
return true;
}
ProcessBuilder pb = new ProcessBuilder("systemctl", "start", "clash");
pb.redirectErrorStream(true);
Process process = pb.start();
process.waitFor(5, java.util.concurrent.TimeUnit.SECONDS);
Thread.sleep(3000);
if (isProxyRunning()) {
logger.info("代理服务启动成功");
clashRunning = true;
return true;
} else {
logger.warn("代理服务启动失败或超时");
return false;
}
} catch (Exception e) {
logger.error("启动代理服务时发生异常", e);
return false;
}
}
/**
* 停止代理服务
*
* @return 是否成功停止
*/
public static boolean stopProxy() {
logger.info("正在停止代理服务...");
try {
if (!isProxyRunning()) {
logger.info("代理服务未在运行,无需停止");
clashRunning = false;
return true;
}
// 在Linux环境下使用systemctl
ProcessBuilder pb = new ProcessBuilder("systemctl", "stop", "clash");
pb.redirectErrorStream(true);
Process process = pb.start();
process.waitFor(5, java.util.concurrent.TimeUnit.SECONDS);
clearSystemProxy();
Thread.sleep(1000);
if (!isProxyRunning()) {
logger.info("代理服务停止成功");
clashRunning = false;
return true;
} else {
logger.warn("代理服务停止失败或超时");
return false;
}
} catch (Exception e) {
logger.error("停止代理服务时发生异常", e);
return false;
} finally {
clearSystemProxy();
}
}
/**
* 检查代理服务是否在运行
*
* @return 是否在运行
*/
public static boolean isProxyRunning() {
try {
java.net.Socket socket = new java.net.Socket();
socket.connect(new java.net.InetSocketAddress(DEFAULT_PROXY_HOST, Integer.parseInt(DEFAULT_PROXY_PORT)), 1000);
socket.close();
return true;
} catch (Exception e) {
return false;
}
}
}