This commit is contained in:
2025-09-25 16:04:00 +08:00
parent 05b923b1ac
commit bb997857fd
14 changed files with 185 additions and 182 deletions

View File

@@ -1,4 +1,5 @@
package com.tashow.erp.controller;
import com.tashow.erp.entity.AmazonProductEntity;
import com.tashow.erp.repository.AmazonProductRepository;
import com.tashow.erp.service.IAmazonScrapingService;
import com.tashow.erp.utils.ExcelParseUtil;
@@ -11,7 +12,6 @@ import org.springframework.web.multipart.MultipartFile;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@RestController
@RequestMapping("/api/amazon")
public class AmazonController {
@@ -29,7 +29,11 @@ public class AmazonController {
Map<String, Object> requestMap = (Map<String, Object>) request;
List<String> asinList = (List<String>) requestMap.get("asinList");
String batchId = (String) requestMap.get("batchId");
return JsonData.buildSuccess(amazonScrapingService.batchGetProductInfo(asinList, batchId));
List<AmazonProductEntity> products = amazonScrapingService.batchGetProductInfo(asinList, batchId);
Map<String, Object> result = new HashMap<>();
result.put("products", products);
result.put("total", products.size());
return JsonData.buildSuccess(result);
}
/**
@@ -37,25 +41,7 @@ public class AmazonController {
*/
@GetMapping("/products/latest")
public JsonData getLatestProducts() {
List<Map<String, Object>> products = amazonProductRepository.findLatestProducts()
.parallelStream()
.map(entity -> {
Map<String, Object> map = new HashMap<>();
map.put("asin", entity.getAsin());
map.put("title", entity.getTitle());
map.put("price", entity.getPrice());
map.put("imageUrl", entity.getImageUrl());
map.put("productUrl", entity.getProductUrl());
map.put("brand", entity.getBrand());
map.put("category", entity.getCategory());
map.put("rating", entity.getRating());
map.put("reviewCount", entity.getReviewCount());
map.put("availability", entity.getAvailability());
map.put("seller", entity.getSeller());
map.put("shipper", entity.getSeller());
return map;
})
.collect(Collectors.toList());
List<AmazonProductEntity> products = amazonProductRepository.findLatestProducts();
Map<String, Object> result = new HashMap<>();
result.put("products", products);
result.put("total", products.size());

View File

@@ -25,9 +25,6 @@ public class AuthController {
public ResponseEntity<?> login(@RequestBody Map<String, Object> loginData) {
String username = (String) loginData.get("username");
String password = (String) loginData.get("password");
if (username == null || password == null) {
return ResponseEntity.ok(Map.of("code", 400, "message", "用户名和密码不能为空"));
}
Map<String, Object> result = authService.login(username, password);
Object success = result.get("success");
Object tokenObj = result.get("token");

View File

@@ -23,10 +23,9 @@ public class BanmaOrderController {
BanmaOrderRepository banmaOrderRepository;
@Autowired
JavaBridge javaBridge;
@Autowired
RestTemplate restTemplate;
@GetMapping("/orders")
public ResponseEntity<Map<String, Object>> getOrders(
@RequestParam(required = false, name = "accountId") Long accountId,
@RequestParam(required = false, name = "startDate") String startDate,
@RequestParam(required = false, name = "endDate") String endDate,
@RequestParam(defaultValue = "1", name = "page") int page,
@@ -34,16 +33,16 @@ public class BanmaOrderController {
@RequestParam(required = false, name = "batchId") String batchId,
@RequestParam(required = false, name = "shopIds") String shopIds) {
List<String> shopIdList = shopIds != null ? java.util.Arrays.asList(shopIds.split(",")) : null;
Map<String, Object> result = banmaOrderService.getOrdersByPage(startDate, endDate, page, pageSize, batchId, shopIdList);
Map<String, Object> result = banmaOrderService.getOrdersByPage(accountId, startDate, endDate, page, pageSize, batchId, shopIdList);
return ResponseEntity.ok(result);
}
/**
* 获取店铺列表
*/
@GetMapping("/shops")
public JsonData getShops() {
public JsonData getShops(@RequestParam(required = false, name = "accountId") Long accountId) {
try {
Map<String, Object> response = banmaOrderService.getShops();
Map<String, Object> response = banmaOrderService.getShops(accountId);
return JsonData.buildSuccess(response);
} catch (Exception e) {
logger.error("获取店铺列表失败: {}", e.getMessage(), e);
@@ -51,19 +50,6 @@ public class BanmaOrderController {
}
}
/**
* 刷新斑马认证Token
*/
@PostMapping("/refresh-token")
public JsonData refreshToken(){
try {
banmaOrderService.refreshToken();
return JsonData.buildSuccess("Token刷新成功");
} catch (Exception e) {
logger.error("刷新Token失败: {}", e.getMessage(), e);
return JsonData.buildError("Token刷新失败: " + e.getMessage());
}
}
/**
* 获取最新订单数据
*/

View File

@@ -21,33 +21,9 @@ public class AmazonProductEntity {
@Column(unique = true, nullable = false)
private String asin;
@Column(name = "title", length = 1000)
private String title;
@Column(name = "price")
private String price;
@Column(name = "image_url", length = 1000)
private String imageUrl;
@Column(name = "product_url", length = 1000)
private String productUrl;
@Column(name = "brand")
private String brand;
@Column(name = "category")
private String category;
@Column(name = "rating")
private String rating;
@Column(name = "review_count")
private String reviewCount;
@Column(name = "availability")
private String availability;
@Column(name = "seller")
private String seller;

View File

@@ -152,6 +152,12 @@ public class Alibaba1688ServiceImpl implements Alibaba1688Service {
}
System.out.println("url"+uploadedUrl);
System.out.println("skuPrices:"+skuPrices);
// 检查并上报空数据
if (skuPrices.isEmpty()) errorReporter.reportDataEmpty("alibaba1688", uploadedUrl, skuPrices);
if (median == null || median == 0.0) errorReporter.reportDataEmpty("alibaba1688", uploadedUrl, median);
if (freightFee.isEmpty()) errorReporter.reportDataEmpty("alibaba1688", uploadedUrl, freightFee);
result.setSkuPrice(skuPrices);
result.setMedian( median);
result.setMapRecognitionLink( uploadImageBase64(imageUrl));

View File

@@ -1,8 +1,10 @@
package com.tashow.erp.service.impl;
import com.tashow.erp.entity.AmazonProductEntity;
import com.tashow.erp.repository.AmazonProductRepository;
import com.tashow.erp.service.IAmazonScrapingService;
import com.tashow.erp.utils.DataReportUtil;
import com.tashow.erp.utils.ErrorReporter;
import com.tashow.erp.utils.RakutenProxyUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -13,6 +15,7 @@ import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;
import java.time.LocalDateTime;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
@@ -29,50 +32,59 @@ public class AmazonScrapingServiceImpl implements IAmazonScrapingService, PagePr
private AmazonProductRepository amazonProductRepository;
@Autowired
private DataReportUtil dataReportUtil;
@Autowired
private ErrorReporter errorReporter;
private final Random random = new Random();
private static volatile Spider activeSpider = null;
private static final Object spiderLock = new Object();
private final Map<String, Map<String, Object>> resultCache = new ConcurrentHashMap<>();
private final Site site = Site.me().setRetryTimes(3).setSleepTime(2000 + random.nextInt(2000))
.setTimeOut(15000).setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/128.0.0.0 Safari/537.36").addHeader("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9").addHeader("accept-language", "ja,en;q=0.9,zh-CN;q=0.8,zh;q=0.7").addHeader("cache-control", "max-age=0").addHeader("upgrade-insecure-requests", "1").addHeader("sec-ch-ua", "\"Chromium\";v=\"128\", \"Not=A?Brand\";v=\"24\"").addHeader("sec-ch-ua-mobile", "?0").addHeader("sec-ch-ua-platform", "\"Windows\"").addHeader("sec-fetch-site", "none").addHeader("sec-fetch-mode", "navigate").addHeader("sec-fetch-user", "?1").addHeader("sec-fetch-dest", "document").addCookie("i18n-prefs", "JPY").addCookie("session-id", "358-1261309-0483141").addCookie("session-id-time", "2082787201l").addCookie("i18n-prefs", "JPY").addCookie("lc-acbjp", "zh_CN").addCookie("ubid-acbjp", "357-8224002-9668932");
private final Map<String, AmazonProductEntity> resultCache = new ConcurrentHashMap<>();
private final Site site = Site.me().setRetryTimes(3).setSleepTime(2000 + random.nextInt(2000)).setTimeOut(15000).setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/128.0.0.0 Safari/537.36").addHeader("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9").addHeader("accept-language", "ja,en;q=0.9,zh-CN;q=0.8,zh;q=0.7").addHeader("cache-control", "max-age=0").addHeader("upgrade-insecure-requests", "1").addHeader("sec-ch-ua", "\"Chromium\";v=\"128\", \"Not=A?Brand\";v=\"24\"").addHeader("sec-ch-ua-mobile", "?0").addHeader("sec-ch-ua-platform", "\"Windows\"").addHeader("sec-fetch-site", "none").addHeader("sec-fetch-mode", "navigate").addHeader("sec-fetch-user", "?1").addHeader("sec-fetch-dest", "document").addCookie("i18n-prefs", "JPY").addCookie("session-id", "358-1261309-0483141").addCookie("session-id-time", "2082787201l").addCookie("i18n-prefs", "JPY").addCookie("lc-acbjp", "zh_CN").addCookie("ubid-acbjp", "357-8224002-9668932");
/**
* 处理亚马逊页面数据提取
*/
@Override
public void process(Page page) {
Html html = page.getHtml();
Map<String, Object> resultMap = new HashMap<>();
String url = page.getUrl().toString();
// 提取ASIN
String asin = html.xpath("//input[@id='ASIN']/@value").toString();
if (isEmpty(asin)) {
String[] parts = url.split("/dp/");
if (parts.length > 1) asin = parts[1].split("/")[0].split("\\?")[0];
}
// 提取价格
String priceSymbol = html.xpath("//span[@class='a-price-symbol']/text()").toString();
String priceWhole = html.xpath("//span[@class='a-price-whole']/text()").toString();
String price = priceSymbol + priceWhole;
if (price.isEmpty()) {
if (isEmpty(price)) {
price = html.xpath("//span[@class='a-price-range']/text()").toString();
}
// 提取卖家
String seller = html.xpath("//a[@id='sellerProfileTriggerId']/text()").toString();
if (seller == null || seller.isEmpty()) {
if (isEmpty(seller)) {
seller = html.xpath("//span[@class='a-size-small offer-display-feature-text-message']/text()").toString();
}
resultMap.put("seller", seller);
if (price != null || seller != null) {
resultMap.put("price", price);
} else {
// 关键数据为空时重试
if (isEmpty(price) && isEmpty(seller)) {
throw new RuntimeException("Retry this page");
}
String asin = html.xpath("//input[@id='ASIN']/@value").toString();
if (asin == null || asin.isEmpty()) {
String[] parts = page.getUrl().toString().split("/dp/");
if (parts.length > 1) asin = parts[1].split("/")[0].split("\\?")[0];
}
String title = html.xpath("//span[@id='productTitle']/text()").toString();
if (title == null || title.isEmpty())
title = html.xpath("//h1[@class='a-size-large a-spacing-none']/text()").toString();
resultMap.put("asin", asin != null ? asin : "");
resultMap.put("title", (title == null || title.isEmpty()) ? "未获取" : title.trim());
// 检查并上报空数据
if (isEmpty(price)) errorReporter.reportDataEmpty("amazon", asin, price);
if (isEmpty(seller)) errorReporter.reportDataEmpty("amazon", asin, seller);
resultCache.put(asin, resultMap);
page.putField("resultMap", resultMap);
AmazonProductEntity entity = new AmazonProductEntity();
entity.setAsin(asin != null ? asin : "");
entity.setPrice(price);
entity.setSeller(seller);
resultCache.put(asin, entity);
page.putField("entity", entity);
}
/**
@@ -87,77 +99,45 @@ public class AmazonScrapingServiceImpl implements IAmazonScrapingService, PagePr
* 批量获取产品信息
*/
@Override
public Map<String, Object> batchGetProductInfo(List<String> asinList, String batchId) {
public List<AmazonProductEntity> batchGetProductInfo(List<String> asinList, String batchId) {
String sessionId = (batchId != null) ? batchId : "SINGLE_" + UUID.randomUUID();
List<Map<String, Object>> products = new ArrayList<>();
List<AmazonProductEntity> products = new ArrayList<>();
for (String asin : asinList) {
if (asin == null || asin.trim().isEmpty()) continue;
String cleanAsin = asin.replaceAll("[^a-zA-Z0-9]", "");
Map<String, Object> result = new HashMap<>();
amazonProductRepository.findByAsin(cleanAsin).ifPresentOrElse(entity -> {
if (entity.getCreatedAt().isAfter(LocalDateTime.now().minusHours(1))) {
result.put("asin", entity.getAsin());
result.put("title", entity.getTitle());
result.put("price", entity.getPrice());
result.put("seller", entity.getSeller());
result.put("imageUrl", entity.getImageUrl());
result.put("productUrl", entity.getProductUrl());
result.put("brand", entity.getBrand());
result.put("category", entity.getCategory());
result.put("rating", entity.getRating());
result.put("reviewCount", entity.getReviewCount());
result.put("availability", entity.getAvailability());
products.add(result);
}
}, () -> {
// 数据库没有或过期 -> 爬取
AmazonProductEntity product = amazonProductRepository.findByAsin(cleanAsin).filter(entity -> entity.getCreatedAt().isAfter(LocalDateTime.now().minusHours(1)) && !isEmpty(entity.getPrice()) && !isEmpty(entity.getSeller())).orElseGet(() -> {
// 采集新数据
String url = "https://www.amazon.co.jp/dp/" + cleanAsin;
RakutenProxyUtil proxyUtil = new RakutenProxyUtil();
synchronized (spiderLock) {
activeSpider = Spider.create(this)
.addUrl(url)
.setDownloader(proxyUtil.createProxyDownloader(proxyUtil.detectSystemProxy(url)))
.thread(1);
activeSpider = Spider.create(this).addUrl(url).setDownloader(proxyUtil.createProxyDownloader(proxyUtil.detectSystemProxy(url))).thread(1);
activeSpider.run();
activeSpider = null;
}
result.putAll(resultCache.getOrDefault(cleanAsin, Map.of("asin", cleanAsin, "price", "", "seller", "", "title", "")));
// 存库
AmazonProductEntity entity = new AmazonProductEntity();
AmazonProductEntity entity = resultCache.getOrDefault(cleanAsin, new AmazonProductEntity());
entity.setAsin(cleanAsin);
entity.setTitle((String) result.get("title"));
entity.setPrice((String) result.get("price"));
entity.setSeller((String) result.get("seller"));
entity.setImageUrl((String) result.get("imageUrl"));
entity.setProductUrl((String) result.get("productUrl"));
entity.setBrand((String) result.get("brand"));
entity.setCategory((String) result.get("category"));
entity.setRating((String) result.get("rating"));
entity.setReviewCount((String) result.get("reviewCount"));
entity.setAvailability((String) result.get("availability"));
entity.setSessionId(sessionId);
entity.setCreatedAt(LocalDateTime.now());
try {
amazonProductRepository.save(entity);
dataReportUtil.reportDataCollection("AMAZON", 1, "0");
} catch (Exception e) {
logger.warn("保存商品数据失败: {}", cleanAsin);
}
products.add(result);
return entity;
});
products.add(product);
}
long failedCount = products.stream().filter(p -> p.get("price").toString().isEmpty()).count();
return Map.of(
"products", products,
"total", products.size(),
"success", true,
"failedCount", failedCount
);
return products;
}
private boolean isEmpty(String str) {
return str == null || str.trim().isEmpty();
}
}