diff --git a/CLAUDE.md b/CLAUDE.md index 3a9fa2d..bd0d427 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -19,7 +19,7 @@ This is a SAP C4C (Cloud for Customer) attachment downloader toolkit that retrie 2. Fetches ServiceRequest attachments via OData endpoints: - `/sap/c4c/odata/v1/c4codata` - Standard C4C OData API - `/sap/c4c/odata/cust/v1/custticketapi` - Custom ticket API -3. Downloads two types of attachments: +3. Downloads two types of attachments using **multi-threaded concurrent downloads**: - **File attachments** (CategoryCode=2): Downloaded via OData `$value` endpoint - **Link attachments** (CategoryCode=3): External Salesforce links scraped using Scrapling + Playwright 4. Handles XIssueItem-level attachments via `BO_XSRIssueItemAttachmentFolder` @@ -30,6 +30,11 @@ This is a SAP C4C (Cloud for Customer) attachment downloader toolkit that retrie - `scrapling[all]` - Web scraping framework with stealth capabilities - `playwright` - Browser automation for downloading Salesforce attachments +**Performance features:** +- Multi-threaded concurrent downloads (default: 5 threads, configurable via `--max-workers`) +- Thread-safe output logging with lock mechanism +- Parallel processing of both file and link attachments + **Output modes:** - Human-readable console output (default) - JSON mode (`--json`) for programmatic consumption @@ -71,6 +76,14 @@ python sap-c4c-AttachmentFolder.py \ --password xxx \ --ticket 24588 +# Download with custom thread count (default: 5) +python sap-c4c-AttachmentFolder.py \ + --tenant https://xxx.c4c.saphybriscloud.cn \ + --user admin \ + --password xxx \ + --ticket 24588 \ + --max-workers 10 + # Download with DSM upload python sap-c4c-AttachmentFolder.py \ --tenant https://xxx.c4c.saphybriscloud.cn \ diff --git a/sap-c4c-AttachmentFolder.py b/sap-c4c-AttachmentFolder.py index 4273b52..bb7af03 100644 --- a/sap-c4c-AttachmentFolder.py +++ b/sap-c4c-AttachmentFolder.py @@ -45,6 +45,8 @@ import requests import urllib3 import xml.etree.ElementTree as ET from scrapling.fetchers import StealthyFetcher +from concurrent.futures import ThreadPoolExecutor, as_completed +from threading import Lock urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) @@ -68,6 +70,10 @@ DSM_USER = "" DSM_PASSWORD = "" DSM_PATH = "" +# 多线程配置 +MAX_WORKERS = 5 # 默认并发数 +print_lock = Lock() # 用于线程安全的打印输出 + def get_session(): s = requests.Session() @@ -516,48 +522,88 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False): return result +def _download_single_file(session, att, label, odata_url, json_mode): + """下载单个文件附件(用于多线程)""" + entry = {"source": label, "c4cName": att["FileName"], "type": "file", "mime": att.get("MimeType")} + try: + content = download_file_via_odata(session, att, odata_url) + file_path = os.path.join(OUTPUT_DIR, att["FileName"]) + with open(file_path, "wb") as f: + f.write(content) + entry["savedPath"] = os.path.abspath(file_path) + entry["savedName"] = att["FileName"] + if not json_mode: + with print_lock: + print(f" ✓ saved: {file_path}") + except Exception as e: + entry["error"] = str(e) + if not json_mode: + with print_lock: + print(f" ✗ OData 下载失败 ({att['FileName']}): {e}") + return entry + + +def _download_single_link(link_att, label, json_mode): + """下载单个链接附件(用于多线程)""" + link_url = link_att.get("LinkWebURI") + entry = {"source": label, "c4cName": link_att["FileName"], "type": "link", "linkUrl": link_url} + + if not link_url: + entry["error"] = "无链接地址" + return entry + + if not json_mode: + with print_lock: + print(f" {link_att['FileName']}: {link_url}") + + r = download_link_via_scrapling(link_url, link_att["FileName"]) + if r["saved"]: + entry["savedPath"] = os.path.abspath(r["saved"]) + entry["savedName"] = os.path.basename(r["saved"]) + if not json_mode: + with print_lock: + print(f" ✓ saved: {r['saved']}") + else: + entry["error"] = r["error"] + if not json_mode: + with print_lock: + print(f" ✗ 下载失败: {r['error']}") + return entry + + def _do_download(session, attachments, label, odata_url, result, json_mode): - """执行下载并将结果追加到 result['downloadedFiles']""" + """执行下载并将结果追加到 result['downloadedFiles'](多线程版本)""" file_atts = [a for a in attachments if a["CategoryCode"] == "2"] link_atts = [a for a in attachments if a["CategoryCode"] == "3"] - # 链接附件 -> Scrapling - for a in link_atts: - link_url = a.get("LinkWebURI") - if not link_url: - continue - if not json_mode: - print(f" {a['FileName']}: {link_url}") - r = download_link_via_scrapling(link_url, a["FileName"]) - entry = {"source": label, "c4cName": a["FileName"], "type": "link", "linkUrl": link_url} - if r["saved"]: - entry["savedPath"] = os.path.abspath(r["saved"]) - entry["savedName"] = os.path.basename(r["saved"]) - if not json_mode: - print(f" saved: {r['saved']}") - else: - entry["error"] = r["error"] - if not json_mode: - print(f" 下载失败: {r['error']}") - result["downloadedFiles"].append(entry) + downloaded_entries = [] - # 文件附件 -> OData - for att in file_atts: - entry = {"source": label, "c4cName": att["FileName"], "type": "file", "mime": att.get("MimeType")} - try: - content = download_file_via_odata(session, att, odata_url) - file_path = os.path.join(OUTPUT_DIR, att["FileName"]) - with open(file_path, "wb") as f: - f.write(content) - entry["savedPath"] = os.path.abspath(file_path) - entry["savedName"] = att["FileName"] - if not json_mode: - print(f" saved: {file_path}") - except Exception as e: - entry["error"] = str(e) - if not json_mode: - print(f" OData 下载失败 ({att['FileName']}): {e}") - result["downloadedFiles"].append(entry) + # 使用线程池并发下载 + with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: + futures = [] + + # 提交文件附件下载任务 + for att in file_atts: + future = executor.submit(_download_single_file, session, att, label, odata_url, json_mode) + futures.append(future) + + # 提交链接附件下载任务 + for att in link_atts: + future = executor.submit(_download_single_link, att, label, json_mode) + futures.append(future) + + # 收集结果 + for future in as_completed(futures): + try: + entry = future.result() + downloaded_entries.append(entry) + except Exception as e: + if not json_mode: + with print_lock: + print(f" ✗ 下载任务异常: {e}") + + # 将结果追加到总结果中 + result["downloadedFiles"].extend(downloaded_entries) def main(): @@ -572,6 +618,7 @@ def main(): parser.add_argument("--output-dir", default="downloads", help="附件保存目录 (默认: downloads)") parser.add_argument("--json", action="store_true", dest="json_mode", help="JSON 输出模式(供程序调用)") parser.add_argument("--list-only", action="store_true", help="仅列出附件清单,不下载") + parser.add_argument("--max-workers", type=int, default=5, help="并发下载线程数 (默认: 5)") # 群晖 DSM 上传参数 parser.add_argument("--dsm-url", default=os.environ.get("DSM_URL", ""), @@ -589,13 +636,14 @@ def main(): parser.error("必须提供 --tenant, --user, --password 参数,或设置 C4C_TENANT, C4C_USERNAME, C4C_PASSWORD 环境变量") # 初始化全局配置 - global TENANT, USERNAME, PASSWORD, ODATA_C4C, ODATA_CUST, SOAP_URL + global TENANT, USERNAME, PASSWORD, ODATA_C4C, ODATA_CUST, SOAP_URL, MAX_WORKERS TENANT = args.tenant.rstrip("/") USERNAME = args.user PASSWORD = args.password ODATA_C4C = f"{TENANT}/sap/c4c/odata/v1/c4codata" ODATA_CUST = f"{TENANT}/sap/c4c/odata/cust/v1/custticketapi" SOAP_URL = f"{TENANT}/sap/bc/srt/scs/sap/manageattachmentfolderin" + MAX_WORKERS = args.max_workers # 初始化 DSM 配置 global DSM_URL, DSM_USER, DSM_PASSWORD, DSM_PATH @@ -604,6 +652,9 @@ def main(): DSM_PASSWORD = args.dsm_password DSM_PATH = args.dsm_path + if not args.json_mode and not args.list_only: + print(f"并发下载线程数: {MAX_WORKERS}") + result = run(args.ticket, args.output_dir, args.list_only, args.json_mode) # 下载完成后上传到群晖 DSM