feat: Add multi-threaded concurrent download support

- Add ThreadPoolExecutor for parallel attachment downloads
- Add --max-workers parameter to control concurrency (default: 5)
- Implement thread-safe logging with Lock mechanism
- Refactor _do_download to use concurrent.futures
- Add _download_single_file and _download_single_link helper functions
- Update CLAUDE.md with multi-threading documentation

Performance improvements:
- File attachments (OData) now download in parallel
- Link attachments (Scrapling) now download in parallel
- Configurable worker threads for different network conditions
This commit is contained in:
afei A
2026-03-12 13:01:13 +08:00
parent 929d3c2ec9
commit 84273a765e
2 changed files with 103 additions and 39 deletions

View File

@@ -19,7 +19,7 @@ This is a SAP C4C (Cloud for Customer) attachment downloader toolkit that retrie
2. Fetches ServiceRequest attachments via OData endpoints:
- `/sap/c4c/odata/v1/c4codata` - Standard C4C OData API
- `/sap/c4c/odata/cust/v1/custticketapi` - Custom ticket API
3. Downloads two types of attachments:
3. Downloads two types of attachments using **multi-threaded concurrent downloads**:
- **File attachments** (CategoryCode=2): Downloaded via OData `$value` endpoint
- **Link attachments** (CategoryCode=3): External Salesforce links scraped using Scrapling + Playwright
4. Handles XIssueItem-level attachments via `BO_XSRIssueItemAttachmentFolder`
@@ -30,6 +30,11 @@ This is a SAP C4C (Cloud for Customer) attachment downloader toolkit that retrie
- `scrapling[all]` - Web scraping framework with stealth capabilities
- `playwright` - Browser automation for downloading Salesforce attachments
**Performance features:**
- Multi-threaded concurrent downloads (default: 5 threads, configurable via `--max-workers`)
- Thread-safe output logging with lock mechanism
- Parallel processing of both file and link attachments
**Output modes:**
- Human-readable console output (default)
- JSON mode (`--json`) for programmatic consumption
@@ -71,6 +76,14 @@ python sap-c4c-AttachmentFolder.py \
--password xxx \
--ticket 24588
# Download with custom thread count (default: 5)
python sap-c4c-AttachmentFolder.py \
--tenant https://xxx.c4c.saphybriscloud.cn \
--user admin \
--password xxx \
--ticket 24588 \
--max-workers 10
# Download with DSM upload
python sap-c4c-AttachmentFolder.py \
--tenant https://xxx.c4c.saphybriscloud.cn \

View File

@@ -45,6 +45,8 @@ import requests
import urllib3
import xml.etree.ElementTree as ET
from scrapling.fetchers import StealthyFetcher
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -68,6 +70,10 @@ DSM_USER = ""
DSM_PASSWORD = ""
DSM_PATH = ""
# 多线程配置
MAX_WORKERS = 5 # 默认并发数
print_lock = Lock() # 用于线程安全的打印输出
def get_session():
s = requests.Session()
@@ -516,48 +522,88 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False):
return result
def _download_single_file(session, att, label, odata_url, json_mode):
"""下载单个文件附件(用于多线程)"""
entry = {"source": label, "c4cName": att["FileName"], "type": "file", "mime": att.get("MimeType")}
try:
content = download_file_via_odata(session, att, odata_url)
file_path = os.path.join(OUTPUT_DIR, att["FileName"])
with open(file_path, "wb") as f:
f.write(content)
entry["savedPath"] = os.path.abspath(file_path)
entry["savedName"] = att["FileName"]
if not json_mode:
with print_lock:
print(f" ✓ saved: {file_path}")
except Exception as e:
entry["error"] = str(e)
if not json_mode:
with print_lock:
print(f" ✗ OData 下载失败 ({att['FileName']}): {e}")
return entry
def _download_single_link(link_att, label, json_mode):
"""下载单个链接附件(用于多线程)"""
link_url = link_att.get("LinkWebURI")
entry = {"source": label, "c4cName": link_att["FileName"], "type": "link", "linkUrl": link_url}
if not link_url:
entry["error"] = "无链接地址"
return entry
if not json_mode:
with print_lock:
print(f" {link_att['FileName']}: {link_url}")
r = download_link_via_scrapling(link_url, link_att["FileName"])
if r["saved"]:
entry["savedPath"] = os.path.abspath(r["saved"])
entry["savedName"] = os.path.basename(r["saved"])
if not json_mode:
with print_lock:
print(f" ✓ saved: {r['saved']}")
else:
entry["error"] = r["error"]
if not json_mode:
with print_lock:
print(f" ✗ 下载失败: {r['error']}")
return entry
def _do_download(session, attachments, label, odata_url, result, json_mode):
"""执行下载并将结果追加到 result['downloadedFiles']"""
"""执行下载并将结果追加到 result['downloadedFiles'](多线程版本)"""
file_atts = [a for a in attachments if a["CategoryCode"] == "2"]
link_atts = [a for a in attachments if a["CategoryCode"] == "3"]
# 链接附件 -> Scrapling
for a in link_atts:
link_url = a.get("LinkWebURI")
if not link_url:
continue
if not json_mode:
print(f" {a['FileName']}: {link_url}")
r = download_link_via_scrapling(link_url, a["FileName"])
entry = {"source": label, "c4cName": a["FileName"], "type": "link", "linkUrl": link_url}
if r["saved"]:
entry["savedPath"] = os.path.abspath(r["saved"])
entry["savedName"] = os.path.basename(r["saved"])
if not json_mode:
print(f" saved: {r['saved']}")
else:
entry["error"] = r["error"]
if not json_mode:
print(f" 下载失败: {r['error']}")
result["downloadedFiles"].append(entry)
downloaded_entries = []
# 文件附件 -> OData
for att in file_atts:
entry = {"source": label, "c4cName": att["FileName"], "type": "file", "mime": att.get("MimeType")}
try:
content = download_file_via_odata(session, att, odata_url)
file_path = os.path.join(OUTPUT_DIR, att["FileName"])
with open(file_path, "wb") as f:
f.write(content)
entry["savedPath"] = os.path.abspath(file_path)
entry["savedName"] = att["FileName"]
if not json_mode:
print(f" saved: {file_path}")
except Exception as e:
entry["error"] = str(e)
if not json_mode:
print(f" OData 下载失败 ({att['FileName']}): {e}")
result["downloadedFiles"].append(entry)
# 使用线程池并发下载
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
futures = []
# 提交文件附件下载任务
for att in file_atts:
future = executor.submit(_download_single_file, session, att, label, odata_url, json_mode)
futures.append(future)
# 提交链接附件下载任务
for att in link_atts:
future = executor.submit(_download_single_link, att, label, json_mode)
futures.append(future)
# 收集结果
for future in as_completed(futures):
try:
entry = future.result()
downloaded_entries.append(entry)
except Exception as e:
if not json_mode:
with print_lock:
print(f" ✗ 下载任务异常: {e}")
# 将结果追加到总结果中
result["downloadedFiles"].extend(downloaded_entries)
def main():
@@ -572,6 +618,7 @@ def main():
parser.add_argument("--output-dir", default="downloads", help="附件保存目录 (默认: downloads)")
parser.add_argument("--json", action="store_true", dest="json_mode", help="JSON 输出模式(供程序调用)")
parser.add_argument("--list-only", action="store_true", help="仅列出附件清单,不下载")
parser.add_argument("--max-workers", type=int, default=5, help="并发下载线程数 (默认: 5)")
# 群晖 DSM 上传参数
parser.add_argument("--dsm-url", default=os.environ.get("DSM_URL", ""),
@@ -589,13 +636,14 @@ def main():
parser.error("必须提供 --tenant, --user, --password 参数,或设置 C4C_TENANT, C4C_USERNAME, C4C_PASSWORD 环境变量")
# 初始化全局配置
global TENANT, USERNAME, PASSWORD, ODATA_C4C, ODATA_CUST, SOAP_URL
global TENANT, USERNAME, PASSWORD, ODATA_C4C, ODATA_CUST, SOAP_URL, MAX_WORKERS
TENANT = args.tenant.rstrip("/")
USERNAME = args.user
PASSWORD = args.password
ODATA_C4C = f"{TENANT}/sap/c4c/odata/v1/c4codata"
ODATA_CUST = f"{TENANT}/sap/c4c/odata/cust/v1/custticketapi"
SOAP_URL = f"{TENANT}/sap/bc/srt/scs/sap/manageattachmentfolderin"
MAX_WORKERS = args.max_workers
# 初始化 DSM 配置
global DSM_URL, DSM_USER, DSM_PASSWORD, DSM_PATH
@@ -604,6 +652,9 @@ def main():
DSM_PASSWORD = args.dsm_password
DSM_PATH = args.dsm_path
if not args.json_mode and not args.list_only:
print(f"并发下载线程数: {MAX_WORKERS}")
result = run(args.ticket, args.output_dir, args.list_only, args.json_mode)
# 下载完成后上传到群晖 DSM