feat: Add multi-threaded concurrent download support
- Add ThreadPoolExecutor for parallel attachment downloads - Add --max-workers parameter to control concurrency (default: 5) - Implement thread-safe logging with Lock mechanism - Refactor _do_download to use concurrent.futures - Add _download_single_file and _download_single_link helper functions - Update CLAUDE.md with multi-threading documentation Performance improvements: - File attachments (OData) now download in parallel - Link attachments (Scrapling) now download in parallel - Configurable worker threads for different network conditions
This commit is contained in:
15
CLAUDE.md
15
CLAUDE.md
@@ -19,7 +19,7 @@ This is a SAP C4C (Cloud for Customer) attachment downloader toolkit that retrie
|
||||
2. Fetches ServiceRequest attachments via OData endpoints:
|
||||
- `/sap/c4c/odata/v1/c4codata` - Standard C4C OData API
|
||||
- `/sap/c4c/odata/cust/v1/custticketapi` - Custom ticket API
|
||||
3. Downloads two types of attachments:
|
||||
3. Downloads two types of attachments using **multi-threaded concurrent downloads**:
|
||||
- **File attachments** (CategoryCode=2): Downloaded via OData `$value` endpoint
|
||||
- **Link attachments** (CategoryCode=3): External Salesforce links scraped using Scrapling + Playwright
|
||||
4. Handles XIssueItem-level attachments via `BO_XSRIssueItemAttachmentFolder`
|
||||
@@ -30,6 +30,11 @@ This is a SAP C4C (Cloud for Customer) attachment downloader toolkit that retrie
|
||||
- `scrapling[all]` - Web scraping framework with stealth capabilities
|
||||
- `playwright` - Browser automation for downloading Salesforce attachments
|
||||
|
||||
**Performance features:**
|
||||
- Multi-threaded concurrent downloads (default: 5 threads, configurable via `--max-workers`)
|
||||
- Thread-safe output logging with lock mechanism
|
||||
- Parallel processing of both file and link attachments
|
||||
|
||||
**Output modes:**
|
||||
- Human-readable console output (default)
|
||||
- JSON mode (`--json`) for programmatic consumption
|
||||
@@ -71,6 +76,14 @@ python sap-c4c-AttachmentFolder.py \
|
||||
--password xxx \
|
||||
--ticket 24588
|
||||
|
||||
# Download with custom thread count (default: 5)
|
||||
python sap-c4c-AttachmentFolder.py \
|
||||
--tenant https://xxx.c4c.saphybriscloud.cn \
|
||||
--user admin \
|
||||
--password xxx \
|
||||
--ticket 24588 \
|
||||
--max-workers 10
|
||||
|
||||
# Download with DSM upload
|
||||
python sap-c4c-AttachmentFolder.py \
|
||||
--tenant https://xxx.c4c.saphybriscloud.cn \
|
||||
|
||||
@@ -45,6 +45,8 @@ import requests
|
||||
import urllib3
|
||||
import xml.etree.ElementTree as ET
|
||||
from scrapling.fetchers import StealthyFetcher
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from threading import Lock
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
@@ -68,6 +70,10 @@ DSM_USER = ""
|
||||
DSM_PASSWORD = ""
|
||||
DSM_PATH = ""
|
||||
|
||||
# 多线程配置
|
||||
MAX_WORKERS = 5 # 默认并发数
|
||||
print_lock = Lock() # 用于线程安全的打印输出
|
||||
|
||||
|
||||
def get_session():
|
||||
s = requests.Session()
|
||||
@@ -516,33 +522,8 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False):
|
||||
return result
|
||||
|
||||
|
||||
def _do_download(session, attachments, label, odata_url, result, json_mode):
|
||||
"""执行下载并将结果追加到 result['downloadedFiles']"""
|
||||
file_atts = [a for a in attachments if a["CategoryCode"] == "2"]
|
||||
link_atts = [a for a in attachments if a["CategoryCode"] == "3"]
|
||||
|
||||
# 链接附件 -> Scrapling
|
||||
for a in link_atts:
|
||||
link_url = a.get("LinkWebURI")
|
||||
if not link_url:
|
||||
continue
|
||||
if not json_mode:
|
||||
print(f" {a['FileName']}: {link_url}")
|
||||
r = download_link_via_scrapling(link_url, a["FileName"])
|
||||
entry = {"source": label, "c4cName": a["FileName"], "type": "link", "linkUrl": link_url}
|
||||
if r["saved"]:
|
||||
entry["savedPath"] = os.path.abspath(r["saved"])
|
||||
entry["savedName"] = os.path.basename(r["saved"])
|
||||
if not json_mode:
|
||||
print(f" saved: {r['saved']}")
|
||||
else:
|
||||
entry["error"] = r["error"]
|
||||
if not json_mode:
|
||||
print(f" 下载失败: {r['error']}")
|
||||
result["downloadedFiles"].append(entry)
|
||||
|
||||
# 文件附件 -> OData
|
||||
for att in file_atts:
|
||||
def _download_single_file(session, att, label, odata_url, json_mode):
|
||||
"""下载单个文件附件(用于多线程)"""
|
||||
entry = {"source": label, "c4cName": att["FileName"], "type": "file", "mime": att.get("MimeType")}
|
||||
try:
|
||||
content = download_file_via_odata(session, att, odata_url)
|
||||
@@ -552,12 +533,77 @@ def _do_download(session, attachments, label, odata_url, result, json_mode):
|
||||
entry["savedPath"] = os.path.abspath(file_path)
|
||||
entry["savedName"] = att["FileName"]
|
||||
if not json_mode:
|
||||
print(f" saved: {file_path}")
|
||||
with print_lock:
|
||||
print(f" ✓ saved: {file_path}")
|
||||
except Exception as e:
|
||||
entry["error"] = str(e)
|
||||
if not json_mode:
|
||||
print(f" OData 下载失败 ({att['FileName']}): {e}")
|
||||
result["downloadedFiles"].append(entry)
|
||||
with print_lock:
|
||||
print(f" ✗ OData 下载失败 ({att['FileName']}): {e}")
|
||||
return entry
|
||||
|
||||
|
||||
def _download_single_link(link_att, label, json_mode):
|
||||
"""下载单个链接附件(用于多线程)"""
|
||||
link_url = link_att.get("LinkWebURI")
|
||||
entry = {"source": label, "c4cName": link_att["FileName"], "type": "link", "linkUrl": link_url}
|
||||
|
||||
if not link_url:
|
||||
entry["error"] = "无链接地址"
|
||||
return entry
|
||||
|
||||
if not json_mode:
|
||||
with print_lock:
|
||||
print(f" {link_att['FileName']}: {link_url}")
|
||||
|
||||
r = download_link_via_scrapling(link_url, link_att["FileName"])
|
||||
if r["saved"]:
|
||||
entry["savedPath"] = os.path.abspath(r["saved"])
|
||||
entry["savedName"] = os.path.basename(r["saved"])
|
||||
if not json_mode:
|
||||
with print_lock:
|
||||
print(f" ✓ saved: {r['saved']}")
|
||||
else:
|
||||
entry["error"] = r["error"]
|
||||
if not json_mode:
|
||||
with print_lock:
|
||||
print(f" ✗ 下载失败: {r['error']}")
|
||||
return entry
|
||||
|
||||
|
||||
def _do_download(session, attachments, label, odata_url, result, json_mode):
|
||||
"""执行下载并将结果追加到 result['downloadedFiles'](多线程版本)"""
|
||||
file_atts = [a for a in attachments if a["CategoryCode"] == "2"]
|
||||
link_atts = [a for a in attachments if a["CategoryCode"] == "3"]
|
||||
|
||||
downloaded_entries = []
|
||||
|
||||
# 使用线程池并发下载
|
||||
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
||||
futures = []
|
||||
|
||||
# 提交文件附件下载任务
|
||||
for att in file_atts:
|
||||
future = executor.submit(_download_single_file, session, att, label, odata_url, json_mode)
|
||||
futures.append(future)
|
||||
|
||||
# 提交链接附件下载任务
|
||||
for att in link_atts:
|
||||
future = executor.submit(_download_single_link, att, label, json_mode)
|
||||
futures.append(future)
|
||||
|
||||
# 收集结果
|
||||
for future in as_completed(futures):
|
||||
try:
|
||||
entry = future.result()
|
||||
downloaded_entries.append(entry)
|
||||
except Exception as e:
|
||||
if not json_mode:
|
||||
with print_lock:
|
||||
print(f" ✗ 下载任务异常: {e}")
|
||||
|
||||
# 将结果追加到总结果中
|
||||
result["downloadedFiles"].extend(downloaded_entries)
|
||||
|
||||
|
||||
def main():
|
||||
@@ -572,6 +618,7 @@ def main():
|
||||
parser.add_argument("--output-dir", default="downloads", help="附件保存目录 (默认: downloads)")
|
||||
parser.add_argument("--json", action="store_true", dest="json_mode", help="JSON 输出模式(供程序调用)")
|
||||
parser.add_argument("--list-only", action="store_true", help="仅列出附件清单,不下载")
|
||||
parser.add_argument("--max-workers", type=int, default=5, help="并发下载线程数 (默认: 5)")
|
||||
|
||||
# 群晖 DSM 上传参数
|
||||
parser.add_argument("--dsm-url", default=os.environ.get("DSM_URL", ""),
|
||||
@@ -589,13 +636,14 @@ def main():
|
||||
parser.error("必须提供 --tenant, --user, --password 参数,或设置 C4C_TENANT, C4C_USERNAME, C4C_PASSWORD 环境变量")
|
||||
|
||||
# 初始化全局配置
|
||||
global TENANT, USERNAME, PASSWORD, ODATA_C4C, ODATA_CUST, SOAP_URL
|
||||
global TENANT, USERNAME, PASSWORD, ODATA_C4C, ODATA_CUST, SOAP_URL, MAX_WORKERS
|
||||
TENANT = args.tenant.rstrip("/")
|
||||
USERNAME = args.user
|
||||
PASSWORD = args.password
|
||||
ODATA_C4C = f"{TENANT}/sap/c4c/odata/v1/c4codata"
|
||||
ODATA_CUST = f"{TENANT}/sap/c4c/odata/cust/v1/custticketapi"
|
||||
SOAP_URL = f"{TENANT}/sap/bc/srt/scs/sap/manageattachmentfolderin"
|
||||
MAX_WORKERS = args.max_workers
|
||||
|
||||
# 初始化 DSM 配置
|
||||
global DSM_URL, DSM_USER, DSM_PASSWORD, DSM_PATH
|
||||
@@ -604,6 +652,9 @@ def main():
|
||||
DSM_PASSWORD = args.dsm_password
|
||||
DSM_PATH = args.dsm_path
|
||||
|
||||
if not args.json_mode and not args.list_only:
|
||||
print(f"并发下载线程数: {MAX_WORKERS}")
|
||||
|
||||
result = run(args.ticket, args.output_dir, args.list_only, args.json_mode)
|
||||
|
||||
# 下载完成后上传到群晖 DSM
|
||||
|
||||
Reference in New Issue
Block a user