diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 890754d..50bae2d 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -2,7 +2,8 @@ "permissions": { "allow": [ "Bash(git init:*)", - "Bash(git add:*)" + "Bash(git add:*)", + "Bash(python -c \"import ast, sys; ast.parse\\(open\\('sap-c4c-AttachmentFolder.py'\\).read\\(\\)\\); print\\('语法检查通过'\\)\")" ] } } diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 9678e51..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,88 +0,0 @@ -# CLAUDE.md - -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. - -## Project Overview - -SAP C4C (Cloud for Customer) attachment downloader toolkit that retrieves attachments from ServiceRequest tickets and optionally uploads them to Synology DSM NAS. - -- **`sap-c4c-AttachmentFolder.py`**: Core downloader (Python >= 3.8) using OData APIs and web scraping -- **`C4CAttachmentDownloader.java`**: Java wrapper that calls the Python script via ProcessBuilder -- **`dsm-upload.py`**: Standalone Synology NAS upload example - -## Common Commands - -```bash -# Install dependencies -pip install requests scrapling[all] playwright -python -m playwright install chromium - -# Download attachments -python sap-c4c-AttachmentFolder.py \ - --tenant https://xxx.c4c.saphybriscloud.cn \ - --user admin --password xxx --ticket 24588 - -# Download with custom concurrency (default: 5 threads) -python sap-c4c-AttachmentFolder.py --ticket 24588 --max-workers 10 - -# List attachments only (no download) -python sap-c4c-AttachmentFolder.py --ticket 24588 --list-only - -# JSON mode (for Java/programmatic use) -python sap-c4c-AttachmentFolder.py --ticket 24588 --json - -# Download + upload to Synology DSM -python sap-c4c-AttachmentFolder.py --ticket 24588 \ - --dsm-url http://10.0.10.235:5000 --dsm-user PLM \ - --dsm-password 123456 --dsm-path /Newgonow/AU-SPFJ - -# All credentials also accept environment variables: -# C4C_TENANT, C4C_USERNAME, C4C_PASSWORD, DSM_URL, DSM_USERNAME, DSM_PASSWORD, DSM_PATH -``` - -```java -// Java: compile requires Jackson (jackson-databind, jackson-core, jackson-annotations) -javac -cp jackson-databind.jar:jackson-core.jar:jackson-annotations.jar C4CAttachmentDownloader.java -``` - -## Architecture - -### Data Flow - -1. Authenticate to SAP C4C via Basic Auth -2. Look up ServiceRequest by ticket ID -> get ObjectID and SerialID -3. Fetch SR-level attachments via `/sap/c4c/odata/v1/c4codata/ServiceRequestCollection('{OID}')/ServiceRequestAttachmentFolder` -4. Fetch XIssueItem-level attachments via `/sap/c4c/odata/cust/v1/custticketapi/BO_XSRIssueItemAttachmentCollection` (two-step: filter by UUID, then navigate to AttachmentFolder) -5. Download concurrently using ThreadPoolExecutor: - - **CategoryCode "2"** (file): OData `$value` endpoint or `DocumentLink` URL - - **CategoryCode "3"** (link): Scrapling + Playwright opens Salesforce URL, clicks `button.downloadbutton[title='Download']`, captures download -6. Optionally upload to Synology DSM via FileStation API, then **auto-delete local files** - -### Two OData Endpoints - -- `/sap/c4c/odata/v1/c4codata` (`ODATA_C4C`) - Standard C4C OData for ServiceRequest and SR-level attachments -- `/sap/c4c/odata/cust/v1/custticketapi` (`ODATA_CUST`) - Custom ticket API for XIssueItem and its attachments - -### Java Wrapper - -Invokes Python script with `--json` flag, passes credentials via **environment variables** (not CLI args for security). Parses JSON into typed classes: `Result`, `Attachment`, `IssueItem`, `DownloadedFile`, `DsmUploadEntry`. Default timeout: 30 minutes. - -### DSM Upload Directory Structure - -- SR attachments: `{DSM_PATH}/{ticketID}_{serialID}/{filename}` -- IssueItem attachments: `{DSM_PATH}/{ticketID}_{serialID}/{issueID}/{filename}` - -### Concurrency Model - -Multi-threaded via `ThreadPoolExecutor` (default 5, `--max-workers`). Both file and link downloads are submitted as futures. Thread-safe console output uses a `print_lock`. The `requests.Session` is shared across file-download threads (thread-safe). Scrapling/Playwright link downloads each launch their own browser. - -### Global State - -The Python script uses module-level globals (`TENANT`, `USERNAME`, `PASSWORD`, `ODATA_C4C`, `ODATA_CUST`, `OUTPUT_DIR`, `DSM_*`, `MAX_WORKERS`) initialized in `main()`. The `run()` function is the core entry point returning a structured dict. - -## Troubleshooting - -- **Playwright not installed**: `python -m playwright install chromium` -- **Link download fails**: Salesforce page selector `button.downloadbutton[title='Download']` may have changed; update `download_link_via_scrapling()` -- **Timeout**: Increase Java wrapper timeout or Scrapling's `timeout` param (currently 60s page load, 120s download wait) -- **SSL warnings**: `verify=False` is used throughout; `urllib3` warnings are suppressed diff --git a/sap-c4c-AttachmentFolder.py b/sap-c4c-AttachmentFolder.py index 6f43933..2fdb308 100644 --- a/sap-c4c-AttachmentFolder.py +++ b/sap-c4c-AttachmentFolder.py @@ -19,7 +19,7 @@ SAP C4C 附件下载工具 python sap-c4c-AttachmentFolder.py --tenant https://xxx.c4c.saphybriscloud.cn --user admin --password xxx --ticket 24588 # 下载附件并上传到群晖 - python sap-c4c-AttachmentFolder.py --tenant https://xxx.c4c.saphybriscloud.cn --user admin --password xxx --ticket 24588 \ + python sap-c4c-AttachmentFolder.py --tenant https://xxx.c4c.saphybriscloud.cn --user admin --password xxx --ticket 24588 \\ --dsm-url http://10.0.10.235:5000 --dsm-user PLM --dsm-password 123456 --dsm-path /Newgonow/AU-SPFJ # JSON 模式(供 Java/其他程序调用) @@ -39,52 +39,51 @@ import os import sys import json import argparse -import base64 import mimetypes import requests import urllib3 -import xml.etree.ElementTree as ET -from scrapling.fetchers import StealthyFetcher +from dataclasses import dataclass from concurrent.futures import ThreadPoolExecutor, as_completed from threading import Lock urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) -TENANT = "" -USERNAME = "" -PASSWORD = "" +print_lock = Lock() -# OData 路径(在 main 中根据 TENANT 初始化) -ODATA_C4C = "" -ODATA_CUST = "" -# SOAP endpoint(如需通过 SOAP 下载文件内容,请替换为实际地址) -SOAP_URL = f"{TENANT}/sap/bc/srt/scs/sap/manageattachmentfolderin" +@dataclass +class Config: + tenant: str = "" + username: str = "" + password: str = "" + odata_c4c: str = "" + odata_cust: str = "" + output_dir: str = "downloads" + max_workers: int = 5 + dsm_url: str = "" + dsm_user: str = "" + dsm_password: str = "" + dsm_path: str = "" -# 默认值,可通过命令行参数覆盖 -OUTPUT_DIR = "downloads" + def init_endpoints(self): + base = self.tenant.rstrip("/") + self.odata_c4c = f"{base}/sap/c4c/odata/v1/c4codata" + self.odata_cust = f"{base}/sap/c4c/odata/cust/v1/custticketapi" -# 群晖 DSM 配置(可选,在 main 中初始化) -DSM_URL = "" -DSM_USER = "" -DSM_PASSWORD = "" -DSM_PATH = "" -# 多线程配置 -MAX_WORKERS = 5 # 默认并发数 -print_lock = Lock() # 用于线程安全的打印输出 +cfg = Config() def get_session(): s = requests.Session() - s.auth = (USERNAME, PASSWORD) + s.auth = (cfg.username, cfg.password) s.headers.update({"Accept": "application/json"}) return s def find_service_request_object_id(session, ticket_id): """通过人类可读的 ticket ID 查找 OData ObjectID 和 SerialID""" - url = f"{ODATA_C4C}/ServiceRequestCollection" + url = f"{cfg.odata_c4c}/ServiceRequestCollection" params = {"$format": "json", "$filter": f"ID eq '{ticket_id}'"} resp = session.get(url, params=params, timeout=60) resp.raise_for_status() @@ -92,8 +91,7 @@ def find_service_request_object_id(session, ticket_id): if not results: raise ValueError(f"未找到 ID={ticket_id} 的 ServiceRequest") sr = results[0] - serial_id = sr.get("SerialID", "") - return sr["ObjectID"], serial_id + return sr["ObjectID"], sr.get("SerialID", "") def _parse_attachments(results): @@ -122,7 +120,7 @@ def _parse_attachments(results): def list_sr_attachments(session, sr_object_id): """获取 ServiceRequest 级别的附件(通过 c4codata 导航)""" - url = f"{ODATA_C4C}/ServiceRequestCollection('{sr_object_id}')/ServiceRequestAttachmentFolder" + url = f"{cfg.odata_c4c}/ServiceRequestCollection('{sr_object_id}')/ServiceRequestAttachmentFolder" params = {"$format": "json"} resp = session.get(url, params=params, timeout=60) resp.raise_for_status() @@ -132,7 +130,7 @@ def list_sr_attachments(session, sr_object_id): def list_issue_items(session, ticket_id): """获取 ServiceRequest 下的 XIssueItem 列表(通过 custticketapi)""" - url = f"{ODATA_CUST}/ServiceRequest_XIssueItem_SDKCollection" + url = f"{cfg.odata_cust}/ServiceRequest_XIssueItem_SDKCollection" params = {"$format": "json", "$filter": f"TicketID eq '{ticket_id}'"} resp = session.get(url, params=params, timeout=60) resp.raise_for_status() @@ -141,21 +139,32 @@ def list_issue_items(session, ticket_id): def get_issue_item_detail(session, object_id): """通过 ObjectID 获取 XIssueItem 详细信息,包括真实的 IssueID_SDK""" - url = f"{ODATA_CUST}/ServiceRequest_XIssueItem_SDKCollection('{object_id}')" + url = f"{cfg.odata_cust}/ServiceRequest_XIssueItem_SDKCollection('{object_id}')" params = {"$format": "json"} resp = session.get(url, params=params, timeout=60) resp.raise_for_status() return resp.json().get("d", {}).get("results", {}) +def _fetch_attachment_folder(session, att_oid): + """获取单个 AttachmentFolder 条目(供并发调用)""" + folder_url = ( + f"{cfg.odata_cust}/BO_XSRIssueItemAttachmentCollection('{att_oid}')" + f"/BO_XSRIssueItemAttachmentFolder" + ) + resp = session.get(folder_url, params={"$format": "json"}, timeout=60) + resp.raise_for_status() + return resp.json().get("d", {}).get("results", []) + + def list_issue_item_attachments(session, issue_item_uuid): """ 获取 XIssueItem 级别的附件。 路径: BO_XSRIssueItemAttachmentCollection (按 XIssueItemUUID 过滤) -> BO_XSRIssueItemAttachmentFolder (实际附件文件) + Step 2 并发请求各 AttachmentFolder。 """ - # Step 1: 通过 XIssueItemUUID 找到 BO_XSRIssueItemAttachment - url = f"{ODATA_CUST}/BO_XSRIssueItemAttachmentCollection" + url = f"{cfg.odata_cust}/BO_XSRIssueItemAttachmentCollection" params = {"$format": "json", "$filter": f"XIssueItemUUID eq guid'{issue_item_uuid}'"} resp = session.get(url, params=params, timeout=60) resp.raise_for_status() @@ -163,108 +172,43 @@ def list_issue_item_attachments(session, issue_item_uuid): if not att_results: return [] - # Step 2: 对每个 Attachment 导航到 AttachmentFolder all_attachments = [] - for att in att_results: - att_oid = att["ObjectID"] - folder_url = ( - f"{ODATA_CUST}/BO_XSRIssueItemAttachmentCollection('{att_oid}')" - f"/BO_XSRIssueItemAttachmentFolder" - ) - resp2 = session.get(folder_url, params={"$format": "json"}, timeout=60) - resp2.raise_for_status() - folders = resp2.json().get("d", {}).get("results", []) - all_attachments.extend(_parse_attachments(folders)) + with ThreadPoolExecutor(max_workers=cfg.max_workers) as executor: + futures = { + executor.submit(_fetch_attachment_folder, session, att["ObjectID"]): att + for att in att_results + } + for future in as_completed(futures): + try: + folders = future.result() + all_attachments.extend(_parse_attachments(folders)) + except Exception as e: + with print_lock: + print(f" ⚠ 获取附件文件夹失败: {e}", file=sys.stderr) return all_attachments -def download_file_via_odata(session, attachment, base_url=None): - """通过 OData $value 直接下载文件内容(适用于 CategoryCode=2 的文件附件)""" - # 优先使用 DocumentLink(custticketapi 返回的完整 $value URL) +def download_file_via_odata(session, attachment, file_path, base_url=None): + """通过 OData $value 流式下载文件,直接写入 file_path(避免大文件 OOM)""" doc_link = attachment.get("DocumentLink") if doc_link: url = doc_link else: obj_id = attachment["ObjectID"] if base_url is None: - base_url = f"{ODATA_C4C}/ServiceRequestAttachmentFolderCollection" + base_url = f"{cfg.odata_c4c}/ServiceRequestAttachmentFolderCollection" url = f"{base_url}('{obj_id}')/$value" resp = session.get(url, timeout=300, stream=True) resp.raise_for_status() - return resp.content - - -def build_read_documents_payload(document_uuids, size_limit_kb=10240): - uuid_xml = "\n".join( - f"{u}" for u in document_uuids - ) - return f""" - - - - - - {uuid_xml} - - - {size_limit_kb} - - - -""" - - -def read_documents_file_content(session, document_uuids): - payload = build_read_documents_payload(document_uuids) - headers = {"Content-Type": "text/xml; charset=utf-8"} - resp = session.post(SOAP_URL, data=payload.encode("utf-8"), headers=headers, timeout=120) - resp.raise_for_status() - return resp.text - - -def parse_soap_response(xml_text): - root = ET.fromstring(xml_text) - items = [] - for elem in root.iter(): - if elem.tag.endswith("AttachmentFolderDocumentFileContent"): - item = {"DocumentUUID": None, "BinaryObject": None} - for child in elem: - if child.tag.endswith("DocumentUUID"): - item["DocumentUUID"] = child.text - elif child.tag.endswith("BinaryObject"): - item["BinaryObject"] = child.text - if item["DocumentUUID"] and item["BinaryObject"]: - items.append(item) - - more_hits = False - for elem in root.iter(): - if elem.tag.endswith("MoreHitsAvailableIndicator") and (elem.text or "").lower() == "true": - more_hits = True - break - return items, more_hits - - -def save_files(content_items, attachment_index): - for item in content_items: - doc_uuid = item["DocumentUUID"] - binary_b64 = item["BinaryObject"] - meta = attachment_index.get(doc_uuid, {}) - file_name = meta.get("FileName", f"{doc_uuid}.bin") - file_path = os.path.join(OUTPUT_DIR, file_name) - with open(file_path, "wb") as f: - f.write(base64.b64decode(binary_b64)) - print(f" saved: {file_path}") - - -def chunked(seq, size): - for i in range(0, len(seq), size): - yield seq[i:i + size] + with open(file_path, "wb") as f: + for chunk in resp.iter_content(chunk_size=65536): + f.write(chunk) def download_link_via_scrapling(link_url, save_name): """通过 Scrapling 打开外部链接页面,点击 .downloadbutton 按钮下载文件""" + from scrapling.fetchers import StealthyFetcher + result = {"saved": None, "error": None} def click_download(page): @@ -273,7 +217,7 @@ def download_link_via_scrapling(link_url, save_name): page.click("button.downloadbutton[title='Download']") download = download_info.value filename = download.suggested_filename or save_name - save_path = os.path.join(OUTPUT_DIR, filename) + save_path = os.path.join(cfg.output_dir, filename) download.save_as(save_path) result["saved"] = save_path @@ -295,9 +239,9 @@ def download_link_via_scrapling(link_url, save_name): def dsm_login(): """登录群晖 DSM,返回 SID""" - resp = requests.get(f"{DSM_URL}/webapi/auth.cgi", params={ + resp = requests.get(f"{cfg.dsm_url}/webapi/auth.cgi", params={ "api": "SYNO.API.Auth", "version": "3", "method": "login", - "account": DSM_USER, "passwd": DSM_PASSWORD, + "account": cfg.dsm_user, "passwd": cfg.dsm_password, "session": "FileStation", "format": "sid", }, verify=False, timeout=30) resp.raise_for_status() @@ -311,7 +255,6 @@ def dsm_upload_file(sid, local_path, remote_path): """上传单个文件到群晖 DSM""" filename = os.path.basename(local_path) mime = mimetypes.guess_type(filename)[0] or "application/octet-stream" - form = { "api": "SYNO.FileStation.Upload", "version": "2", @@ -322,7 +265,7 @@ def dsm_upload_file(sid, local_path, remote_path): } with open(local_path, "rb") as f: resp = requests.post( - f"{DSM_URL}/webapi/entry.cgi", + f"{cfg.dsm_url}/webapi/entry.cgi", data=form, files={"file": (filename, f, mime)}, cookies={"id": sid}, @@ -337,24 +280,22 @@ def dsm_upload_file(sid, local_path, remote_path): def dsm_upload_downloaded_files(downloaded_files, ticket_id, serial_id, json_mode=False): - """将所有已下载文件上传到群晖 DSM,按 ticket 和 issue 组织目录结构""" - if not DSM_URL or not DSM_USER or not DSM_PASSWORD or not DSM_PATH: + """将所有已下载文件并发上传到群晖 DSM,按 ticket 和 issue 组织目录结构""" + if not cfg.dsm_url or not cfg.dsm_user or not cfg.dsm_password or not cfg.dsm_path: return [] files_to_upload = [f for f in downloaded_files if f.get("savedPath") and not f.get("error")] if not files_to_upload: return [] - # 目录名: ticketID_serialID folder_name = f"{ticket_id}_{serial_id}" if serial_id else ticket_id if not json_mode: print(f"\n{'='*60}") - print(f"上传到群晖 DSM: {DSM_URL}") - print(f"目标路径: {DSM_PATH}/{folder_name}") + print(f"上传到群晖 DSM: {cfg.dsm_url}") + print(f"目标路径: {cfg.dsm_path}/{folder_name}") print('='*60) - upload_results = [] try: sid = dsm_login() if not json_mode: @@ -364,20 +305,16 @@ def dsm_upload_downloaded_files(downloaded_files, ticket_id, serial_id, json_mod print(f" DSM 登录失败: {e}", file=sys.stderr) return [{"error": f"DSM 登录失败: {e}"}] - for f in files_to_upload: + def _upload_one(f): local_path = f["savedPath"] filename = os.path.basename(local_path) issue_id = f.get("issueId", "") - - # 根据 issueId 判断目录结构 - # SR 附件: {DSM_PATH}/{ticketID_serialID}/{filename} - # IssueItem 附件: {DSM_PATH}/{ticketID_serialID}/{issueID}/{filename} - if issue_id: - remote_path = f"{DSM_PATH}/{folder_name}/{issue_id}" - else: - remote_path = f"{DSM_PATH}/{folder_name}" + remote_path = ( + f"{cfg.dsm_path}/{folder_name}/{issue_id}" + if issue_id else + f"{cfg.dsm_path}/{folder_name}" + ) full_remote_path = f"{remote_path}/{filename}" - entry = { "file": filename, "ticketId": ticket_id, @@ -389,13 +326,18 @@ def dsm_upload_downloaded_files(downloaded_files, ticket_id, serial_id, json_mod dsm_upload_file(sid, local_path, remote_path) entry["success"] = True if not json_mode: - print(f" 上传成功: {filename} -> {full_remote_path}") + with print_lock: + print(f" 上传成功: {filename} -> {full_remote_path}") except Exception as e: entry["success"] = False entry["error"] = str(e) if not json_mode: - print(f" 上传失败: {filename}: {e}") - upload_results.append(entry) + with print_lock: + print(f" 上传失败: {filename}: {e}") + return entry + + with ThreadPoolExecutor(max_workers=cfg.max_workers) as executor: + upload_results = list(executor.map(_upload_one, files_to_upload)) if not json_mode: ok = sum(1 for r in upload_results if r.get("success")) @@ -414,15 +356,8 @@ def print_attachment_summary(all_attachments): print(" 无附件") return - total_files = 0 - total_links = 0 - for level, atts in all_attachments: - for a in atts: - if a["CategoryCode"] == "2": - total_files += 1 - elif a["CategoryCode"] == "3": - total_links += 1 - + total_files = sum(1 for _, atts in all_attachments for a in atts if a["CategoryCode"] == "2") + total_links = sum(1 for _, atts in all_attachments for a in atts if a["CategoryCode"] == "3") print(f" 合计: {total_files} 个文件附件, {total_links} 个链接附件\n") idx = 0 @@ -433,37 +368,164 @@ def print_attachment_summary(all_attachments): for a in atts: idx += 1 cat = "文件" if a["CategoryCode"] == "2" else "链接" - size = a.get("SizeInkB", "") size_str = "" - if size and cat == "文件": + if a.get("SizeInkB") and cat == "文件": try: - kb = float(size) - if kb > 1024: - size_str = f" ({kb/1024:.1f} MB)" - else: - size_str = f" ({kb:.0f} KB)" + kb = float(a["SizeInkB"]) + size_str = f" ({kb/1024:.1f} MB)" if kb > 1024 else f" ({kb:.0f} KB)" except ValueError: pass - mime = a.get("MimeType") or "" - link = a.get("LinkWebURI") or "" print(f" {idx}. [{cat}] {a['FileName']}{size_str}") - if mime: - print(f" MIME: {mime}") - if link: - print(f" 链接: {link}") + if a.get("MimeType"): + print(f" MIME: {a['MimeType']}") + if a.get("LinkWebURI"): + print(f" 链接: {a['LinkWebURI']}") print() +def _download_single_file(session, att, label, issue_id, odata_url, json_mode): + """下载单个文件附件,流式写入磁盘(用于多线程)""" + entry = { + "source": label, "issueId": issue_id, + "c4cName": att["FileName"], "type": "file", "mime": att.get("MimeType"), + } + file_path = os.path.join(cfg.output_dir, att["FileName"]) + try: + download_file_via_odata(session, att, file_path, odata_url) + entry["savedPath"] = os.path.abspath(file_path) + entry["savedName"] = att["FileName"] + if not json_mode: + with print_lock: + print(f" ✓ saved: {file_path}") + except Exception as e: + entry["error"] = str(e) + if not json_mode: + with print_lock: + print(f" ✗ OData 下载失败 ({att['FileName']}): {e}") + return entry + + +def _download_single_link(link_att, label, issue_id, json_mode): + """下载单个链接附件(用于多线程)""" + link_url = link_att.get("LinkWebURI") + entry = { + "source": label, "issueId": issue_id, + "c4cName": link_att["FileName"], "type": "link", "linkUrl": link_url, + } + + if not link_url: + entry["error"] = "无链接地址" + return entry + + if not json_mode: + with print_lock: + print(f" {link_att['FileName']}: {link_url}") + + r = download_link_via_scrapling(link_url, link_att["FileName"]) + if r["saved"]: + entry["savedPath"] = os.path.abspath(r["saved"]) + entry["savedName"] = os.path.basename(r["saved"]) + if not json_mode: + with print_lock: + print(f" ✓ saved: {r['saved']}") + else: + entry["error"] = r["error"] + if not json_mode: + with print_lock: + print(f" ✗ 下载失败: {r['error']}") + return entry + + +def _do_download(session, attachments, label, issue_id, odata_url, json_mode): + """并发下载附件列表,返回下载结果列表""" + file_atts = [a for a in attachments if a["CategoryCode"] == "2"] + link_atts = [a for a in attachments if a["CategoryCode"] == "3"] + + with ThreadPoolExecutor(max_workers=cfg.max_workers) as executor: + futures = [ + executor.submit(_download_single_file, session, att, label, issue_id, odata_url, json_mode) + for att in file_atts + ] + [ + executor.submit(_download_single_link, att, label, issue_id, json_mode) + for att in link_atts + ] + + downloaded_entries = [] + for future in as_completed(futures): + try: + downloaded_entries.append(future.result()) + except Exception as e: + if not json_mode: + with print_lock: + print(f" ✗ 下载任务异常: {e}") + + return downloaded_entries + + +def _process_issue_item(session, item, list_only, json_mode): + """处理单个 XIssueItem:fetch detail → list attachments → download(供并发调用)""" + item_oid = item["ObjectID"] + issue_uuid = item.get("XIssueItemUUIDcontent_SDK", "") + issue_desc = (item.get("IssuesDescriptionX_SDK") or "")[:80] + + issue_id = "" + try: + item_detail = get_issue_item_detail(session, item_oid) + issue_id = item_detail.get("IssueID_SDK", "") + except Exception as e: + with print_lock: + print(f" ⚠ 获取 IssueID 失败 ({item_oid}): {e}", file=sys.stderr) + + if not json_mode: + with print_lock: + print(f"\n XIssueItem: {item_oid}") + if issue_id: + print(f" IssueID: {issue_id}") + print(f" UUID: {issue_uuid}") + print(f" 描述: {issue_desc}") + + issue_entry = { + "objectId": item_oid, + "issueId": issue_id, + "uuid": issue_uuid, + "description": issue_desc, + "attachments": [], + } + downloaded_entries = [] + + if not issue_uuid: + if not json_mode: + with print_lock: + print(" ⚠ 无 XIssueItemUUID,跳过") + return issue_entry, downloaded_entries + + atts = list_issue_item_attachments(session, issue_uuid) + issue_entry["attachments"] = atts + + if not json_mode: + with print_lock: + print(f" 找到 {len(atts)} 个附件") + + if not list_only: + label = f"IssueItem-{issue_id}" if issue_id else f"IssueItem-{item_oid[:12]}" + downloaded_entries = _do_download( + session, atts, label, issue_id, + f"{cfg.odata_cust}/BO_XSRIssueItemAttachmentFolderCollection", + json_mode, + ) + + return issue_entry, downloaded_entries + + def run(ticket_id, output_dir, list_only=False, json_mode=False): """核心逻辑,返回结构化结果""" - global OUTPUT_DIR - OUTPUT_DIR = output_dir - os.makedirs(OUTPUT_DIR, exist_ok=True) + cfg.output_dir = output_dir + os.makedirs(cfg.output_dir, exist_ok=True) session = get_session() result = { "ticketId": ticket_id, - "outputDir": os.path.abspath(OUTPUT_DIR), + "outputDir": os.path.abspath(cfg.output_dir), "success": True, "error": None, "srAttachments": [], @@ -490,9 +552,10 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False): print(f"找到 {len(sr_attachments)} 个附件") if not list_only: - _do_download(session, sr_attachments, "SR", "", None, result, json_mode) + entries = _do_download(session, sr_attachments, "SR", "", None, json_mode) + result["downloadedFiles"].extend(entries) - # 3) XIssueItem 级别附件 + # 3) XIssueItem 级别附件(并发处理每个 item) if not json_mode: print(f"\n{'='*60}") print("XIssueItem 级别附件 (BO_XSRIssueItemAttachmentFolder)") @@ -501,60 +564,28 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False): if not json_mode: print(f"找到 {len(issue_items)} 个 XIssueItem") - for item in issue_items: - item_oid = item["ObjectID"] - issue_uuid = item.get("XIssueItemUUIDcontent_SDK", "") - issue_desc = (item.get("IssuesDescriptionX_SDK") or "")[:80] - - # 通过 ObjectID 查询详细信息,获取真实的 IssueID_SDK - issue_id = "" - try: - item_detail = get_issue_item_detail(session, item_oid) - issue_id = item_detail.get("IssueID_SDK", "") - except Exception as e: - print(f" ⚠ 获取 IssueID 失败: {e}", file=sys.stderr) - - issue_entry = { - "objectId": item_oid, - "issueId": issue_id, - "uuid": issue_uuid, - "description": issue_desc, - "attachments": [], - } - - if not json_mode: - print(f"\n XIssueItem: {item_oid}") - if issue_id: - print(f" IssueID: {issue_id}") - print(f" UUID: {issue_uuid}") - print(f" 描述: {issue_desc}") - - if not issue_uuid: - if not json_mode: - print(" ⚠ 无 XIssueItemUUID,跳过") - result["issueItems"].append(issue_entry) - continue - - atts = list_issue_item_attachments(session, issue_uuid) - issue_entry["attachments"] = atts - if not json_mode: - print(f" 找到 {len(atts)} 个附件") - - if not list_only: - label = f"IssueItem-{issue_id}" if issue_id else f"IssueItem-{item_oid[:12]}" - _do_download( - session, atts, label, issue_id, - f"{ODATA_CUST}/BO_XSRIssueItemAttachmentFolderCollection", - result, json_mode, - ) - - result["issueItems"].append(issue_entry) + with ThreadPoolExecutor(max_workers=cfg.max_workers) as executor: + futures = [ + executor.submit(_process_issue_item, session, item, list_only, json_mode) + for item in issue_items + ] + for future in as_completed(futures): + try: + issue_entry, downloaded_entries = future.result() + result["issueItems"].append(issue_entry) + result["downloadedFiles"].extend(downloaded_entries) + except Exception as e: + if not json_mode: + print(f" ✗ XIssueItem 处理异常: {e}", file=sys.stderr) # 4) 汇总清单 if not json_mode: all_attachments = [("SR", sr_attachments)] for ie in result["issueItems"]: - ie_label = f"IssueItem-{ie['issueId']}" if ie.get("issueId") else f"IssueItem-{ie['objectId'][:12]}" + ie_label = ( + f"IssueItem-{ie['issueId']}" if ie.get("issueId") + else f"IssueItem-{ie['objectId'][:12]}" + ) all_attachments.append((ie_label, ie["attachments"])) print_attachment_summary(all_attachments) @@ -567,90 +598,6 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False): return result -def _download_single_file(session, att, label, issue_id, odata_url, json_mode): - """下载单个文件附件(用于多线程)""" - entry = {"source": label, "issueId": issue_id, "c4cName": att["FileName"], "type": "file", "mime": att.get("MimeType")} - try: - content = download_file_via_odata(session, att, odata_url) - file_path = os.path.join(OUTPUT_DIR, att["FileName"]) - with open(file_path, "wb") as f: - f.write(content) - entry["savedPath"] = os.path.abspath(file_path) - entry["savedName"] = att["FileName"] - if not json_mode: - with print_lock: - print(f" ✓ saved: {file_path}") - except Exception as e: - entry["error"] = str(e) - if not json_mode: - with print_lock: - print(f" ✗ OData 下载失败 ({att['FileName']}): {e}") - return entry - - -def _download_single_link(link_att, label, issue_id, json_mode): - """下载单个链接附件(用于多线程)""" - link_url = link_att.get("LinkWebURI") - entry = {"source": label, "issueId": issue_id, "c4cName": link_att["FileName"], "type": "link", "linkUrl": link_url} - - if not link_url: - entry["error"] = "无链接地址" - return entry - - if not json_mode: - with print_lock: - print(f" {link_att['FileName']}: {link_url}") - - r = download_link_via_scrapling(link_url, link_att["FileName"]) - if r["saved"]: - entry["savedPath"] = os.path.abspath(r["saved"]) - entry["savedName"] = os.path.basename(r["saved"]) - if not json_mode: - with print_lock: - print(f" ✓ saved: {r['saved']}") - else: - entry["error"] = r["error"] - if not json_mode: - with print_lock: - print(f" ✗ 下载失败: {r['error']}") - return entry - - -def _do_download(session, attachments, label, issue_id, odata_url, result, json_mode): - """执行下载并将结果追加到 result['downloadedFiles'](多线程版本)""" - file_atts = [a for a in attachments if a["CategoryCode"] == "2"] - link_atts = [a for a in attachments if a["CategoryCode"] == "3"] - - downloaded_entries = [] - - # 使用线程池并发下载 - with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: - futures = [] - - # 提交文件附件下载任务 - for att in file_atts: - future = executor.submit(_download_single_file, session, att, label, issue_id, odata_url, json_mode) - futures.append(future) - - # 提交链接附件下载任务 - for att in link_atts: - future = executor.submit(_download_single_link, att, label, issue_id, json_mode) - futures.append(future) - - # 收集结果 - for future in as_completed(futures): - try: - entry = future.result() - downloaded_entries.append(entry) - except Exception as e: - if not json_mode: - with print_lock: - print(f" ✗ 下载任务异常: {e}") - - # 将结果追加到总结果中 - result["downloadedFiles"].extend(downloaded_entries) - - def main(): parser = argparse.ArgumentParser(description="SAP C4C 附件下载工具") parser.add_argument("--tenant", default=os.environ.get("C4C_TENANT", ""), @@ -663,7 +610,7 @@ def main(): parser.add_argument("--output-dir", default="downloads", help="附件保存目录 (默认: downloads)") parser.add_argument("--json", action="store_true", dest="json_mode", help="JSON 输出模式(供程序调用)") parser.add_argument("--list-only", action="store_true", help="仅列出附件清单,不下载") - parser.add_argument("--max-workers", type=int, default=5, help="并发下载线程数 (默认: 5)") + parser.add_argument("--max-workers", type=int, default=5, help="并发线程数 (默认: 5)") # 群晖 DSM 上传参数 parser.add_argument("--dsm-url", default=os.environ.get("DSM_URL", ""), @@ -681,31 +628,27 @@ def main(): parser.error("必须提供 --tenant, --user, --password 参数,或设置 C4C_TENANT, C4C_USERNAME, C4C_PASSWORD 环境变量") # 初始化全局配置 - global TENANT, USERNAME, PASSWORD, ODATA_C4C, ODATA_CUST, SOAP_URL, MAX_WORKERS - TENANT = args.tenant.rstrip("/") - USERNAME = args.user - PASSWORD = args.password - ODATA_C4C = f"{TENANT}/sap/c4c/odata/v1/c4codata" - ODATA_CUST = f"{TENANT}/sap/c4c/odata/cust/v1/custticketapi" - SOAP_URL = f"{TENANT}/sap/bc/srt/scs/sap/manageattachmentfolderin" - MAX_WORKERS = args.max_workers - - # 初始化 DSM 配置 - global DSM_URL, DSM_USER, DSM_PASSWORD, DSM_PATH - DSM_URL = args.dsm_url.rstrip("/") if args.dsm_url else "" - DSM_USER = args.dsm_user - DSM_PASSWORD = args.dsm_password - DSM_PATH = args.dsm_path + cfg.tenant = args.tenant.rstrip("/") + cfg.username = args.user + cfg.password = args.password + cfg.max_workers = args.max_workers + cfg.dsm_url = args.dsm_url.rstrip("/") if args.dsm_url else "" + cfg.dsm_user = args.dsm_user + cfg.dsm_password = args.dsm_password + cfg.dsm_path = args.dsm_path + cfg.init_endpoints() if not args.json_mode and not args.list_only: - print(f"并发下载线程数: {MAX_WORKERS}") + print(f"并发线程数: {cfg.max_workers}") result = run(args.ticket, args.output_dir, args.list_only, args.json_mode) # 下载完成后上传到群晖 DSM - if DSM_URL and not args.list_only and result["success"]: + if cfg.dsm_url and not args.list_only and result["success"]: serial_id = result.get("serialId", "") - upload_results = dsm_upload_downloaded_files(result["downloadedFiles"], args.ticket, serial_id, args.json_mode) + upload_results = dsm_upload_downloaded_files( + result["downloadedFiles"], args.ticket, serial_id, args.json_mode + ) result["dsmUpload"] = upload_results # 上传完成后清理本地下载文件