From 674fdbbf087757c25338a7612b2a71d93e26c85d Mon Sep 17 00:00:00 2001 From: afei A <57030625+NewHubBoy@users.noreply.github.com> Date: Sat, 14 Mar 2026 16:22:05 +0800 Subject: [PATCH] v0.0.1 --- CLAUDE.md | 216 +++++++++--------------------------- sap-c4c-AttachmentFolder.py | 104 +++++++++++++---- 2 files changed, 133 insertions(+), 187 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index bd0d427..9678e51 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,197 +4,85 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -This is a SAP C4C (Cloud for Customer) attachment downloader toolkit that retrieves attachments from ServiceRequest tickets and optionally uploads them to Synology DSM NAS. The project consists of: +SAP C4C (Cloud for Customer) attachment downloader toolkit that retrieves attachments from ServiceRequest tickets and optionally uploads them to Synology DSM NAS. -- **Python script** (`sap-c4c-AttachmentFolder.py`): Core downloader using OData APIs and web scraping -- **Java wrapper** (`C4CAttachmentDownloader.java`): Java interface that calls the Python script via ProcessBuilder -- **DSM upload script** (`dsm-upload.py`): Standalone Synology NAS upload utility - -## Architecture - -### Python Script (`sap-c4c-AttachmentFolder.py`) - -**Core functionality:** -1. Authenticates to SAP C4C using Basic Auth -2. Fetches ServiceRequest attachments via OData endpoints: - - `/sap/c4c/odata/v1/c4codata` - Standard C4C OData API - - `/sap/c4c/odata/cust/v1/custticketapi` - Custom ticket API -3. Downloads two types of attachments using **multi-threaded concurrent downloads**: - - **File attachments** (CategoryCode=2): Downloaded via OData `$value` endpoint - - **Link attachments** (CategoryCode=3): External Salesforce links scraped using Scrapling + Playwright -4. Handles XIssueItem-level attachments via `BO_XSRIssueItemAttachmentFolder` -5. Optionally uploads downloaded files to Synology DSM via FileStation API - -**Key dependencies:** -- `requests` - HTTP client for OData/REST APIs -- `scrapling[all]` - Web scraping framework with stealth capabilities -- `playwright` - Browser automation for downloading Salesforce attachments - -**Performance features:** -- Multi-threaded concurrent downloads (default: 5 threads, configurable via `--max-workers`) -- Thread-safe output logging with lock mechanism -- Parallel processing of both file and link attachments - -**Output modes:** -- Human-readable console output (default) -- JSON mode (`--json`) for programmatic consumption - -### Java Wrapper (`C4CAttachmentDownloader.java`) - -Provides a type-safe Java API that: -- Invokes the Python script via `ProcessBuilder` -- Passes credentials via environment variables (more secure than CLI args) -- Parses JSON output into strongly-typed Java objects -- Supports timeout configuration (default: 30 minutes) - -**Key classes:** -- `Result` - Top-level response containing all attachment metadata -- `Attachment` - Individual attachment metadata (UUID, filename, MIME type, category) -- `IssueItem` - XIssueItem with nested attachments -- `DownloadedFile` - Download result with local path and error info -- `DsmUploadEntry` - DSM upload result per file - -### DSM Upload (`dsm-upload.py`) - -Standalone script demonstrating Synology FileStation API usage: -1. Login via `SYNO.API.Auth` to obtain SID -2. Upload files via `SYNO.FileStation.Upload` with SID cookie +- **`sap-c4c-AttachmentFolder.py`**: Core downloader (Python >= 3.8) using OData APIs and web scraping +- **`C4CAttachmentDownloader.java`**: Java wrapper that calls the Python script via ProcessBuilder +- **`dsm-upload.py`**: Standalone Synology NAS upload example ## Common Commands -### Python Script - ```bash # Install dependencies pip install requests scrapling[all] playwright python -m playwright install chromium -# Download attachments (credentials via CLI) +# Download attachments python sap-c4c-AttachmentFolder.py \ --tenant https://xxx.c4c.saphybriscloud.cn \ - --user admin \ - --password xxx \ - --ticket 24588 + --user admin --password xxx --ticket 24588 -# Download with custom thread count (default: 5) -python sap-c4c-AttachmentFolder.py \ - --tenant https://xxx.c4c.saphybriscloud.cn \ - --user admin \ - --password xxx \ - --ticket 24588 \ - --max-workers 10 - -# Download with DSM upload -python sap-c4c-AttachmentFolder.py \ - --tenant https://xxx.c4c.saphybriscloud.cn \ - --user admin \ - --password xxx \ - --ticket 24588 \ - --dsm-url http://10.0.10.235:5000 \ - --dsm-user PLM \ - --dsm-password 123456 \ - --dsm-path /Newgonow/AU-SPFJ - -# JSON mode (for Java/programmatic use) -python sap-c4c-AttachmentFolder.py --ticket 24588 --json +# Download with custom concurrency (default: 5 threads) +python sap-c4c-AttachmentFolder.py --ticket 24588 --max-workers 10 # List attachments only (no download) python sap-c4c-AttachmentFolder.py --ticket 24588 --list-only -# Using environment variables for credentials -export C4C_TENANT=https://xxx.c4c.saphybriscloud.cn -export C4C_USERNAME=admin -export C4C_PASSWORD=xxx -export DSM_URL=http://10.0.10.235:5000 -export DSM_USERNAME=PLM -export DSM_PASSWORD=123456 -export DSM_PATH=/Newgonow/AU-SPFJ +# JSON mode (for Java/programmatic use) python sap-c4c-AttachmentFolder.py --ticket 24588 --json + +# Download + upload to Synology DSM +python sap-c4c-AttachmentFolder.py --ticket 24588 \ + --dsm-url http://10.0.10.235:5000 --dsm-user PLM \ + --dsm-password 123456 --dsm-path /Newgonow/AU-SPFJ + +# All credentials also accept environment variables: +# C4C_TENANT, C4C_USERNAME, C4C_PASSWORD, DSM_URL, DSM_USERNAME, DSM_PASSWORD, DSM_PATH ``` +```java +// Java: compile requires Jackson (jackson-databind, jackson-core, jackson-annotations) +javac -cp jackson-databind.jar:jackson-core.jar:jackson-annotations.jar C4CAttachmentDownloader.java +``` + +## Architecture + +### Data Flow + +1. Authenticate to SAP C4C via Basic Auth +2. Look up ServiceRequest by ticket ID -> get ObjectID and SerialID +3. Fetch SR-level attachments via `/sap/c4c/odata/v1/c4codata/ServiceRequestCollection('{OID}')/ServiceRequestAttachmentFolder` +4. Fetch XIssueItem-level attachments via `/sap/c4c/odata/cust/v1/custticketapi/BO_XSRIssueItemAttachmentCollection` (two-step: filter by UUID, then navigate to AttachmentFolder) +5. Download concurrently using ThreadPoolExecutor: + - **CategoryCode "2"** (file): OData `$value` endpoint or `DocumentLink` URL + - **CategoryCode "3"** (link): Scrapling + Playwright opens Salesforce URL, clicks `button.downloadbutton[title='Download']`, captures download +6. Optionally upload to Synology DSM via FileStation API, then **auto-delete local files** + +### Two OData Endpoints + +- `/sap/c4c/odata/v1/c4codata` (`ODATA_C4C`) - Standard C4C OData for ServiceRequest and SR-level attachments +- `/sap/c4c/odata/cust/v1/custticketapi` (`ODATA_CUST`) - Custom ticket API for XIssueItem and its attachments + ### Java Wrapper -```java -// Compile (requires Jackson for JSON parsing) -javac -cp jackson-databind.jar:jackson-core.jar:jackson-annotations.jar C4CAttachmentDownloader.java +Invokes Python script with `--json` flag, passes credentials via **environment variables** (not CLI args for security). Parses JSON into typed classes: `Result`, `Attachment`, `IssueItem`, `DownloadedFile`, `DsmUploadEntry`. Default timeout: 30 minutes. -// Basic usage -C4CAttachmentDownloader downloader = new C4CAttachmentDownloader( - "/path/to/sap-c4c-AttachmentFolder.py", - "https://xxx.c4c.saphybriscloud.cn", - "admin", - "password" -); +### DSM Upload Directory Structure -// List attachments only -C4CAttachmentDownloader.Result result = downloader.listAttachments("24588"); +- SR attachments: `{DSM_PATH}/{ticketID}_{serialID}/{filename}` +- IssueItem attachments: `{DSM_PATH}/{ticketID}_{serialID}/{issueID}/{filename}` -// Download to default directory -C4CAttachmentDownloader.Result result = downloader.download("24588"); +### Concurrency Model -// Download to specific directory -C4CAttachmentDownloader.Result result = downloader.download("24588", "/tmp/ticket_24588"); +Multi-threaded via `ThreadPoolExecutor` (default 5, `--max-workers`). Both file and link downloads are submitted as futures. Thread-safe console output uses a `print_lock`. The `requests.Session` is shared across file-download threads (thread-safe). Scrapling/Playwright link downloads each launch their own browser. -// Download with DSM upload -downloader.setDsmConfig("http://10.0.10.235:5000", "PLM", "123456", "/Newgonow/AU-SPFJ"); -C4CAttachmentDownloader.Result result = downloader.download("24588", "/tmp/ticket_24588"); -``` +### Global State -## Key Implementation Details - -### Attachment Categories - -SAP C4C uses `CategoryCode` to distinguish attachment types: -- **"2"** = File attachment (binary content stored in C4C, downloaded via OData `$value`) -- **"3"** = Link attachment (external URL, typically Salesforce links requiring web scraping) - -### OData Navigation Paths - -**ServiceRequest attachments:** -``` -/ServiceRequestCollection('{ObjectID}')/ServiceRequestAttachmentFolder -``` - -**XIssueItem attachments (two-step navigation):** -``` -1. /BO_XSRIssueItemAttachmentCollection?$filter=XIssueItemUUID eq guid'{uuid}' -2. /BO_XSRIssueItemAttachmentCollection('{ObjectID}')/BO_XSRIssueItemAttachmentFolder -``` - -### Scrapling Download Strategy - -For CategoryCode=3 (link attachments), the script: -1. Opens the Salesforce link in a headless Chromium browser -2. Waits for `button.downloadbutton[title='Download']` selector -3. Clicks the button and captures the download -4. Saves with original or suggested filename - -### Security Considerations - -- Java wrapper passes credentials via **environment variables** (not CLI args) to avoid exposure in process lists -- Python script supports both CLI args and environment variables -- DSM API uses session-based authentication (SID cookie) -- SSL verification disabled (`verify=False`) - consider enabling in production - -## File Structure - -``` -. -├── C4CAttachmentDownloader.java # Java wrapper with typed API -├── sap-c4c-AttachmentFolder.py # Core Python downloader -├── dsm-upload.py # Standalone DSM upload example -└── downloads/ # Default output directory -``` +The Python script uses module-level globals (`TENANT`, `USERNAME`, `PASSWORD`, `ODATA_C4C`, `ODATA_CUST`, `OUTPUT_DIR`, `DSM_*`, `MAX_WORKERS`) initialized in `main()`. The `run()` function is the core entry point returning a structured dict. ## Troubleshooting -**Playwright not installed:** -```bash -python -m playwright install chromium -``` - -**Timeout errors:** Increase timeout in Java wrapper constructor (default 30 minutes) or adjust Scrapling timeout parameters. - -**DSM upload fails:** Verify DSM URL, credentials, and that target path exists or `create_parents=true` is set. - -**Link download fails:** Check that Salesforce page structure matches expected selector (`button.downloadbutton[title='Download']`). Update `download_link_via_scrapling()` if page structure changes. +- **Playwright not installed**: `python -m playwright install chromium` +- **Link download fails**: Salesforce page selector `button.downloadbutton[title='Download']` may have changed; update `download_link_via_scrapling()` +- **Timeout**: Increase Java wrapper timeout or Scrapling's `timeout` param (currently 60s page load, 120s download wait) +- **SSL warnings**: `verify=False` is used throughout; `urllib3` warnings are suppressed diff --git a/sap-c4c-AttachmentFolder.py b/sap-c4c-AttachmentFolder.py index bb7af03..6f43933 100644 --- a/sap-c4c-AttachmentFolder.py +++ b/sap-c4c-AttachmentFolder.py @@ -83,15 +83,17 @@ def get_session(): def find_service_request_object_id(session, ticket_id): - """通过人类可读的 ticket ID 查找 OData ObjectID""" + """通过人类可读的 ticket ID 查找 OData ObjectID 和 SerialID""" url = f"{ODATA_C4C}/ServiceRequestCollection" - params = {"$format": "json", "$filter": f"ID eq '{ticket_id}'", "$select": "ObjectID,ID"} + params = {"$format": "json", "$filter": f"ID eq '{ticket_id}'"} resp = session.get(url, params=params, timeout=60) resp.raise_for_status() results = resp.json().get("d", {}).get("results", []) if not results: raise ValueError(f"未找到 ID={ticket_id} 的 ServiceRequest") - return results[0]["ObjectID"] + sr = results[0] + serial_id = sr.get("SerialID", "") + return sr["ObjectID"], serial_id def _parse_attachments(results): @@ -137,6 +139,15 @@ def list_issue_items(session, ticket_id): return resp.json().get("d", {}).get("results", []) +def get_issue_item_detail(session, object_id): + """通过 ObjectID 获取 XIssueItem 详细信息,包括真实的 IssueID_SDK""" + url = f"{ODATA_CUST}/ServiceRequest_XIssueItem_SDKCollection('{object_id}')" + params = {"$format": "json"} + resp = session.get(url, params=params, timeout=60) + resp.raise_for_status() + return resp.json().get("d", {}).get("results", {}) + + def list_issue_item_attachments(session, issue_item_uuid): """ 获取 XIssueItem 级别的附件。 @@ -325,8 +336,8 @@ def dsm_upload_file(sid, local_path, remote_path): return data -def dsm_upload_downloaded_files(downloaded_files, json_mode=False): - """将所有已下载文件上传到群晖 DSM""" +def dsm_upload_downloaded_files(downloaded_files, ticket_id, serial_id, json_mode=False): + """将所有已下载文件上传到群晖 DSM,按 ticket 和 issue 组织目录结构""" if not DSM_URL or not DSM_USER or not DSM_PASSWORD or not DSM_PATH: return [] @@ -334,10 +345,13 @@ def dsm_upload_downloaded_files(downloaded_files, json_mode=False): if not files_to_upload: return [] + # 目录名: ticketID_serialID + folder_name = f"{ticket_id}_{serial_id}" if serial_id else ticket_id + if not json_mode: print(f"\n{'='*60}") print(f"上传到群晖 DSM: {DSM_URL}") - print(f"目标路径: {DSM_PATH}") + print(f"目标路径: {DSM_PATH}/{folder_name}") print('='*60) upload_results = [] @@ -353,12 +367,29 @@ def dsm_upload_downloaded_files(downloaded_files, json_mode=False): for f in files_to_upload: local_path = f["savedPath"] filename = os.path.basename(local_path) - entry = {"file": filename, "remotePath": f"{DSM_PATH}/{filename}"} + issue_id = f.get("issueId", "") + + # 根据 issueId 判断目录结构 + # SR 附件: {DSM_PATH}/{ticketID_serialID}/{filename} + # IssueItem 附件: {DSM_PATH}/{ticketID_serialID}/{issueID}/{filename} + if issue_id: + remote_path = f"{DSM_PATH}/{folder_name}/{issue_id}" + else: + remote_path = f"{DSM_PATH}/{folder_name}" + full_remote_path = f"{remote_path}/{filename}" + + entry = { + "file": filename, + "ticketId": ticket_id, + "serialId": serial_id, + "issueId": issue_id, + "remotePath": full_remote_path, + } try: - dsm_upload_file(sid, local_path, DSM_PATH) + dsm_upload_file(sid, local_path, remote_path) entry["success"] = True if not json_mode: - print(f" 上传成功: {filename} -> {DSM_PATH}/{filename}") + print(f" 上传成功: {filename} -> {full_remote_path}") except Exception as e: entry["success"] = False entry["error"] = str(e) @@ -441,11 +472,12 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False): } try: - # 1) 通过 ticket ID 找到 ObjectID - sr_object_id = find_service_request_object_id(session, ticket_id) + # 1) 通过 ticket ID 找到 ObjectID 和 SerialID + sr_object_id, serial_id = find_service_request_object_id(session, ticket_id) result["srObjectId"] = sr_object_id + result["serialId"] = serial_id if not json_mode: - print(f"ServiceRequest ID={ticket_id}, ObjectID={sr_object_id}") + print(f"ServiceRequest ID={ticket_id}, ObjectID={sr_object_id}, SerialID={serial_id}") # 2) ServiceRequest 级别附件 if not json_mode: @@ -458,7 +490,7 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False): print(f"找到 {len(sr_attachments)} 个附件") if not list_only: - _do_download(session, sr_attachments, "SR", None, result, json_mode) + _do_download(session, sr_attachments, "SR", "", None, result, json_mode) # 3) XIssueItem 级别附件 if not json_mode: @@ -474,8 +506,17 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False): issue_uuid = item.get("XIssueItemUUIDcontent_SDK", "") issue_desc = (item.get("IssuesDescriptionX_SDK") or "")[:80] + # 通过 ObjectID 查询详细信息,获取真实的 IssueID_SDK + issue_id = "" + try: + item_detail = get_issue_item_detail(session, item_oid) + issue_id = item_detail.get("IssueID_SDK", "") + except Exception as e: + print(f" ⚠ 获取 IssueID 失败: {e}", file=sys.stderr) + issue_entry = { "objectId": item_oid, + "issueId": issue_id, "uuid": issue_uuid, "description": issue_desc, "attachments": [], @@ -483,6 +524,8 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False): if not json_mode: print(f"\n XIssueItem: {item_oid}") + if issue_id: + print(f" IssueID: {issue_id}") print(f" UUID: {issue_uuid}") print(f" 描述: {issue_desc}") @@ -498,8 +541,9 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False): print(f" 找到 {len(atts)} 个附件") if not list_only: + label = f"IssueItem-{issue_id}" if issue_id else f"IssueItem-{item_oid[:12]}" _do_download( - session, atts, f"IssueItem-{item_oid[:12]}", + session, atts, label, issue_id, f"{ODATA_CUST}/BO_XSRIssueItemAttachmentFolderCollection", result, json_mode, ) @@ -510,7 +554,8 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False): if not json_mode: all_attachments = [("SR", sr_attachments)] for ie in result["issueItems"]: - all_attachments.append((f"IssueItem-{ie['objectId'][:12]}", ie["attachments"])) + ie_label = f"IssueItem-{ie['issueId']}" if ie.get("issueId") else f"IssueItem-{ie['objectId'][:12]}" + all_attachments.append((ie_label, ie["attachments"])) print_attachment_summary(all_attachments) except Exception as e: @@ -522,9 +567,9 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False): return result -def _download_single_file(session, att, label, odata_url, json_mode): +def _download_single_file(session, att, label, issue_id, odata_url, json_mode): """下载单个文件附件(用于多线程)""" - entry = {"source": label, "c4cName": att["FileName"], "type": "file", "mime": att.get("MimeType")} + entry = {"source": label, "issueId": issue_id, "c4cName": att["FileName"], "type": "file", "mime": att.get("MimeType")} try: content = download_file_via_odata(session, att, odata_url) file_path = os.path.join(OUTPUT_DIR, att["FileName"]) @@ -543,10 +588,10 @@ def _download_single_file(session, att, label, odata_url, json_mode): return entry -def _download_single_link(link_att, label, json_mode): +def _download_single_link(link_att, label, issue_id, json_mode): """下载单个链接附件(用于多线程)""" link_url = link_att.get("LinkWebURI") - entry = {"source": label, "c4cName": link_att["FileName"], "type": "link", "linkUrl": link_url} + entry = {"source": label, "issueId": issue_id, "c4cName": link_att["FileName"], "type": "link", "linkUrl": link_url} if not link_url: entry["error"] = "无链接地址" @@ -571,7 +616,7 @@ def _download_single_link(link_att, label, json_mode): return entry -def _do_download(session, attachments, label, odata_url, result, json_mode): +def _do_download(session, attachments, label, issue_id, odata_url, result, json_mode): """执行下载并将结果追加到 result['downloadedFiles'](多线程版本)""" file_atts = [a for a in attachments if a["CategoryCode"] == "2"] link_atts = [a for a in attachments if a["CategoryCode"] == "3"] @@ -584,12 +629,12 @@ def _do_download(session, attachments, label, odata_url, result, json_mode): # 提交文件附件下载任务 for att in file_atts: - future = executor.submit(_download_single_file, session, att, label, odata_url, json_mode) + future = executor.submit(_download_single_file, session, att, label, issue_id, odata_url, json_mode) futures.append(future) # 提交链接附件下载任务 for att in link_atts: - future = executor.submit(_download_single_link, att, label, json_mode) + future = executor.submit(_download_single_link, att, label, issue_id, json_mode) futures.append(future) # 收集结果 @@ -659,9 +704,22 @@ def main(): # 下载完成后上传到群晖 DSM if DSM_URL and not args.list_only and result["success"]: - upload_results = dsm_upload_downloaded_files(result["downloadedFiles"], args.json_mode) + serial_id = result.get("serialId", "") + upload_results = dsm_upload_downloaded_files(result["downloadedFiles"], args.ticket, serial_id, args.json_mode) result["dsmUpload"] = upload_results + # 上传完成后清理本地下载文件 + for f in result["downloadedFiles"]: + local_path = f.get("savedPath") + if local_path and os.path.exists(local_path): + try: + os.remove(local_path) + if not args.json_mode: + print(f" 已删除本地文件: {local_path}") + except OSError as e: + if not args.json_mode: + print(f" 删除失败: {local_path}: {e}") + if args.json_mode: print(json.dumps(result, ensure_ascii=False, indent=2)) sys.exit(0 if result["success"] else 1)