From 674fdbbf087757c25338a7612b2a71d93e26c85d Mon Sep 17 00:00:00 2001
From: afei A <57030625+NewHubBoy@users.noreply.github.com>
Date: Sat, 14 Mar 2026 16:22:05 +0800
Subject: [PATCH] v0.0.1

---
 CLAUDE.md                   | 216 +++++++++---------------------------
 sap-c4c-AttachmentFolder.py | 104 +++++++++++++----
 2 files changed, 133 insertions(+), 187 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index bd0d427..9678e51 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,197 +4,85 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 ## Project Overview
 
-This is a SAP C4C (Cloud for Customer) attachment downloader toolkit that retrieves attachments from ServiceRequest tickets and optionally uploads them to Synology DSM NAS. The project consists of:
+SAP C4C (Cloud for Customer) attachment downloader toolkit that retrieves attachments from ServiceRequest tickets and optionally uploads them to Synology DSM NAS.
 
-- **Python script** (`sap-c4c-AttachmentFolder.py`): Core downloader using OData APIs and web scraping
-- **Java wrapper** (`C4CAttachmentDownloader.java`): Java interface that calls the Python script via ProcessBuilder
-- **DSM upload script** (`dsm-upload.py`): Standalone Synology NAS upload utility
-
-## Architecture
-
-### Python Script (`sap-c4c-AttachmentFolder.py`)
-
-**Core functionality:**
-1. Authenticates to SAP C4C using Basic Auth
-2. Fetches ServiceRequest attachments via OData endpoints:
-   - `/sap/c4c/odata/v1/c4codata` - Standard C4C OData API
-   - `/sap/c4c/odata/cust/v1/custticketapi` - Custom ticket API
-3. Downloads two types of attachments using **multi-threaded concurrent downloads**:
-   - **File attachments** (CategoryCode=2): Downloaded via OData `$value` endpoint
-   - **Link attachments** (CategoryCode=3): External Salesforce links scraped using Scrapling + Playwright
-4. Handles XIssueItem-level attachments via `BO_XSRIssueItemAttachmentFolder`
-5. Optionally uploads downloaded files to Synology DSM via FileStation API
-
-**Key dependencies:**
-- `requests` - HTTP client for OData/REST APIs
-- `scrapling[all]` - Web scraping framework with stealth capabilities
-- `playwright` - Browser automation for downloading Salesforce attachments
-
-**Performance features:**
-- Multi-threaded concurrent downloads (default: 5 threads, configurable via `--max-workers`)
-- Thread-safe output logging with lock mechanism
-- Parallel processing of both file and link attachments
-
-**Output modes:**
-- Human-readable console output (default)
-- JSON mode (`--json`) for programmatic consumption
-
-### Java Wrapper (`C4CAttachmentDownloader.java`)
-
-Provides a type-safe Java API that:
-- Invokes the Python script via `ProcessBuilder`
-- Passes credentials via environment variables (more secure than CLI args)
-- Parses JSON output into strongly-typed Java objects
-- Supports timeout configuration (default: 30 minutes)
-
-**Key classes:**
-- `Result` - Top-level response containing all attachment metadata
-- `Attachment` - Individual attachment metadata (UUID, filename, MIME type, category)
-- `IssueItem` - XIssueItem with nested attachments
-- `DownloadedFile` - Download result with local path and error info
-- `DsmUploadEntry` - DSM upload result per file
-
-### DSM Upload (`dsm-upload.py`)
-
-Standalone script demonstrating Synology FileStation API usage:
-1. Login via `SYNO.API.Auth` to obtain SID
-2. Upload files via `SYNO.FileStation.Upload` with SID cookie
+- **`sap-c4c-AttachmentFolder.py`**: Core downloader (Python >= 3.8) using OData APIs and web scraping
+- **`C4CAttachmentDownloader.java`**: Java wrapper that calls the Python script via ProcessBuilder
+- **`dsm-upload.py`**: Standalone Synology NAS upload example
 
 ## Common Commands
 
-### Python Script
-
 ```bash
 # Install dependencies
 pip install requests scrapling[all] playwright
 python -m playwright install chromium
 
-# Download attachments (credentials via CLI)
+# Download attachments
 python sap-c4c-AttachmentFolder.py \
   --tenant https://xxx.c4c.saphybriscloud.cn \
-  --user admin \
-  --password xxx \
-  --ticket 24588
+  --user admin --password xxx --ticket 24588
 
-# Download with custom thread count (default: 5)
-python sap-c4c-AttachmentFolder.py \
-  --tenant https://xxx.c4c.saphybriscloud.cn \
-  --user admin \
-  --password xxx \
-  --ticket 24588 \
-  --max-workers 10
-
-# Download with DSM upload
-python sap-c4c-AttachmentFolder.py \
-  --tenant https://xxx.c4c.saphybriscloud.cn \
-  --user admin \
-  --password xxx \
-  --ticket 24588 \
-  --dsm-url http://10.0.10.235:5000 \
-  --dsm-user PLM \
-  --dsm-password 123456 \
-  --dsm-path /Newgonow/AU-SPFJ
-
-# JSON mode (for Java/programmatic use)
-python sap-c4c-AttachmentFolder.py --ticket 24588 --json
+# Download with custom concurrency (default: 5 threads)
+python sap-c4c-AttachmentFolder.py --ticket 24588 --max-workers 10
 
 # List attachments only (no download)
 python sap-c4c-AttachmentFolder.py --ticket 24588 --list-only
 
-# Using environment variables for credentials
-export C4C_TENANT=https://xxx.c4c.saphybriscloud.cn
-export C4C_USERNAME=admin
-export C4C_PASSWORD=xxx
-export DSM_URL=http://10.0.10.235:5000
-export DSM_USERNAME=PLM
-export DSM_PASSWORD=123456
-export DSM_PATH=/Newgonow/AU-SPFJ
+# JSON mode (for Java/programmatic use)
 python sap-c4c-AttachmentFolder.py --ticket 24588 --json
+
+# Download + upload to Synology DSM
+python sap-c4c-AttachmentFolder.py --ticket 24588 \
+  --dsm-url http://10.0.10.235:5000 --dsm-user PLM \
+  --dsm-password 123456 --dsm-path /Newgonow/AU-SPFJ
+
+# All credentials also accept environment variables:
+# C4C_TENANT, C4C_USERNAME, C4C_PASSWORD, DSM_URL, DSM_USERNAME, DSM_PASSWORD, DSM_PATH
 ```
 
+```java
+// Java: compile requires Jackson (jackson-databind, jackson-core, jackson-annotations)
+javac -cp jackson-databind.jar:jackson-core.jar:jackson-annotations.jar C4CAttachmentDownloader.java
+```
+
+## Architecture
+
+### Data Flow
+
+1. Authenticate to SAP C4C via Basic Auth
+2. Look up ServiceRequest by ticket ID -> get ObjectID and SerialID
+3. Fetch SR-level attachments via `/sap/c4c/odata/v1/c4codata/ServiceRequestCollection('{OID}')/ServiceRequestAttachmentFolder`
+4. Fetch XIssueItem-level attachments via `/sap/c4c/odata/cust/v1/custticketapi/BO_XSRIssueItemAttachmentCollection` (two-step: filter by UUID, then navigate to AttachmentFolder)
+5. Download concurrently using ThreadPoolExecutor:
+   - **CategoryCode "2"** (file): OData `$value` endpoint or `DocumentLink` URL
+   - **CategoryCode "3"** (link): Scrapling + Playwright opens Salesforce URL, clicks `button.downloadbutton[title='Download']`, captures download
+6. Optionally upload to Synology DSM via FileStation API, then **auto-delete local files**
+
+### Two OData Endpoints
+
+- `/sap/c4c/odata/v1/c4codata` (`ODATA_C4C`) - Standard C4C OData for ServiceRequest and SR-level attachments
+- `/sap/c4c/odata/cust/v1/custticketapi` (`ODATA_CUST`) - Custom ticket API for XIssueItem and its attachments
+
 ### Java Wrapper
 
-```java
-// Compile (requires Jackson for JSON parsing)
-javac -cp jackson-databind.jar:jackson-core.jar:jackson-annotations.jar C4CAttachmentDownloader.java
+Invokes Python script with `--json` flag, passes credentials via **environment variables** (not CLI args for security). Parses JSON into typed classes: `Result`, `Attachment`, `IssueItem`, `DownloadedFile`, `DsmUploadEntry`. Default timeout: 30 minutes.
 
-// Basic usage
-C4CAttachmentDownloader downloader = new C4CAttachmentDownloader(
-    "/path/to/sap-c4c-AttachmentFolder.py",
-    "https://xxx.c4c.saphybriscloud.cn",
-    "admin",
-    "password"
-);
+### DSM Upload Directory Structure
 
-// List attachments only
-C4CAttachmentDownloader.Result result = downloader.listAttachments("24588");
+- SR attachments: `{DSM_PATH}/{ticketID}_{serialID}/{filename}`
+- IssueItem attachments: `{DSM_PATH}/{ticketID}_{serialID}/{issueID}/{filename}`
 
-// Download to default directory
-C4CAttachmentDownloader.Result result = downloader.download("24588");
+### Concurrency Model
 
-// Download to specific directory
-C4CAttachmentDownloader.Result result = downloader.download("24588", "/tmp/ticket_24588");
+Multi-threaded via `ThreadPoolExecutor` (default 5, `--max-workers`). Both file and link downloads are submitted as futures. Thread-safe console output uses a `print_lock`. The `requests.Session` is shared across file-download threads (thread-safe). Scrapling/Playwright link downloads each launch their own browser.
 
-// Download with DSM upload
-downloader.setDsmConfig("http://10.0.10.235:5000", "PLM", "123456", "/Newgonow/AU-SPFJ");
-C4CAttachmentDownloader.Result result = downloader.download("24588", "/tmp/ticket_24588");
-```
+### Global State
 
-## Key Implementation Details
-
-### Attachment Categories
-
-SAP C4C uses `CategoryCode` to distinguish attachment types:
-- **"2"** = File attachment (binary content stored in C4C, downloaded via OData `$value`)
-- **"3"** = Link attachment (external URL, typically Salesforce links requiring web scraping)
-
-### OData Navigation Paths
-
-**ServiceRequest attachments:**
-```
-/ServiceRequestCollection('{ObjectID}')/ServiceRequestAttachmentFolder
-```
-
-**XIssueItem attachments (two-step navigation):**
-```
-1. /BO_XSRIssueItemAttachmentCollection?$filter=XIssueItemUUID eq guid'{uuid}'
-2. /BO_XSRIssueItemAttachmentCollection('{ObjectID}')/BO_XSRIssueItemAttachmentFolder
-```
-
-### Scrapling Download Strategy
-
-For CategoryCode=3 (link attachments), the script:
-1. Opens the Salesforce link in a headless Chromium browser
-2. Waits for `button.downloadbutton[title='Download']` selector
-3. Clicks the button and captures the download
-4. Saves with original or suggested filename
-
-### Security Considerations
-
-- Java wrapper passes credentials via **environment variables** (not CLI args) to avoid exposure in process lists
-- Python script supports both CLI args and environment variables
-- DSM API uses session-based authentication (SID cookie)
-- SSL verification disabled (`verify=False`) - consider enabling in production
-
-## File Structure
-
-```
-.
-├── C4CAttachmentDownloader.java    # Java wrapper with typed API
-├── sap-c4c-AttachmentFolder.py     # Core Python downloader
-├── dsm-upload.py                   # Standalone DSM upload example
-└── downloads/                      # Default output directory
-```
+The Python script uses module-level globals (`TENANT`, `USERNAME`, `PASSWORD`, `ODATA_C4C`, `ODATA_CUST`, `OUTPUT_DIR`, `DSM_*`, `MAX_WORKERS`) initialized in `main()`. The `run()` function is the core entry point returning a structured dict.
 
 ## Troubleshooting
 
-**Playwright not installed:**
-```bash
-python -m playwright install chromium
-```
-
-**Timeout errors:** Increase timeout in Java wrapper constructor (default 30 minutes) or adjust Scrapling timeout parameters.
-
-**DSM upload fails:** Verify DSM URL, credentials, and that target path exists or `create_parents=true` is set.
-
-**Link download fails:** Check that Salesforce page structure matches expected selector (`button.downloadbutton[title='Download']`). Update `download_link_via_scrapling()` if page structure changes.
+- **Playwright not installed**: `python -m playwright install chromium`
+- **Link download fails**: Salesforce page selector `button.downloadbutton[title='Download']` may have changed; update `download_link_via_scrapling()`
+- **Timeout**: Increase Java wrapper timeout or Scrapling's `timeout` param (currently 60s page load, 120s download wait)
+- **SSL warnings**: `verify=False` is used throughout; `urllib3` warnings are suppressed
diff --git a/sap-c4c-AttachmentFolder.py b/sap-c4c-AttachmentFolder.py
index bb7af03..6f43933 100644
--- a/sap-c4c-AttachmentFolder.py
+++ b/sap-c4c-AttachmentFolder.py
@@ -83,15 +83,17 @@ def get_session():
 
 
 def find_service_request_object_id(session, ticket_id):
-    """通过人类可读的 ticket ID 查找 OData ObjectID"""
+    """通过人类可读的 ticket ID 查找 OData ObjectID 和 SerialID"""
     url = f"{ODATA_C4C}/ServiceRequestCollection"
-    params = {"$format": "json", "$filter": f"ID eq '{ticket_id}'", "$select": "ObjectID,ID"}
+    params = {"$format": "json", "$filter": f"ID eq '{ticket_id}'"}
     resp = session.get(url, params=params, timeout=60)
     resp.raise_for_status()
     results = resp.json().get("d", {}).get("results", [])
     if not results:
         raise ValueError(f"未找到 ID={ticket_id} 的 ServiceRequest")
-    return results[0]["ObjectID"]
+    sr = results[0]
+    serial_id = sr.get("SerialID", "")
+    return sr["ObjectID"], serial_id
 
 
 def _parse_attachments(results):
@@ -137,6 +139,15 @@ def list_issue_items(session, ticket_id):
     return resp.json().get("d", {}).get("results", [])
 
 
+def get_issue_item_detail(session, object_id):
+    """通过 ObjectID 获取 XIssueItem 详细信息，包括真实的 IssueID_SDK"""
+    url = f"{ODATA_CUST}/ServiceRequest_XIssueItem_SDKCollection('{object_id}')"
+    params = {"$format": "json"}
+    resp = session.get(url, params=params, timeout=60)
+    resp.raise_for_status()
+    return resp.json().get("d", {}).get("results", {})
+
+
 def list_issue_item_attachments(session, issue_item_uuid):
     """
     获取 XIssueItem 级别的附件。
@@ -325,8 +336,8 @@ def dsm_upload_file(sid, local_path, remote_path):
     return data
 
 
-def dsm_upload_downloaded_files(downloaded_files, json_mode=False):
-    """将所有已下载文件上传到群晖 DSM"""
+def dsm_upload_downloaded_files(downloaded_files, ticket_id, serial_id, json_mode=False):
+    """将所有已下载文件上传到群晖 DSM，按 ticket 和 issue 组织目录结构"""
     if not DSM_URL or not DSM_USER or not DSM_PASSWORD or not DSM_PATH:
         return []
 
@@ -334,10 +345,13 @@ def dsm_upload_downloaded_files(downloaded_files, json_mode=False):
     if not files_to_upload:
         return []
 
+    # 目录名: ticketID_serialID
+    folder_name = f"{ticket_id}_{serial_id}" if serial_id else ticket_id
+
     if not json_mode:
         print(f"\n{'='*60}")
         print(f"上传到群晖 DSM: {DSM_URL}")
-        print(f"目标路径: {DSM_PATH}")
+        print(f"目标路径: {DSM_PATH}/{folder_name}")
         print('='*60)
 
     upload_results = []
@@ -353,12 +367,29 @@ def dsm_upload_downloaded_files(downloaded_files, json_mode=False):
     for f in files_to_upload:
         local_path = f["savedPath"]
         filename = os.path.basename(local_path)
-        entry = {"file": filename, "remotePath": f"{DSM_PATH}/{filename}"}
+        issue_id = f.get("issueId", "")
+
+        # 根据 issueId 判断目录结构
+        # SR 附件: {DSM_PATH}/{ticketID_serialID}/{filename}
+        # IssueItem 附件: {DSM_PATH}/{ticketID_serialID}/{issueID}/{filename}
+        if issue_id:
+            remote_path = f"{DSM_PATH}/{folder_name}/{issue_id}"
+        else:
+            remote_path = f"{DSM_PATH}/{folder_name}"
+        full_remote_path = f"{remote_path}/{filename}"
+
+        entry = {
+            "file": filename,
+            "ticketId": ticket_id,
+            "serialId": serial_id,
+            "issueId": issue_id,
+            "remotePath": full_remote_path,
+        }
         try:
-            dsm_upload_file(sid, local_path, DSM_PATH)
+            dsm_upload_file(sid, local_path, remote_path)
             entry["success"] = True
             if not json_mode:
-                print(f"  上传成功: {filename} -> {DSM_PATH}/{filename}")
+                print(f"  上传成功: {filename} -> {full_remote_path}")
         except Exception as e:
             entry["success"] = False
             entry["error"] = str(e)
@@ -441,11 +472,12 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False):
     }
 
     try:
-        # 1) 通过 ticket ID 找到 ObjectID
-        sr_object_id = find_service_request_object_id(session, ticket_id)
+        # 1) 通过 ticket ID 找到 ObjectID 和 SerialID
+        sr_object_id, serial_id = find_service_request_object_id(session, ticket_id)
         result["srObjectId"] = sr_object_id
+        result["serialId"] = serial_id
         if not json_mode:
-            print(f"ServiceRequest ID={ticket_id}, ObjectID={sr_object_id}")
+            print(f"ServiceRequest ID={ticket_id}, ObjectID={sr_object_id}, SerialID={serial_id}")
 
         # 2) ServiceRequest 级别附件
         if not json_mode:
@@ -458,7 +490,7 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False):
             print(f"找到 {len(sr_attachments)} 个附件")
 
         if not list_only:
-            _do_download(session, sr_attachments, "SR", None, result, json_mode)
+            _do_download(session, sr_attachments, "SR", "", None, result, json_mode)
 
         # 3) XIssueItem 级别附件
         if not json_mode:
@@ -474,8 +506,17 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False):
             issue_uuid = item.get("XIssueItemUUIDcontent_SDK", "")
             issue_desc = (item.get("IssuesDescriptionX_SDK") or "")[:80]
 
+            # 通过 ObjectID 查询详细信息，获取真实的 IssueID_SDK
+            issue_id = ""
+            try:
+                item_detail = get_issue_item_detail(session, item_oid)
+                issue_id = item_detail.get("IssueID_SDK", "")
+            except Exception as e:
+                print(f"  ⚠ 获取 IssueID 失败: {e}", file=sys.stderr)
+
             issue_entry = {
                 "objectId": item_oid,
+                "issueId": issue_id,
                 "uuid": issue_uuid,
                 "description": issue_desc,
                 "attachments": [],
@@ -483,6 +524,8 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False):
 
             if not json_mode:
                 print(f"\n  XIssueItem: {item_oid}")
+                if issue_id:
+                    print(f"  IssueID: {issue_id}")
                 print(f"  UUID: {issue_uuid}")
                 print(f"  描述: {issue_desc}")
 
@@ -498,8 +541,9 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False):
                 print(f"  找到 {len(atts)} 个附件")
 
             if not list_only:
+                label = f"IssueItem-{issue_id}" if issue_id else f"IssueItem-{item_oid[:12]}"
                 _do_download(
-                    session, atts, f"IssueItem-{item_oid[:12]}",
+                    session, atts, label, issue_id,
                     f"{ODATA_CUST}/BO_XSRIssueItemAttachmentFolderCollection",
                     result, json_mode,
                 )
@@ -510,7 +554,8 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False):
         if not json_mode:
             all_attachments = [("SR", sr_attachments)]
             for ie in result["issueItems"]:
-                all_attachments.append((f"IssueItem-{ie['objectId'][:12]}", ie["attachments"]))
+                ie_label = f"IssueItem-{ie['issueId']}" if ie.get("issueId") else f"IssueItem-{ie['objectId'][:12]}"
+                all_attachments.append((ie_label, ie["attachments"]))
             print_attachment_summary(all_attachments)
 
     except Exception as e:
@@ -522,9 +567,9 @@ def run(ticket_id, output_dir, list_only=False, json_mode=False):
     return result
 
 
-def _download_single_file(session, att, label, odata_url, json_mode):
+def _download_single_file(session, att, label, issue_id, odata_url, json_mode):
     """下载单个文件附件（用于多线程）"""
-    entry = {"source": label, "c4cName": att["FileName"], "type": "file", "mime": att.get("MimeType")}
+    entry = {"source": label, "issueId": issue_id, "c4cName": att["FileName"], "type": "file", "mime": att.get("MimeType")}
     try:
         content = download_file_via_odata(session, att, odata_url)
         file_path = os.path.join(OUTPUT_DIR, att["FileName"])
@@ -543,10 +588,10 @@ def _download_single_file(session, att, label, odata_url, json_mode):
     return entry
 
 
-def _download_single_link(link_att, label, json_mode):
+def _download_single_link(link_att, label, issue_id, json_mode):
     """下载单个链接附件（用于多线程）"""
     link_url = link_att.get("LinkWebURI")
-    entry = {"source": label, "c4cName": link_att["FileName"], "type": "link", "linkUrl": link_url}
+    entry = {"source": label, "issueId": issue_id, "c4cName": link_att["FileName"], "type": "link", "linkUrl": link_url}
 
     if not link_url:
         entry["error"] = "无链接地址"
@@ -571,7 +616,7 @@ def _download_single_link(link_att, label, json_mode):
     return entry
 
 
-def _do_download(session, attachments, label, odata_url, result, json_mode):
+def _do_download(session, attachments, label, issue_id, odata_url, result, json_mode):
     """执行下载并将结果追加到 result['downloadedFiles']（多线程版本）"""
     file_atts = [a for a in attachments if a["CategoryCode"] == "2"]
     link_atts = [a for a in attachments if a["CategoryCode"] == "3"]
@@ -584,12 +629,12 @@ def _do_download(session, attachments, label, odata_url, result, json_mode):
 
         # 提交文件附件下载任务
         for att in file_atts:
-            future = executor.submit(_download_single_file, session, att, label, odata_url, json_mode)
+            future = executor.submit(_download_single_file, session, att, label, issue_id, odata_url, json_mode)
             futures.append(future)
 
         # 提交链接附件下载任务
         for att in link_atts:
-            future = executor.submit(_download_single_link, att, label, json_mode)
+            future = executor.submit(_download_single_link, att, label, issue_id, json_mode)
             futures.append(future)
 
         # 收集结果
@@ -659,9 +704,22 @@ def main():
 
     # 下载完成后上传到群晖 DSM
     if DSM_URL and not args.list_only and result["success"]:
-        upload_results = dsm_upload_downloaded_files(result["downloadedFiles"], args.json_mode)
+        serial_id = result.get("serialId", "")
+        upload_results = dsm_upload_downloaded_files(result["downloadedFiles"], args.ticket, serial_id, args.json_mode)
         result["dsmUpload"] = upload_results
 
+        # 上传完成后清理本地下载文件
+        for f in result["downloadedFiles"]:
+            local_path = f.get("savedPath")
+            if local_path and os.path.exists(local_path):
+                try:
+                    os.remove(local_path)
+                    if not args.json_mode:
+                        print(f"  已删除本地文件: {local_path}")
+                except OSError as e:
+                    if not args.json_mode:
+                        print(f"  删除失败: {local_path}: {e}")
+
     if args.json_mode:
         print(json.dumps(result, ensure_ascii=False, indent=2))
         sys.exit(0 if result["success"] else 1)