feat: 批量下载脚本、依赖文件、说明书

This commit is contained in:
afei A
2026-03-19 13:47:53 +08:00
parent 1edabd6d3b
commit 25059a50a2
8 changed files with 368 additions and 3 deletions

View File

@@ -3,7 +3,10 @@
"allow": [ "allow": [
"Bash(git init:*)", "Bash(git init:*)",
"Bash(git add:*)", "Bash(git add:*)",
"Bash(python -c \"import ast, sys; ast.parse\\(open\\('sap-c4c-AttachmentFolder.py'\\).read\\(\\)\\); print\\('语法检查通过'\\)\")" "Bash(python -c \"import ast, sys; ast.parse\\(open\\('sap-c4c-AttachmentFolder.py'\\).read\\(\\)\\); print\\('语法检查通过'\\)\")",
"Bash(git commit:*)",
"Bash(head:*)",
"Bash(python3:*)"
] ]
} }
} }

130
README.md Normal file
View File

@@ -0,0 +1,130 @@
# SAP C4C 附件批量下载工具
从 SAP C4C 下载 Ticket 附件并上传到群晖 DSM。
---
## 环境要求
- Python >= 3.8
- Ubuntu 20.04+ 或 macOS
---
## 安装
```bash
pip install -r requirements.txt
python -m playwright install chromium
# Ubuntu 需额外执行(需要 sudo
python -m playwright install-deps chromium
```
---
## 文件说明
| 文件 | 说明 |
|---|---|
| `sap-c4c-AttachmentFolder.py` | 核心下载脚本,支持单个 Ticket 下载 |
| `batch_download.py` | 批量下载脚本,从 CSV 读取 ID 并行执行 |
| `get_ticket_ids.py` | 从 CSV 提取 Ticket ID 工具 |
| `datasource/` | 存放 SAP Analytics 导出的 CSV 文件 |
| `downloads/` | 本地临时下载目录(上传 DSM 后自动清理) |
| `error_log.txt` | 错误日志,每次批量运行时重置 |
---
## 单个 Ticket 下载
```bash
python sap-c4c-AttachmentFolder.py \
--tenant https://my300375.c4c.saphybriscloud.cn \
--user admin \
--password 'your_password' \
--ticket 20950 \
--output-dir ./downloads
```
### 同时上传到群晖 DSM
```bash
python sap-c4c-AttachmentFolder.py \
--tenant https://my300375.c4c.saphybriscloud.cn \
--user admin \
--password 'your_password' \
--ticket 20950 \
--output-dir ./downloads \
--dsm-url http://10.0.10.235:5000 \
--dsm-user PLM \
--dsm-password 'dsm_password' \
--dsm-path /Newgonow/AU-SPFJ
```
### 仅列出附件不下载
```bash
python sap-c4c-AttachmentFolder.py ... --list-only
```
### JSON 输出模式
```bash
python sap-c4c-AttachmentFolder.py ... --json
```
---
## 批量下载
将 SAP Analytics 导出的 CSV 文件放入 `datasource/` 目录,然后运行:
```bash
python batch_download.py
```
- 默认读取 CSV 第二列的 Ticket ID取前 10 个
- 最多 5 个任务并行执行
- 每个 Ticket 使用独立子目录 `downloads/{ticket_id}`,避免并行冲突
- 上传 DSM 完成后自动清理本地文件和子目录
- 错误记录到 `error_log.txt`
### 修改并行数或 Ticket 数量
编辑 `batch_download.py` 顶部配置:
```python
WORKERS = 5 # 并行数
# get_ticket_ids(10) 中的 10 为读取数量
```
---
## DSM 目录结构
上传后文件按以下结构组织:
```
{dsm_path}/
└── {ticket_id}_{serial_id}/
└── {issue_id}_{issue_description}/
└── 附件文件
```
> `Quote & Chassis` 类型的附件会同时复制到所有其他 issue 目录下。
---
## 环境变量(可选)
也可通过环境变量代替命令行参数:
| 环境变量 | 对应参数 |
|---|---|
| `C4C_TENANT` | `--tenant` |
| `C4C_USERNAME` | `--user` |
| `C4C_PASSWORD` | `--password` |
| `DSM_URL` | `--dsm-url` |
| `DSM_USERNAME` | `--dsm-user` |
| `DSM_PASSWORD` | `--dsm-password` |
| `DSM_PATH` | `--dsm-path` |

145
batch_download.py Normal file
View File

@@ -0,0 +1,145 @@
#!/usr/bin/env python3
"""批量下载 SAP C4C 附件,从 CSV 读取 Ticket ID并行执行并记录错误日志"""
import subprocess
import sys
import os
import json
import datetime
import csv
import glob
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
# ── 配置 ──────────────────────────────────────────────────────────────────────
TENANT = "https://my300375.c4c.saphybriscloud.cn"
USER = "admin"
PASSWORD = "Xjait.1?"
OUTPUT = "./downloads"
DSM_URL = "http://10.0.10.235:5000"
DSM_USER = "PLM"
DSM_PASS = "123456"
DSM_PATH = "/Newgonow/AU-SPFJ"
WORKERS = 5
SCRIPT = os.path.join(os.path.dirname(__file__), "sap-c4c-AttachmentFolder.py")
ERROR_LOG = os.path.join(os.path.dirname(__file__), "error_log.txt")
DATASOURCE = os.path.join(os.path.dirname(__file__), "datasource")
print_lock = threading.Lock()
# ─────────────────────────────────────────────────────────────────────────────
def get_ticket_ids(limit=10):
ids, seen = [], set()
for csv_file in glob.glob(os.path.join(DATASOURCE, "*.csv")):
with open(csv_file, encoding="utf-8-sig") as f:
reader = csv.reader(f)
next(reader, None)
for row in reader:
if len(row) < 2:
continue
val = row[1].strip()
if val and val not in seen:
seen.add(val)
ids.append(val)
if len(ids) >= limit:
break
if len(ids) >= limit:
break
return ids
def log_error(ticket_id, message):
ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open(ERROR_LOG, "a", encoding="utf-8") as f:
f.write(f"[{ts}] Ticket {ticket_id}: {message}\n")
def run_ticket(ticket_id, index, total):
with print_lock:
print(f"\n[{index}/{total}] 开始下载 Ticket {ticket_id} ...")
cmd = [
sys.executable, SCRIPT,
"--tenant", TENANT,
"--user", USER,
"--password", PASSWORD,
"--ticket", str(ticket_id),
"--output-dir", os.path.join(OUTPUT, str(ticket_id)),
"--json",
"--dsm-url", DSM_URL,
"--dsm-user", DSM_USER,
"--dsm-password", DSM_PASS,
"--dsm-path", DSM_PATH,
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
# 解析 JSON 输出
try:
data = json.loads(result.stdout)
success = data.get("success", False)
downloaded = len(data.get("downloadedFiles", []))
dsm_uploads = data.get("dsmUpload", [])
ok_uploads = sum(1 for u in dsm_uploads if u.get("success"))
fail_uploads = len(dsm_uploads) - ok_uploads
if success:
with print_lock:
print(f" ✓ [{ticket_id}] 下载 {downloaded} 个文件, DSM 上传 {ok_uploads} 成功 {fail_uploads} 失败")
if fail_uploads:
fails = [u for u in dsm_uploads if not u.get("success")]
for u in fails:
log_error(ticket_id, f"DSM 上传失败: {u.get('file')} - {u.get('error')}")
else:
err = data.get("error", result.stderr or "未知错误")
with print_lock:
print(f" ✗ [{ticket_id}] 失败: {err}")
log_error(ticket_id, err)
except json.JSONDecodeError:
if result.returncode == 0:
with print_lock:
print(f" ✓ [{ticket_id}] 完成")
else:
err = result.stderr.strip() or result.stdout.strip() or "未知错误"
with print_lock:
print(f" ✗ [{ticket_id}] 失败: {err}")
log_error(ticket_id, err)
except subprocess.TimeoutExpired:
msg = "超时 (300s)"
with print_lock:
print(f" ✗ [{ticket_id}] {msg}")
log_error(ticket_id, msg)
except Exception as e:
with print_lock:
print(f" ✗ [{ticket_id}] 异常: {e}")
log_error(ticket_id, str(e))
def main():
print("读取 Ticket ID ...")
ids = get_ticket_ids(10)
if not ids:
print("未找到任何 Ticket ID请检查 datasource 目录")
sys.exit(1)
print(f"{len(ids)} 个 Ticket: {', '.join(ids)}")
# 清空/创建 error_log
open(ERROR_LOG, "w").close()
with ThreadPoolExecutor(max_workers=WORKERS) as executor:
futures = {executor.submit(run_ticket, tid, i, len(ids)): tid
for i, tid in enumerate(ids, 1)}
for future in as_completed(futures):
future.result() # 触发异常传播(已在 run_ticket 内处理)
print("\n全部完成。")
if os.path.getsize(ERROR_LOG) > 0:
print(f"有错误,详见 {ERROR_LOG}")
else:
print("无错误。")
if __name__ == "__main__":
main()

File diff suppressed because one or more lines are too long

0
error_log.txt Normal file
View File

31
get_ticket_ids.py Normal file
View File

@@ -0,0 +1,31 @@
#!/usr/bin/env python3
"""从 datasource 目录下的 CSV 文件读取第二列提取10个有效的 Ticket ID"""
import csv
import glob
import os
datasource_dir = os.path.join(os.path.dirname(__file__), "datasource")
csv_files = glob.glob(os.path.join(datasource_dir, "*.csv"))
ids = []
seen = set()
for csv_file in csv_files:
with open(csv_file, encoding="utf-8-sig") as f:
reader = csv.reader(f)
next(reader, None) # 跳过表头
for row in reader:
if len(row) < 2:
continue
val = row[1].strip()
if val and val not in seen:
seen.add(val)
ids.append(val)
if len(ids) >= 10:
break
if len(ids) >= 10:
break
print("找到的 Ticket ID")
for i, tid in enumerate(ids, 1):
print(f" {i}. {tid}")

5
requirements.txt Normal file
View File

@@ -0,0 +1,5 @@
requests
urllib3
# 可选Salesforce 外链附件爬取
scrapling[all]
playwright

View File

@@ -373,7 +373,6 @@ def dsm_upload_downloaded_files(downloaded_files, ticket_id, serial_id, json_mod
return upload_results return upload_results
def print_attachment_summary(all_attachments): def print_attachment_summary(all_attachments):
"""打印附件清单汇总""" """打印附件清单汇总"""
print(f"\n{'='*60}") print(f"\n{'='*60}")
@@ -680,7 +679,7 @@ def main():
) )
result["dsmUpload"] = upload_results result["dsmUpload"] = upload_results
# 上传完成后清理本地下载文件 # 上传完成后清理本地下载文件及子目录
for f in result["downloadedFiles"]: for f in result["downloadedFiles"]:
local_path = f.get("savedPath") local_path = f.get("savedPath")
if local_path and os.path.exists(local_path): if local_path and os.path.exists(local_path):
@@ -691,6 +690,14 @@ def main():
except OSError as e: except OSError as e:
if not args.json_mode: if not args.json_mode:
print(f" 删除失败: {local_path}: {e}") print(f" 删除失败: {local_path}: {e}")
# 删除空的下载子目录
try:
if os.path.isdir(args.output_dir) and not os.listdir(args.output_dir):
os.rmdir(args.output_dir)
if not args.json_mode:
print(f" 已删除目录: {args.output_dir}")
except OSError:
pass
if args.json_mode: if args.json_mode:
print(json.dumps(result, ensure_ascii=False, indent=2)) print(json.dumps(result, ensure_ascii=False, indent=2))