修改 README

feat: 保存失败的 Ticket ID 到 CSV，支持重试
feat: 兼容 xlsx 文件读取 Ticket ID，取消数量限制
2026-03-19 16:30:40 +08:00 · 2026-03-19 16:25:40 +08:00 · 2026-03-19 15:39:32 +08:00 · 2026-03-19 15:34:04 +08:00
6 changed files with 75 additions and 63 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -6,7 +6,9 @@
      "Bash(python -c \"import ast, sys; ast.parse\\(open\\('sap-c4c-AttachmentFolder.py'\\).read\\(\\)\\); print\\('语法检查通过'\\)\")",
      "Bash(git commit:*)",
      "Bash(head:*)",
-      "Bash(python3:*)"
+      "Bash(python3:*)",
      "Bash(curl -sI \"https://cdn.playwright.dev/chrome-for-testing-public/145.0.7632.6/linux64/chrome-headless-shell-linux64.zip\")",
      "Bash(pip3 show:*)"
    ]
  }
 }
--- a/README.md
+++ b/README.md
@@ -32,6 +32,7 @@ python -m playwright install-deps chromium
 | `datasource/` | 存放 SAP Analytics 导出的 CSV 文件 |
 | `downloads/` | 本地临时下载目录（上传 DSM 后自动清理） |
 | `error_log.txt` | 错误日志，每次批量运行时重置 |
 | `failed_tickets.csv` | 失败的 Ticket ID 列表，可用于下次重试 |
 ---
@@ -83,11 +84,37 @@ python sap-c4c-AttachmentFolder.py ... --json
 python batch_download.py
 ```
- 默认读取 CSV 第二列的 Ticket ID，取前 10 个
+- 读取 datasource 目录下 CSV/xlsx 的第二列，获取所有 Ticket ID
 - 最多 5 个任务并行执行
 - 每个 Ticket 使用独立子目录 `downloads/{ticket_id}`，避免并行冲突
 - 上传 DSM 完成后自动清理本地文件和子目录
- 错误记录到 `error_log.txt`
+- 错误记录到 `error_log.txt`，失败的 Ticket ID 保存到 `failed_tickets.csv`
 ### 守护进程
 ```shell
  #启动后台下载
  nohup python3 batch_download.py > run.log 2>&1 &
  echo $! > batch.pid
  #查看实时进度
  tail -f run.log
  #查看进程是否还在运行
  cat batch.pid | xargs ps -p
  #结束进程
  cat batch.pid | xargs kill
 ```
 ### 重试失败的 Ticket
 将上一次运行生成的 `failed_tickets.csv` 移动到 `datasource/` 目录，然后重新运行：
 ```bash
 mv failed_tickets.csv datasource/
 python batch_download.py
 ```
 ### 修改并行数或 Ticket 数量
--- a/batch_download.py
+++ b/batch_download.py
@@ -24,29 +24,45 @@ WORKERS     = 5
 SCRIPT      = os.path.join(os.path.dirname(__file__), "sap-c4c-AttachmentFolder.py")
 ERROR_LOG   = os.path.join(os.path.dirname(__file__), "error_log.txt")
 FAILED_CSV  = os.path.join(os.path.dirname(__file__), "failed_tickets.csv")
 DATASOURCE  = os.path.join(os.path.dirname(__file__), "datasource")
 print_lock = threading.Lock()
 failed_lock = threading.Lock()
 failed_ids = set()
 # ─────────────────────────────────────────────────────────────────────────────
-def get_ticket_ids(limit=10):
+def get_ticket_ids():
    ids, seen = [], set()
    def _add(val):
        val = str(val).strip()
        if val and val not in seen:
            seen.add(val)
            ids.append(val)
    for csv_file in glob.glob(os.path.join(DATASOURCE, "*.csv")):
        with open(csv_file, encoding="utf-8-sig") as f:
            reader = csv.reader(f)
            next(reader, None)
            for row in reader:
-                if len(row) < 2:
+                if len(row) >= 2:
                    _add(row[1])
    for xlsx_file in glob.glob(os.path.join(DATASOURCE, "*.xlsx")):
        import openpyxl
        wb = openpyxl.load_workbook(xlsx_file, read_only=True, data_only=True)
        ws = wb.active
        first = True
        for row in ws.iter_rows(min_col=2, max_col=2, values_only=True):
            if first:
                first = False
                continue
-                val = row[1].strip()
+            if row[0] is not None:
-                if val and val not in seen:
+                _add(row[0])
-                    seen.add(val)
+        wb.close()
-                    ids.append(val)
+
                if len(ids) >= limit:
                    break
        if len(ids) >= limit:
            break
    return ids
@@ -54,6 +70,11 @@ def log_error(ticket_id, message):
    ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    with open(ERROR_LOG, "a", encoding="utf-8") as f:
        f.write(f"[{ts}] Ticket {ticket_id}: {message}\n")
    with failed_lock:
        if ticket_id not in failed_ids:
            failed_ids.add(ticket_id)
            with open(FAILED_CSV, "a", encoding="utf-8", newline="") as f:
                csv.writer(f).writerow([ticket_id])
 def run_ticket(ticket_id, index, total):
@@ -117,16 +138,19 @@ def run_ticket(ticket_id, index, total):
 def main():
    global failed_ids
    print("读取 Ticket ID ...")
-    ids = get_ticket_ids(10)
+    ids = get_ticket_ids()
    if not ids:
        print("未找到任何 Ticket ID，请检查 datasource 目录")
        sys.exit(1)
    print(f"共 {len(ids)} 个 Ticket: {', '.join(ids)}")
-    # 清空/创建 error_log
+    # 清空/创建 error_log 和 failed_tickets.csv
    open(ERROR_LOG, "w").close()
    open(FAILED_CSV, "w", encoding="utf-8", newline="").close()
    failed_ids.clear()
    with ThreadPoolExecutor(max_workers=WORKERS) as executor:
        futures = {executor.submit(run_ticket, tid, i, len(ids)): tid
@@ -135,10 +159,12 @@ def main():
            future.result()  # 触发异常传播（已在 run_ticket 内处理）
    print("\n全部完成。")
    if failed_ids:
        print(f"失败 {len(failed_ids)} 个 Ticket，已保存到 {FAILED_CSV}")
        if os.path.getsize(ERROR_LOG) > 0:
-        print(f"有错误，详见 {ERROR_LOG}")
+            print(f"错误详情见 {ERROR_LOG}")
    else:
-        print("无错误。")
+        print("全部成功，无失败。")
 if __name__ == "__main__":
--- a/datasource/25年10月索赔报表.xlsx
+++ b/datasource/25年10月索赔报表.xlsx
--- a/datasource/SAPAnalyticsReport(Z96B303CD8F03C0B6AC322A).csv
+++ b/datasource/SAPAnalyticsReport(Z96B303CD8F03C0B6AC322A).csv
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 requests
 urllib3
 openpyxl
 # 可选：Salesforce 外链附件爬取
 scrapling[all]
 playwright
Author	SHA1	Message	Date
afei A	8ee25380e1	修改 README	2026-03-19 16:30:40 +08:00
afei A	2fb2597ce1	feat: 保存失败的 Ticket ID 到 CSV，支持重试	2026-03-19 16:25:40 +08:00
afei A	bc6c758d8c	feat: 兼容 xlsx 文件读取 Ticket ID，取消数量限制	2026-03-19 15:39:32 +08:00
afei A	fb97d46d98	chore: 更新设置文件，移除 datasource csv	2026-03-19 15:34:04 +08:00