diff --git a/README.md b/README.md index 753b2c4..f555760 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ python -m playwright install-deps chromium | `datasource/` | 存放 SAP Analytics 导出的 CSV 文件 | | `downloads/` | 本地临时下载目录(上传 DSM 后自动清理) | | `error_log.txt` | 错误日志,每次批量运行时重置 | +| `failed_tickets.csv` | 失败的 Ticket ID 列表,可用于下次重试 | --- @@ -83,11 +84,20 @@ python sap-c4c-AttachmentFolder.py ... --json python batch_download.py ``` -- 默认读取 CSV 第二列的 Ticket ID,取前 10 个 +- 读取 datasource 目录下 CSV/xlsx 的第二列,获取所有 Ticket ID - 最多 5 个任务并行执行 - 每个 Ticket 使用独立子目录 `downloads/{ticket_id}`,避免并行冲突 - 上传 DSM 完成后自动清理本地文件和子目录 -- 错误记录到 `error_log.txt` +- 错误记录到 `error_log.txt`,失败的 Ticket ID 保存到 `failed_tickets.csv` + +### 重试失败的 Ticket + +将上一次运行生成的 `failed_tickets.csv` 移动到 `datasource/` 目录,然后重新运行: + +```bash +mv failed_tickets.csv datasource/ +python batch_download.py +``` ### 修改并行数或 Ticket 数量 diff --git a/batch_download.py b/batch_download.py index 5fd2e6d..b70cc51 100644 --- a/batch_download.py +++ b/batch_download.py @@ -24,9 +24,12 @@ WORKERS = 5 SCRIPT = os.path.join(os.path.dirname(__file__), "sap-c4c-AttachmentFolder.py") ERROR_LOG = os.path.join(os.path.dirname(__file__), "error_log.txt") +FAILED_CSV = os.path.join(os.path.dirname(__file__), "failed_tickets.csv") DATASOURCE = os.path.join(os.path.dirname(__file__), "datasource") print_lock = threading.Lock() +failed_lock = threading.Lock() +failed_ids = set() # ───────────────────────────────────────────────────────────────────────────── @@ -67,6 +70,11 @@ def log_error(ticket_id, message): ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") with open(ERROR_LOG, "a", encoding="utf-8") as f: f.write(f"[{ts}] Ticket {ticket_id}: {message}\n") + with failed_lock: + if ticket_id not in failed_ids: + failed_ids.add(ticket_id) + with open(FAILED_CSV, "a", encoding="utf-8", newline="") as f: + csv.writer(f).writerow([ticket_id]) def run_ticket(ticket_id, index, total): @@ -130,16 +138,19 @@ def run_ticket(ticket_id, index, total): def main(): + global failed_ids print("读取 Ticket ID ...") - ids = get_ticket_ids(10) + ids = get_ticket_ids() if not ids: print("未找到任何 Ticket ID,请检查 datasource 目录") sys.exit(1) print(f"共 {len(ids)} 个 Ticket: {', '.join(ids)}") - # 清空/创建 error_log + # 清空/创建 error_log 和 failed_tickets.csv open(ERROR_LOG, "w").close() + open(FAILED_CSV, "w", encoding="utf-8", newline="").close() + failed_ids.clear() with ThreadPoolExecutor(max_workers=WORKERS) as executor: futures = {executor.submit(run_ticket, tid, i, len(ids)): tid @@ -148,10 +159,12 @@ def main(): future.result() # 触发异常传播(已在 run_ticket 内处理) print("\n全部完成。") - if os.path.getsize(ERROR_LOG) > 0: - print(f"有错误,详见 {ERROR_LOG}") + if failed_ids: + print(f"失败 {len(failed_ids)} 个 Ticket,已保存到 {FAILED_CSV}") + if os.path.getsize(ERROR_LOG) > 0: + print(f"错误详情见 {ERROR_LOG}") else: - print("无错误。") + print("全部成功,无失败。") if __name__ == "__main__":