diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 056305b..fab3fef 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -7,7 +7,8 @@ "Bash(git commit:*)", "Bash(head:*)", "Bash(python3:*)", - "Bash(curl -sI \"https://cdn.playwright.dev/chrome-for-testing-public/145.0.7632.6/linux64/chrome-headless-shell-linux64.zip\")" + "Bash(curl -sI \"https://cdn.playwright.dev/chrome-for-testing-public/145.0.7632.6/linux64/chrome-headless-shell-linux64.zip\")", + "Bash(pip3 show:*)" ] } } diff --git a/batch_download.py b/batch_download.py index 898e53b..5fd2e6d 100644 --- a/batch_download.py +++ b/batch_download.py @@ -30,23 +30,36 @@ print_lock = threading.Lock() # ───────────────────────────────────────────────────────────────────────────── -def get_ticket_ids(limit=10): +def get_ticket_ids(): ids, seen = [], set() + + def _add(val): + val = str(val).strip() + if val and val not in seen: + seen.add(val) + ids.append(val) + for csv_file in glob.glob(os.path.join(DATASOURCE, "*.csv")): with open(csv_file, encoding="utf-8-sig") as f: reader = csv.reader(f) next(reader, None) for row in reader: - if len(row) < 2: - continue - val = row[1].strip() - if val and val not in seen: - seen.add(val) - ids.append(val) - if len(ids) >= limit: - break - if len(ids) >= limit: - break + if len(row) >= 2: + _add(row[1]) + + for xlsx_file in glob.glob(os.path.join(DATASOURCE, "*.xlsx")): + import openpyxl + wb = openpyxl.load_workbook(xlsx_file, read_only=True, data_only=True) + ws = wb.active + first = True + for row in ws.iter_rows(min_col=2, max_col=2, values_only=True): + if first: + first = False + continue + if row[0] is not None: + _add(row[0]) + wb.close() + return ids diff --git a/datasource/25年10月索赔报表.xlsx b/datasource/25年10月索赔报表.xlsx new file mode 100644 index 0000000..660fd8a Binary files /dev/null and b/datasource/25年10月索赔报表.xlsx differ diff --git a/requirements.txt b/requirements.txt index 857e424..60906ed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ requests urllib3 +openpyxl # 可选:Salesforce 外链附件爬取 scrapling[all] playwright