feat: 兼容 xlsx 文件读取 Ticket ID,取消数量限制

This commit is contained in:
afei A
2026-03-19 15:39:32 +08:00
parent fb97d46d98
commit bc6c758d8c
4 changed files with 27 additions and 12 deletions

View File

@@ -7,7 +7,8 @@
"Bash(git commit:*)",
"Bash(head:*)",
"Bash(python3:*)",
"Bash(curl -sI \"https://cdn.playwright.dev/chrome-for-testing-public/145.0.7632.6/linux64/chrome-headless-shell-linux64.zip\")"
"Bash(curl -sI \"https://cdn.playwright.dev/chrome-for-testing-public/145.0.7632.6/linux64/chrome-headless-shell-linux64.zip\")",
"Bash(pip3 show:*)"
]
}
}

View File

@@ -30,23 +30,36 @@ print_lock = threading.Lock()
# ─────────────────────────────────────────────────────────────────────────────
def get_ticket_ids(limit=10):
def get_ticket_ids():
ids, seen = [], set()
def _add(val):
val = str(val).strip()
if val and val not in seen:
seen.add(val)
ids.append(val)
for csv_file in glob.glob(os.path.join(DATASOURCE, "*.csv")):
with open(csv_file, encoding="utf-8-sig") as f:
reader = csv.reader(f)
next(reader, None)
for row in reader:
if len(row) < 2:
if len(row) >= 2:
_add(row[1])
for xlsx_file in glob.glob(os.path.join(DATASOURCE, "*.xlsx")):
import openpyxl
wb = openpyxl.load_workbook(xlsx_file, read_only=True, data_only=True)
ws = wb.active
first = True
for row in ws.iter_rows(min_col=2, max_col=2, values_only=True):
if first:
first = False
continue
val = row[1].strip()
if val and val not in seen:
seen.add(val)
ids.append(val)
if len(ids) >= limit:
break
if len(ids) >= limit:
break
if row[0] is not None:
_add(row[0])
wb.close()
return ids

Binary file not shown.

View File

@@ -1,5 +1,6 @@
requests
urllib3
openpyxl
# 可选Salesforce 外链附件爬取
scrapling[all]
playwright