TA的每日心情 | 无聊 4 天前 |
签到天数: 530 天 连续签到: 2 天 [LV.9]测试副司令
- import pandas as pd
- from selenium import webdriver
- from selenium.webdriver.chrome.options import Options
- from time import sleep
- import csv
- # 这个爬虫爬取结果的最后几列需要手工处理(可能会多出来几列)
- def get_infos(ID):
- driver.get(r"")
- sleep(1)
- driver.find_element_by_name("txtoutpatient_id").send_keys(ID)
- driver.find_element_by_name("btnConfirm").click()
- cols = driver.find_elements_by_xpath('''//tr[contains(@onclick, "return btnClick")]''')
- times = len(cols)
- # 思路:挨个去点击一行的病员号,然后获取下方表格的信息
- # print(driver.page_source)
- # cols = driver.find_elements_by_xpath("//td[text()=" + ID + "]")
- # col = cols[4]
- # col_info = col.text.split(' ')[:14]
- # col.click()
- # items = driver.find_elements_by_xpath("//div[@id='report-content']//tbody//tr")[1:]
- # item = items[0]
- infos = []
- for i in range(times):
- driver.get(r"")
- sleep(2)
- driver.find_element_by_name("txtoutpatient_id").send_keys(ID)
- driver.find_element_by_name("btnConfirm").click()
- cols = driver.find_elements_by_xpath('''//tr[contains(@onclick, "return btnClick")]''')
- col = cols[i]
- col_info = col.text.split(' ')[:14]
- col.click()
- items = driver.find_elements_by_xpath("//div[@id='report-content']//tbody//tr")[1:]
- for item in items:
- a = item.text.split(' ')
- try:
- a.remove('')
- except:
- pass
- # 这里要做点长度判断,如果a的长度大于7,那就截断;如果不够,就填充''
- #if len(a) <= 7:
- # for i in range(7-len(a)):
- # a.append('')
- #else:
- # a = a[:7]
- infos.append([ID] + col_info + a)
- return infos
- # start最小为0, end最大为641
- start = 200
- end = 641
- data = pd.read_excel(r"C:\Users\cc\Desktop\资料\数据录入\ALL_raw.xlsx")
- IDs = data['登记号'].tolist()[start:end]
- # IDs = ["0005248871", '0010610644']
- options = Options()
- options.binary_location = r"C:\Users\newceshi\Desktop\蒋丽莎病历检查\pzwj\google\chrome.exe"
- driver = webdriver.Chrome(r"C:\Users\newceshi\Desktop\蒋丽莎病历检查\pzwj\chromedriver.exe", chrome_options=options)
- driver.maximize_window()
- ALL = []
- for ID in IDs:
- try:
- infos = get_infos(ID)
- ALL += infos
- except:
- pass
- headers = ['ID', '序号', '检验单', '病员号', '类型', '送检', '目的', '姓名', '性别', '年龄', '科别', '病区', '工作组', '审核人员', '审核日期', '审核时间', 'NO', '英文名称', '检验项目', '结果', '单位', '状态', '参考值']
- with open(r"result_检验_" + str(start) + "_" + str(end) +".csv", 'w', newline='') as f:
- f_csv = csv.writer(f)
- f_csv.writerow(headers)
- for i in ALL:
- f_csv.writerow(i)
- sleep(3)
- driver.quit()
复制代码 运行结果及报错内容
- C:\Users\cc\AppData\Local\Programs\Python\Python39\python.exe D:/Pycharm/data/chaxue4.py
- Traceback (most recent call last):
- File "D:\Pycharm\data\chaxue4.py", line 58, in <module>
- data = pd.read_excel(r"C:\Users\cc\Desktop\资料\数据录入\ALL_raw.xlsx")
- File "C:\Users\cc\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\util\_decorators.py", line 311, in wrapper
- return func(*args, **kwargs)
- File "C:\Users\cc\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\io\excel\_base.py", line 364, in read_excel
- io = ExcelFile(io, storage_options=storage_options, engine=engine)
- File "C:\Users\cc\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\io\excel\_base.py", line 1233, in __init__
- self._reader = self._engines[engine](self._io, storage_options=storage_options)
- File "C:\Users\cc\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\io\excel\_openpyxl.py", line 521, in __init__
- import_optional_dependency("openpyxl")
- File "C:\Users\cc\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\compat\_optional.py", line 118, in import_optional_dependency
- raise ImportError(msg) from None
- ImportError: Missing optional dependency 'openpyxl'. Use pip or conda to install openpyxl.
- Process finished with exit code 1
复制代码 我的解答思路和尝试过的方法我从别人那搞到的代码,但我电脑上运行的结果是这样,看不懂什么意思