测试积点老人 发表于 2022-1-7 09:55:40

用python爬取数据出错

import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from time import sleep
import csv
# 这个爬虫爬取结果的最后几列需要手工处理(可能会多出来几列)
def get_infos(ID):
    driver.get(r"http://192.168.3.252/xhlisweb-inspection_id/XHlisWebReport.aspx")
    sleep(1)
    driver.find_element_by_name("txtoutpatient_id").send_keys(ID)
    driver.find_element_by_name("btnConfirm").click()
    cols = driver.find_elements_by_xpath('''//tr''')
    times = len(cols)
    # 思路:挨个去点击一行的病员号,然后获取下方表格的信息
    # print(driver.page_source)
    # cols = driver.find_elements_by_xpath("//td")
    # col = cols
    # col_info = col.text.split(' ')[:14]
    # col.click()
    # items = driver.find_elements_by_xpath("//div[@id='report-content']//tbody//tr")
    # item = items
    infos = []
    for i in range(times):
      driver.get(r"http://192.168.3.252/xhlisweb-inspection_id/XHlisWebReport.aspx")
      sleep(2)
      driver.find_element_by_name("txtoutpatient_id").send_keys(ID)
      driver.find_element_by_name("btnConfirm").click()
      cols = driver.find_elements_by_xpath('''//tr''')
      col = cols
      col_info = col.text.split(' ')[:14]
      col.click()
      items = driver.find_elements_by_xpath("//div[@id='report-content']//tbody//tr")
      for item in items:
            a = item.text.split(' ')
            try:
                a.remove('')
            except:
                pass
            # 这里要做点长度判断,如果a的长度大于7,那就截断;如果不够,就填充''
            #if len(a) <= 7:
            #    for i in range(7-len(a)):
            #      a.append('')
            #else:
            #    a = a[:7]
            infos.append( + col_info + a)
    return infos

# start最小为0, end最大为641
start = 200
end = 641
data = pd.read_excel(r"C:\Users\cc\Desktop\资料\数据录入\ALL_raw.xlsx")
IDs = data['登记号'].tolist()
# IDs = ["0005248871", '0010610644']
options = Options()
options.binary_location = r"C:\Users\newceshi\Desktop\蒋丽莎病历检查\pzwj\google\chrome.exe"
driver = webdriver.Chrome(r"C:\Users\newceshi\Desktop\蒋丽莎病历检查\pzwj\chromedriver.exe", chrome_options=options)
driver.maximize_window()
ALL = []
for ID in IDs:
    try:
      infos = get_infos(ID)
      ALL += infos
    except:
      pass
headers = ['ID', '序号', '检验单', '病员号', '类型', '送检', '目的', '姓名', '性别', '年龄', '科别', '病区', '工作组', '审核人员', '审核日期', '审核时间', 'NO', '英文名称', '检验项目', '结果', '单位', '状态', '参考值']
with open(r"result_检验_" + str(start) + "_" + str(end) +".csv", 'w', newline='') as f:
    f_csv = csv.writer(f)
    f_csv.writerow(headers)
    for i in ALL:
      f_csv.writerow(i)

sleep(3)
driver.quit()
运行结果及报错内容
C:\Users\cc\AppData\Local\Programs\Python\Python39\python.exe D:/Pycharm/data/chaxue4.py
Traceback (most recent call last):
File "D:\Pycharm\data\chaxue4.py", line 58, in <module>
    data = pd.read_excel(r"C:\Users\cc\Desktop\资料\数据录入\ALL_raw.xlsx")
File "C:\Users\cc\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\util\_decorators.py", line 311, in wrapper
    return func(*args, **kwargs)
File "C:\Users\cc\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\io\excel\_base.py", line 364, in read_excel
    io = ExcelFile(io, storage_options=storage_options, engine=engine)
File "C:\Users\cc\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\io\excel\_base.py", line 1233, in __init__
    self._reader = self._engines(self._io, storage_options=storage_options)
File "C:\Users\cc\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\io\excel\_openpyxl.py", line 521, in __init__
    import_optional_dependency("openpyxl")
File "C:\Users\cc\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\compat\_optional.py", line 118, in import_optional_dependency
    raise ImportError(msg) from None
ImportError: Missing optional dependency 'openpyxl'.Use pip or conda to install openpyxl.
Process finished with exit code 1
我的解答思路和尝试过的方法我从别人那搞到的代码,但我电脑上运行的结果是这样,看不懂什么意思

郭小贱 发表于 2022-1-10 09:44:07

ImportError: Missing optional dependency 'openpyxl'.Use pip or conda to install openpyxl.
问题出在这。

qqq911 发表于 2022-1-10 11:07:30

下断点吧,一步一步调试

jingzizx 发表于 2022-1-10 14:34:34

缺少依赖?
页: [1]
查看完整版本: 用python爬取数据出错