TA的每日心情 | 擦汗 昨天 09:05 |
签到天数: 1048 天 连续签到: 1 天 [LV.10]测试总司令
因为Python-docx库只能对docx文件操作,所以要转格式,直接改后缀不行。- word = wc.Dispatch("Word.Application")
- # 不能用相对路径,老老实实用绝对路径
- # 需要处理的文件所在文件夹目录
- for root, dirs, files in os.walk(rawpath):
- for i in files:
- # 找出文件中以.doc结尾并且不以~$开头的文件(~$是为了排除临时文件的)
- if i.endswith('.doc') and not i.startswith('~
- [font="][size=16px][b]找到特定文件[/b][/size][/font]
- [font="][size=16px] 这个比较简单,只需要循环遍历文件夹,按照队列里的关键字将目标文件添加到队列里即可。[/size][/font]
- [font="][size=16px] 因为转pdf只能是docx,所以要找docx文件,同时过滤~$文件开头的临时文件。[/size][/font]
- [font="][size=16px][code]def findfiles():
- count = 1
- for root, dirs, files in os.walk(path):
- for filename in files:
- for i in range(len(filenames)):
- if (filenames[i] in filename and filename.endswith('docx') and not filename.startswith('~[b]所有字体颜色变为黑色[/b][/size][/font]
- [font="][size=16px][indent]def change_color(path):
- file = Document(path)
- for pag in file.paragraphs:
- for block in pag.runs:
- block.font.color.rgb = RGBColor(0, 0, 0)
- for table in file.tables:
- for row in table.rows:
- for cell in row.cells:
- for cell_pag in cell.paragraphs:
- for cell_block in cell_pag.runs:
- cell_block.font.color.rgb = RGBColor(0, 0, 0)
- # 页眉
- pag_head = file.sections[0].header
- head_pag = pag_head.paragraphs[0]
- for run in head_pag.runs:
- run.font.color.rgb = RGBColor(0, 0, 0)
- # 页脚
- pag_foot = file.sections[0].footer
- foot_pag = pag_foot.paragraphs[0]
- for run in foot_pag.runs:
- run.font.color.rgb = RGBColor(0, 0, 0)
- file.save(path)
- print(path)
- print("^"*10 + "颜色切换完成" + "^"*10)[/indent]
- [b]docx转pdf[/b]
- 因为分页操作只能pdf实现。
- [code]for i in range(len(result)):
- file = result[i][1]
- name = file.rsplit('\\', 1)[1]
- print(i)
- if "关键字" in name: # 跳过不需要截取的关键字文件
- outfile = pdf_file_path + name[:-5] + str(i) +'.pdf'
- else:
- outfile = out_path + name[:-5] + str(i) +'.pdf'
- if file.split(".")[-1] == 'docx':
- print(file)
- convert(file, outfile)
- print("^"*10+"PDF转换完成"+"^"*10)
- time.sleep(1)
复制代码 截取特定页面
- def split_single_pdf(read_file, start_page, end_page, pdf_file):
- # 1. 获取原始pdf文件
- fp_read_file = open(read_file, 'rb')
- # 2. 将要分割的PDF内容格式化
- pdf_input = PdfFileReader(fp_read_file)
- # 3. 实例一个 PDF文件编写器
- pdf_output = PdfFileWriter()
- # 4. 把第一页放到PDF文件编写器
- for i in range(start_page, end_page):
- pdf_output.addPage(pdf_input.getPage(i))
- # 5. PDF文件输出
- with open(pdf_file, 'wb') as pdf_out:
- pdf_output.write(pdf_out)
- print(f'{read_file}分割{start_page}页-{end_page}页完成,保存为{pdf_file}!')
复制代码 调用打印机打印
- def printer_loading(filename):
- win32api.ShellExecute(0, "print", filename, '/d:"%s"' % win32print.GetDefaultPrinter(), ".", 0)
对execl特定页面打印- def excel_print(execl_path):
- app = xw.App(visible=False, add_book=False)
- workbook = app.books.open(execl_path)
- worksheet = workbook.sheets['sheet关键字']
- area = worksheet.range('A1:D11') # 打印区域
- area.api.PrintOut(Copies=1, ActivePrinter='Canon MF260 Series UFRII LT', Collate=True)
- workbook.close()
- app.quit()
- ):
- print(i)
- doc = word.Documents.Open(root +'\\'+ i)
- # # 将文件名与后缀分割
- rename = os.path.splitext(i)
- # 将文件另存为.docx
- doc.SaveAs(root + '\\' +rename[0] + '.docx', 12) # 12表示docx格式
- doc.Close()
- # time.sleep(1)
- word.Quit()