Excel文件的合并
method 1
更新个简单粗暴的方法,旧的太细腻了。
import pandas as pd
from pathlib import Path, PurePath
src_path = 'D:/File/2021-07'
dst_file = 'D:/File/2021-07/result.xlsx'
# 取得该目录下所有的xlsx格式文件
p = Path(src_path)
files = [x for x in p.iterdir() if PurePath(x).match('*.xlsx')]
result = pd.read_excel(dst_file)
for file in files:
file1 = pd.read_excel(file)
result = result.append(file1,ignore_index=True)
result.to_excel(dst_file)
method 2
import xlrd
import xlwt
from pathlib import Path, PurePath
# 指定要合并excel的路径
src_path = 'Q:/python/搬砖助手'
# 指定合并完成的路径
dst_file = 'Q:/python/搬砖助手/result.xls'
# 取得该目录下所有的xlsx格式文件
p = Path(src_path)
files = [x for x in p.iterdir() if PurePath(x).match('*.xls')]
# 准备一个列表存放读取结果
content = []
# 抽取每一个文件内容
for file in files:
# 用文件名作为每个用户的标识
username = file.stem
data = xlrd.open_workbook(file)
table = data.sheets()[0]
# 取得需要的内容
answer1 = table.cell_value(rowx=1, colx=3)
answer2 = table.cell_value(rowx=1, colx=7)
temp = f'{username},{answer1},{answer2}'
# 合并为一行先存储起来
content.append(temp.split(','))
print(temp)
# 输出
# 准备写入文件的表头
table_header = ['员工', '手机号', '机型']
workbook = xlwt.Workbook(encoding='utf-8')
xlsheet = workbook.add_sheet("统计结果")
# 写入表头
row = 0
col = 0
for cell_header in table_header:
xlsheet.write(row, col, cell_header)
col += 1
# 向下移动一行
row += 1
# 取出每一行内容
for line in content:
col = 0
# 取出每个单元格内容
for cell in line:
# 写入内容
xlsheet.write(row, col, cell)
# 向右移动一个单元格
col += 1
# 向下移动一行
row += 1
# 保存最终结果
workbook.save(dst_file)
Excel表格拆分
#打开要拆解的文件
import xlrd
import xlwt
from pathlib import Path, PurePath
file = 'Q:/python/搬砖助手/result.xls'
data = xlrd.open_workbook(file)
table = data.sheets()[0]
#拆分的行数
employee_number = table.nrows
#表头
salary_header = table.row_values(rowx=0, start_colx=0, end_colx=None)
#输出文件到指定路径
def write_to_file(filename, cnt):
dst_file = 'Q:/python/搬砖助手/{}.xls'.format(str(filename))
workbook = xlwt.Workbook(encoding='utf-8')
xlsheet = workbook.add_sheet("新建sheet")
# 写入内容,感觉这个库不太行,得一个个个格子写入—_—
# 取出每一行内容
row = 0
for line in cnt:
col = 0
# 取出每个单元格内容
for cell in line:
# 写入内容
xlsheet.write(row, col, cell)
# 向右移动一个单元格
col += 1
# 向下移动一行
row += 1
# 保存文件
workbook.save(dst_file)
#表格拆分
for line in range(1,employee_number):
content = table.row_values(rowx=line, start_colx=0, end_colx=None)
# 将表头和内容重新组成一个新的文件
new_content = []
# 增加表头到要写入的内容中
new_content.append(salary_header)
# 增加员工工资到要写入的内容中
new_content.append(content)
# 调用自定义函数write_to_file()写入新的文件
write_to_file(filename = content[0], cnt = new_content)
合并Word
import docx
from docx import Document
#不带格式的文本合并
def merge_without_format(docx_files: list):
# 遍历每个文件
doc=Document()
for docx_file in sorted(docx_files):
another_doc = Document(docx_file)
# 获取每个文件的所有“段落”
paras = another_doc.paragraphs
for para in paras:
# 为新的word文件创建一个新段落
newpar = doc.add_paragraph('')
# 将提取的内容写入新的文本段落中
newpar.add_run(para.text)
# 所有文件合并完成后在指定路径进行保存
doc.save(Path(p, 'new.docx'))
#并不能直接导入文档内表格T T ,要插入表格后爬内容
# 调用函数
src_path_w = 'Q:/python/搬砖助手'
p = Path(src_path_w)
files = [x for x in p.iterdir() if PurePath(x).match('*.docx')]
merge_without_format(files)
合并TXT和Word,增加内容,调整格式
from pathlib import Path, PurePath
import docx
from docx import Document
from docx.shared import RGBColor, Pt,Inches,Cm
def add_content_mode1(doc,content):
para = doc.add_paragraph().add_run(content)
# 设置字体格式
para.font.name = '仿宋'
# 设置下划线
para.font.underline = True
# 设置颜色
para.font.color.rgb = RGBColor(255,128,128)
# 所有文件合并完成后在指定路径进行保存
doc.save(Path(p, 'new2.docx'))
#调用合并函数
src_path_w = 'Q:/python/搬砖助手'
p = Path(src_path_w)
doc=Document('Q:/python/搬砖助手/new.docx')
content = open('Q:/python/搬砖助手/txt1.txt','r')
add_content_mode1(doc,content)
合并图片和Word
from docx import Document
from docx import shared
doc = Document()
# 按英寸设置宽度,Word插入图片
doc.add_picture('test.jpg', width=shared.Inches(1))
在Word中批量替换Excel关键词内容
from docx import Document
from pathlib import Path, PurePath
def generat_invitation(word,replace_content,invitation_path):
doc = Document(word)
# 取出每一段
for para in doc.paragraphs:
for key, value in replace_content.items():
if key in para.text:
# 逐个关键字进行替换
para.text = para.text.replace(key, str(value))
#另存为新的文件
file_name = PurePath(invitation_path).with_name(replace_content['员工']).with_suffix('.docx')
doc.save(file_name)
print('success')
import xlrd
word = 'Q:/python/搬砖助手/test.docx'
#Excel转为dict
file = 'Q:/python/搬砖助手/result.xls'
data = xlrd.open_workbook(file)
table = data.sheets()[0]
invitation_path = 'Q:/python/搬砖助手'
for i in range(1,table.nrows):
replace_content = {}
#表头
table_header = table.row_values(0)
#内容
for j in range(table.ncols):
replace_content[table_header[j]] = table.cell_value(rowx = i, colx = j)#读取指定位置数据
generat_invitation(word,replace_content,invitation_path)