Python抽取Word表格保存为Excel

只支持docx格式，根据内容过滤指定表格的时候需要加上条件，比如table.rows>1否则遇到只有单行表格table.cell可能会触发list out of range异常

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import docx
from docx import Document
import xlwt;
import xlrd;
import glob

def readdoc(filename):
    doc = Document(filename)
    n = 0
    m = 0
    j = 0
    for table in doc.tables:
        table_temp = []
        table_temp.insert(0,filename)
        if len(table.rows) > 1 and table.cell(1, 0).text.strip() == "输入项名称":
            m = m + 1
            for row in table.rows:
                row_temp = []
                j = j + 1
                for cell in row.cells:
                    row_temp.append(cell.text)
                table_temp.append(row_temp)
            tables.append(table_temp)
        n = n + 1
    print("File name:" + filename + " tables:" + repr(n) + " parsed:" + repr(m))
    return tables

def writeExcel(tables,filename):

    # bold font
    style = xlwt.easyxf('font: bold 1')

    Sheet_index = 0
    workbook = xlwt.Workbook(encoding='utf-8')
    worksheet = workbook.add_sheet('sheet' + str(Sheet_index),cell_overwrite_ok = True)
    Sheet_index = Sheet_index + 1
    j = 0
    for table in tables:
        for rows in table:
            j = j + 1
            r = table.index(rows)
            if r == 0:
                # write filename with style
                worksheet.write(j,0,rows,style)
            else:
                # write table
                for cell in rows:
                    c = rows.index(cell)
                    #print(r,c,cell)
                    worksheet.write(j,c,cell)
    workbook.save(filename[:-5] + ".xls")

filenames = glob.iglob("**/*.docx", recursive=True)
tables = []
for filename in filenames:
    tables = readdoc(filename)

writeExcel(tables,filename)

Python抽取Word表格保存为Excel

推荐阅读更多精彩内容