600字范文,内容丰富有趣,生活中的好帮手!
600字范文 > python 读取发票内容 在窗口中显示并保存到excel文件中

python 读取发票内容 在窗口中显示并保存到excel文件中

时间:2023-11-25 02:08:55

相关推荐

python 读取发票内容 在窗口中显示并保存到excel文件中

编写两个文件ReadPdf.py和QTShow.py

ReadPdf.py

1.采用正则表达式re定义提取的字段:(目前只读取这8个字段,开户行及账户在测试中出现问题)

self.template_fields = {"发票类型": r'[\u4e00-\u9fa5]+电子普通发票',"发票代码": r'发票代码(.*\d+)',"发票号码": r'发票号码(.*\d+)',"开票日期": r'开票日期(.*)',"名称": r'名\s*称\s*[::]\s*([\u4e00-\u9fa5]+)',"纳税人识别号": r'纳税人识别号\s*[::]\s*([a-zA-Z0-9]+)',"金额": r'小写.*(.*[0-9.]+)',"收款人": r'收\s*款\s*人\s*[::]\s*([\u4e00-\u9fa5]+)',}

2.对提取到的字段进行处理:

def extract_fields(self):pdf_text = self.extract_text()# 在PDF文本中查找所有匹配正则表达式的内容matches = {}for field, regex in self.template_fields.items():matches[field] = self.re_text(pile(regex), pdf_text)# 截取相应字段for k, v in matches.items():if ":" in v:new_value = v.split(":")[1].strip()matches[k] = new_valueelif "¥" in v:new_value = v.split("¥")[1].strip()matches[k] = new_valueelif "¥" in v:new_value = v.split("¥")[1].strip()matches[k] = new_valueelse:matches[k] = matches[k]return matches

3.全部如下:

import reimport pandas as pdfrom pdfminer.high_level import extract_textimport pdfplumberimport osimport xlwtfrom openpyxl import Workbookfrom openpyxl.utils import get_column_letterclass PDFExtractor:def __init__(self, filename):self.filename = filename# 定义需要提取的字段和其对应的正则表达式self.template_fields = {"发票类型": r'[\u4e00-\u9fa5]+电子普通发票',"发票代码": r'发票代码(.*\d+)',"发票号码": r'发票号码(.*\d+)',"开票日期": r'开票日期(.*)',"名称": r'名\s*称\s*[::]\s*([\u4e00-\u9fa5]+)',"纳税人识别号": r'纳税人识别号\s*[::]\s*([a-zA-Z0-9]+)',"金额": r'小写.*(.*[0-9.]+)',"收款人": r'收\s*款\s*人\s*[::]\s*([\u4e00-\u9fa5]+)',}def extract_text(self):with pdfplumber.open(self.filename) as pdf:first_page = pdf.pages[0]return first_page.extract_text()def re_text(self, bt, text):m1 = re.search(bt, text)if m1 is not None:return self.re_block(m1[0])def re_block(self, text):return text.replace(' ', '').replace('', '').replace(')', '').replace(')', '').replace(':', ':')def extract_fields(self):pdf_text = self.extract_text()# 在PDF文本中查找所有匹配正则表达式的内容matches = {}for field, regex in self.template_fields.items():matches[field] = self.re_text(pile(regex), pdf_text)# 截取相应字段for k, v in matches.items():if ":" in v:new_value = v.split(":")[1].strip()matches[k] = new_valueelif "¥" in v:new_value = v.split("¥")[1].strip()matches[k] = new_valueelif "¥" in v:new_value = v.split("¥")[1].strip()matches[k] = new_valueelse:matches[k] = matches[k]return matches

QTShow.py

import sysimport osfrom PyQt5.QtWidgets import QApplication, QMainWindow, QTableWidget, QTableWidgetItemfrom ReadPdf import PDFExtractorfrom collections import OrderedDictimport xlwtclass MainWindow(QMainWindow):def __init__(self):super().__init__()# 读取文件夹中的所有 PDF 文件# 文件夹路径就行folder_path = "/yourfilespath"file_names = [f for f in os.listdir(folder_path) if f.endswith(".pdf")]self.filename = "invoice.xls"# 创建表格控件self.table_widget = QTableWidget()self.table_widget.setRowCount(len(file_names))self.table_widget.setColumnCount(9)self.setCentralWidget(self.table_widget)wb = xlwt.Workbook(encoding='utf-8')sh = wb.add_sheet('sheet 1')row_num = 0# 处理字典for i, file_name in enumerate(file_names):file_path = os.path.join(folder_path, file_name)pdf_extractor = PDFExtractor(file_path)# 提取数据self.matches = pdf_extractor.extract_fields()new_key = '文件名'new_val = file_nameself.new_matches = OrderedDict([(new_key, new_val)])for k, v in self.matches.items():self.new_matches[k] = vtitle = list(self.new_matches.keys())self.table_widget.setHorizontalHeaderLabels(title)for col_num, (key, value) in enumerate(self.new_matches.items()):value_item = QTableWidgetItem(value)if row_num == 0:sh.write(row_num, col_num, key)sh.write(row_num + 1, col_num, value)sh.col(col_num).width = (len(value) + 11) * 256self.table_widget.setItem(row_num, col_num, value_item)# 自动调整每一列的宽度self.table_widget.resizeColumnsToContents()row_num += 1wb.save(self.filename)def setWindowSize(self):# 调整窗口大小,使得窗口大小为QTable widget的1.5倍table_size = self.table_widget.size()*1.5return table_sizeif __name__ == "__main__":app = QApplication(sys.argv)window = MainWindow()window.resize(window.setWindowSize())window.show()sys.exit(app.exec_())

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。