#!/usr/bin/env python3 import datetime import sys import lxml.etree as ET from decimal import Decimal, getcontext getcontext().prec = 6 TEMPLATE_DATE = '%Y-%m-%dT%H:%M:%S' NS_CFDI = { 'cfdi': 'http://www.sat.gob.mx/cfd/3', 'tfd': 'http://www.sat.gob.mx/TimbreFiscalDigital', 'nomina12': 'http://www.sat.gob.mx/nomina12', } PRE = '/cfdi:Comprobante' CADENA = "||{Version}|{UUID}|{FechaTimbrado}|{SelloCFD}|{NoCertificadoSAT}||" class CfdiToDic(object): def __init__(self): self._data = {} self.complements = True def parse(self, source): # ~ recovering_parser = ET.XMLParser(recover=True) # ~ tree = ET.fromstring(xml, parser=recovering_parser) self._tree = ET.fromstring(source) self._data['comprobante'] = dict(self._tree.attrib) self._data['comprobante']['Total'] = \ self._str_to_decimal(self._data['comprobante']['Total']) self._data['comprobante']['Fecha'] = \ self._str_to_date(self._data['comprobante']['Fecha']) attr = self._get_attr(f'{PRE}/cfdi:Complemento/tfd:TimbreFiscalDigital') self._data['timbre'] = attr attr = self._get_attr(f'{PRE}/cfdi:Emisor') self._data['emisor'] = attr attr = self._get_attr(f'{PRE}/cfdi:Receptor') self._data['receptor'] = attr self._parse_details() self._parse_taxes() self._parse_nomina() self._complements() return def _parse_details(self): name = f'{PRE}/cfdi:Conceptos/cfdi:Concepto' details = self._tree.xpath(name, namespaces=NS_CFDI) rows = [] for detail in details: row = dict(detail.attrib) for k, v in row.items(): if k in ('Cantidad', 'ValorUnitario', 'Descuento', 'Importe'): row[k] = self._str_to_decimal(v) row['taxes'] = self._get_taxes(detail) rows.append(row) self._data['conceptos'] = rows return def _get_taxes(self, node): data = {'traslados': [], 'retenciones': []} for n in node.iter(): row = {} if n.tag.endswith('Traslado') or n.tag.endswith('Retencion'): row = dict(n.attrib) if row: row['Base'] = self._str_to_decimal(row['Base']) row['TasaOCuota'] = self._str_to_decimal(row['TasaOCuota']) row['Importe'] = self._str_to_decimal(row['Importe']) if n.tag.endswith('Traslado'): data['traslados'].append(row) else: data['retenciones'].append(row) return data def _parse_taxes(self): attr = self._get_attr(f'{PRE}/cfdi:Impuestos') self._data['impuestos'] = attr fields = ('TotalImpuestosTrasladados', 'TotalImpuestosRetenidos') k = 'impuestos' for f in fields: if f in self._data[k]: self._data[k][f] = self._str_to_decimal(self._data[k][f]) name = f'{PRE}/cfdi:Impuestos/cfdi:Traslados/cfdi:Traslado' taxes = self._tree.xpath(name, namespaces=NS_CFDI) if taxes: data = [] for t in taxes: values = dict(t.attrib) values['TasaOCuota'] = self._str_to_decimal(values['TasaOCuota']) values['Importe'] = self._str_to_decimal(values['Importe']) data.append(values) self._data['impuestos']['traslados'] = data name = f'{PRE}/cfdi:Impuestos/cfdi:Retenciones/cfdi:Retencion' taxes = self._tree.xpath(name, namespaces=NS_CFDI) if taxes: data = [] for t in taxes: values = dict(t.attrib) values['Importe'] = self._str_to_decimal(values['Importe']) data.append(values) self._data['impuestos']['retenciones'] = data return def _parse_nomina(self): node_name = '//cfdi:Complemento/nomina12:Nomina' node = self._tree.xpath(node_name, namespaces=NS_CFDI) if not node: return node = node[0] attr = dict(node.attrib) # ~ self._data['version_nomina'] = attr['version'] self._data['nomina'] = attr node_name = '//nomina12:Emisor' attr = self._get_attr(node_name, node) self._data['emisor'].update(attr) node_name = '//nomina12:Receptor' attr = self._get_attr(node_name, node) self._data['receptor'].update(attr) node_name = '//nomina12:Percepciones' self._data['percepciones'] = self._get_attr(node_name, node) node_name = '//nomina12:Percepcion' percepciones = node.xpath(node_name, namespaces=NS_CFDI) self._data['percepciones']['percepciones'] = [ dict(n.attrib) for n in percepciones] node_name = '//nomina12:Deducciones' self._data['deducciones'] = self._get_attr(node_name, node) node_name = '//nomina12:Deduccion' deducciones = node.xpath(node_name, namespaces=NS_CFDI) self._data['deducciones']['deducciones'] = [ dict(n.attrib) for n in deducciones] return def _complements(self): if not self.complements: self._data['comprobante']['xml'] = self.xml return self._data['comprobante']['Certificado'] = '' self._data['comprobante']['cadenaoriginal'] = \ CADENA.format(**self._data['timbre']) return def _get_attr(self, node_name, node=None): if node is None: new_node = self._tree.xpath(node_name, namespaces=NS_CFDI)[0] else: new_node = node.xpath(node_name, namespaces=NS_CFDI)[0] attr = dict(new_node.attrib) return attr def _str_to_date(self, str_date, template=TEMPLATE_DATE): return datetime.datetime.strptime(str_date, template) def _str_to_decimal(self, value): d = Decimal(value) return d def __str__(self): return str(self._data) @property def xml(self): data = ET.tostring(self._tree, pretty_print=True, xml_declaration=True, encoding='utf-8').decode() return data @property def data(self): return self._data def main(path): with open(path, 'rb') as f: data = f.read() cfdi = CfdiToDic(data) print(cfdi) return if __name__ == '__main__': main(sys.argv[1])