#!/usr/bin/env python
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.

import base64
import datetime
import logging

from html.parser import HTMLParser
from uuid import UUID

from OpenSSL import crypto
import requests
from requests import Session, exceptions, adapters
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += 'HIGH:!DH:!aNULL'


LOG_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
LOG_DATE = '%d/%m/%Y %H:%M:%S'
logging.addLevelName(logging.ERROR, '\033[1;41mERROR\033[1;0m')
logging.addLevelName(logging.DEBUG, '\x1b[33mDEBUG\033[1;0m')
logging.addLevelName(logging.INFO, '\x1b[32mINFO\033[1;0m')
logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT, datefmt=LOG_DATE)
log = logging.getLogger(__name__)


TIMEOUT = 10
VERIFY_CERT = True


class FormLoginValues(HTMLParser):

    def __init__(self):
        super().__init__()
        self.values = {}

    def handle_starttag(self, tag, attrs):
        if tag == 'input':
            attrib = dict(attrs)
            try:
                self.values[attrib['id']] = attrib['value']
            except:
                pass


class FormValues(HTMLParser):

    def __init__(self):
        super().__init__()
        self.values = {}

    def handle_starttag(self, tag, attrs):
        if tag in ('input', 'select'):
            a = dict(attrs)
            if a.get('type', '') and a['type'] == 'hidden':
                if 'name' in a and 'value' in a:
                    self.values[a['name']] = a['value']

class Filters(object):

    def __init__(self, args):
        self.uuid = args['uuid']

        self.date_from = args['date_from']
        self.day = args.get('day', False)
        self.emitidas = args['emitidas']
        self.date_to = None
        self.stop = False
        self.hour = False
        self.minute = False
        self.second = False
        self._init_values(args)

    def __str__(self):
        msg = 'Descargar por UUID'
        tipo = 'Recibidas'
        if self.emitidas:
            tipo = 'Emitidas'
        return '{} - {} - {}'.format(msg, self.uuid, tipo)

    def _init_values(self, args):
        #~ print ('ARGS', args)
        status = '-1'
        type_cfdi = args.get('type_cfdi', '-1')
        center_filter = 'RdoFolioFiscal'
        rfc_receptor = ''

        script_manager = 'ctl00$MainContent$UpnlBusqueda|ctl00$MainContent$BtnBusqueda'
        self._post = {
            '__ASYNCPOST': 'true',
            '__EVENTTARGET': '',
            '__EVENTARGUMENT': '',
            '__LASTFOCUS': '',
            '__VIEWSTATEENCRYPTED': '',
            'ctl00$ScriptManager1': script_manager,
            'ctl00$MainContent$hfInicialBool': 'false',
            'ctl00$MainContent$BtnBusqueda': 'Buscar CFDI',
            'ctl00$MainContent$TxtUUID': self.uuid,
            'ctl00$MainContent$FiltroCentral': center_filter,
            'ctl00$MainContent$TxtRfcReceptor': rfc_receptor,
            'ctl00$MainContent$DdlEstadoComprobante': status,
            'ctl00$MainContent$ddlComplementos': type_cfdi,
        }
        return

    def get_post(self):
        start_hour = '0'
        start_minute = '0'
        start_second = '0'
        end_hour = '0'
        end_minute = '0'
        end_second = '0'

        if self.date_from:
            start_hour = str(self.date_from.hour)
            start_minute = str(self.date_from.minute)
            start_second = str(self.date_from.second)
            end_hour = str(self.date_to.hour)
            end_minute = str(self.date_to.minute)
            end_second = str(self.date_to.second)

        if self.emitidas:
            year1 = '0'
            year2 = '0'
            start = ''
            end = ''
            if self.date_from:
                year1 = str(self.date_from.year)
                year2 = str(self.date_to.year)
                start = self.date_from.strftime('%d/%m/%Y')
                end = self.date_to.strftime('%d/%m/%Y')
            data = {
                'ctl00$MainContent$hfInicial': year1,
                'ctl00$MainContent$CldFechaInicial2$Calendario_text': start,
                'ctl00$MainContent$CldFechaInicial2$DdlHora': start_hour,
                'ctl00$MainContent$CldFechaInicial2$DdlMinuto': start_minute,
                'ctl00$MainContent$CldFechaInicial2$DdlSegundo': start_second,
                'ctl00$MainContent$hfFinal': year2,
                'ctl00$MainContent$CldFechaFinal2$Calendario_text': end,
                'ctl00$MainContent$CldFechaFinal2$DdlHora': end_hour,
                'ctl00$MainContent$CldFechaFinal2$DdlMinuto': end_minute,
                'ctl00$MainContent$CldFechaFinal2$DdlSegundo': end_second,
            }
        else:
            year = '0'
            month = '0'
            if self.date_from:
                year = str(self.date_from.year)
                month = str(self.date_from.month)
            day = '00'
            if self.day:
                day = '{:02d}'.format(self.date_from.day)
            data = {
                'ctl00$MainContent$CldFecha$DdlAnio': year,
                'ctl00$MainContent$CldFecha$DdlMes': month,
                'ctl00$MainContent$CldFecha$DdlDia': day,
                'ctl00$MainContent$CldFecha$DdlHora': start_hour,
                'ctl00$MainContent$CldFecha$DdlMinuto': start_minute,
                'ctl00$MainContent$CldFecha$DdlSegundo': start_second,
                'ctl00$MainContent$CldFecha$DdlHoraFin': end_hour,
                'ctl00$MainContent$CldFecha$DdlMinutoFin': end_minute,
                'ctl00$MainContent$CldFecha$DdlSegundoFin': end_second,
            }
        self._post.update(data)
        return self._post


class Invoice(HTMLParser):
    START_PAGE = 'ContenedorDinamico'
    # ~ START_PAGE = 'ctl00_MainContent_ContenedorDinamico'
    URL = 'https://portalcfdi.facturaelectronica.sat.gob.mx/'
    END_PAGE = 'ctl00_MainContent_pageNavPosition'
    LIMIT_RECORDS = 'ctl00_MainContent_PnlLimiteRegistros'
    NOT_RECORDS = 'ctl00_MainContent_PnlNoResultados'
    TEMPLATE_DATE = '%Y-%m-%dT%H:%M:%S'

    def __init__(self):
        super().__init__()
        self._is_div_page = False
        self._col = 0
        self._current_tag = ''
        self._last_link = ''
        self._last_link_pdf = ''
        self._last_uuid = ''
        self._last_status = ''
        self._last_date_cfdi = ''
        self._last_date_timbre = ''
        self._last_pac = ''
        self._last_total = ''
        self._last_type = ''
        self._last_date_cancel = ''
        self._last_emisor_rfc = ''
        self._last_emisor = ''
        self._last_receptor_rfc = ''
        self._last_receptor = ''
        self.invoices = []
        self.not_found = False
        self.limit = False

    def handle_starttag(self, tag, attrs):
        self._current_tag = tag
        if tag == 'div':
            attrib = dict(attrs)
            if 'id' in attrib and attrib['id'] == self.NOT_RECORDS \
                and 'inline' in attrib['style']:
                self.not_found = True
            elif 'id' in attrib and attrib['id'] == self.LIMIT_RECORDS:
                self.limit = True
            elif 'id' in attrib and attrib['id'] == self.START_PAGE:
                self._is_div_page = True
            elif 'id' in attrib and attrib['id'] == self.END_PAGE:
                self._is_div_page = False
        elif self._is_div_page and tag == 'td':
            self._col +=1
        elif tag == 'span':
            attrib = dict(attrs)
            if attrib.get('id', '') == 'BtnDescarga':
                self._last_link = attrib['onclick'].split("'")[1]

    def handle_endtag(self, tag):
        if self._is_div_page and tag == 'tr':
            if self._last_uuid:
                url_xml = ''
                if self._last_link:
                    url_xml = '{}{}'.format(self.URL, self._last_link)
                    self._last_link = ''
                url_pdf = ''
                if self._last_link_pdf:
                    url_pdf = '{}{}'.format(self.URL, self._last_link_pdf)

                date_cancel = None
                if self._last_date_cancel:
                    date_cancel = datetime.datetime.strptime(
                        self._last_date_cancel, self.TEMPLATE_DATE)
                invoice = (self._last_uuid,
                    {
                        'url': url_xml,
                        'acuse': url_pdf,
                        'estatus': self._last_status,
                        'date_cfdi': datetime.datetime.strptime(
                            self._last_date_cfdi, self.TEMPLATE_DATE),
                        'date_timbre': datetime.datetime.strptime(
                            self._last_date_timbre, self.TEMPLATE_DATE),
                        'date_cancel': date_cancel,
                        'rfc_pac': self._last_pac,
                        'total': float(self._last_total),
                        'tipo': self._last_type,
                        'emisor': self._last_emisor,
                        'rfc_emisor': self._last_emisor_rfc,
                        'receptor': self._last_receptor,
                        'rfc_receptor': self._last_receptor_rfc,
                    }
                )
                self.invoices.append(invoice)
            self._last_link_pdf = ''
            self._last_uuid = ''
            self._last_status = ''
            self._last_date_cancel = ''
            self._last_emisor_rfc = ''
            self._last_emisor = ''
            self._last_receptor_rfc = ''
            self._last_receptor = ''
            self._last_date_cfdi = ''
            self._last_date_timbre = ''
            self._last_pac = ''
            self._last_total = ''
            self._last_type = ''
            self._col = 0

    def handle_data(self, data):
        cv = data.strip()
        if self._is_div_page and self._current_tag == 'span' and cv:
            if self._col == 1:
                try:
                    UUID(cv)
                    self._last_uuid = cv
                except ValueError:
                    pass
            elif self._col == 2:
                self._last_emisor_rfc = cv
            elif self._col == 3:
                self._last_emisor = cv
            elif self._col == 4:
                self._last_receptor_rfc = cv
            elif self._col == 5:
                self._last_receptor = cv
            elif self._col == 6:
                self._last_date_cfdi = cv
            elif self._col == 7:
                self._last_date_timbre = cv
            elif self._col == 8:
                self._last_pac = cv
            elif self._col == 9:
                self._last_total = cv.replace('$', '').replace(',', '')
            elif self._col == 10:
                self._last_type = cv.lower()
            elif self._col == 12:
                self._last_status = cv
            elif self._col == 14:
                self._last_date_cancel = cv


class PortalSAT(object):
    URL_MAIN = 'https://portalcfdi.facturaelectronica.sat.gob.mx/'
    HOST = 'cfdiau.sat.gob.mx'
    BROWSER = 'Mozilla/5.0 (X11; Linux x86_64; rv:55.0) Gecko/20100101 Firefox/55.0'
    REFERER = 'https://cfdiau.sat.gob.mx/nidp/app/login?id=SATUPCFDiCon&sid=0&option=credential&sid=0'

    PORTAL = 'portalcfdi.facturaelectronica.sat.gob.mx'
    URL_LOGIN = 'https://{}/nidp/app/login'.format(HOST)
    URL_FORM = 'https://{}/nidp/app/login?sid=0&sid=0'.format(HOST)
    URL_PORTAL = 'https://portalcfdi.facturaelectronica.sat.gob.mx/'
    URL_CONTROL = 'https://cfdicontribuyentes.accesscontrol.windows.net/v2/wsfederation'
    URL_CONSULTA = URL_PORTAL + 'Consulta.aspx'
    URL_RECEPTOR = URL_PORTAL + 'ConsultaReceptor.aspx'
    URL_EMISOR = URL_PORTAL + 'ConsultaEmisor.aspx'
    URL_LOGOUT = URL_PORTAL + 'logout.aspx?salir=y'

    def __init__(self):
        self.error = ''
        self.is_connect = False
        self._emitidas = False
        self._session = Session()
        a = adapters.HTTPAdapter(pool_connections=512, pool_maxsize=512, max_retries=5)
        self._session.mount('https://', a)

    def _read_form(self, html, form=''):
        if form == 'login':
            parser = FormLoginValues()
        else:
            parser = FormValues()
        parser.feed(html)
        return parser.values

    def _response(self, url, method='get', headers={}, data={}):
        # ~ log.debug('URL: {}'.format(url))
        try:
            if method == 'get':
                result = self._session.get(url, timeout=TIMEOUT,
                    verify=VERIFY_CERT)
            else:
                result = self._session.post(url, data=data,
                    timeout=TIMEOUT, verify=VERIFY_CERT)
            msg = '{} {} {}'.format(result.status_code, method.upper(), url)
            if result.status_code == 200:
                return result.text
            else:
                log.error(msg)
                return ''
        except exceptions.Timeout:
            msg = 'Tiempo de espera agotado'
            self.not_network = True
            log.error(msg)
            self.error = msg
            return ''
        except exceptions.ConnectionError:
            msg = 'Revisa la conexión a Internet'
            self.not_network = True
            log.error(msg)
            self.error = msg
            return ''

    def _get_headers(self, host, referer, ajax=False):
        user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0'
        acept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'

        headers = {
            'Accept': acept,
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'en-US,en;q=0.5',
            'Connection': 'keep-alive',
            'DNT': '1',
            'Host': host,
            'Referer': referer,
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': self.BROWSER,
            'Content-Type': 'application/x-www-form-urlencoded',
        }
        if ajax:
            headers.update({
                'Cache-Control': 'no-cache',
                'X-MicrosoftAjax': 'Delta=true',
                'x-requested-with': 'XMLHttpRequest',
                'Pragma': 'no-cache',
            })
        return headers

    def _get_post_type_search(self, html):
        tipo_busqueda = 'RdoTipoBusquedaReceptor'
        if self._emitidas:
            tipo_busqueda = 'RdoTipoBusquedaEmisor'
        sm = 'ctl00$MainContent$UpnlBusqueda|ctl00$MainContent$BtnBusqueda'
        post = self._read_form(html)
        post['ctl00$MainContent$TipoBusqueda'] = tipo_busqueda
        post['__ASYNCPOST'] = 'true'
        post['__EVENTTARGET'] = ''
        post['__EVENTARGUMENT'] = ''
        post['ctl00$ScriptManager1'] = sm
        return post

    def _get_data_cert(self, cert):
        with open(cert['cer'], 'rb') as fh:
            cert = crypto.load_certificate(crypto.FILETYPE_ASN1, fh.read())
        rfc = cert.get_subject().x500UniqueIdentifier.split(' ')[0]
        serie  = '{0:x}'.format(cert.get_serial_number())[1::2]
        fert = cert.get_notAfter().decode()[2:]
        return rfc, serie, fert

    def _sign(self, cert, data):
        with open(cert['key']) as fh:
            key = crypto.load_privatekey(crypto.FILETYPE_PEM, fh.read())
        sign = base64.b64encode(crypto.sign(key, data, 'sha256'))
        return base64.b64encode(sign).decode('utf-8')

    def _get_token(self, firma, co):
        co = base64.b64encode(co.encode('utf-8')).decode('utf-8')
        data = '{}#{}'.format(co, firma).encode('utf-8')
        token = base64.b64encode(data).decode('utf-8')
        return token

    def _make_data_form(self, cert, values):
        rfc, serie, fert = self._get_data_cert(cert)
        co = '{}|{}|{}'.format(values['tokenuuid'], rfc, serie)
        firma = self._sign(cert, co)
        token = self._get_token(firma, co)

        keys = ('credentialsRequired', 'guid', 'ks', 'urlApplet')
        data = {k: values[k] for k in keys}
        data['fert'] = fert
        data['token'] = token
        data['arc'] = ''
        data['placer'] = ''
        data['secuence'] = ''
        data['seeder'] = ''
        data['tan'] = ''
        return data

    def login(self, cert):
        HOST = 'cfdicontribuyentes.accesscontrol.windows.net'
        REFERER = 'https://cfdiau.sat.gob.mx/nidp/wsfed/ep?id=SATUPCFDiCon&sid=0&option=credential&sid=0'

        url_login = 'https://cfdiau.sat.gob.mx/nidp/app/login?id=SATx509Custom&sid=0&option=credential&sid=0'

        result = self._session.get(self.URL_MAIN)
        url_redirect = result.history[-1].headers['Location']
        self._session.headers['Host'] = self.HOST
        result = self._response(url_redirect)

        self._session.headers['User-Agent'] = self.BROWSER
        self._session.headers['Referer'] = REFERER.format(url_redirect)
        result = self._response(url_login, 'post')

        values = self._read_form(result, 'login')
        data = self._make_data_form(cert, values)
        headers = self._get_headers(self.HOST, self.REFERER)
        self._session.headers.update(headers)
        result = self._response(url_login, 'post', data=data)

        if not result:
            msg = 'Error al identificarse en el SAT'
            log.error(msg)
            return False
        data = self._read_form(result)

        # Inicio
        response = self._response(self.URL_MAIN, 'post', data=data)
        data = self._get_post_type_search(response)
        headers = self._get_headers(self.HOST, self.URL_MAIN)

        # Consulta
        response = self._response(self.URL_CONSULTA, 'post', headers, data)
        msg = 'Se ha identificado en el SAT'
        log.info(msg)
        self.is_connect = True
        return True

    def logout(self):
        msg = 'Cerrando sessión en el SAT'
        log.debug(msg)
        respuesta = self._response(self.URL_LOGOUT)
        self.is_connect = False
        msg = 'Sesión cerrada en el SAT'
        log.info(msg)
        return

    def _get_filters(self, cfdi_uuid, emitidas=True):
        filters = []
        data = {'uuid': cfdi_uuid}

        data['day'] = False
        data['emitidas'] = emitidas
        data['rfc_emisor'] = ''
        data['rfc_receptor'] = ''
        data['type_cfdi'] = '-1'
        data['date_from'] = None

        filters.append(Filters(data))

        return tuple(filters)

    def _merge(self, list1, list2):
        result = list1.copy()
        result.update(list2)
        return result

    def _get_download_links(self, html):
        parser = Invoice()
        parser.feed(html)
        return parser.not_found, parser.limit, parser.invoices

    def _search_by_uuid(self, filters):
        f = filters[0]
        log.info(str(f))
        url_search = self.URL_RECEPTOR
        if f.emitidas:
            url_search = self.URL_EMISOR

        result = self._response(url_search, 'get')
        post = self._read_form(result)
        post = self._merge(post, f.get_post())
        headers = self._get_headers(self.PORTAL, url_search)
        html = self._response(url_search, 'post', headers, post)
        not_found, limit, invoices = self._get_download_links(html)

        if not_found:
            msg = f'\n\tNo se encontraron documentos en el filtro:\n\t{str(f)}'
            log.info(msg)
            return ''

        url = invoices[0][1]['url']
        xml = ''

        r = self._session.get(url, timeout=TIMEOUT)
        if r.status_code == 200:
            xml = r.text

        return xml

    def get_uuid(self, cfdi_uuid):
        data = {'error': '', 'xml': ''}
        msg = f'Buscando UUID: {cfdi_uuid}'
        log.debug(msg)

        # ~ Recibidos
        filters = self._get_filters(cfdi_uuid, False)
        data['xml'] = self._search_by_uuid(filters)

        if data['xml']:
            return data

        # ~ Emitidos
        filters = self._get_filters(cfdi_uuid, True)
        data['xml'] = self._search_by_uuid(filters)

        return data