from dataiku.core.export_model_charts_and_templates import ExportModelChartsAndTemplates
import zipfile
import io
import logging
import re

logger = logging.getLogger(__name__)


class PuppeteerExtractor(object):
    """
    Extract Puppeteer data from the zip file and keep them with a cache mechanism.
    """

    def __init__(self, project_key, export_id):
        self.project_key = project_key
        self.export_id = export_id

        # Received from Puppeteer
        self.extracted_contents = None

    def get_contents(self, puppeteer_config_name):
        logger.info("get_image of %s", puppeteer_config_name)
        if self.extracted_contents is None:
            self.extract_contents()
        if self.extracted_contents is None:
            logger.error("Unable to generate content (image and templated texts)")
            return None
        if puppeteer_config_name in self.extracted_contents:
            return self.extracted_contents[puppeteer_config_name]
        else:
            logger.error("Unable to get content for %s.", puppeteer_config_name)

    def extract_contents(self):
        logger.info("Downloading charts and templates")
        emc = ExportModelChartsAndTemplates(self.project_key, self.export_id, "DOM_ELEMENTS")

        (content_type, contents) = emc.download()
        self.extracted_contents = self.unzip(contents)

        logger.info("extracted_contents: %s", len(self.extracted_contents))

    @staticmethod
    def unzip(contents):
        logger.info("Unzipping puppeteer zip result file.")
        f = io.BytesIO(contents)
        zfile = zipfile.ZipFile(f, "r")
        extracted_contents = {}

        for filename in zfile.namelist():
            logger.debug("File name: %s", filename)
            data = zfile.read(filename)
            (config_name, element_index, section_index, extension) = tuple(filename.split("."))
            logger.debug("configName= %s | element index= %s | image index= %s | extension= %s",
                         config_name, element_index, section_index, extension)
            element_index = int(element_index)
            section_index = int(section_index)
            if extension == "png":
                if config_name not in extracted_contents:
                    extracted_contents[config_name] = {}
                # Some content are a set of images
                if element_index not in extracted_contents[config_name]:
                    extracted_contents[config_name][element_index] = {}
                extracted_contents[config_name][element_index][section_index] = {"type": extension, "data": data}
            elif extension == "txt" or extension == "json":
                if config_name not in extracted_contents:
                    extracted_contents[config_name] = {}
                # Some content are a set of images
                if element_index not in extracted_contents[config_name]:
                    extracted_contents[config_name][element_index] = {}
                extracted_contents[config_name][element_index][section_index] = {"type": extension, "data": data}

        return extracted_contents

class SingleExportExtractor(object):
    """
    Extract data from an export that contains only one export (but it could be several zipped files if the export requires pagination)
    Only supports single png file or zip containing files named Xxxxx-Part-x-y.png
    """

    def __init__(self, project_key, export_id, export_type):
        self.project_key = project_key
        self.export_id = export_id

        logger.info("Downloading charts and templates")
        emc = ExportModelChartsAndTemplates(project_key, export_id, export_type)
        (content_type, contents) = emc.download()

        logger.info("extracted_contents: %s", content_type)

        if content_type == "image/png":
            self.extracted_contents = self.from_single_file("png", contents)
        else :
            self.extracted_contents = self.unzip(contents)

        logger.info("extracted_contents: %s", len(self.extracted_contents))


    def get_contents(self, config_name):
        logger.info("get_image for %s", config_name)
        if self.extracted_contents is None:
            logger.error("Unable to generate content (image and templated texts)")
            return None
        return self.extracted_contents

    @staticmethod
    def from_single_file(type, contents):
        extracted_contents = {
            0: {
                0: {
                    "type": type,
                    "data": contents
                }
            }
        }
        return extracted_contents

    @staticmethod
    def unzip(contents):
        logger.info("Unzipping puppeteer zip result file.")
        f = io.BytesIO(contents)
        zfile = zipfile.ZipFile(f, "r")
        extracted_contents = {}

        for filename in zfile.namelist():
            logger.debug("File name: %s", filename)
            data = zfile.read(filename)
            m = re.search(r'Part-(\d+)-(\d+)\.(\w+)', filename)

            if m is None: # file name doesn't have the expected format
                logger.error("SingleExportExtractor: File in zip package has an unexpected name : " + filename)

            element_index = int(m.group(1))
            section_index = int(m.group(2))
            extension = m.group(3)

            # Some content are a set of images
            if element_index not in extracted_contents:
                extracted_contents[element_index] = {}
            extracted_contents[element_index][section_index] = {"type": extension, "data": data}

        return extracted_contents


class MultiExtractor(object):
    """
    This class generalize the extractor concept for several exports
    It behaves like a map exportId => extractor with lazy loading of extractors content
    """
    def __init__(self, project_key):
        self.project_key = project_key
        self.extractors = {}

    def build_extractor(self, export_id, export_type):
        # add some conditions on export_type if some future exports require a different Extractor type (different file format, different file name convention in the zip...)

        self.extractors[export_id] = SingleExportExtractor(self.project_key, export_id, export_type)
    
    def get_extractor(self, export_id, export_type):
        if export_id not in self.extractors:
            self.build_extractor(export_id, export_type)
        return self.extractors[export_id]
