from dataiku.doctor.docgen.extractor.placeholder_parser import PlaceholderParser
from docx.text.run import Run
from docx.table import Table
from docx.text.paragraph import Paragraph
from dataiku.doctor.docgen.common.placeholder import Placeholder, TablePlaceholder, BlockPlaceholder, \
    HeaderPlaceholder, FooterPlaceholder, NestedPlaceholder
from dataiku.doctor.docgen.renderer.document_handler import DocumentHandler
import logging

logger = logging.getLogger(__name__)


class DocxParser(object):

    @staticmethod
    def get_safe_runs(p):
        """
        Sequence of |Run| instances corresponding to the <w:r> elements in the p paragraph.
        The difference with p.runs is that this method also includes the runs from accepted changes
        (meaning runs inserted when the "Track Changes" mode is enabled on Word)
        """
        return [Run(r, p) for r in p._p.xpath(".//w:r[not(ancestor::w:del)]")]

    @staticmethod
    def parse_text_and_tables(document):
        """
        Find placeholders that are located in the plain text and in tables.
        Can take as argument the document, a Cell, or a table of elements (paragraph & tables)
        :returns: a list of basic placeholders (Placeholder or TablePlaceholder, not Block, Nested etc.)
        :rtype: [Placeholder, TablePlaceholder]
        """
        placeholders = []

        if isinstance(document, list):
            elements = document
        else:
            elements = DocumentHandler.iter_block_items(document)

        for block in elements:
            if isinstance(block, Paragraph):
                placeholders.extend(DocxParser.parse_paragraph(block))
            if isinstance(block, Table):
                for row_id, row in enumerate(block.rows):
                    for column_id, cell in enumerate(row.cells):
                        in_table_placeholders = DocxParser.parse_text_and_tables(cell)
                        if len(in_table_placeholders) > 0:
                            placeholders.append(TablePlaceholder(block, row_id, column_id, in_table_placeholders))

        return placeholders

    @staticmethod
    def parse_text(document):
        """
        Find placeholders that are located in the plain text.
        :returns: a list of placeholders
        :rtype: [Placeholder]
        """
        return [
            Placeholder(name, placeholder_type, is_closing, para, start, end)
            for para in document.paragraphs
            for name, placeholder_type, is_closing, start, end in PlaceholderParser.parse(DocxParser.get_safe_runs(para))
        ]

    @staticmethod
    def parse_paragraph(paragraph):
        """
        Find placeholders that are located in the plain text.
        :returns: a list of placeholders
        :rtype: [Placeholder]
        """
        return [
            Placeholder(name, placeholder_type, is_closing, paragraph, start, end)
            for name, placeholder_type, is_closing, start, end in PlaceholderParser.parse(DocxParser.get_safe_runs(paragraph))
        ]

    def parse_table(self, document):
        """
        Find placeholders that are located inside table.
        :returns: a list of placeholders
        :rtype: [TablePlaceholder]
        """
        placeholder_tables = []
        for table in document.tables:
            for row_id, row in enumerate(table.rows):
                for column_id, cell in enumerate(row.cells):
                    placeholders = self.parse_text(cell)
                    if len(placeholders) > 0:
                        placeholder_tables.append(TablePlaceholder(table, row_id, column_id, placeholders))
        return placeholder_tables

    def parse_headers(self, document):
        """
        Find placeholders that are located inside headers.
        :returns: a list of placeholders
        :rtype: [HeaderPlaceholder]
        """
        return [
            HeaderPlaceholder(name, placeholder_type, is_closing, para, start, end, section_number)
            # For each section
            for section_number in range(len(document.sections))
            # get the paragraphs of the header
            for para in document.sections[section_number].header.paragraphs
            # and extract the placeholders
            for name, placeholder_type, is_closing, start, end in PlaceholderParser.parse(DocxParser.get_safe_runs(para))
        ]

    def parse_footers(self, document):
        """
        Find placeholders that are located inside footers.
        :returns: a list of placeholders
        :rtype: [FooterPlaceholder]
        """
        return [
            FooterPlaceholder(name, placeholder_type, is_closing, para, start, end, section_number)
            # For each section
            for section_number in range(len(document.sections))
            # get the paragraphs of the footer
            for para in document.sections[section_number].footer.paragraphs
            # and extract the placeholders
            for name, placeholder_type, is_closing, start, end in PlaceholderParser.parse(DocxParser.get_safe_runs(para))
        ]

    @staticmethod
    def get_end_conditionals_placeholders(placeholder_name, iterator, placeholders):
        """
        find the endif placeholder associated to the placeholder named "placeholder_name", starting from the iterator
        :param [Placeholder] placeholders: list of placeholder
        :param placeholder_name: placeholder name
        :param iterator: starting point
        :return: if we find a matching placeholder, return it, otherwise return None.
        """
        # On some really unwanted case, we can have multiple conditional placeholder using the same placeholder variable
        # We need to match the "if" with the correct "endif".
        recursivity_counter = 0
        for placeholder in placeholders[iterator:]:
            if placeholder.is_conditional and placeholder.is_closing and \
                    placeholder_name == placeholder.extract_name():
                if recursivity_counter > 0:
                    # We reach an endif that is associated to an inner conditional placeholder
                    recursivity_counter -= 1
                else:
                    return placeholder
            elif placeholder.is_conditional and not placeholder.is_closing and \
                placeholder_name == placeholder.extract_name():
                # we have an inner conditional placeholder. We try to see if the two placeholders names match.
                # if so, we increment the recursivity_counter to not take the first endif.
                recursivity_counter += 1
        return None

    @staticmethod
    def extract_conditionals_placeholders(placeholders):
        """
        Extract from the placeholders list the conditionals placeholders

        ie:
        {if myplaceholder == myvalue}
        a text to display.
        {endif myplaceholder}

        :param placeholders: List of extracted placeholders.
        :return: [:class:`BlockPlaceholder`]
        """
        conditional_placeholders = []
        i = 0
        while i < len(placeholders):
            if placeholders[i].is_conditional and not placeholders[i].is_closing:
                real_name = placeholders[i].extract_name()
                logger.debug("name: '%s' => '%s'", placeholders[i].tagname, real_name)
                closing_placeholder = DocxParser.get_end_conditionals_placeholders(real_name, i + 1, placeholders)
                if closing_placeholder is not None:
                    conditional_placeholders.append(BlockPlaceholder(placeholders[i], closing_placeholder))
                else:
                    logger.error("No match for conditional placeholder : '%s'.", placeholders[i].tagname)
            # else: non conditional or closing placeholder. We do not care
            i += 1
        return conditional_placeholders

    @staticmethod
    def extract_blocks(placeholders):
        """
        Given a list a placeholders, returns the blocks from that list and the original list without the blocks placeholders
        A block is defined as two placeholders with the same name following each others.
        The second placeholder must start with a /

        ie: {mytable} table description with style{/mytable}

        :param [Placeholder] placeholders: list of placeholder extracted from the docx file.
        :rtype: ([:class:`BlockPlaceholder`], [single_placeholders])
        """
        logger.debug("Placeholders before extraction :%s", placeholders)
        single_placeholders = []
        blocks = []
        i = 0
        while i < len(placeholders):
            # remove conditional placeholders
            if not placeholders[i].is_conditional:
                if not placeholders[i].is_closing and i < len(placeholders) - 1 and \
                        placeholders[i + 1].is_closing and placeholders[i].tagname == placeholders[i + 1].tagname:
                    # check if we are on a block placeholder (aka next paragraph is a closing one)
                    blocks.append(BlockPlaceholder(placeholders[i], placeholders[i + 1]))
                    i += 1
                elif not placeholders[i].is_closing:
                    single_placeholders.append(placeholders[i])
                # else: closing placeholder it was already processed or is an error and should be left as is
            i += 1

        logger.debug("Blocks found : %s", blocks)
        logger.debug("Placeholders left : %s", single_placeholders)
        return blocks, single_placeholders

    @staticmethod
    def build_placeholders_tree(placeholders):
        """
            Organize the placeholders as a tree
            Input is the basic placeholders, as a list (Placeholder, HeadingPlaceholder, FooterPlaceholder or TablePlaceholder)
            Output is still a list, but where opening and closing Placeholders have been glued together to form BlockPlaceholders, including nesting content for IF and FOREACH (using NestingPLaceholders)
        """
        placeholders_tree = []

        i = 0
        while i < len(placeholders):
            placeholder = placeholders[i]

            if isinstance(placeholder, TablePlaceholder):
                # for a table, we just parse what is inside
                placeholder.placeholders = DocxParser.build_placeholders_tree(placeholder.placeholders)
                placeholders_tree.append(placeholder)

            elif not placeholder.is_closing:
                # this is the opening of a placeholder. We need to check if it's a block placeholder
                closing, closing_index = DocxParser.get_matching_closing_placeholder(placeholder, i + 1, placeholders)

                if closing == None:
                    # no closing, this should be a simple placeholder. However, handle the error case where it should have a closing tag
                    if placeholder.requires_nesting():
                        logger.error("No matching closing placeholder was found for %s %s. Placeholder is ignored", placeholder.placeholder_type, placeholder.extract_name())
                    else:
                        placeholders_tree.append(placeholder)
                
                else: #placeholder has a closing matching tag.
                    if placeholder.requires_nesting():
                        # start of a nesting placeholder, recursive call on what is between the opening and the closing tags
                        inner_placeholders = placeholders[i + 1: closing_index]
                        inner_placeholders_tree = DocxParser.build_placeholders_tree(inner_placeholders)
                        placeholders_tree.append(NestedPlaceholder(placeholder, closing, inner_placeholders_tree))
                    else:
                        # this is just a block placeholder => still check if there is something inside (illegal) and warn
                        if i != closing_index - 1:
                            logger.error("Some placeholders have been found inside a block placeholder (%s). This is not supported and they will be ignored", placeholder.extract_name())
                        
                        placeholders_tree.append(BlockPlaceholder(placeholder, closing))
                    
                    i = closing_index # we skip the content, but since there is a i+=1 at the end of the loop, we target the closing tag
            else:
                # We are on a closing placeholder. This should never happen since the closing placeholder are parsed at the same time as the opening. Just warn and ignore
                logger.error("Closing placeholder found with no matching opening placeholder (%s). Check previous warnings as this situation can occur when the the opening placeholder actually exists, but was ignored because of a previous error")

            i += 1

        return placeholders_tree

    
    def extract_placeholder_tree(self, doc):
        """
        This function extracts the placeholders from a document block (Main text, header, footer or table cell)
        :param Document | _Header | _Footer | _Cell the block to parse
        :return The placeholders parsed as a tree (see build_placeholders_tree)
        """
        placeholders = self.parse_text_and_tables(doc)

        return DocxParser.build_placeholders_tree(placeholders)


    def extract_placeholder_tree_with_headers_footers(self, doc):
        """
        This function extracts the placeholders from a complete document. Footer and header placeholders are simply concatenated with the main document text placeholders.
        :param Document the document to parse
        :return The placeholders parsed as a tree (see build_placeholders_tree)
        """

        # The order in which we parse / resolve main part, header and footers MUST match the order used during the rendering phase.
        # first the main part, then (header, footer) for each section
        
        result = self.extract_placeholder_tree(doc)

        # a separate tree is built for each footer / header, but we basically concat them because it doesn't matters for java where the placeholder comes from, as long as it's in a tree
        for section_number in range(len(doc.sections)):
            result.extend(self.extract_placeholder_tree(doc.sections[section_number].header))
            result.extend(self.extract_placeholder_tree(doc.sections[section_number].footer))

        return result

    @staticmethod
    def get_matching_closing_placeholder(placeholder, iterator, placeholders):
        """
        find the placeholder associated to the placeholder given as param, starting from the iterator
        :param [Placeholder] placeholders: list of placeholder
        :param placeholder: placeholder we are trying to match
        :param iterator: starting point
        :return: if we find a matching placeholder, return (it, index), otherwise return (None, 0).
        """
        # On some really unwanted case, we can have multiple conditional placeholder using the same placeholder variable
        # We need to match the "if" with the correct "endif".
        recursivity_counter = 0
        
        for i in range(iterator, len(placeholders)):
            ph = placeholders[i]

            # TablePlaceholders are ignored because it is not allowed to have closing and opening tags in & out a table
            if (not isinstance(ph, TablePlaceholder)) and \
                ph.placeholder_type == placeholder.placeholder_type and \
                ph.extract_name() == placeholder.extract_name():
                # We found a matching placeholder
                if ph.is_closing:
                    if recursivity_counter == 0:
                        return ph, i
                    recursivity_counter -= 1
                else:
                    # this is an inner matching placeholder (it might be illegal, but not always)
                    recursivity_counter += 1
        
        return None, 0

    def debug(self, document):
        for i, p in enumerate(document.paragraphs):
            logger.debug("Paragraph: %d %s format: %s style: %s", i, hex(id(p._element)),
                          self.debug_format(p.paragraph_format), self.debug_c_style(p.style))
            for j, r in enumerate(document.paragraphs[i].runs):
                logger.debug("\tRun [%d] : %s. style:%s, bold=%s, italic=%s, font=%s",
                              j, r.text, self.debug_c_style(r.style), r.bold, r.italic, self.debug_font(r.font))
            logger.debug("Text %s", p.text)

        # too spammy
        # logger.debug("Availables styles")

        # for style in document.styles:
        #    logger.debug("%s:%s format %s", style.name, style.element,
        #                  self.debug_format(style.paragraph_format) if isinstance(style, _ParagraphStyle) else "")
        for table in document.tables:
            logger.debug("Table: %s %s parent: %s", table, table._element, table._parent)
            for row in table.rows:
                logger.debug("====")
                for cell in row.cells:
                    logger.debug("cell Text: %s ", cell.text)
        logger.debug("Body:%s ", document._body._element)
        for i in document._body._element:
            logger.debug(i)

    def debug_c_style(self, c_style):
        if c_style is None:
            return "None"
        return "{" + c_style.name + "font=" + self.debug_font(c_style.font) + " base=" \
               + self.debug_c_style(c_style.base_style) + "}"

    @staticmethod
    def debug_format(format):
        return str(format) + " left_indent: " + str(format.left_indent) + " " + format.left_indent.__class__.__name__

    @staticmethod
    def debug_font(font):
        items = {
            "bold": font.bold,
            "color": font.color,
            "italic": font.italic,
            "name": font.name,
            "cs_bold": font.cs_bold,
            "cs_italic": font.cs_italic,
            "size": font.size}
        return ", ".join([attr + "=" + str(value) for attr, value in items.items()])
