import copy
from cell import cell_range_to_string, cell_to_string, parse_cell_range
import dataiku
import openpyxl
from openpyxl import Workbook
from openpyxl.worksheet.worksheet import Worksheet
import io
import logging
import pandas
from tempfile import NamedTemporaryFile

from dataset_name_search import DatasetNameSearch

logger = logging.getLogger(__name__)
logging.basicConfig(
    level=logging.INFO, format="Excel Templater | %(levelname)s - %(message)s"
)


# Write Excel to managed folder
def write_workbook_to_managed_folder(workbook, output_folder, file_name):
    with NamedTemporaryFile() as tmp:
        workbook.save(tmp.name)
        output = tmp.read()
        with output_folder.get_writer(file_name) as file_handle:
            logger.info("Writing the output Excel document")
            file_handle.write(output)
            logger.info("Done writing the output Excel document")


def read_workbook_from_managed_folder(
    input_folder: dataiku.Folder, template_file_name: str
):
    """
    Reads Input Template as Workbook
    """
    logger.info("Template file to be used: {}".format(template_file_name))

    try:
        with input_folder.get_download_stream(template_file_name) as file_handle:
            bytes_in = io.BytesIO(file_handle.read())
            logger.info("Reading the Excel template file")
            workbook = openpyxl.load_workbook(bytes_in)
            logger.info("Done reading the Excel template file")
    except Exception as e:
        raise IOError(
            f"The template file {template_file_name} could not be read. Please check that it does exist in the input folder. Error: {e}"
        )

    return workbook


def populate_table_in_worksheet(
    df: pandas.DataFrame,
    worksheet: Worksheet,
    start_row: int,
    start_col: int,
    include_column_names: bool,
):
    """
    Writes dataset to sheet starting at a given row/column position

    :param df: Dataset as a DataFrame
    :param worksheet: Worksheet to populate
    :param start_row: Index representing the starting row to populate the worksheet from
    :param start_row: Index representing the starting column to populate the worksheet from
    :param include_column_names: To include column names on the starting row
    """
    df_np = df.values
    written_start_row = start_row
    written_row_count = df.shape[0]

    if include_column_names:
        logger.info("Adding column names in output worksheet")

        for col_num, column_name in enumerate(df.columns):
            worksheet.cell(
                row=(start_row), column=(col_num + start_col)
            ).value = column_name

        start_row += 1
        written_row_count += 1

    for row_num in range(df.shape[0]):
        for col_num in range(df.shape[1]):
            worksheet.cell(
                row=(row_num + start_row), column=(col_num + start_col)
            ).value = df_np[row_num][col_num]

    adapt_tables_from_worksheet(worksheet, written_row_count, written_start_row)
    return worksheet


def convert_all_time_columns_format(df):
    for column in df.columns:
        if pandas.api.types.is_datetime64_any_dtype(df[column]):
            logger.info("Column {} as dates and need conversion".format(column))
            df[column] = df[column].dt.strftime("%Y-%m-%d %H:%M:%S")


def find_tags_in_worksheet(worksheet: Worksheet, dataset_keyword: str):
    """
    Finds all the tags composed of `<dataset keyword>.<optional project key>.<dataset name>` in worksheets with their coordinates.

    :param worksheet: Worksheet to scan
    :param dataset_keyword: The keyword defined to locate the cell in which to import the dataset
    """
    tags = []
    logger.info(
        f"Scanning worksheet {worksheet.title} for tags containing keyword: {dataset_keyword} over {worksheet.max_column} columns and {worksheet.max_row} rows"
    )
    for row_index in range(1, worksheet.max_row + 1):
        for col_index in range(1, worksheet.max_column + 1):
            value = worksheet.cell(row=row_index, column=col_index).value
            if value is not None:
                if str(value).startswith(dataset_keyword):
                    tags.append([value, row_index, col_index])
    return tags


def populate_workbook_from_dataset(
    workbook: Workbook,
    dataset_keyword: str,
    dataset_name_search: DatasetNameSearch,
    include_column_headers: bool,
):
    """"
    Writes datasets in worksheets of a workbook
    
    :param workbook: Workbook instance representing the Excel file
    :param dataset_keyword: The keyword defined to locate the cell in which to import the dataset 
    :param dataset_name_search: Injected search service on datasets name
    :param include_column_names: To include column names on the starting row
    """
    for sheet_name in workbook.sheetnames:
        worksheet = workbook[sheet_name]
        tags = find_tags_in_worksheet(worksheet, dataset_keyword) # type: ignore
        for tag in tags:
            logger.info("Processing tag {}".format(tag))
            if len(tag) != 3:
                raise Exception(
                    "Tag cannot be properly extracted from sheet {}".format(sheet_name)
                )
            if "." not in tag[0]:
                raise Exception("Tag {} is not well formatted".format(tag[0]))
            tag_value = tag[0]

            # Checks that the wanted dataset_name exists in the input datasets, handling case insensitivity
            dataset_fully_qualified_name = dataset_name_search.find_from_tag(tag_value)

            if dataset_fully_qualified_name is None:
                logger.warning(
                    f"The dataset associated to {tag_value} should be declared as input of this recipe. Not populating the associated tag"
                )
                continue
            dataset_fully_qualified_name = dataset_fully_qualified_name.to_string()
            logger.info(f"Matched dataset {dataset_fully_qualified_name}")

            dataset = dataiku.Dataset(dataset_fully_qualified_name)

            df = dataset.get_dataframe()
            convert_all_time_columns_format(df)
            populate_table_in_worksheet(
                df,
                worksheet, # type: ignore
                tag[1],
                tag[2],
                include_column_headers,
            )

def adapt_tables_from_worksheet(
    worksheet: Worksheet,
    written_row_count: int,
    written_start_row: int,
):
    """
    Adapts each table on a worksheet:
    - Updates the table headers if their value is not the same as the value on the corresponding cell (to prevent Excel errors)
    - Extends vertically (on more rows) the format of a table to fit a dataset if the table headers are located on the first row where a dataset was written

    :param worksheet: Worksheet where a dataset has been written on
    :param written_row_count: Number of rows that was written for a dataset (counting the optional dataset header)
    :param written_start_row: First row at which the dataset has been written (including the dataset headers)
    """
    # Handles the case of an empty dataset and no included dataset headers
    if written_row_count == 0:
        return

    # Deep copies the table info so it can be removed from the worksheet, updated and added again
    existing_tables = list(map(copy.deepcopy, worksheet.tables.values()))
    logger.info(f"Worksheet {worksheet.title} contains {len(existing_tables)} tables")

    for table in existing_tables:
        logger.info(
            f"Adapting table located at {table.ref} of worksheet {worksheet.title}"
        )

        # Deletes the current table as it cannot be just updated
        del worksheet.tables[table.displayName]

        try:
            table_start_column, table_start_row, table_end_column, _table_end_row = (
                parse_cell_range(table.ref)
            )
        except ValueError as e:
            # Silently continues to the next table if an error occurred while parsing the cell reference
            logger.error(
                f"Error while parsing cell range reference {table.ref}. {worksheet.title} should be the worksheet containing the faulty table. Error: {e}"
            )
            continue

        # Updates the table headers value (table column name) to their associated cell value
        # If no dataset was written on the same cell position, the header and cell values are the same.
        for i, table_column in enumerate(table.tableColumns):
            cell = worksheet.cell(column=table_start_column + i, row=table_start_row)
            cell_ref = cell_to_string(table_start_column + i, table_start_row)

            # The table header and the value at the associated cell need to be a string (and equal)
            try:
                if pandas.isnull(cell.value):
                    logger.info(f"The cell value located at {cell_ref} on worksheet {worksheet.title} was empty. Updating cell back to its table header value.")
                    cell.value = table_column.name
                else:
                    cell_value_str = str(cell.value)
                    table_column.name = cell_value_str
                    cell.value = cell_value_str
            except Exception as e:
                # Silently continues if a cell value could not be casted to string
                logging.warning(
                    f"The cell value located at {cell_ref} on worksheet {worksheet.title}, on the same position than the original table header {table_column.name}, could not be converted to string. Error: {e}"
                )

        # Extends vertically (on more rows) the table to fit the new dataset, if they start on the same row
        if table_start_row == written_start_row:
            table_new_end_row = table_start_row + written_row_count - 1
            
            # Only extends, does not shrink to avoid issues
            if table_new_end_row > table_start_row:
                table.ref = cell_range_to_string(
                    table_start_column, table_start_row, table_end_column, table_new_end_row
                )
                logger.info(
                    f"Table now located at {table.ref} for worksheet {worksheet.title}"
                )

        worksheet.add_table(table)