import os
import sys
import json
import shutil
import gzip
import zipfile
import calendar, datetime, time
import traceback, logging

from dataiku.base.utils import watch_stdin, get_clazz_in_code, get_json_friendly_error, get_argspec, package_is_at_least
from dataiku.base.socket_block_link import JavaLink, parse_javalink_args
from dataiku.core import dkuio
from dataiku.core.dataset import Dataset
from .exporter import Exporter

import pandas as pd, numpy as np


def json_date_serializer(obj):
    """Default JSON serializer."""

    if isinstance(obj, datetime.datetime):
        return obj.isoformat()
    raise Exception("Not serializable")

def empty_for_none(obj):
    return '' if obj is None else obj

# export a stream of rows
def export_rows(exporter, export_behavior, schema, input_stream, destination_file_path=None):
    if export_behavior == 'OUTPUT_TO_FILE':
        exporter.open_to_file(schema, destination_file_path)
    elif export_behavior == 'MANAGES_OUTPUT':
        exporter.open(schema)
    else:
        raise Exception("Unexpected export behavior %s" % export_behavior)

    export_start = time.time()

    # data comes in as csv
    (names, dtypes, parse_dates_columns) = Dataset.get_dataframe_schema_st(schema["columns"],
                columns=None, parse_dates=True, infer_with_pandas=False, bool_as_str=False)

    # We don't want to fail on bad data in int columns so we read them as doubles rather
    if dtypes is not None:
        new_dtypes = {}
        for (k, v) in dtypes.items():
            if v == np.int64 or v == np.int32:
                v = np.float64
            if (v == bool or v == np.bool_) and package_is_at_least(pd, '1.0'):
                # Behavior when parsing CSV with a missing boolean value, wrt pandas version & dtype:
                # - Pandas 0.23,  dtype=np.bool   => float('nan') (array dtype = 'object' when at least 1 missing value)
                # - Pandas >=1.0, dtype=np.bool   => error
                # - Pandas >=1.0, dtype='boolean' => pd.NA (array dtype is 'pd.BooleanDtype')
                v = 'boolean'
            new_dtypes[k] = v
        dtypes = new_dtypes

    print ("Read with dtypes = %s" % dtypes)

    row_count = 0
    pd_na_to_replace = pd.NA if package_is_at_least(pd, '1.0') else np.nan
    for df in pd.read_table(input_stream, iterator=True, chunksize=5000,
                    header=None, names=names, sep=',', skip_blank_lines=False,
                    dtype = dtypes, parse_dates=parse_dates_columns):

        print ("Read a DF (%d rows)" % df.shape[0])
        for row in df.itertuples(index=False, name=None):
            clean_row = []
            for v in row:
                if isinstance(v, np.bool_) or isinstance(v, bool):
                    clean_row.append(True if v else False)
                elif v is pd_na_to_replace: 
                    # Avoid leaking pd.NA to the plugin, in order to get the same behavior in pandas 0.23 & 1.0
                    clean_row.append(np.nan)
                else:
                    clean_row.append(v)
            exporter.write_row(tuple(clean_row))
            row_count += 1
        print ("DF is consumed, preparing for next")

    export_end = time.time()
    print ("Export done in %ds" % (export_end - export_start))

    exporter.close()
    
    return row_count

def adjust_command_obj_in_containers(command):
    # adjust output file if needed
    if command.get("sendFileBack", "NONE") == "FILE":
        # the exporter is expected to write a file then send it over the link
        # so the destinationFilePath field is irrelevant here.
        # but keep the file extension of the original value, because some
        # libs are picky about it
        command["destinationFilePath"] = os.path.join(".", "tmp_" + os.path.basename(command.get("destinationFilePath", "export.out")))
    elif command.get("sendFileBack", "NONE") == "DIR":
        # same, but with a folder
        command["destinationFilePath"] = os.path.join(".", "export.out.dir")
        os.makedirs(command["destinationFilePath"])
    if command.get("destinationFilePath"):
        logging.info("adjusted destination file path %s" % command.get("destinationFilePath"))

def send_empty_stream_if_needed(link, command):
    logging.info("Sending empty stream")
    if command.get("sendFileBack", "NONE") != "NONE":
        # a file upload will be expected, nip it in the bud
        link.send_string('') # that's the end-of-stream marker

def zip_folder_to_send(command):
    raw = command["destinationFilePath"]
    zipped = raw + ".zip"
    relroot = os.path.abspath(raw)
    logging.info("Make zip dir at %s" % os.path.abspath(zipped))
    with zipfile.ZipFile(zipped, "w", zipfile.ZIP_DEFLATED) as zip:
        for root, dirs, files in os.walk(raw):
            root_inside_location = os.path.relpath(root, relroot)
            arcroot = os.path.join('.', root_inside_location) # DSS complains if the contents of the zip are at its root
            zip.write(root, arcroot) # add directory (needed for empty dirs)
            for file in files:
                filename = os.path.join(root, file)
                if os.path.isfile(filename): # regular files only
                    arcname = os.path.join(arcroot, file)
                    zip.write(filename, arcname)
    return zipped

def gzip_file_to_send(command):
    raw = command["destinationFilePath"]
    gzipped = raw + ".gz"
    logging.info("Make gzip file at %s" % os.path.abspath(gzipped))
    with open(raw, 'rb') as f_in, gzip.open(gzipped, 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)
    return gzipped

def send_stream_if_needed(link, command):
    if command.get("sendFileBack", "NONE") == "FILE":
        # send as gzip
        try:
            to_send = gzip_file_to_send(command)
        except Exception as e:
            send_empty_stream_if_needed(link, command)
            raise e
        # note: no need to handle the link.send_string(''), because the send_stream() does it
        with link.send_stream() as output, open(to_send, 'rb') as f:
            logging.info("Streaming output")
            shutil.copyfileobj(f, output)
    elif command.get("sendFileBack", "NONE") == "DIR":
        # send as zip
        try:
            to_send = zip_folder_to_send(command)
        except Exception as e:
            send_empty_stream_if_needed(link, command)
            raise e
        # note: no need to handle the link.send_string(''), because the send_stream() does it
        with link.send_stream() as output, open(to_send, 'rb') as f:
            logging.info("Streaming output")
            shutil.copyfileobj(f, output)

# socket-based connection to backend
def serve(port, secret, server_cert=None):
    link = JavaLink(port, secret, server_cert=server_cert)
    # initiate connection
    link.connect()
    # get work to do
    command = link.read_json()
    try:
        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]

        try:
            # get the exporter object
            clazz = get_clazz_in_code(code, Exporter)
            arg_count = len(get_argspec(clazz.__init__).args)
            exporter = None
            if arg_count == 1:
                exporter = clazz()
            elif arg_count == 2:
                exporter = clazz(config)
            elif arg_count == 3:
                exporter = clazz(config, plugin_config)
            else:
                raise Exception("Wrong signature of the Exporter subclass: %i args" % arg_count)
        except Exception as e:
            send_empty_stream_if_needed(link, command)
            raise e

        # get task and dispatch work to exporter    
        task = command["task"]
        if task == "export":
            adjust_command_obj_in_containers(command)

            # schema is mandatory
            try:
                with link.read_stream() as input:
                    row_count = export_rows(exporter, command["exportBehavior"], command["schema"], input, command.get("destinationFilePath", None))
            except Exception as e:
                send_empty_stream_if_needed(link, command)
                raise e

            send_stream_if_needed(link, command)
        else:
            send_empty_stream_if_needed(link, command)
            raise Exception("Unexpected task %s" % task)
            
        # send ack
        link.send_json({'ok':True, 'count':row_count})
    except:
        traceback.print_exc()
        link.send_string('') # mark failure
        link.send_json(get_json_friendly_error())
    finally:
        # done
        link.close()
    

if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s')
    watch_stdin()
    port, secret, server_cert = parse_javalink_args()
    serve(port, secret, server_cert=server_cert)
        
