import sys, re, os, os.path as osp, json, subprocess, datetime, socket, functools, tempfile, requests

dkuinstalldir = os.environ["DKUINSTALLDIR"]
dip_home = os.environ.get("DIP_HOME", None)

def file_integrity_check(opts):
    global dkuinstalldir

    if not opts.checksum_file:
        print("Downloading checksums file")
        checksum_file_fp = tempfile.NamedTemporaryFile(prefix="dataiku-dss-%s-sha256sums-downloaded" % installdir_dss_version)

        resp = requests.get("https://cdn.downloads.dataiku.com/public/dss/%s/dataiku-dss-%s-sha256sums.txt" % (installdir_dss_version, installdir_dss_version))
        resp.raise_for_status()

        checksum_file_fp.write(resp.content)
        checksum_file_fp.flush()
        
        checksum_file = checksum_file_fp.name
    else:
        if not osp.isfile(opts.checksum_file):
            raise Exception("Checksum file not a file: %s" % opts.checksum_file)

        checksum_file = osp.realpath(opts.checksum_file)

    sha256sum_output = subprocess.run(["sha256sum", "-c", checksum_file], cwd = dkuinstalldir, stdout=subprocess.PIPE, 
                                    stderr=subprocess.PIPE, encoding="utf8")

    lines = sha256sum_output.stdout.splitlines()

    print("File integrity check: Checksums computed, processing results")

    print("Verification err: %s" % sha256sum_output.stderr)

    components = {}

    python_packages_pattern = re.compile("^python3(6|7).packages")
    for checked_file in lines:
        chunks = checked_file.split(":")

        component_chunks = chunks[0].split("/")

        # Should always be the case
        if component_chunks[0] == ".":
            component_chunks = component_chunks[1:]

        # We explicitly remove non-used python version for built-in env in Fleet Manager Images.
        # Don't check it as there are not there.
        if python_packages_pattern.match(component_chunks[0]):
             continue

        #if component_chunks[0].startswith("python"):
        #    print("Python: %s: %s %s %s", component_chunks, component_chunks[0].startswith("python"), component_chunks[0].endswith(".packages"), component_chunks[1].endswith(".dist-info"))

        if len(component_chunks) == 1:
            component = "ROOT"
        elif len(component_chunks) == 2:
            # Files that are just one level below install dir (for example, dist/dataiku-dip.jar), 
            # reassign to a higher-level component
            component = "/".join(component_chunks[0:1])
        else:
            # Dig deeper in lib as it's "more important"
            if component_chunks[0] == "lib" and component_chunks[1] == "ivy":
                component = "/".join(component_chunks[0:3])
            # Same for python/dataiku
            elif component_chunks[0] == "python" and component_chunks[1] == "dataiku":
                component = "/".join(component_chunks[0:3])
            # cleanup the dist-info
            elif component_chunks[0].startswith("python") and component_chunks[0].endswith(".packages") and component_chunks[1].endswith(".dist-info"):
                print("Found a dist info to cleanup: %s" % component_chunks)
                dist_info_equivalent_package = component_chunks[1].split("-")[0]
                component_chunks[1] = dist_info_equivalent_package
                print("REplacing by %s" % dist_info_equivalent_package)
                component = "/".join(component_chunks[0:2])
                print("Now chunks are %s" % component_chunks)

            elif component_chunks[0] == "python.packages":
                
                component = "/".join(component_chunks[0:2])

            else:
                component = "/".join(component_chunks[0:2])

        if component == "python.packages/LICENSE.BSD":
            print("len=%s chunks=%s" % (len(component_chunks), component_chunks))

        component_data = components.get(component, None)
        if component_data is None:
            component_data = {"OK": 0, "FAILED": []}
            components[component] = component_data

        if chunks[1] == " OK":
            component_data["OK"] += 1
        else:
            component_data["FAILED"].append(chunks[0])

    print("File integrity check: Results processed")

    for (component, component_data) in components.items():
        print("File integrity check: Component: %s -- Passed: %s -- Failed: %s" % (component, component_data["OK"], component_data["FAILED"]))

    file_integrity_total_failed = functools.reduce(lambda a,b:a+b, [len(d["FAILED"]) for d in components.values()])
    file_integrity_total_checked = functools.reduce(lambda a,b:a+b, [d["OK"] + len(d["FAILED"]) for d in components.values()])

    
    file_consistency_report_chunk = "# File integrity check\n\n"

    file_consistency_report_chunk += "A total of %s files were checked.\n\n" % file_integrity_total_checked

    if file_integrity_total_failed > 0:
        file_consistency_report_chunk += "%s files failed integrity check\n\n" % file_integrity_total_failed

    file_consistency_report_chunk += "## Summary by component\n\n"

    components_table = "Component|Status|Valid files|Invalid files\n"
    components_table += "--|--|--|--\n"

    for (component, component_data) in components.items():
        components_table += "%s|" % component
        if len(component_data["FAILED"]) == 0:
            components_table += "PASSED|%s|0" % component_data["OK"]
        else:
            components_table += "FAILED|%s|%s" % (component_data["OK"], len(component_data["FAILED"]))
        components_table += "\n"

    file_consistency_report_chunk += components_table

    file_consistency_report_chunk +="\n\n"

    if file_integrity_total_failed > 0:
        file_consistency_report_chunk += "## Failed integrity checks details\n\n"

        for (component, component_data) in components.items():
            if len(component_data["FAILED"]) > 0:
                file_consistency_report_chunk += "### Failures in component %s\n\n"  % component

                for f in component_data["FAILED"]:
                    file_consistency_report_chunk += "* Checksum mismatch for file: **%s**\n" % f

    return (file_integrity_total_failed == 0, file_consistency_report_chunk)

def version_consistency_check(opts):
    global dkuinstalldir, dip_home

    report_chunk = "# Version consistency\n\n"

    with open(osp.join(dip_home, "dss-version.json"), "rb") as f:
        datadir_dss_version = json.load(f)["product_version"]

    if datadir_dss_version == installdir_dss_version:
        report_chunk += "Check passed\n\n"
        return (True, report_chunk)
    else:
        report_chunk += "Check failed: %s in datadir, %s in installdir\n\n" % (datadir_dss_version, installdir_dss_version)
        return (False, report_chunk)


def builtin_env_check(opts):
    global dkuinstalldir, dip_home

    report_chunk = "# Builtin Python env integrity\n\n"

    possible_site_packages = [
        "lib/python3.6/site-packages",
        "lib/python3.7/site-packages",
        "lib/python3.9/site-packages",
        "lib/python3.10/site-packages",
        "lib64/python3.6/site-packages",
        "lib64/python3.7/site-packages",
        "lib64/python3.9/site-packages",
        "lib64/python3.10/site-packages",
    ]

    found_site_packages = [sp for sp in possible_site_packages if osp.isdir(osp.join(dip_home, "pyenv", sp))]

    if len(found_site_packages) == 0:
        report_chunk += "No valid site-package folder found. Using unsupported Python version?"
        return (False, report_chunk)

    all_valid = True

    for found_sp in found_site_packages:

        sp_folder = osp.join(dip_home, "pyenv", found_sp)

        for content in os.listdir(sp_folder):
            if osp.isdir(osp.join(sp_folder, content)):
                if content.find("dist-info") > 0:
                    continue
                if content in ["__pycache__", "pkg_resources", "_distutils_hack", "pip", "setuptools", "wheel"]:
                    continue

                report_chunk += "* Unexpected folder found in Builtin env. Maybe manually installed: **%s**\n" % (osp.join(sp_folder, content))
                all_valid = False
    if all_valid:
        report_chunk += "Check passed\n\n"
    return (all_valid, report_chunk)


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(prog='installation-verifier.py', description="Verifies installation")
    parser.add_argument('--checksum-file', required=False)

    opts = parser.parse_args(sys.argv[1:])

    print("Dataiku DSS installation verification starting ...")

    with open(osp.join(dkuinstalldir, "dss-version.json"), "rb") as f:
        installdir_dss_version = json.load(f)["product_version"]

    overall_pass = True

    report_chunks = []

    ############################### File integrity checks

    print("File integrity check: Starting check")
    (check_pass, report_chunk) = file_integrity_check(opts)
    overall_pass &= check_pass
    report_chunks.append(report_chunk)
    print("File integrity check: check complete")

    ############################### Version consistency

    if dip_home is not None:
        print("Version consistency: Starting check")
        (check_pass, report_chunk) = version_consistency_check(opts)
        overall_pass &= check_pass
        report_chunks.append(report_chunk)
        print("Version consistency: check complete")
        

    ############################### Builtin env

    if dip_home is not None:
        print("Builtin env: Starting check")
        (check_pass, report_chunk) = builtin_env_check(opts)
        overall_pass &= check_pass
        report_chunks.append(report_chunk)
        print("Builtin env: check complete")


    ############################### Report generation

    print("Checks complete. Preparing report")

    report = """---
title: "Dataiku DSS installation validation report"
author: "Dataiku installation verifier"
date: %s
---\n\n""" % datetime.date.today()

    report += "# Report information\n\n"

    report += "* DSS version: %s\n" % installdir_dss_version
    report += "* Generated on: %s\n" % (datetime.datetime.now().astimezone().strftime("%Y-%m-%dT%H:%M:%S%z"))
    report += "* Installation directory: %s\n" % dkuinstalldir
    report +=" * Host: %s\n" % (socket.getfqdn())
    report +=" * UNIX user: %s" % (os.getlogin())

    report += "\n\n\n"

    report += "# Summary\n\n"

    report += """The following items were checked in this Dataiku DSS installation:

* File integrity check on the installation directory
* Version consistency
* Builtin Python env integrity check\n\n
"""

    if not overall_pass:
        report += "The overall status of the verification is: **FAILED**\n\n"
    else:
        report += "The overall status of the verification is: **PASSED**\n\n"

    report += "\n\n\\newpage\n\n"

    for report_chunk in report_chunks:
        report += report_chunk
        report += "\n\n\\newpage\n\n"
        #report += "\n\n\n"

    print("Report prepared")

    ############################### Report write

    print("Compiling report")

    with tempfile.TemporaryDirectory() as tmpdir:

        with open(osp.join(tmpdir, "report.md"), "w") as f:
            f.write(report)

        output_file = osp.join(os.getcwd(), "verification-report.pdf")

        subprocess.check_output(["pandoc", "-i", "report.md", "-o", output_file, "--toc"], cwd=tmpdir)

        print("Finished")
        print("Verification report has been output in %s" % output_file)
        print("Verification overall status is %s" % (overall_pass and "PASSED" or "FAILED"))