#!/usr/bin/python3
# -*- coding: utf-8 -*-
#
# SPDX-FileCopyrightText: 2019-2026 Univention GmbH
# SPDX-License-Identifier: AGPL-3.0-only

"""
Tool that helps administrators to migrate from the old import (import_user
or UMC module "CSV-Import") to the new imports (ucs-school-import-user or
UMC module "Benutzer-Import").
"""

from __future__ import absolute_import, print_function

import argparse
import codecs
import csv
import logging
import os
import sys
from typing import TYPE_CHECKING, Optional  # noqa: F401

import attr
from ldap.filter import filter_format
from six import PY3

import univention.admin.uldap
from ucsschool.importer.reader.csv_reader import CsvReader
from ucsschool.lib.models.utils import get_file_handler, get_stream_handler
from univention.config_registry import ConfigRegistry

if TYPE_CHECKING:
    from univention.admin.uldap import access as LoType, position as PoType  # noqa: F401


LOG_FILE = "/var/log/univention/ucs-school-migration-import-user.log"

ucr = ConfigRegistry()
ucr.load()

if ucr.get("server/role") != "domaincontroller_master":
    print("This script can only be executed on the Primary Directory Node.")
    sys.exit(2)


def py3_decode(data, encoding):
    return data.decode(encoding) if PY3 and isinstance(data, bytes) else data


@attr.s
class Entry(object):
    username = attr.ib()  # type: Optional[str]
    record_uid = attr.ib()  # type: str
    reason = attr.ib()  # type: Optional[str]
    csv_line = attr.ib()  # type: Optional[str]

    def to_csv(self):
        return [
            self.username if self.username else "",
            self.record_uid,
            self.reason if self.reason else "",
            self.csv_line if self.csv_line else "",
        ]


class UcsSchoolUserMigration(object):
    def __init__(self):  # type: () -> None
        argp = argparse.ArgumentParser(
            description="Tool that helps administrators to migrate from the old import (import_user or "
            'UMC module "CSV-Import") to the new imports (ucs-school-import-user or UMC '
            'module "Benutzer-Import").',
            epilog="Logfile: {}".format(LOG_FILE),
        )
        argp.add_argument("--dry-run", action="store_true", help="If set, a dry run is performed.")
        argp.add_argument(
            "--modify-record-uid",
            action="store_true",
            help="Sets the UCS@school record UID according to specified CSV file for each user. Please "
            "specify --input-file.",
        )
        argp.add_argument(
            "--source-uid",
            action="store",
            help="Also set the source UID for all affected users to the specified value.",
        )
        argp.add_argument(
            "--guess-usernames",
            action="store_true",
            help="Tries to guess usernames based on first name and last name. Please also specify "
            "--column-firstname, --column-lastname, --column-record-uid and --input-file (Hint: "
            "this action does not support --dry-run).",
        )
        argp.add_argument(
            "--column-firstname",
            action="store",
            type=int,
            help="Column number that contains the first name. Counting starts with 1 (1 == first "
            "column).",
        )
        argp.add_argument(
            "--column-lastname",
            action="store",
            type=int,
            help="Column number that contains the last name. Counting starts with 1 (1 == first "
            "column).",
        )
        argp.add_argument(
            "--column-record-uid",
            action="store",
            type=int,
            help="Column number that contains the record UID. Counting starts with 1 (1 == first "
            "column).",
        )
        argp.add_argument("--input-file", action="store", help="Path to input CSV file")
        argp.add_argument("--output-file", action="store", help="Path to output CSV file")
        self.args = argp.parse_args()

        self.logger = logging.getLogger("ImportUserMigration")
        self.logger.setLevel("DEBUG")
        self.logger.addHandler(get_stream_handler("INFO"))
        self.logger.addHandler(get_file_handler("DEBUG", LOG_FILE))
        self.logger.debug("Given arguments: %r", sys.argv)

        def fatal_argp(msg):
            self.logger.error(msg)
            argp.error(msg)

        if self.args.guess_usernames and not all(
            [
                self.args.input_file,
                self.args.output_file,
                self.args.column_firstname,
                self.args.column_lastname,
                self.args.column_record_uid,
            ]
        ):
            fatal_argp(
                "For guessing usernames, the filenames for input and output CSV files and the columns "
                "for first name, last name and record UID have to be specified!"
            )

        if self.args.modify_record_uid and not self.args.input_file:
            fatal_argp("A CSV file must be specified to modify the user's record UID.")

        if not os.path.exists(self.args.input_file):
            fatal_argp("Input file {!r} does not exist!".format(self.args.input_file))
        try:
            with open(self.args.input_file) as fd:
                fd.read(1)
        except (IOError, OSError) as exc:
            fatal_argp("Cannot open {!r}: {}".format(self.args.input_file, exc))

        if self.args.guess_usernames and os.path.exists(self.args.output_file):
            fatal_argp("Output file {!r} does already exist!".format(self.args.output_file))

        self.lo = None  # type: Optional[LoType]
        self.po = None  # type: Optional[PoType]

    def run(self):
        self.lo, self.po = univention.admin.uldap.getAdminConnection()

        if self.args.guess_usernames:
            self.guess_usernames()
        elif self.args.modify_record_uid:
            self.modify_record_uid()
        else:
            raise NotImplementedError()

    def guess_usernames(self):
        # get encoding and CSV dialect
        encoding = CsvReader.get_encoding(self.args.input_file)
        with open(self.args.input_file, "rb") as fp:
            dialect = csv.Sniffer().sniff(py3_decode(fp.read(), encoding))

        results = []
        ambiguous_entries = []

        # read CSV file
        with open(self.args.input_file, "rb") as fd_in:
            fd_in_e = codecs.getreader(encoding)(fd_in)
            for i, row in enumerate(csv.reader(fd_in_e, dialect=dialect), start=1):
                if i == 1:
                    # skip header
                    continue
                if i % 50 == 0:
                    self.logger.info("Processing line %d ...", i)

                try:
                    firstname = row[self.args.column_firstname - 1]
                    lastname = row[self.args.column_lastname - 1]
                    record_uid = row[self.args.column_record_uid - 1]
                except IndexError:
                    self.logger.error("Line %d does not contain firstname, lastname and record_uid!", i)
                    continue

                filter_s = filter_format(
                    "(&(objectClass=ucsschoolType)(givenName=%s)(sn=%s))", (firstname, lastname)
                )
                result = self.lo.search(filter=filter_s, required=False)
                if len(result) == 1:
                    username = result[0][1].get("uid", [b""])[0].decode("UTF-8") or None
                    results.append(Entry(username, record_uid, None, None))
                elif not result:
                    # user(name) not found
                    ambiguous_entries.append(
                        Entry(None, record_uid, "no user found in LDAP", " $ ".join(row))
                    )
                else:
                    username_list = [
                        entry[1].get("uid", [b"---"])[0].decode("UTF-8") for entry in result
                    ]
                    ambiguous_entries.append(
                        Entry(
                            None,
                            record_uid,
                            "multiple users found: {}".format(",".join(username_list)),
                            " $ ".join(row),
                        )
                    )

        # write new CSV file
        with open(self.args.output_file, "wb") as fd_out:
            # use the same dialect and encoding as in the input file
            fd_out_e = codecs.getwriter(encoding)(fd_out)
            writer = csv.writer(fd_out_e, dialect=dialect)
            writer.writerow(["This CSV file consists of 2 sections:"])
            writer.writerow(
                [
                    "The first section lists all problematic users for whom either *no* or *several* "
                    "user names were found"
                ]
            )
            writer.writerow(
                [
                    "in LDAP by first and last name. These entries must be checked and corrected "
                    "manually. The entries of the"
                ]
            )
            writer.writerow(
                [
                    "first section can be recognized by a note in the comment column and the column for "
                    "the user name being empty."
                ]
            )
            writer.writerow(
                [
                    "The second section contains all entries for which exactly one user was found in "
                    "LDAP with the specified"
                ]
            )
            writer.writerow(
                ["first and last name. These should nevertheless be checked again for correctness."]
            )
            writer.writerow(
                [
                    "This file contains {} ambiguous entries and {} unambiguous entries.".format(
                        len(ambiguous_entries), len(results)
                    )
                ]
            )
            writer.writerow(
                [
                    "After completion of the corrections, the lines with this text paragraph must be "
                    "removed!"
                ]
            )
            writer.writerow([""])
            writer.writerow(["username", "record_uid", "comment", "input_data"])
            for entry in ambiguous_entries:
                writer.writerow(entry.to_csv())
            for entry in results:
                writer.writerow(entry.to_csv())

    def modify_record_uid(self):
        # get encoding and CSV dialect
        encoding = CsvReader.get_encoding(self.args.input_file)
        with open(self.args.input_file, "rb") as fp:
            dialect = csv.Sniffer().sniff(fp.read().decode(encoding))

        with open(self.args.input_file, "rb") as fd_in:
            fd_in_e = codecs.getreader(encoding)(fd_in)
            for i, row in enumerate(csv.reader(fd_in_e, dialect=dialect), start=1):
                if i == 1 and row[0] == "username" and row[1] == "record_uid":
                    # skip header
                    continue

                try:
                    username = row[0]
                    record_uid = row[1]
                except IndexError:
                    self.logger.error("Line %d does not contain username and record_uid!", i)
                    sys.exit(1)

                filter_s = filter_format("(&(objectClass=ucsschoolType)(uid=%s))", (username,))
                result = self.lo.search(filter=filter_s, required=False)
                if len(result) == 0:
                    self.logger.error("User %r not found in LDAP!", username)
                    continue
                if len(result) != 1:
                    self.logger.error("User %r found multiple times in LDAP!", username)
                    continue

                user_dn, attrs = result[0]
                ml = [
                    (
                        "ucsschoolRecordUID",
                        attrs.get("ucsschoolRecordUID", []),
                        [record_uid.encode("UTF-8")],
                    ),
                ]
                if self.args.source_uid:
                    ml.append(
                        (
                            "ucsschoolSourceUID",
                            attrs.get("ucsschoolSourceUID", []),
                            [self.args.source_uid.encode("UTF-8")],
                        )
                    )
                self.logger.info(
                    "%sUser: %r   record_uid: %r ==> %r   source_uid: %r ==> %r",
                    "DRYRUN: " if self.args.dry_run else "",
                    username,
                    attrs.get("ucsschoolRecordUID", [b""])[0].decode("UTF-8"),
                    record_uid,
                    attrs.get("ucsschoolSourceUID", [b""])[0].decode("UTF-8"),
                    self.args.source_uid
                    if self.args.source_uid
                    else attrs.get("ucsschoolSourceUID", [b""])[0].decode("UTF-8"),
                )
                if not self.args.dry_run:
                    try:
                        self.lo.modify(user_dn, ml)
                    except Exception as exc:
                        self.logger.exception("Failed to update user %r: %s", user_dn, exc)


if __name__ == "__main__":
    usum = UcsSchoolUserMigration()
    usum.run()
