import os
import unicodedata
import glob


class FolderAccentsRemover:
    def __init__(self, dat_folder: str, out_folder: str):
        self.dat_folder = dat_folder
        self.out_folder = out_folder
        self.data_dict = {}

    @staticmethod
    def remove_accents(text: str):
        text = unicodedata.normalize("NFD", text)
        text = text.encode("ascii", "ignore")
        text = text.decode("utf-8")
        return str(text)

    @staticmethod
    def remove_accents2(text: str):
        nfkd_form = unicodedata.normalize("NFKD", text)
        return "".join([c for c in nfkd_form if not unicodedata.combining(c)])

    @staticmethod
    def make_file_name(file_path: str):
        return file_path.replace("/", "_").replace("\\", "_").replace(".", "_") + ".txt"

    def ensure_dict_init(self, dir_name: str):
        if dir_name not in self.data_dict:
            self.data_dict[dir_name] = ""

    def process_data(self):
        input_pattern = os.path.join(self.dat_folder, "**/*.txt")
        for filename in glob.iglob(input_pattern, recursive=True):
            print(f"processing: {filename}")
            dir_name = os.path.dirname(filename)
            self.ensure_dict_init(dir_name)

            data_inp = open(filename, encoding="utf-8").read()
            data_out = self.remove_accents(data_inp)
            self.data_dict[dir_name] += f"{data_out}\n"

        os.makedirs(self.out_folder, exist_ok=True)
        for dir_name, dir_content in self.data_dict.items():
            dir_name_file = self.make_file_name(dir_name)
            out_path = os.path.join(self.out_folder, dir_name_file)
            print(f"writing: {out_path}")
            open(out_path, "wt").write(dir_content)


def main_runner():
    dat_folder = input("Data folder   : ")
    out_folder = input("Result folder : ")
    processor = FolderAccentsRemover(dat_folder, out_folder)
    processor.process_data()


if __name__ == "__main__":
    main_runner()
    # processor = FolderAccentsRemover("tipar_test_dat", "tipar_test_out")
    # processor.process_data()
