import tkinter as tk
from tkinter import filedialog, ttk, messagebox
import PyPDF2
import re
from datetime import datetime
import os
from collections import defaultdict

class EmailFinderApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Căutare Email-uri în PDF-uri")
        self.root.geometry("800x600")
        
        self.pdf_files = []
        self.email_file = ""
        self.emails = []
        
        self.create_widgets()
    
    def create_widgets(self):
        main_frame = ttk.Frame(self.root, padding="10")
        main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
        
        ttk.Button(main_frame, text="Încarcă PDF-uri", command=self.load_pdfs).grid(row=0, column=0, pady=5)
        ttk.Button(main_frame, text="Încarcă fișier cu email-uri", command=self.load_email_file).grid(row=1, column=0, pady=5)
        
        self.pdf_listbox = tk.Listbox(main_frame, width=60, height=5)
        self.pdf_listbox.grid(row=0, column=1, rowspan=2, padx=10, pady=5)
        
        pdf_scrollbar = ttk.Scrollbar(main_frame, orient="vertical", command=self.pdf_listbox.yview)
        pdf_scrollbar.grid(row=0, column=2, rowspan=2, sticky=(tk.N, tk.S))
        self.pdf_listbox.configure(yscrollcommand=pdf_scrollbar.set)
        
        ttk.Button(main_frame, text="Caută Email-uri", command=self.search_emails).grid(row=2, column=0, columnspan=3, pady=20)
        
        self.result_text = tk.Text(main_frame, width=70, height=20)
        self.result_text.grid(row=3, column=0, columnspan=3, pady=10)
        
        result_scrollbar = ttk.Scrollbar(main_frame, orient="vertical", command=self.result_text.yview)
        result_scrollbar.grid(row=3, column=3, sticky=(tk.N, tk.S))
        self.result_text.configure(yscrollcommand=result_scrollbar.set)
    
    def load_pdfs(self):
        files = filedialog.askopenfilenames(
            title="Selectează PDF-uri",
            filetypes=[("PDF files", "*.pdf")]
        )
        self.pdf_files = files
        self.pdf_listbox.delete(0, tk.END)
        for file in files:
            self.pdf_listbox.insert(tk.END, os.path.basename(file))
    
    def load_email_file(self):
        self.email_file = filedialog.askopenfilename(
            title="Selectează fișierul cu email-uri",
            filetypes=[("Text files", "*.txt")]
        )
        if self.email_file:
            with open(self.email_file, 'r') as f:
                self.emails = [line.strip() for line in f if line.strip()]

    def find_email_in_text(self, text, email_pattern):
        """
        Caută email-ul complet în text, inclusiv părțile cu punct
        Returnează True dacă găsește exact email-ul căutat
        """
        # Găsește toate aparițiile care se termină cu pattern-ul dat
        text_lower = text.lower()
        pattern_lower = email_pattern.lower()
        
        # Căutăm poziția pattern-ului în text
        index = text_lower.find(pattern_lower)
        if index == -1:
            return False
            
        # Verificăm caracterele din fața pattern-ului pentru a găsi începutul real al email-ului
        start = index
        while start > 0 and (text_lower[start-1].isalnum() or text_lower[start-1] in '._-'):
            start -= 1
            
        # Extragem email-ul complet găsit
        email_found = text[start:index + len(pattern_lower)]
        return email_found.lower()

    def search_emails(self):
        if not self.pdf_files or not self.email_file:
            messagebox.showerror("Eroare", "Te rog încarcă atât PDF-urile cât și fișierul cu email-uri!")
            return
        
        self.result_text.delete(1.0, tk.END)
        results_by_pdf = defaultdict(dict)  # Changed to dict to track unique emails per page
        
        for pdf_path in self.pdf_files:
            try:
                with open(pdf_path, 'rb') as file:
                    pdf_reader = PyPDF2.PdfReader(file)
                    for page_num in range(len(pdf_reader.pages)):
                        page = pdf_reader.pages[page_num]
                        text = page.extract_text()
                        
                        # Dictionary to track unique emails for this page
                        page_key = f"{os.path.basename(pdf_path)}_{page_num + 1}"
                        if page_key not in results_by_pdf[os.path.basename(pdf_path)]:
                            results_by_pdf[os.path.basename(pdf_path)][page_key] = set()
                        
                        for email_pattern in self.emails:
                            found_email = self.find_email_in_text(text, email_pattern)
                            if found_email:
                                results_by_pdf[os.path.basename(pdf_path)][page_key].add(found_email)
            
            except Exception as e:
                messagebox.showerror("Eroare", f"Eroare la procesarea {os.path.basename(pdf_path)}: {str(e)}")
        
        # Process and display results grouped by PDF
        all_results = []
        for pdf_name in sorted(results_by_pdf.keys()):
            pdf_header = f"Document: {pdf_name}\n{'='*50}\n\n"
            all_results.append(pdf_header)
            
            # Convert results to sorted list format
            page_results = []
            for page_key, emails in results_by_pdf[pdf_name].items():
                page_num = int(page_key.split('_')[1])
                for email in emails:
                    page_results.append({
                        'page': page_num,
                        'email': email,
                        'text': f"Email: {email}\nPagina: {page_num}\n\n"
                    })
            
            # Sort by page number and then by email
            sorted_pdf_results = sorted(page_results, key=lambda x: (x['page'], x['email']))
            all_results.extend(result['text'] for result in sorted_pdf_results)
            
            # Add separator between PDFs
            all_results.append('-'*50 + '\n\n')
        
        # Display results
        for result in all_results:
            self.result_text.insert(tk.END, result)
        
        # Save results to file
        if all_results:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_file = f"rezultate_cautare_{timestamp}.txt"
            with open(output_file, 'w') as f:
                f.writelines(all_results)
            
            messagebox.showinfo("Succes", f"Rezultatele au fost salvate în {output_file}")
        else:
            messagebox.showinfo("Informație", "Nu s-au găsit email-uri în documentele selectate.")

if __name__ == "__main__":
    root = tk.Tk()
    app = EmailFinderApp(root)
    root.mainloop()
