import numpy as np
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

from newspaper import Article
import sys
import os
import re
import urllib.request


from googlesearch import search
from htmldate import find_date
from datetime import date


import requests
import shutil

import multiprocessing as mp
import glob
from sys import stdout
import openai

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer


nume_fisier = 1
print("Programul de detectie a stirilor false este in proba: puteti trimite evaluari si sugestii la intelinkgov@gmail.com")
print("Explicatie: eticheta fake este data de cate ori gaseste in articol elemente false sau stirea contine opinii ori altceva care nu este considerat stire")
print("Introdu linkul stirii pe care vrei sa o evaluezi:")
urlstire = input().strip()

with open('news.txt', 'wb') as fw:
    try:
        result = 'Descarc: [' + urlstire + '] ............ '
        article = Article(urlstire)
        article.download()
        article.parse()
        fw.write(article.text.encode('utf-8'))
        result = result + '[OK]'
        print(article.authors + ' : ' + article.publish_date)
    except:
        result = result + '[ANALIZA]'

    print(result)

from googletrans import Translator

# Crearea instanței Translator
translator = Translator()

# Deschiderea fișierului "news.txt" pentru a citi conținutul
with open("news.txt", "r", encoding="utf-8") as f:
    content = f.read()

# Traducerea conținutului din engleză în română
translated = translator.translate(content, src="auto", dest="en")

# Salvarea conținutului tradus într-un fișier numit "tradus.txt"
with open("tradus.txt", "w", encoding="utf-8") as f:
    f.write(translated.text)

shutil.copy('tradus.txt', 'stiri')
#Read the data
df=pd.read_csv('news.csv')
#Get shape and head
df.shape
df.head()

#DataFlair - Get the labels
labels=df.label
labels.head()
#DataFlair - Split the dataset
x_train,x_test,y_train,y_test=train_test_split(df['text'], labels, test_size=0.2, random_state=7)
#DataFlair - Initialize a TfidfVectorizer
tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)
#DataFlair - Fit and transform train set, transform test set
tfidf_train=tfidf_vectorizer.fit_transform(x_train) 
tfidf_test=tfidf_vectorizer.transform(x_test)
#DataFlair - Initialize a PassiveAggressiveClassifier
pac=PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train,y_train)
#DataFlair - Predict on the test set and calculate accuracy
y_pred=pac.predict(tfidf_test)
score=accuracy_score(y_test,y_pred)
print(f'Accuracy: {round(score*100,2)}%')
#DataFlair - Build confusion matrix
confusion_matrix(y_test,y_pred, labels=['Textul contine elemente care nu ar trebui sa existe intr-o stire profesionala-posibil elemnte de opinie sau FAKE NEWS','REAL'])

def read_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

# Preprocesează și vectorizează textul știrii noi
file_path = 'tradus.txt'
new_story = read_file(file_path)
tfidf_new_story = tfidf_vectorizer.transform([new_story])

# Utilizează modelul pentru a prezice eticheta știrii noi
prediction = pac.predict(tfidf_new_story)

# Afișează rezultatul
print(f"Predicted label: {prediction[0]}")

# Scrie rezultatul într-un fișier numit "result.txt"
with open("resultat.txt", "w") as output_file:
    output_file.write(f"Predicted label: {prediction[0]}\n")

# Deschide fișierul "result.txt" pentru a verifica rezultatul








def TransformData(data):
    return str(data)[:4] + str(data)[5:7] + str(data)[8:11]
    
def DiferentaData(data1, data2):
    y1 = int(data1[:4])
    y2 = int(data2[:4])
    m1 = int(data1[4:6])
    m2 = int(data2[4:6])
    z1 = int(data1[6:8])
    z2 = int(data2[6:8])
    d1 = z1 + m1*30 + (y1-1900)*365
    d2 = z2 + m2*30 + (y2-1900)*365
    diff = d1 - d2
    return diff
    
def TitluArticol(url):
    hearders = {'headers':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0'}
    n = requests.get(url, headers=hearders)
    al = n.text
    return al[al.find('<title>') + 7 : al.find('</title>')]

url=(urlstire)
titlu = TitluArticol(url)

data=TransformData(find_date(url))
print('Titlu articol:', titlu)
print('Data articol:', data)

today = TransformData(date.today())
print("Data de azi: ", today)

if data==today:
   print("este un articol scris astazi")
else:
   dif = DiferentaData(today, data)
   print('articolul este vechi de: ', dif, 'zile')




openai.api_key = "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"

conversatie = [
    {"role": "system", "content": "Ești un asistent expert să detecteze știrile false si cele reale și aduci argumente solide .Raspunsurile le vei da in limba romana. Atunci cand in stire exista referinta la anul evenimentului treci peste evaluarea cu privire la data. Anul in curs este 2023 iar baza de date a ta este din 2021"},
]


with open("news.txt", "r", encoding='utf-8') as file:
     content = file.read()

#Limitarea lungimii articolului la 4000 de tokenuri pentru a nu depasi limita maxima a modelului
content = content[:3000]

intrebare = ("stabileste valoarea de adevar a urmatorului articol argumentand , acesta este articolul: ") + content

conversatie.append({"role": "user", "content": intrebare})
raspuns = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=conversatie
)
answer = raspuns['choices'][0]['message']['content']
print(answer)

# Salvează răspunsul într-un fișier
with open("answer.txt", "w", encoding='utf-8') as output_file:
    output_file.write(answer)

# Deschide fișierul cu răspunsul pentru citire și afișează conținutul
with open("answer.txt", "r", encoding='utf-8') as input_file:
    print(input_file.read())

conversatie.append({"role": "assistant", "content": answer})


os.chdir('stiri')

os.remove('news.txt')
os.chdir('..')


# Initialize the sentiment analyzer
sid = SentimentIntensityAnalyzer()

def get_sentiment(file_path):
    """
    This function takes a file path as input, reads the text from the file,
    and returns a dictionary containing the sentiment scores.
    """
    # Open the file and read its contents
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read()


    # Get the sentiment scores for the text
    scores = sid.polarity_scores(text)

    # Return the scores as a dictionary
    return scores

def main():
    # Get a list of all text files in the "stiri" directory
    txt_files = sorted(glob.glob("stiri/*.txt"))

    # Open the results file for writing
    with open("results_stiri.txt", "w") as f:
        # Write the header line to the file
        f.write("file,neg,neu,pos,compound\n")
        print(results_stiri.txt)

        # Loop over each text file and write its sentiment scores to the results file
        for file_path in txt_files:
            # Get the sentiment scores for the current file
            scores = get_sentiment(file_path)

            # Write the scores to the results file
            f.write(f"{file_path},{scores['neg']},{scores['neu']},{scores['pos']},{scores['compound']}\n")

    # Read the sentiment scores from the results file
    with open("results_stiri.txt", "r") as f:
        # Skip the header line
        f.readline()

        # Read the sentiment scores for each file
        for line in f:
            # Extract the scores from the line
            file_path, neg, neu, pos, compound = line.strip().split(",")

            # Convert the scores to floats
            neg = float(neg)
            neu = float(neu)
            pos = float(pos)

            # Check if the scores are within the expected ranges
            if 0.051 <= neg <= 0.119 and 0.784 <= neu <= 0.828 and 0.091 <= pos <= 0.158:
                print(f"{file_path}: Articolul a fost gasit in parametri normali.")
            else:
                print(f"{file_path}: Articolul a iesit din parametri normali.")

if __name__ == "__main__":
    main()



