import numpy as np
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

from newspaper import Article
import sys
import os
import re
import urllib.request


from googlesearch import search
from htmldate import find_date
from datetime import date


import requests
import shutil

nume_fisier = 1

print("Introdu linkul de la tastatura:")
urlstire = input().strip()

with open('news.txt', 'wb') as fw:
    try:
        result = 'Descarc: [' + urlstire + '] ............ '
        article = Article(urlstire)
        article.download()
        article.parse()
        fw.write(article.text.encode('utf-8'))
        result = result + '[OK]'
        print(article.authors + ' : ' + article.publish_date)
    except:
        result = result + '[EROARE]'

    print(result)

from googletrans import Translator

# Crearea instanței Translator
translator = Translator()

# Deschiderea fișierului "news.txt" pentru a citi conținutul
with open("news.txt", "r", encoding="utf-8") as f:
    content = f.read()

# Traducerea conținutului din engleză în română
translated = translator.translate(content, src="auto", dest="en")

# Salvarea conținutului tradus într-un fișier numit "tradus.txt"
with open("tradus.txt", "w", encoding="utf-8") as f:
    f.write(translated.text)


#Read the data
df=pd.read_csv('news.csv')
#Get shape and head
df.shape
df.head()

#DataFlair - Get the labels
labels=df.label
labels.head()
#DataFlair - Split the dataset
x_train,x_test,y_train,y_test=train_test_split(df['text'], labels, test_size=0.2, random_state=7)
#DataFlair - Initialize a TfidfVectorizer
tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)
#DataFlair - Fit and transform train set, transform test set
tfidf_train=tfidf_vectorizer.fit_transform(x_train) 
tfidf_test=tfidf_vectorizer.transform(x_test)
#DataFlair - Initialize a PassiveAggressiveClassifier
pac=PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train,y_train)
#DataFlair - Predict on the test set and calculate accuracy
y_pred=pac.predict(tfidf_test)
score=accuracy_score(y_test,y_pred)
print(f'Accuracy: {round(score*100,2)}%')
#DataFlair - Build confusion matrix
confusion_matrix(y_test,y_pred, labels=['FAKE','REAL'])

def read_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

# Preprocesează și vectorizează textul știrii noi
file_path = 'tradus.txt'
new_story = read_file(file_path)
tfidf_new_story = tfidf_vectorizer.transform([new_story])

# Utilizează modelul pentru a prezice eticheta știrii noi
prediction = pac.predict(tfidf_new_story)

# Afișează rezultatul
print(f"Predicted label: {prediction[0]}")

# Scrie rezultatul într-un fișier numit "result.txt"
with open("resultat.txt", "w") as output_file:
    output_file.write(f"Predicted label: {prediction[0]}\n")

# Deschide fișierul "result.txt" pentru a verifica rezultatul
import os
os.system("resultat.txt")






def TransformData(data):
    return str(data)[:4] + str(data)[5:7] + str(data)[8:11]
    
def DiferentaData(data1, data2):
    y1 = int(data1[:4])
    y2 = int(data2[:4])
    m1 = int(data1[4:6])
    m2 = int(data2[4:6])
    z1 = int(data1[6:8])
    z2 = int(data2[6:8])
    d1 = z1 + m1*30 + (y1-1900)*365
    d2 = z2 + m2*30 + (y2-1900)*365
    diff = d1 - d2
    return diff
    
def TitluArticol(url):
    hearders = {'headers':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0'}
    n = requests.get(url, headers=hearders)
    al = n.text
    return al[al.find('<title>') + 7 : al.find('</title>')]

url=(urlstire)
titlu = TitluArticol(url)

data=TransformData(find_date(url))
print('Titlu articol:', titlu)
print('Data articol:', data)

today = TransformData(date.today())
print("Data de azi: ", today)

if data==today:
   print("este un articol scris astazi")
else:
   dif = DiferentaData(today, data)
   print('articolul este vechi de: ', dif, 'zile')


import openai

openai.api_key = "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"

conversatie = [
    {"role": "system", "content": "Ești un asistent expert să detecteze știrile false si cele reale și aduci argumente solide .Raspunsurile le vei da in limba romana"},
]


with open("news.txt", "r", encoding='utf-8') as file:
     content = file.read()

#Limitarea lungimii articolului la 4000 de tokenuri pentru a nu depasi limita maxima a modelului
content = content[:3000]

intrebare = ("stabileste valoarea de adevar a urmatorului articol argumentand , acesta este articolul: ") + content

conversatie.append({"role": "user", "content": intrebare})
raspuns = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=conversatie
)
answer = raspuns['choices'][0]['message']['content']
print(answer)

# Salvează răspunsul într-un fișier
with open("answer.txt", "w", encoding='utf-8') as output_file:
    output_file.write(answer)

# Deschide fișierul cu răspunsul pentru citire și afișează conținutul
with open("answer.txt", "r", encoding='utf-8') as input_file:
    print(input_file.read())

conversatie.append({"role": "assistant", "content": answer})

import multiprocessing as mp
import glob
import sys
import os
from sys import stdout

def process_file(file_path):
  # return (os.getpid(), file_path)

  from nltk.sentiment.vader import SentimentIntensityAnalyzer
  sid = SentimentIntensityAnalyzer()

  stdout.write(f"{os.getpid():05d} processing: {file_path}\n")
  stdout.flush()
  text = open(file_path, "r").read()
  res = sid.polarity_scores(text)

  stdout.write(f"{os.getpid():05d} finished: {file_path} [{str(res)}]\n")
  stdout.flush()
  return [file_path,res]

def main():
  pool = mp.Pool(mp.cpu_count())

  results = []
  txt_files = sorted(glob.glob(f"{sys.argv[1]}/*.txt"))
  # pp.pprint(txt_files)

  results = pool.starmap_async(process_file, [(txt_file,) for txt_file in txt_files]).get()
  # pp.pprint(results)

  csv_file = open(f"results_{sys.argv[1]}.txt", "wt")
  #csv_file.write("file,neg,neu,pos,compound\n")
  for ii in range(len(results)):
    res = ",".join([str(v) for v in list(results[ii][1].values())])
    ss = f"{results[ii][0]},{res}\n"
    csv_file.write(ss)
  csv_file.close()
  pool.close()


import re

input_string = "stiri\news.txt,0.073,0.89,0.037,-0.9804"
pattern = r'(-?\d+\.\d+)'

numbers = re.findall(pattern, input_string)

# Explicitly convert extracted values to floats
neg = float(numbers[0])
neu = float(numbers[1])
poz = float(numbers[2])

# Compare the first value with the range 0.051-0.119
if 0.051 <= neg <= 0.119:
    num1 = 0
else:
    num1 = 1

# Compare the second value with the range 0.784-0.828
if 0.784 <= neu <= 0.828:
    num2 = 0
else:
    num2 = 1

# Compare the third value with the range 0.091-0.158
if 0.091 <= poz <= 0.158:
    num3 = 0
else:
    num3 = 1

# Add num1, num2, and num3
result = num1 + num2 + num3

# Determine the message depending on the result
if result > 0:
    message = "Articolul a iesit din parametri normali."
else:
    message = "Articolul a fost gasit in parametri normali."

# Write the message to a file named "verificat.txt"
with open("verificat.txt", "w") as file:
    file.write(message)


if __name__ == "__main__":
  main()



