from googlesearch import search
from htmldate import find_date
from datetime import date
import sys

import os
import requests
import shutil

date_time = ("documente")
os.makedirs(date_time, exist_ok=True)
print(f"o sa salvez in {date_time}")

def TransformData(data):
    return str(data)[:4] + str(data)[5:7] + str(data)[8:11]
    
def DiferentaData(data1, data2):
    y1 = int(data1[:4])
    y2 = int(data2[:4])
    m1 = int(data1[4:6])
    m2 = int(data2[4:6])
    z1 = int(data1[6:8])
    z2 = int(data2[6:8])
    d1 = z1 + m1*30 + (y1-1900)*365
    d2 = z2 + m2*30 + (y2-1900)*365
    diff = d1 - d2
    return diff
    
def TitluArticol(url):
    hearders = {'headers':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0'}
    n = requests.get(url, headers=hearders)
    al = n.text
    return al[al.find('<title>') + 7 : al.find('</title>')]

url=str(input("introduceti adresa articolului, linkul= "))
titlu = TitluArticol(url)

data=TransformData(find_date(url))
print('Titlu articol:', titlu)
print('Data articol:', data)

today = TransformData(date.today())
print("Data de azi: ", today)

if data==today:
   print("este un articol scris astazi")
else:
   dif = DiferentaData(today, data)
   print('articolul este vechi de: ', dif, 'zile')


# get the API KEY here: https://developers.google.com/custom-search/v1/overview
API_KEY = "AIzaSyCWYqhZ6dR10q9DaMRGPoWrmLZhvQ-OIxw"
# get your Search Engine ID on your CSE control panel
SEARCH_ENGINE_ID = "40f686966ff73f062"
# the search query you want
query = (titlu)
# using the first page
page = 1
# constructing the URL
# doc: https://developers.google.com/custom-search/v1/using_rest
# calculating start, (page=2) => (start=11), (page=3) => (start=21)
start = (page - 1) * 10 + 1
url = f"https://www.googleapis.com/customsearch/v1?key={API_KEY}&cx={SEARCH_ENGINE_ID}&q={query}&start={start}"
# make the API request
data = requests.get(url).json()
# get the result items
search_items = data.get("items")
# iterate over 10 results found
if search_items:
    for i, search_item in enumerate(search_items, start=1):
        try:
            long_description = search_item["pagemap"]["metatags"][0]["og:description"]
        except KeyError:
            long_description = "N/A"
        # get the page title
        title = search_item.get("title")
        # page snippet
        snippet = search_item.get("snippet")
        # alternatively, you can get the HTML snippet (bolded keywords)
        html_snippet = search_item.get("htmlSnippet")
        # extract the page url
        link = search_item.get("link")
        # print the results
        print("="*10, f"Result #{i+start-1}", "="*10)
        print("Title:", title)
        print("Description:", snippet)
        print("Long description:", long_description)
        print("URL:", link, "\n")
        # scrie intr-un fisier cu numele links.txt link-urile pe care le gasesti
        with open("links.txt", "a") as f:
            f.write(link + "\n")

else:
    sys.exit(1)

#################################################################


from newspaper import Article
import sys
import os
import re
import urllib.request

nume_fisier=1

fisier = ("links.txt")
print(fisier)
with open(fisier, 'r') as fr:
    try:
        os.mkdir('stiri')
    except:
        pass
    os.chdir('stiri')
    for i in range (3):
        for i, line in enumerate(fr):
            with open('stire_'+str(i)+'.txt', 'wb') as fw:
                try:
                    result = 'Descarc: [' + line.strip() + '] ............ '
                    urlstire=line.strip()
                    article = Article(urlstire)
                    article.download()
                    article.parse()
                    fw.write(article.text.encode('utf-8'))
                    result = result + '[OK]'
                    #fw.write(article.publish_date)
                    #fw.write(article.authors)
                    print(article.authors+' : '+article.publish_date)
                except:
                    result = result + '[EROARE]'
                
                print(result)
os.chdir('..')



#################################################################

import os

import unicodedata


def remove_accents(text):
    """
    Strip accents from input String.

    :param text: The input string.
    :type text: String.

    :returns: The processed String.
    :rtype: String.
    """
    text = unicodedata.normalize('NFD', text)
    text = text.encode('ascii', 'ignore')
    text = text.decode("utf-8")
    return str(text)

def remove_accents2(input_str):
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])

folder= ("stiri")
files=os.listdir(folder)
# creating an empty list
lst = []

# number of elemetns as data_ro
words=titlu.split()
keys = []
# iterating till the range
for i in words:
    koi = str()
    keys.append(remove_accents(koi))  # adding the element

for file in files:
    if '.txt' in file:
        
        f = open(folder+os.sep+file, encoding='utf-8')# encoding='iso-8859-2')
        data = f.read()
        data = remove_accents(data)

        f.close()
        visited = []
        for key in keys:
            if (key) in str(data) and key not in visited:
               visited += [key]               
               
# if len(visited)==len(keys):
path = os.path.join(folder, 'result.txt')
f2 = open(path, 'a', encoding='ISO-8859-2')


f2.write(data)
f2.write('\r')


f2.close()
flag=False



        



