from multiprocessing import Pool, cpu_count

from newspaper import Article
import sys
import os

urls_input_file = sys.argv[1]
out_folder = 'stiri'

def download_article(article_url, index, folder):
    out_file_name = "news_" + str(index) + ".txt"
    out_file_path = os.path.join(folder, out_file_name)
    with open(out_file_path, 'wb') as fw:
        try:
            result = f"Downloading: [{article_url}] .."
            article = Article(article_url)
            article.download()
            article.parse()
            fw.write(article.text.encode('utf-8'))
            result = result + '[OK]'
        except:
            result = result + '[ERROR]'
        finally:
            print(result)

def main():
    global out_folder
    params_list = []
    with open(urls_input_file, 'r') as fr:
        os.makedirs(out_folder, exist_ok=True)
        ix = 0
        for line in fr.readlines():
            ix += 1
            url_news = line.strip()
            params_list.append((url_news, ix, out_folder))

    num_cpus = cpu_count()
    print(f"We have {num_cpus} CPUs. Downloading..")
    with Pool(num_cpus) as pool:
        pool.starmap(download_article, params_list)

if __name__ == '__main__':
    main()
