import multiprocessing as mp
import glob
import sys
import os
from sys import stdout

def process_file(file_path):
  # return (os.getpid(), file_path)

  from nltk.sentiment.vader import SentimentIntensityAnalyzer
  sid = SentimentIntensityAnalyzer()

  stdout.write(f"{os.getpid():05d} processing: {file_path}\n")
  stdout.flush()
  text = open(file_path, "r").read()
  res = sid.polarity_scores(text)

  stdout.write(f"{os.getpid():05d} finished: {file_path} [{str(res)}]\n")
  stdout.flush()
  return [file_path,res]

def main():
  pool = mp.Pool(mp.cpu_count())

  results = []
  txt_files = sorted(glob.glob(f"{sys.argv[1]}/*.txt"))
  # pp.pprint(txt_files)

  results = pool.starmap_async(process_file, [(txt_file,) for txt_file in txt_files]).get()
  # pp.pprint(results)

  csv_file = open(f"results_{sys.argv[1]}.csv", "wt")
  csv_file.write("file,neg,neu,pos,compound\n")
  for ii in range(len(results)):
    res = ",".join([str(v) for v in list(results[ii][1].values())])
    ss = f"{results[ii][0]},{res}\n"
    csv_file.write(ss)
  csv_file.close()
  pool.close()

if __name__ == "__main__":
  main()