import re
import sys

if len(sys.argv) < 2:
    print("Please send a file name as argument")

file_name = sys.argv[1]
    

def extractURLs(fileContent):
    urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', fileContent.lower())
    return urls

myFile = open(file_name, 'r', encoding="utf-8")
fileContent = myFile.read()
URLs = extractURLs(fileContent)
myFile.close()
myFile = open("URLs.txt", 'w', encoding="utf-8")
for item in URLs:
    if item.endswith('.html'):
        myFile.write(item+'\n')
myFile.close()
