import requests
from bs4 import BeautifulSoup
import os
import re

def extract_and_download_links(url, file_type, depth):
    """
    Extracts links of a specific file type from a given URL, displays them, 
    writes them to a text file, and downloads the files into a separate folder.

    Args:
    url (str): The URL to scrape.
    file_type (str): The file type to extract (e.g., 'pdf', 'txt', 'doc').
    depth (int): The depth of scraping (1 for current page, 2 for current page and linked pages, and so on).

    Returns:
    None
    """
    # Create a directory for downloads
    download_dir = f"{file_type}_downloads"
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    # Function to write links to a text file and display them
    def write_to_file_and_display(links, file_name):
        with open(file_name, 'a') as file:
            for link in links:
                print(link)  # Display the link
                file.write(link + '\n')

    # Function to download files from links
    def download_files(file_name, download_dir):
        with open(file_name, 'r') as file:
            for link in file:
                try:
                    link = link.strip()  # Remove whitespace characters like `\n` at the end
                    response = requests.get(link)
                    # Clean and create a valid file name
                    clean_file_name = re.sub(r'[\\/*?:"<>|]', '', link.split('/')[-1])
                    file_path = os.path.join(download_dir, clean_file_name)
                    with open(file_path, 'wb') as f:
                        f.write(response.content)
                    print(f"Downloaded {file_path}")
                except Exception as e:
                    print(f"Error downloading {link}: {e}")

    # Function to scrape links
    def scrape_links(current_url, current_depth, file_type, visited_urls, file_name):
        if current_depth == 0:
            return
        if current_url in visited_urls:
            return
        visited_urls.add(current_url)
        try:
            response = requests.get(current_url)
            soup = BeautifulSoup(response.text, 'html.parser')
            links = soup.find_all('a', href=True)
            file_links = [link['href'] for link in links if link['href'].endswith(file_type)]
            write_to_file_and_display(file_links, file_name)
            for link in links:
                next_url = link['href']
                if next_url.startswith('http') and next_url not in visited_urls:
                    scrape_links(next_url, current_depth - 1, file_type, visited_urls, file_name)
        except Exception as e:
            print(f"Error scraping {current_url}: {e}")

    # Main logic
    visited_urls = set()
    file_name = f"{file_type}_links.txt"
    scrape_links(url, depth, file_type, visited_urls, file_name)
    print("Scraping complete. Starting download of files.")
    download_files(file_name, download_dir)
    print("Download complete.")

# Get user input for URL, file type, and depth
url = input("Enter the URL to scrape: ")
file_type = input("Enter the file type to extract (e.g., 'pdf', 'txt', 'doc'): ")
depth = int(input("Enter the depth of scraping (1 for current page, 2 for linked pages, and so on): "))

# Call the extract_and_download_links function
extract_and_download_links(url, file_type, depth)
