import pandas as pd
from collections import Counter
from Bio.SeqUtils import GC
from Bio.Seq import Seq

def read_dna_sequence(source, filepath=None):
    """
    Read DNA sequence from a file or directly from input.
    
    :param source: 'file' to read from a file, 'input' to take input directly
    :param filepath: Path to the file containing the DNA sequence (if source is 'file')
    :return: DNA sequence as a string
    """
    if source == 'file':
        try:
            with open(filepath, 'r') as file:
                dna_sequence = file.read().replace('\n', '')
            return dna_sequence.upper()
        except FileNotFoundError:
            raise FileNotFoundError("The file was not found. Please check the filepath.")
        except Exception as e:
            raise Exception(f"An error occurred while reading the file: {e}")
    elif source == 'input':
        dna_sequence = input("Enter DNA sequence: ").strip().upper()
        return dna_sequence
    else:
        raise ValueError("Invalid source type. Use 'file' or 'input'.")
        
def calculate_gc_content(dna_sequence):
    """
    Calculate the GC content of a DNA sequence.
    
    :param dna_sequence: DNA sequence as a string
    :return: GC content percentage
    """
    return GC(dna_sequence)

def count_codon_frequencies(dna_sequence):
    """
    Count the frequency of each codon in a DNA sequence.
    
    :param dna_sequence: DNA sequence as a string
    :return: DataFrame with codons and their frequencies
    """
    if len(dna_sequence) % 3 != 0:
        raise ValueError("The length of the DNA sequence is not a multiple of three.")
    
    codons = [dna_sequence[i:i+3] for i in range(0, len(dna_sequence), 3)]
    codon_counts = Counter(codons)
    total_codons = sum(codon_counts.values())
    codon_frequencies = {codon: count / total_codons for codon, count in codon_counts.items()}
    
    df = pd.DataFrame(list(codon_frequencies.items()), columns=['Codon', 'Frequency'])
    return df

def find_repetitive_sequences(dna_sequence, min_length=4):
    """
    Find repetitive sequences in a DNA sequence.
    
    :param dna_sequence: DNA sequence as a string
    :param min_length: Minimum length of the repetitive sequence to be considered
    :return: List of repetitive sequences
    """
    repetitive_sequences = []
    for start in range(len(dna_sequence) - min_length + 1):
        for end in range(start + min_length, len(dna_sequence) + 1):
            seq = dna_sequence[start:end]
            if dna_sequence.count(seq) > 1 and seq not in repetitive_sequences:
                repetitive_sequences.append(seq)
    return repetitive_sequences

def main():
    try:
        source = input("Enter the source of DNA sequence ('file' or 'input'): ").strip().lower()
        filepath = None
        if source == 'file':
            filepath = input("Enter the path to the file containing the DNA sequence: ").strip()
        
        dna_sequence = read_dna_sequence(source, filepath)
        
        gc_content = calculate_gc_content(dna_sequence)
        print(f"GC Content: {gc_content:.2f}%")
        
        codon_frequencies_df = count_codon_frequencies(dna_sequence)
        print("\nCodon Frequencies:")
        print(codon_frequencies_df)
        
        repetitive_sequences = find_repetitive_sequences(dna_sequence)
        if repetitive_sequences:
            print("\nRepetitive Sequences Found:")
            for seq in repetitive_sequences:
                print(seq)
        else:
            print("\nNo repetitive sequences found.")
    
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()