
    Fd#                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ  G d d          Z G d d	e          Z G d
 de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d d          Z e             e             e             e             e             e             e             e             e             e             e             e             e             e            dZdefdZdedej        d efd!ZdS )"    N)BeautifulSoup)LatexNodes2Text)logs)loggerc                       e Zd ZdedefdZdS )ParserStrategy	file_pathreturnc                     t           N)NotImplementedErrorselfr	   s     ZC:\Users\Administrator\Downloads\Auto-GPT-master\autogpt\commands\file_operations_utils.pyreadzParserStrategy.read   s    !!    N__name__
__module____qualname__strr    r   r   r   r      s6        "c "c " " " " " "r   r   c                       e Zd ZdedefdZdS )	TXTParserr	   r
   c                     t          j        |                                          }t          j        d| d|j         d           t          |          S )Nz	Reading 'z' with encoding '')charset_normalizer	from_pathbestr   debugencodingr   )r   r	   charset_matchs      r   r   zTXTParser.read   sS    *4Y??DDFFVVV]=SVVVWWW=!!!r   Nr   r   r   r   r   r      s6        "c "c " " " " " "r   r   c                       e Zd ZdedefdZdS )	PDFParserr	   r
   c                     t          j        |          }d}t          t          |j                            D ]$}||j        |                                         z  }%|S N )PyPDF2	PdfReaderrangelenpagesextract_text)r   r	   parsertextpage_idxs        r   r   zPDFParser.read   s\    !),,c&,//00 	: 	:HFL*77999DDr   Nr   r   r   r   r$   r$      6        c c      r   r$   c                       e Zd ZdedefdZdS )
DOCXParserr	   r
   c                 Z    t          j        |          }d}|j        D ]}||j        z  }|S r&   )docxDocument
paragraphsr/   )r   r	   doc_filer/   paras        r   r   zDOCXParser.read)   s;    =++' 	 	DDIDDr   Nr   r   r   r   r3   r3   (   r1   r   r3   c                       e Zd ZdedefdZdS )
JSONParserr	   r
   c                     t          |d          5 }t          j        |          }t          |          }d d d            n# 1 swxY w Y   |S Nr)openjsonloadr   r   r	   fdatar/   s        r   r   zJSONParser.read3   s    )S!! 	Q9Q<<Dt99D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 s   $AAANr   r   r   r   r;   r;   2   6        c c      r   r;   c                       e Zd ZdedefdZdS )	XMLParserr	   r
   c                     t          |d          5 }t          |d          }|                                }d d d            n# 1 swxY w Y   |S )Nr>   xmlr?   r   get_textr   r	   rC   soupr/   s        r   r   zXMLParser.read;   s    )S!! 	#Q E**D==??D	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	#    %AA	ANr   r   r   r   rG   rG   :   rE   r   rG   c                       e Zd ZdedefdZdS )
YAMLParserr	   r
   c                     t          |d          5 }t          j        |t          j                  }t	          |          }d d d            n# 1 swxY w Y   |S )Nr>   )Loader)r?   yamlrA   
FullLoaderr   rB   s        r   r   zYAMLParser.readD   s    )S!! 	Q9Qt777Dt99D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 s   0AAANr   r   r   r   rP   rP   C   rE   r   rP   c                       e Zd ZdedefdZdS )
HTMLParserr	   r
   c                     t          |d          5 }t          |d          }|                                }d d d            n# 1 swxY w Y   |S )Nr>   html.parserrJ   rL   s        r   r   zHTMLParser.readL   s    )S!! 	#Q M22D==??D	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# rN   Nr   r   r   r   rV   rV   K   rE   r   rV   c                       e Zd ZdedefdZdS )MarkdownParserr	   r
   c                    t          |d          5 }t          j        |                                          }d                    t	          |d                              d                    }d d d            n# 1 swxY w Y   |S )Nr>   r'   rX   T)string)r?   markdownr   joinr   findAll)r   r	   rC   htmlr/   s        r   r   zMarkdownParser.readT   s    )S!! 	TQ$QVVXX..D77=}==EETERRSSD	T 	T 	T 	T 	T 	T 	T 	T 	T 	T 	T 	T 	T 	T 	T s   AA;;A?A?Nr   r   r   r   rZ   rZ   S   rE   r   rZ   c                       e Zd ZdedefdZdS )LaTeXParserr	   r
   c                     t          |d          5 }|                                }d d d            n# 1 swxY w Y   t                                          |          }|S r=   )r?   r   r   latex_to_text)r   r	   rC   latexr/   s        r   r   zLaTeXParser.read\   s    )S!! 	QFFHHE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	  ..u55   266Nr   r   r   r   rb   rb   [   rE   r   rb   c                   D    e Zd Zdedej        fdZdeddfdZdefdZ	dS )FileContextr.   r   c                 "    || _         || _        d S r   )r.   r   )r   r.   r   s      r   __init__zFileContext.__init__d   s    r   r
   Nc                 N    | j                             d|            || _        d S )NzSetting Context Parser to )r   r    r.   )r   r.   s     r   
set_parserzFileContext.set_parserh   s,    ?v??@@@r   c                     | j                             d| d| j                    | j                            |          S )NzReading file z with parser )r   r    r.   r   r   s     r   	read_filezFileContext.read_filel   sA    O)OO$+OOPPP{	***r   )
r   r   r   r   r   Loggerrj   rl   r   rn   r   r   r   rh   rh   c   su        ~ t{     D    +c + + + + + +r   rh   )z.txtz.csvz.pdfz.docxz.jsonz.xmlz.yamlz.ymlz.htmlz.htmz.xhtmlz.mdz	.markdownz.texr	   c                     t          | d          5 }|                                }ddd           n# 1 swxY w Y   d|v rdS dS )zGiven a file path load all its content and checks if the null bytes is present

    Args:
        file_path (_type_): _description_

    Returns:
        bool: is_binary
    rbN    TF)r?   r   )r	   rC   	file_datas      r   is_file_binary_fnrt      s     
i		 !FFHH	              )t5rf   r   r
   c                    t           j                            |           st          d|  d          t	          |           }t           j                            |           d                                         }t                              |          }|s"|rt          d|           t                      }t          ||          }|                    |           S )Nz
read_file z" failed: no such file or directory   z Unsupported binary file format: )ospathisfileFileNotFoundErrorrt   splitextlowerextension_to_parserget
ValueErrorr   rh   rn   )r	   r   	is_binaryfile_extensionr.   file_contexts         r   read_textual_filer      s    7>>)$$ 
FFFF
 
 	
 "),,IW%%i00399;;N $$^44F  	RPPPQQQvv..L!!),,,r   )r@   rw   r   r5   r]   r(   rS   bs4r   pylatexenc.latex2textr   autogptr   autogpt.logsr   r   r   r$   r3   r;   rG   rP   rV   rZ   rb   rh   r}   r   rt   ro   r   r   r   r   <module>r      s:    				               1 1 1 1 1 1            " " " " " " " "" " " " " " " "                                              ^       .   + + + + + + + + IKKIKKIKKZ\\Z\\IKKZ\\JLLZ\\JLLjll>!!KMM  $     - -dk -c - - - - - -r   