
    Fd                         d Z ddlmZ ddlmZ ddlZddlZddlmZ ddl	m
Z
 ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddedee         defdZ	 ddededee         defdZ	 	 ddededee         fdZ	 	 ddededee         dee         deedeeeef                  z  f         f
dZ	 	 ddedededee         fdZdS )zText processing functions    )ceil)OptionalN)Config)ChatSequence)OPEN_AI_MODELS)count_string_tokenscreate_chat_completion)logger)batchmodelmaxreturnc                 `    t           |          j        dz
  }||dk    rt          ||          S |S )N   r   )r   
max_tokensmin)r   r   model_max_input_tokenss      KC:\Users\Administrator\Downloads\Auto-GPT-master\autogpt\processing\text.py_max_chunk_lengthr      s9    +E2=A
3773.///!!    text	for_modelmax_chunk_lengthc                 F    t          | |          t          ||          k    S N)r   r   )r   r   r   s      r   must_chunk_contentr      s.     tY//2C#3 3  r   Tcontentc              #     K   d}t          | ||          s| t          | |          fV  dS |pt          |          }t          j        |          }|                    |           }t          |          }t          ||z            }t          ||z            }	|rt          ||	z
  |          nd}
t          ||	|
z   |
          D ](}|
                    |          t          |          fV  )dS )z>Split content into chunks of approximately equal token length.   Nr   )r   r   r   tiktokenencoding_for_modelencodelenr   r   r   decode)r   r   r   with_overlapMAX_OVERLAP	tokenizertokenized_texttotal_lengthn_chunkschunk_lengthoverlaptoken_batchs               r   chunk_contentr.      s       Kgy2BCC *7I>>>>>>'G+<Y+G+G+I66I%%g..N~&&LL#3344Hx/00LCOVc"\1;???UVG^\G-CWMM > >{++S-=-======> >r   configinstructionquestionc                    | st          d          |r|rt          d          |j        }|rd| d}t          j        |          }t	          | |          }t          j        d| d           t          |          dz
  }t          j        d| d           t          | ||          s|	                    d	d
|d| nd d|  d           t          j
        d| d|                                 d           t          ||dd          j        }t          j
        dd dd d| dd d	           |                                dfS g t          t!          | |||                    t#                    D ]^\  }	\  }
}t          j        d|	dz    dt%                     d| d           t'          |
|          \  }}                    |           _t          j        dt%                     d            t'          d!                                        \  }}|                                fd"t-          dt%                              D             fS )#a  Summarize text using the OpenAI API

    Args:
        text (str): The text to summarize
        config (Config): The config object
        instruction (str): Additional instruction for summarization, e.g. "focus on information related to polar bears", "omit personal information contained in the text"
        question (str): Question to answer in the summary

    Returns:
        str: The summary of the text
        list[(summary, chunk)]: Text chunks and their summary, if the text was chunked.
            None otherwise.
    zNo text to summarizez;Parameters 'question' and 'instructions' cannot both be setzAinclude any information that can be used to answer the question "z-". Do not directly answer the question itselfzText length: z tokensi&  zMax chunk length: userz-Write a concise summary of the following textNz;  z:


LITERAL TEXT: """z5"""


CONCISE SUMMARY: The text is best summarized aszSummarizing with z:

r   i  )promptr/   temperaturer   z----------------z	 SUMMARY z-----------------z*------------------------------------------)r   r/   r   zSummarizing chunk r   z / z of length zSummarized z chunksz

c                 <    g | ]}|         |         d          fS )r    ).0ichunks	summariess     r   
<listcomp>z"summarize_text.<locals>.<listcomp>   s7       )*1vay|$  r   )
ValueErrorfast_llm_modelr   r   r   r
   infor   r   adddebugdumpr	   r   striplist
split_text	enumerater#   summarize_textappendjoinrange)r   r/   r0   r1   r   summarization_prompttoken_lengthr   summaryr;   chunkr+   _r<   r=   s                @@r   rI   rI   <   s,   &  1/000 Xx XVWWW!E 
9PX 9 9 9 	
 (1%88&tU33L
K5555666 )//#5
K>%5>>>???dE+;<< %  >%0%<!K!!!"> > !%> > >		
 		
 		
 	RRR3G3L3L3N3NRRRSSS('ARU
 
 

 	 	J&JJ6JJWJJJJJKKK}}$$IE&CS	
 	
 	
 F %.f$5$5 " "  E<XQXX3v;;XX<XXX	
 	
 	
 $E;77
!!!!
K2c&kk222333I 6 677JGQ==??     .3As6{{.C.C    r   c              #   ,  K   t          ||          }|                     dd          } t          | |          }||k     r| |fV  dS t          ||z            }t          ||z            }t	          j        |j                  }	|	                    d            |	|           }
d |
j        D             }g }d}d}d}d}|t          |          k     r$||         }t          ||          }|dz   |z   }||k     r$||dz  z
  |k     r|
                    |           |}n||k     r|rd                    |          |fV  g }d}|re||z
  dz
  }||k     r||gz  }||dz   z  }nH|d	k    rB|t          t          |||                                                    d         gz  }||dz   z  }||gz  }||z  }n%d
 t          |||          D             |||dz   <   |dz  }|}|}|t          |          k     $|rd                    |          |fV  dS dS )a'  Split text into chunks of sentences, with each chunk not exceeding the maximum length

    Args:
        text (str): The text to split
        for_model (str): The model to chunk for; determines tokenizer and constraints
        config (Config): The config object
        with_overlap (bool, optional): Whether to allow overlap between chunks
        max_chunk_length (int, optional): The maximum length of a chunk

    Yields:
        str: The next chunk of text

    Raises:
        ValueError: when a sentence is longer than the maximum length
    r5    Nsentencizerc                 @    g | ]}|j                                         S r9   )r   rE   )r:   sentences     r   r>   zsplit_text.<locals>.<listcomp>   s&    AAA8$$&&AAAr   r   r         c                     g | ]\  }}|S r9   r9   )r:   rP   rQ   s      r   r>   zsplit_text.<locals>.<listcomp>   s,     $ $ $E1 $ $ $r   )r   replacer   r   spacyloadbrowse_spacy_language_modeladd_pipesentsr#   rJ   rK   rF   r.   pop)r   r   r/   r%   r   
max_lengthtext_lengthr*   target_chunk_lengthnlpdoc	sentencescurrent_chunkcurrent_chunk_lengthlast_sentencelast_sentence_lengthr;   rV   sentence_lengthexpected_chunk_lengthoverlap_max_lengths                        r   rG   rG      s     . #9.>??J <<c""D%dI66KZKK*,--H{X566#(:f.P#Q#QCLL
#d))CAAsyAAAI!MM	A
c)nn

Q<-h	BB 4q 8? J "J..%1)<=@SSS  ***#8  z)) Ghh}--/CCCCC "'($ G)3o)E)I&+.@@@%-8,0Dq0HH,,+a//%  -$1$-$6!" !"  "ceeA'*  -0BQ0FF,hZ'M O3  $ $ -h	CV W W$ $ $Ia!a%i  	Q .c c)nn

f  <hh}%%';;;;;;;< <r   r   )NT)NN)TN)__doc__mathr   typingr   r[   r    autogpt.configr   autogpt.llm.baser   autogpt.llm.providers.openair   autogpt.llm.utilsr   r	   autogpt.logsr
   autogpt.utilsr   strintr   boolr   r.   tuplerF   rI   rG   r9   r   r   <module>r{      sE                   ! ! ! ! ! ! ) ) ) ) ) ) 7 7 7 7 7 7 I I I I I I I I            " "S "x} " " " " " BF 
19#	    '+	> >>> sm> > > >@ "&"	S S
SS #S sm	S
 3tE#s(O,,,-S S S St &*c< c<
c<c< c<
 smc< c< c< c< c< c<r   