o
    ;)h,Q                     @  sV  d dl mZ d dlmZ d dlmZ ddlmZmZm	Z	 ddl
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ G dd dZG d	d
 d
eZG dd deZG dd deZ G dd deZ!G dd deZ"G dd deZ#G dd deZ$G dd deZ%G dd deZ&eddd/d"d#Z'ed$d	&d0d1d,d-Z(d.S )2    )annotations)	lru_cache)	getLogger   )COMMON_SAFE_ASCII_CHARACTERSTRACEUNICODE_SECONDARY_RANGE_KEYWORD)is_accentuated	is_arabicis_arabic_isolated_formis_case_variableis_cjkis_emoticon	is_hangulis_hiraganais_katakanais_latinis_punctuationis_separator	is_symbolis_thaiis_unprintableremove_accentunicode_rangeis_cjk_uncommonc                   @  s<   e Zd ZdZdddZdd	d
ZdddZedddZdS )MessDetectorPluginzy
    Base abstract class used for mess detection plugins.
    All detectors MUST extend and implement given methods.
    	characterstrreturnboolc                 C     t )z@
        Determine if given character should be fed in.
        NotImplementedErrorselfr    r%   TC:\Users\User\Downloads\Proyecto_IoT\venv\Lib\site-packages\charset_normalizer/md.pyeligible'      zMessDetectorPlugin.eligibleNonec                 C  r    )z
        The main routine to be executed upon character.
        Insert the logic in witch the text would be considered chaotic.
        r!   r#   r%   r%   r&   feed-   s   zMessDetectorPlugin.feedc                 C  r    )zB
        Permit to reset the plugin to the initial state.
        r!   r$   r%   r%   r&   reset4   r(   zMessDetectorPlugin.resetfloatc                 C  r    )z
        Compute the chaos ratio based on what your feed() has seen.
        Must NOT be lower than 0.; No restriction gt 0.
        r!   r+   r%   r%   r&   ratio:   s   zMessDetectorPlugin.ratioNr   r   r   r   r   r   r   r)   r   r)   r   r-   )	__name__
__module____qualname____doc__r'   r*   r,   propertyr.   r%   r%   r%   r&   r   !   s    


r   c                   @  B   e Zd ZdddZddd	Zdd
dZdddZedddZdS ) TooManySymbolOrPunctuationPluginr   r)   c                 C  s"   d| _ d| _d| _d | _d| _d S )Nr   F)_punctuation_count_symbol_count_character_count_last_printable_charZ_frenzy_symbol_in_wordr+   r%   r%   r&   __init__D   s
   
z)TooManySymbolOrPunctuationPlugin.__init__r   r   r   c                 C     |  S Nisprintabler#   r%   r%   r&   r'   L      z)TooManySymbolOrPunctuationPlugin.eligiblec                 C  sp   |  j d7  _ || jkr3|tvr3t|r|  jd7  _n| du r3t|r3t|du r3|  jd7  _|| _d S )Nr   F   )	r<   r=   r   r   r:   isdigitr   r   r;   r#   r%   r%   r&   r*   O   s   

z%TooManySymbolOrPunctuationPlugin.feedc                 C  s   d| _ d| _d| _d S Nr   )r:   r<   r;   r+   r%   r%   r&   r,   a      
z&TooManySymbolOrPunctuationPlugin.resetr-   c                 C  s0   | j dkrdS | j| j | j  }|dkr|S dS )Nr           333333?)r<   r:   r;   )r$   Zratio_of_punctuationr%   r%   r&   r.   f   s   

z&TooManySymbolOrPunctuationPlugin.ratioNr1   r/   r0   r2   	r3   r4   r5   r>   r'   r*   r,   r7   r.   r%   r%   r%   r&   r9   C   s    



r9   c                   @  r8   )TooManyAccentuatedPluginr   r)   c                 C     d| _ d| _d S rF   r<   _accentuated_countr+   r%   r%   r&   r>   s      
z!TooManyAccentuatedPlugin.__init__r   r   r   c                 C  r?   r@   )isalphar#   r%   r%   r&   r'   w   rC   z!TooManyAccentuatedPlugin.eligiblec                 C  ,   |  j d7  _ t|r|  jd7  _d S d S Nr   )r<   r	   rN   r#   r%   r%   r&   r*   z      zTooManyAccentuatedPlugin.feedc                 C  rL   rF   rM   r+   r%   r%   r&   r,      rO   zTooManyAccentuatedPlugin.resetr-   c                 C  s*   | j dk rdS | j| j  }|dkr|S dS )N   rH   gffffff?rM   )r$   Zratio_of_accentuationr%   r%   r&   r.      s   
zTooManyAccentuatedPlugin.ratioNr1   r/   r0   r2   rJ   r%   r%   r%   r&   rK   r   s    



rK   c                   @  r8   )UnprintablePluginr   r)   c                 C  rL   rF   )_unprintable_countr<   r+   r%   r%   r&   r>      rO   zUnprintablePlugin.__init__r   r   r   c                 C     dS NTr%   r#   r%   r%   r&   r'         zUnprintablePlugin.eligiblec                 C  s(   t |r|  jd7  _|  jd7  _d S rR   )r   rV   r<   r#   r%   r%   r&   r*      s   zUnprintablePlugin.feedc                 C  s
   d| _ d S rF   )rV   r+   r%   r%   r&   r,      s   
zUnprintablePlugin.resetr-   c                 C     | j dkrdS | jd | j  S )Nr   rH   rT   )r<   rV   r+   r%   r%   r&   r.         
zUnprintablePlugin.ratioNr1   r/   r0   r2   rJ   r%   r%   r%   r&   rU      s    



rU   c                   @  r8   )SuspiciousDuplicateAccentPluginr   r)   c                 C     d| _ d| _d | _d S rF   _successive_countr<   _last_latin_characterr+   r%   r%   r&   r>      s   
z(SuspiciousDuplicateAccentPlugin.__init__r   r   r   c                 C  s   |  ot|S r@   )rP   r   r#   r%   r%   r&   r'      s   z(SuspiciousDuplicateAccentPlugin.eligiblec                 C  st   |  j d7  _ | jd ur5t|r5t| jr5| r%| j r%|  jd7  _t|t| jkr5|  jd7  _|| _d S rR   )r<   r`   r	   isupperr_   r   r#   r%   r%   r&   r*      s   

z$SuspiciousDuplicateAccentPlugin.feedc                 C  r]   rF   r^   r+   r%   r%   r&   r,      rG   z%SuspiciousDuplicateAccentPlugin.resetr-   c                 C  rZ   )Nr   rH   rD   )r<   r_   r+   r%   r%   r&   r.      r[   z%SuspiciousDuplicateAccentPlugin.ratioNr1   r/   r0   r2   rJ   r%   r%   r%   r&   r\      s    



r\   c                   @  r8   )SuspiciousRanger   r)   c                 C  r]   rF   )"_suspicious_successive_range_countr<   _last_printable_seenr+   r%   r%   r&   r>      rG   zSuspiciousRange.__init__r   r   r   c                 C  r?   r@   rA   r#   r%   r%   r&   r'      rC   zSuspiciousRange.eligiblec                 C  sx   |  j d7  _ | st|s|tv rd | _d S | jd u r"|| _d S t| j}t|}t||r7|  jd7  _|| _d S rR   )r<   isspacer   r   rd   r    is_suspiciously_successive_rangerc   )r$   r   unicode_range_aunicode_range_br%   r%   r&   r*      s    



zSuspiciousRange.feedc                 C  r]   rF   )r<   rc   rd   r+   r%   r%   r&   r,      rG   zSuspiciousRange.resetr-   c                 C  s"   | j dkrdS | jd | j  }|S )N   rH   rD   )r<   rc   )r$   Zratio_of_suspicious_range_usager%   r%   r&   r.      s   
zSuspiciousRange.ratioNr1   r/   r0   r2   rJ   r%   r%   r%   r&   rb      s    



rb   c                   @  r8   )SuperWeirdWordPluginr   r)   c                 C  s@   d| _ d| _d| _d| _d| _d| _d| _d| _d| _d| _	d S )Nr   F )
_word_count_bad_word_count_foreign_long_count_is_current_word_bad_foreign_long_watchr<   _bad_character_count_buffer_buffer_accent_count_buffer_glyph_countr+   r%   r%   r&   r>      s   
zSuperWeirdWordPlugin.__init__r   r   r   c                 C  rW   rX   r%   r#   r%   r%   r&   r'     rY   zSuperWeirdWordPlugin.eligiblec                 C  s  |  rc|  j|7  _t|r|  jd7  _| jdu rFt|du s%t|rFt|du rFt|du rFt|du rFt	|du rFt
|du rFd| _t|sZt|sZt|sZt	|sZt
|ra|  jd7  _d S | jshd S | sut|sut|r$| jr$|  jd7  _t| j}|  j|7  _|dkr| j| dkrd| _n4t| jd r| jd  rtdd | jD du r|  jd7  _d| _n| jdkrd| _|  jd7  _|d	kr| jrd
d t| jtd|D }d}|rt|| dkrd}|s|  jd7  _d| _| jr|  jd7  _|  jt| j7  _d| _d| _d| _d| _d| _d S |dvrA| du rCt|rEd| _|  j|7  _d S d S d S d S )Nr   FT         ?c                 s  s    | ]}|  V  qd S r@   ra   ).0_r%   r%   r&   	<genexpr>8  s    z,SuperWeirdWordPlugin.feed.<locals>.<genexpr>   c                 S  s   g | ]
\}}|  r|qS r%   rx   )ry   cir%   r%   r&   
<listcomp>@  s    z-SuperWeirdWordPlugin.feed.<locals>.<listcomp>r   rI   rk   >   >rz   =|~-<)rP   rr   r	   rs   rp   r   r   r   r   r   r   rt   re   r   r   rl   lenr<   ro   ra   allrn   ziprangerm   rq   rE   r   )r$   r   Zbuffer_lengthZcamel_case_dstZprobable_camel_casedr%   r%   r&   r*     s   




zSuperWeirdWordPlugin.feedc                 C  s4   d| _ d| _d| _d| _d| _d| _d| _d| _d S )Nrk   Fr   )rr   ro   rp   rm   rl   r<   rq   rn   r+   r%   r%   r&   r,   _  s   
zSuperWeirdWordPlugin.resetr-   c                 C  s$   | j dkr| jdkrdS | j| j S )N
   r   rH   )rl   rn   rq   r<   r+   r%   r%   r&   r.   i  s   zSuperWeirdWordPlugin.ratioNr1   r/   r0   r2   rJ   r%   r%   r%   r&   rj      s    



Q
rj   c                   @  sF   e Zd ZdZdddZdd	d
ZdddZdddZedddZ	dS )CjkUncommonPluginz<
    Detect messy CJK text that probably means nothing.
    r   r)   c                 C  rL   rF   r<   _uncommon_countr+   r%   r%   r&   r>   v  rO   zCjkUncommonPlugin.__init__r   r   r   c                 C     t |S r@   )r   r#   r%   r%   r&   r'   z  rC   zCjkUncommonPlugin.eligiblec                 C  rQ   rR   )r<   r   r   r#   r%   r%   r&   r*   }  s
   zCjkUncommonPlugin.feedc                 C  rL   rF   r   r+   r%   r%   r&   r,     rO   zCjkUncommonPlugin.resetr-   c                 C  s.   | j dk rdS | j| j  }|dkr|d S dS )NrT   rH   rv   r   r   )r$   Zuncommon_form_usager%   r%   r&   r.     s   
zCjkUncommonPlugin.ratioNr1   r/   r0   r2   )
r3   r4   r5   r6   r>   r'   r*   r,   r7   r.   r%   r%   r%   r&   r   q  s    



r   c                   @  r8   )ArchaicUpperLowerPluginr   r)   c                 C  s.   d| _ d| _d| _d| _d| _d | _d| _d S )NFr   T)_buf_character_count_since_last_sep_successive_upper_lower_count#_successive_upper_lower_count_finalr<   _last_alpha_seen_current_ascii_onlyr+   r%   r%   r&   r>     s   
z ArchaicUpperLowerPlugin.__init__r   r   r   c                 C  rW   rX   r%   r#   r%   r%   r&   r'     rY   z ArchaicUpperLowerPlugin.eligiblec                 C  s$  |  ot|}|du }|rC| jdkrC| jdkr+| du r+| jdu r+|  j| j7  _d| _d| _d | _d| _|  j	d7  _	d| _d S | jdu rQ|
 du rQd| _| jd ur| r_| j sh| r|| j r|| jdu rx|  jd7  _d| _nd| _nd| _|  j	d7  _	|  jd7  _|| _d S )NFr   @   r   TrD   )rP   r   r   rE   r   r   r   r   r   r<   isasciira   islower)r$   r   Zis_concernedZ	chunk_sepr%   r%   r&   r*     s@   




zArchaicUpperLowerPlugin.feedc                 C  s.   d| _ d| _d| _d| _d | _d| _d| _d S )Nr   FT)r<   r   r   r   r   r   r   r+   r%   r%   r&   r,     s   
zArchaicUpperLowerPlugin.resetr-   c                 C  s   | j dkrdS | j| j  S )Nr   rH   )r<   r   r+   r%   r%   r&   r.     s   
zArchaicUpperLowerPlugin.ratioNr1   r/   r0   r2   rJ   r%   r%   r%   r&   r     s    



*	r   c                   @  sB   e Zd ZdddZdddZdd
dZdddZedddZdS )ArabicIsolatedFormPluginr   r)   c                 C  rL   rF   r<   _isolated_form_countr+   r%   r%   r&   r>     rO   z!ArabicIsolatedFormPlugin.__init__c                 C  rL   rF   r   r+   r%   r%   r&   r,     rO   zArabicIsolatedFormPlugin.resetr   r   r   c                 C  r   r@   )r
   r#   r%   r%   r&   r'     rC   z!ArabicIsolatedFormPlugin.eligiblec                 C  rQ   rR   )r<   r   r   r#   r%   r%   r&   r*     rS   zArabicIsolatedFormPlugin.feedr-   c                 C  s   | j dk rdS | j| j  }|S )NrT   rH   r   )r$   Zisolated_form_usager%   r%   r&   r.     s   
zArabicIsolatedFormPlugin.ratioNr1   r/   r0   r2   )	r3   r4   r5   r>   r,   r'   r*   r7   r.   r%   r%   r%   r&   r     s    



r      )maxsizerg   
str | Nonerh   r   r   c                 C  sv  | du s|du r
dS | |krdS d| v rd|v rdS d| v s"d|v r$dS d| v s,d|v r6d| v s4d|v r6dS |  d| d}}|D ]}|tv rJqC||v rQ dS qC| dv |dv }}|s_|rid	| v sgd	|v ridS |ro|rodS d
| v swd
|v rd	| v sd	|v rdS | dks|dkrdS d	| v sd	|v s| dv r|dv rd| v sd|v rdS d| v sd|v rdS | dks|dkrdS dS )za
    Determine if two Unicode range seen next to each other can be considered as suspicious.
    NTFZLatinZ	EmoticonsZ	Combining )HiraganaKatakanaCJKHangulzBasic Latin)r   r   PunctuationZForms)splitr   )rg   rh   Zkeywords_range_aZkeywords_range_belZrange_a_jp_charsZrange_b_jp_charsr%   r%   r&   rf     sZ   rf   i   皙?Fdecoded_sequencer   maximum_thresholdr-   debugc              	   C  sR  dd t  D }t| d }d}|dk rd}n	|dkrd}nd	}t| d
 t|D ]2\}}|D ]}	|	|r<|	| q0|dkrG|| dksM||d kr\tdd |D }||kr\ nq*|rtd}
|
	t
d| d| d|  t| dkr|
	t
d| dd   |
	t
d| dd   |D ]}|
	t
|j d|j  qt|dS )zw
    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
    c                 S  s   g | ]}| qS r%   r%   )ry   Zmd_classr%   r%   r&   r   N  s    zmess_ratio.<locals>.<listcomp>r   rH   i       r   r      
r   c                 s  s    | ]}|j V  qd S r@   )r.   )ry   dtr%   r%   r&   r{   e  s    zmess_ratio.<locals>.<genexpr>Zcharset_normalizerzIMess-detector extended-analysis start. intermediary_mean_mess_ratio_calc=z mean_mess_ratio=z maximum_threshold=   zStarting with: NzEnding with: iz:    )r   __subclasses__r   r   r   r'   r*   sumr   logr   	__class__r.   round)r   r   r   Z	detectorslengthZmean_mess_ratioZ!intermediary_mean_mess_ratio_calcr   indexdetectorloggerr   r%   r%   r&   
mess_ratioF  sN   


r   N)rg   r   rh   r   r   r   )r   F)r   r   r   r-   r   r   r   r-   ))
__future__r   	functoolsr   loggingr   Zconstantr   r   r   utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r9   rK   rU   r\   rb   rj   r   r   r   rf   r   r%   r%   r%   r&   <module>   s(    P"/%1v#LI