o
    (j6h@                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm Z  d dlm!Z! d dl"m#Z# d dl"m$Z$ d dl"m%Z% d dl&m'Z' dZ(z
d dl)m*Z* dZ+W n e,y   dZ+Y nw zd dl-Z.dZ/W n e,y   dZ/Y nw e 0dZ1dZ2e3dZ4dZ5h dZ6d e7fd!e7fd"e7fd#e8fd$e9fd%e8fd&Z:d'd(d)d*d+Z;e%d,Z<e%d-Z=G d.d/ d/Z>G d0d1 d1e?Z@G d2d3 d3eAZBG d4d5 d5e?ZCG d6d7 d7e?ZDG d8d9 d9e?ZEd:d; ZFedvd<d=ZGd>d? ZHd@dA ZIdBdC ZJdDdE ZKedFdG ZLdwdHdIZM	J	 	 dxdKdLZNdydMe9dNeOfdOdPZP		 	 	dzdQe e9 dRe!e9 dSe7dTe7dNeOf
dUdVZQ	J		J	 	 	d{dWdXZRdYdZ ZSd[d\ ZTd]d^ ZUeHd|d_d`ZVeHdadb ZWddJd e>jXd fdcddZY		J	 	e	 d}dfdgZZ		J	 	 d~dhdiZ[ddJd e>jXd fdjdkZ\dvdldmZ]ddJd e>jXd dfdndoZ^dpdJd e>jXd fdqdrZ_dsdt Z`eadukrebe` dS )    N)contextmanager)
QUOTE_NONE)ENOENT)wraps)iglob)BytesIO)environ)extsep)linesep)remove)normcase)normpath)realpath)NamedTemporaryFile)sleep)List)Optional)InvalidVersion)parse)Version)Image	tesseract)ndarrayTFpytesseractzutf-8z	^[a-z_]+$RGB>
   BMPGIFPBMPGMPNGPPMJPEGTIFFWEBPJPEG2000page_numorientationrotateorientation_confscriptscript_conf)zPage numberzOrientation in degreesRotatezOrientation confidenceScriptzScript confidencez.tessedit_create_boxfile=1 batch.nochop makeboxztessedit_create_alto=1ztessedit_create_hocr=1ztessedit_create_tsv=1)boxxmlhocrtsvz3.05z4.1.0c                   @   s   e Zd ZdZdZdZdZdS )Outputbytesz
data.framedictstringN)__name__
__module____qualname__BYTES	DATAFRAMEDICTSTRING r<   r<   f/var/www/html/chefvision.cloud.itp360.com/venv/lib/python3.10/site-packages/pytesseract/pytesseract.pyr1   V   s
    r1   c                          e Zd Z fddZ  ZS )PandasNotSupportedc                       t  d d S )NzMissing pandas packagesuper__init__self	__class__r<   r=   rC   ^   s   zPandasNotSupported.__init__r5   r6   r7   rC   __classcell__r<   r<   rF   r=   r?   ]       r?   c                   @   s   e Zd Zdd ZdS )TesseractErrorc                 C   s   || _ || _||f| _d S N)statusmessageargs)rE   rM   rN   r<   r<   r=   rC   c   s   zTesseractError.__init__N)r5   r6   r7   rC   r<   r<   r<   r=   rK   b   s    rK   c                       r>   )TesseractNotFoundErrorc                    s   t  t d d S )NzQ is not installed or it's not in your PATH. See README file for more information.)rB   rC   tesseract_cmdrD   rF   r<   r=   rC   j   s   zTesseractNotFoundError.__init__rH   r<   r<   rF   r=   rP   i   rJ   rP   c                       r>   )TSVNotSupportedc                    r@   )Nz4TSV output not supported. Tesseract >= 3.05 requiredrA   rD   rF   r<   r=   rC   r      zTSVNotSupported.__init__rH   r<   r<   rF   r=   rR   q   rJ   rR   c                       r>   )ALTONotSupportedc                    r@   )Nz6ALTO output not supported. Tesseract >= 4.1.0 requiredrA   rD   rF   r<   r=   rC   y   rS   zALTONotSupported.__init__rH   r<   r<   rF   r=   rT   x   rJ   rT   c                 C   s   |    z;z| d W n ty   td Y n ty!   Y nw W |   || _d S W |   || _d S W |   || _d S |   || _w )N   )	terminatewait	TypeErrorr   	Exceptionkill
returncode)processcoder<   r<   r=   rZ      s&   


rZ   c                 c   s    zL|s|   d V  W | j  | j  | j  d S z| j |d\}}|V  W n tjy;   t| d tdw W | j  | j  | j  d S | j  | j  | j  w )NrU   )timeoutzTesseract process timeout)	communicatestdinclosestdoutstderr
subprocessTimeoutExpiredrZ   RuntimeError)procseconds_error_stringr<   r<   r=   timeout_manager   s,   
	






rl   c                    s    t   fdd_S )Nc                     s,   | ddrju r | i |_jS )NcachedF)pop_result)rO   kwargsfuncwrapperr<   r=   rs      s   zrun_once.<locals>.wrapper)r   ro   )rr   r<   rq   r=   run_once   s   rt   c                 C   s"   d dd | t D  S )N c                 s   s    | ]}|V  qd S rL   r<   .0liner<   r<   r=   	<genexpr>   s    
zget_errors.<locals>.<genexpr>)joindecodeDEFAULT_ENCODING
splitlinesstrip)rk   r<   r<   r=   
get_errors   s
   
r   c                 C   s\   t | r|  dn| D ] }zt| W q ty+ } z|jtkr! W Y d}~qd}~ww dS )z5Tries to remove temp files by filename wildcard path.*N)r   r   OSErrorerrnor   )	temp_namefilenameer<   r<   r=   cleanup   s   
r   c                 C   s   t rt| trt| } t| tjstd| jsdn| j}|tvr&tdd|  v r@t	t
| jd}|| d| d |} || _| |fS )NzUnsupported image objectr   zUnsupported image format/typeA)   r   r   )r   r   )numpy_installed
isinstancer   r   	fromarrayrX   formatSUPPORTED_FORMATSgetbandsnewRGB_MODEsizepaste
getchannel)image	extension
backgroundr<   r<   r=   prepare   s   
r   c                 c   s    zdt dddF}t| tr)|jttt| fV  	 W d    W t|j d S t| \} }|j dt	 | }| j
|| jd |j|fV  W d    n1 sQw   Y  W t|j d S W t|j d S t|j w )Ntess_F)prefixdelete_input)r   )r   r   strnamer   r   r   r   r   r	   saver   )r   fr   input_file_namer<   r<   r=   r      s    
			r   c                 C   sh   t jt jd td}tt dr$t  |d< |d  jt jO  _t j|d _| r-t j|d< |S t j	|d< |S )N)ra   rd   startupinfoenvSTARTUPINFOr   rc   )
re   PIPEr   hasattrr   dwFlagsSTARTF_USESHOWWINDOWSW_HIDEwShowWindowDEVNULL)include_stdoutrp   r<   r<   r=   subprocess_args   s   


r    c              
   C   s  g }t jdk }|r|dkr|ddt|f7 }|t| |f7 }|d ur(|d|f7 }|r3|tj||d7 }| D ]}	|	dvrB||	 q7td| zt	j
|fi t }
W n tyj } z	|jtkrc t d }~ww t|
|}|
jr|t|
jt|W d    d S 1 sw   Y  d S )	Nwin32r   nicez-n-l)posix>   r-   osdr0   r.   z%r)sysplatformr   rQ   shlexsplitappendLOGGERdebugre   Popenr   r   r   r   rP   rl   r[   rK   r   )input_filenameoutput_filename_baser   langconfigr   r^   cmd_argsnot_windows
_extensionrh   r   rk   r<   r<   r=   run_tesseract   s6   	

"r   r   return_bytesc                 C   sV   t | d}|r| W  d    S | tW  d    S 1 s$w   Y  d S )Nrb)openreadr{   r|   )r   r   output_filer<   r<   r=   _read_output  s   $r   
extensionsr   r   r^   c           	   	      s   d dd |D  }|rd| }nd}t| (\}}||d |||||d td	i    fdd|D W  d    S 1 sEw   Y  d S )
Nru   c                 s   s    | ]	}t |d V  qdS )r   N)EXTENTION_TO_CONFIGgetrw   r   r<   r<   r=   ry   .  s    
z.run_and_get_multiple_output.<locals>.<genexpr>z-c r   r   r   r   r   r   r   r^   c                    s2   g | ]}t  d   t | |dv rdnqS )r      pdfr/   T)r   r	   r   rp   r   r<   r=   
<listcomp>C  s    z/run_and_get_multiple_output.<locals>.<listcomp>r<   )rz   r~   r   r   )	r   r   r   r   r^   r   r   r   r   r<   r   r=   run_and_get_multiple_output&  s,   

$r   c           
   	   C   sl   t | (\}}|||||||d}	tdi |	 t|	d  t | |W  d    S 1 s/w   Y  d S )Nr   r   r<   )r   r   r   r	   )
r   r   r   r   r   r^   r   r   r   rp   r<   r<   r=   run_and_get_outputL  s   	
$r   c              
      s   i } fdd|   dD }t|dk r|S |d}t|}t|d |k r0|d d |dk r8||7 }t|D ]<\}}t ||< |D ]0}	t|	|krPqG||krlz
tt|	| }
W n t	yk   |	| }
Y nw |	| }
|| |
 qGq<|S )Nc                    s   g | ]}|  qS r<   r   )rw   rowcell_delimiterr<   r=   r   i  s    z file_to_dict.<locals>.<listcomp>
   r   r_   r   )
r~   r   lenrn   r   	enumeratelistintfloat
ValueError)r0   r   str_col_idxresultrowsheaderlengthiheadr   valr<   r   r=   file_to_dictg  s2   

r   c                 C   s@   |t u r|  S |tu rzt|  W dS  ty   Y dS w dS )NTF)r   isdigitr   r   )r   _typer<   r<   r=   is_valid  s   r   c                 C   s   dd dd |  dD D S )Nc                 S   sX   i | ](}t |d krt|d t|d  d rt|d  d t|d  d |d qS )r   rU   r   )r   r   OSD_KEYS)rw   kvr<   r<   r=   
<dictcomp>  s
    &$zosd_to_dict.<locals>.<dictcomp>c                 s   s    | ]}| d V  qdS ): Nr   rv   r<   r<   r=   ry     s    zosd_to_dict.<locals>.<genexpr>r   r   )r   r<   r<   r=   osd_to_dict  s   r   c                 C   s   t dg}| r|t| 7 }ztj|tjtjd}W n
 ty#   t w |j	dvr,t g }|j
rK|j
ttD ]}| }t|rJ|| q:|S )Nz--list-langs)rc   rd   )r   rU   )rQ   r   r   re   runr   STDOUTr   rP   r[   rc   r{   r|   r
   r~   LANG_PATTERNmatchr   )r   r   r   	languagesrx   r   r<   r<   r=   get_languages  s,   



r   c               	   C   s   zt jtdgt jtt jd} W n
 ty   t w | t	}|
tjdd d^}}|d^}}zt|}|tks@J W |S  ttfyS   td| dw )	z9
    Returns Version object of the Tesseract version
    z	--version)rd   r   ra   
   Nru   -zInvalid tesseract version: "")re   check_outputrQ   r   r   r   r   rP   r{   r|   lstripr4   	printable	partitionr   TESSERACT_MIN_VERSIONAssertionErrorr   
SystemExit)outputraw_versionstr_versionrj   versionr<   r<   r=   get_tesseract_version  s(   

r  c                    sD   | d||||g t j fddt j fddt j fddi|  S )zS
    Returns the result of a Tesseract OCR run on the provided image to string
    txtc                         t  dg  S NTr   r<   rO   r<   r=   <lambda>      z!image_to_string.<locals>.<lambda>c                      s   dt   iS )Ntextr	  r<   r
  r<   r=   r        c                         t   S rL   r	  r<   r
  r<   r=   r        )r1   r8   r:   r;   r   r   r   r   output_typer^   r<   r
  r=   image_to_string  s   r  r   c                 C   sF   |dvrt d| |dkrd|  }| |||||dg}t| S )zU
    Returns the result of a Tesseract OCR run on the provided image to pdf/hocr
    r   zUnsupported extension: r/   z-c tessedit_create_hocr=1 T)r   r~   r   )r   r   r   r   r   r^   rO   r<   r<   r=   image_to_pdf_or_hocr  s   r  c                 C   s<   t ddtk r
t d|  }| d||||dg}t| S )zU
    Returns the result of a Tesseract OCR run on the provided image to ALTO XML
    Trm   z-c tessedit_create_alto=1 r.   )r  TESSERACT_ALTO_VERSIONrT   r~   r   )r   r   r   r   r^   rO   r<   r<   r=   image_to_alto_xml  s
   r  c                    sR   |   d}| d||||g tj fddtj fddtj fddi|  S )zR
    Returns string containing recognized characters and their box boundaries
    z2 -c tessedit_create_boxfile=1 batch.nochop makeboxr-   c                      r  r  r	  r<   r
  r<   r=   r  )  r  z image_to_boxes.<locals>.<lambda>c                      s   t dt   ddS )Nz char left bottom right top page
ru   r   r   r   r<   r
  r<   r=   r  *  s
    c                      r  rL   r	  r<   r
  r<   r=   r  /  r  r~   r1   r8   r:   r;   r  r<   r
  r=   image_to_boxes  s   r  c              	   C   sT   t st tdd}z|| W n ttfy   Y nw tjtt	|  fi |S )N	)quotingsep)
pandas_installedr?   r   updaterX   r   pdread_csvr   r   )rO   r   rp   r<   r<   r=   get_pandas_output3  s   
r"  c              
      sv   t ddtk r
t d|  }| d||||g tj fddtj fddtj fddtj fd	di|  S )
zt
    Returns string containing box boundaries, confidences,
    and other information. Requires Tesseract 3.05+
    Tr  z-c tessedit_create_tsv=1 r0   c                      r  r  r	  r<   r
  r<   r=   r  U  r  zimage_to_data.<locals>.<lambda>c                      s   t  dg S r  )r"  r<   rO   pandas_configr<   r=   r  V  s    c                      s   t t  ddS )Nr  r_   r  r<   r
  r<   r=   r  Z  s    c                      r  rL   r	  r<   r
  r<   r=   r  [  r  )	r  r   rR   r~   r1   r8   r9   r:   r;   )r   r   r   r   r  r^   r$  r<   r#  r=   image_to_data@  s   r%  r   c                    sR   d|   }| d||||g tj fddtj fddtj fddi|  S )zN
    Returns string containing the orientation and script detection (OSD)
    z--psm 0 r   c                      r  r  r	  r<   r
  r<   r=   r  n  r  zimage_to_osd.<locals>.<lambda>c                      s   t t  S rL   )r   r   r<   r
  r<   r=   r  o  r  c                      r  rL   r	  r<   r
  r<   r=   r  p  r  r  r  r<   r
  r=   image_to_osd_  s   r&  c               
   C   s,  t tjdkrtjd d } }n#t tjdkr*tjd dkr*tjd tjd } }n	tdtjd dS z"t| }tt||d W d    W d S 1 sNw   Y  W d S  tyt } ztt	| d	tjd W Y d }~dS d }~w t
y } ztt|j d
| tjd W Y d }~dS d }~ww )Nr   rU      r      z(Usage: pytesseract [-l lang] input_file
)file)r   r   r   )r   r   argvprintrd   r   r   r  rP   r   r   typer5   )r   r   imgr   r<   r<   r=   maint  s&   &r.  __main__rL   )T)r   r   r   )F)Nr   r   F)r   Nr   r   r   F)r   )Nr   r   r   r   )Nr   r   r   )cloggingrer   r4   re   r   
contextlibr   csvr   r   r   	functoolsr   globr   ior   osr   r	   r
   r   os.pathr   r   r   tempfiler   timer   typingr   r   packaging.versionr   r   r   PILr   rQ   numpyr   r   ModuleNotFoundErrorpandasr   r  	getLoggerr   r|   compiler   r   r   r   r   r   r   r   r   r  r1   EnvironmentErrorr?   rg   rK   rP   rR   rT   rZ   rl   rt   r   r   r   r   r   r   boolr   r   r   r   r   r   r   r  r;   r  r  r  r  r"  r%  r&  r.  r5   r   r<   r<   r<   r=   <module>   s,  






)

(
#






!


