
    *gH                       d dl mZ d dlmZmZ d dlmZmZmZm	Z	 d dl
Z
d dlmZ d dlZd dlmZmZmZmZmZ d dlZd dlZd dlmZ d dlmZmZmZ d d	lmZ d d
lm Z  d dl!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z(m)Z) d dl*m+Z+m,Z, erd dl-m.Z.m/Z/m0Z0 d dl1m2Z2m3Z3 dZ4 G d de+      Z5 G d dej                        Z6 G d de5      Z7ddZ8ddZ9y)    )annotations)abcdefaultdict)HashableIteratorMappingSequenceN)StringIO)IOTYPE_CHECKINGDefaultDictLiteralcast)lib)EmptyDataErrorParserErrorParserWarning)cache_readonly)find_stack_level)is_bool_dtype
is_integeris_numeric_dtype)is_dict_like)dedup_namesis_potential_multi_index)
ParserBaseparser_defaults)	ArrayLikeReadCsvBufferScalar)Index
MultiIndexu   ﻿c                  n    e Zd ZU ded<   d fdZed d       Zd!dZ	 d"	 	 	 d#dZ	 	 	 	 d$dZ		 d"	 	 	 d%dZ
	 	 	 	 d&d	Zed'd
       Z	 	 d(dZed        Z	 	 	 	 	 	 	 	 d)dZd*dZd+dZd,dZd*dZd-dZd.dZd/dZd/dZd/dZ	 	 	 	 	 	 	 	 d0dZd/dZd1dZ	 	 d2dZd3dZd"d4dZd5dZd6dZ  xZ!S )7PythonParserset[int]_no_thousands_columnsc                    t            |       d _        g  _        d _        d _        |d    _        t         j                        r j                   _        n
 fd _        t        |d          _
        |d    _        |d    _        t         j                  t              rt         j                         _        |d    _        |d	    _        |d
    _        |d    _        |d    _        |d    _        d _        d|v r
|d    _        |d    _        |d    _        |d    _        |d    _        t        |t4              rt7        t8        t           |       _        n$t;        |d      sJ  j=                  |       _        d _         jA                         \  } _!         _"         jG                  | jH                        \   _%         _$         _&        }t5         jJ                         _'         jP                  s8 jS                         \  } _'         _%        d _*         jH                  | _$         j>                  ,t5        tW        tY         jJ                                     _         j[                   jJ                         _.         j_                          _0        tY         j0                        dk7  rtc        d      y)zN
        Workhorse function for processing nested list into DataFrame
        Nr   skiprowsc                     | j                   v S N)r(   )xselfs    i/var/www/html/articles-backend/trend/venv/lib/python3.12/site-packages/pandas/io/parsers/python_parser.py<lambda>z'PythonParser.__init__.<locals>.<lambda>Z   s    a4==&8     
skipfooter	delimiter	quotechar
escapechardoublequoteskipinitialspacelineterminatorquotingskip_blank_linesFhas_index_namesverbose	thousandsdecimalcommentreadlineT   z'Only length-1 decimal markers supported)2super__init__databufposline_posr(   callableskipfunc_validate_skipfooter_argr0   r1   r2   
isinstancestrr3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   listr   r   hasattr_make_reader_col_indices_infer_columnsnum_original_columnsunnamed_cols_extract_multi_indexer_columnsindex_namescolumns	col_names
orig_names_has_complex_date_col_get_index_name_name_processedrangelen_validate_parse_dates_presence_parse_date_cols_set_no_thousand_columnsr&   
ValueError)r,   fkwdsrT   _rS   	__class__s   `     r-   rA   zPythonParser.__init__J   s    	*.	Z(DMM" MMDM8DM243EFk*k*dnnc* 0DN|,. $%7 8"#34I $%7 8$$#'(9#:D Ik*II aXc]A.DI1j)))))!,DI /3 !		
% //
	
LN +/t||*<
 ));?;O;O;Q8[$/4<#'D '#. $ $U3t||+<%= >D $ C CDLL Q%)%B%B%D"t||!FGG "r/   c                    t        j                  | j                        }| j                  d| d}n(t        j                  | j                        }d| d| d}t        j                  |      S )Nz^[\-\+]?[0-9]*(z [0-9]*)?([0-9]?(E|e)\-?[0-9]+)?$z^[\-\+]?([0-9]+z	|[0-9])*()reescaper<   r;   compile)r,   r<   regexr;   s       r-   numzPythonParser.num   sm    ))DLL)>>!&wi/OPE		$..1I"9+Ywi @, -  zz%  r/   c                     j                   t              dk(  r j                  rt        d       G  fddt        j
                        }|}	|_         n[j                         } j                  |gg      d   } j                   j                        s|sZ xj                  dz  c_	        j                         } j                  |gg      d   } j                   j                        rW|sZt        t        t           |      }|d   } xj                  dz  c_	         xj                  dz  c_        t	        j                         j                  |      }|j                   |_         t	        j                   t#        |      |      } j$                  j'                  t        |             t	        j                   |d      }	|	S fd	}
 |
       }	|	S )
Nr?   z<Custom line terminators not supported in python parser (yet)c                      e Zd ZW  j                  ZW  j                  ZW  j
                  ZW  j                  ZW  j                  ZW  j                  ZdZ	y),PythonParser._make_reader.<locals>.MyDialect
N)
__name__
__module____qualname__r1   r2   r3   r4   r5   r7   r6   r,   s   r-   	MyDialectrl      sC     NN	 NN	!__
"..#'#8#8 ,,!%r/   rr   r   )dialectT)rs   strictc               3     K   j                         } t        j                        }|j                  | j	                                D ]#  } |j                  | j	                                % y wr*   )r>   re   rg   splitstrip)linepatr`   seps     r-   _readz(PythonParser._make_reader.<locals>._read   sX     zz|jjoii

-- 2D))DJJL112s   A0A3)r1   r[   r6   r_   csvDialectr>   _check_commentsrG   rD   r   rK   rJ   rE   Sniffersniffreaderr
   rC   extend)r,   r`   rr   diarx   lines	lines_strsniffedline_rdrr   r{   rz   s   ``         @r-   rM   zPythonParser._make_reader   s   nn;#c(a-"" R &CKK & C # zz|,,tfX6q9mmDHH-UHHMH::<D 004&:1=E mmDHH-U !cE2	 !|A"++---d3 ' 1 1 ::htncBX/ ZZ3t<F 2 WFr/   c                   	 | j                  |      }d| _        t	        | j
                        }t        |      sut        | j
                  t        | j
                  | j                              }| j                  || j                        \  }}}| j                  || j                        }|||fS t        |d         }d }	| j                  r|t        |      k(  r
|d   }	|dd  }| j!                  |      }
| j#                  |
      \  }}| j%                  |      }| j'                  ||      \  }}| j)                  ||
||	      \  }}|||fS # t        $ r$ | j                  rg }n| j                           Y cw xY w)NFr   r?   )
_get_linesStopIteration_first_chunkcloserK   rV   r[   r   r   	index_col_get_empty_metadtype_maybe_make_multi_index_columnsrU   count_empty_valsr9   _rows_to_cols_exclude_implicit_index_convert_data_do_date_conversions_make_index)r,   rowscontentrT   namesindexcol_dictconv_columnscount_empty_content_valsindexnamerowalldatarB   	conv_dataresult_columnss                 r-   readzPythonParser.read   s   
	ood+G "&*4??&;7|  (OONNE (,';';

($E7H  ??XL,00 $4GAJ#? $<G$L"1:LabkG$$W-44W=g&&t,	!66w	J $ 0 0w!
~ ni//[  	  

 	s   E
 
)E76E7c                P   t        | j                  t        | j                  | j                              }d}| j                  rt        | j                        }t        |      }| j                  ||       t        |      D ci c]  \  }}||k  s||||z       c}}|fS c c}}w Nr   )r   rV   r   r   _implicit_indexr[   _check_data_length	enumerate)r,   r   r   offsetlen_alldatainames          r-   r   z$PythonParser._exclude_implicit_index,  s    
 OO$
 (F'lw/ 6?u5E
*1!T[D'!f*%%
 	 
s   B"B"c                B    || j                   }| j                  |      S )N)r   )	chunksizer   )r,   sizes     r-   	get_chunkzPythonParser.get_chunkF  s#    
 <>>Dyydy##r/   c                   | j                  | j                        }| j                  | j                        }i }i }t        | j                  t
              rg| j                  D ]W  }| j                  |   }| j                  |   }t        |t              r|| j                  vr| j                  |   }|||<   |||<   Y n| j                  }| j                  }| j                  |||| j                  ||      S r*   )_clean_mapping
convertersr   rI   	na_valuesdict
na_fvaluesintrV   _convert_to_ndarraysr:   )	r,   rB   
clean_convclean_dtypesclean_na_valuesclean_na_fvaluescolna_value	na_fvalues	            r-   r   zPythonParser._convert_dataP  s    
 ((9
**4::6 dnnd+~~ 2>>#. OOC0	c3'Ct,F//#.C'/$(1 %2 #nnO#((LL
 	
r/   c                    | j                   y| j                   }t        |t        t        t        j
                  f      rt        |      dkD  S y)NFr?   )headerrI   rK   tuplenpndarrayr[   )r,   r   s     r-   _have_mi_columnszPythonParser._have_mi_columnss  s>    ;;ftUBJJ78v;?"r/   c           
     H   | j                   }d}d}t               }| j                  | j                  }| j                  }t	        |t
        t        t        j                  f      r|rt        |      |d   dz   gz   }n|g}g }t        |      D ]u  \  }}		 | j                         }
| j                  |	k  r | j                         }
| j                  |	k  r g }g }t        |
      D ]N  \  }}|dk(  r3|r	d| d| }nd| }|j)                  |       |j)                  |       >|j)                  |       P |st-        t.              }t1        t#        |            D cg c]  }||vr|
 c}|z   }|D ]  }||   }|}||   }|dkD  r|dkD  r$|dz   ||<   | d| }||v r|dz  }n||   }|dkD  r$| j2                  t5        | j2                        rl| j2                  j7                  |      Q| j2                  j7                  |      6| j2                  j9                  || j2                  j7                  |      i       |||<   |dz   ||<    nj|rh|	|d   k(  r`t#        |      }| j:                  }|t#        |      nd}t#        |      }||k7  r||z
  |kD  s|dk(  rd}d g|z  }| j<                  d   g| _        |j)                  |       |j9                  |D ch c]  }||   	 c}       t#        |      dk(  skt#        |      }x |r| j'                          |	 | j                         }|dn
t#        |      }t#        |      t#        |d         kD  rt#        |      |kD  rt%        d      t#        |      dkD  rt?        d      | j@                  | jC                  |||       nt#        |      }| jD                  It#        |      t#        | jD                        k7  r(tG        | jD                        D cg c]  }||   	 c}g}n|g}n| jC                  ||d   |      }nt#        | jH                        }|}|s,t        t1        |            g}| jC                  ||d   |      }n| j@                  t#        |      |k\  r | jC                  |g||      }t#        |      }nZtK        | j@                        s,t#        |      t#        | j@                        k7  rt%        d      |g}| jC                  ||d   |       |||fS # t        $ r}d| j                  cxk  r|	k  rkn nh|r|	|d   k7  r^t        t        t        |r|d d n|            }ddj!                  |       dt#        |       d}t%        d	| d
| j                   d      ||rD|	dkD  r?|r| j'                          |j)                  d gt#        |d         z         |||fcY d }~c S | j                   st+        d      || j                   d d  }
Y d }~'d }~ww xY wc c}w c c}w # t        $ r d }Y w xY wc c}w )Nr   Tr?   [,z
], len of z, zPassed header=z	but only z lines in fileNo columns to parse from file z	Unnamed: _level_.FzHNumber of passed names did not match number of header fields in the filez*Cannot pass names with multi-index columns)&r   setr   r   rI   rK   r   r   r   r   _buffered_linerE   
_next_liner   maprJ   joinr[   r_   _clear_bufferappendr   r   r   rZ   r   r   getupdater   rC   	TypeErrorusecols_handle_usecolsrN   sorted_header_linerF   )r,   r   rP   clear_bufferrQ   r   have_mi_columnsrT   levelhrrx   errjoimsgthis_columnsthis_unnamed_colsr   ccol_namecountscol_loop_orderr   old_col	cur_countlcsicicunnamed_count
first_linelen_first_data_rowncolss                                  r-   rO   zPythonParser._infer_columns~  s[    

 +.5;;"[[F"33O&4

";<"!&\VBZ!^,<<F 13G&v. h=	r)..0D--2-#0 --2-< 57$&!%dO 
/DAqBw*)21#WUG'DH)21#H)003$++H5$++A.
/ '*5c*:F
 "'s<'8!9&$55 & *	&*N , 4*1o"%$*3K	$q="+a-2;a-w)0	9+&>#&,#6$-NI06sI #,a- !%

 6$0$<$(JJNN7$;$G$(JJNN3$7$? $

 1 13

w8O2P Q*-Q&/!ms-4. % VBZ' ."nn),SXa(+,=(> -/BGm4KPRVWPW+0L,06B;L(,~DH|,##>O$P\!_$PQw<1$+.|+<(Qh=T ""$  &!%!2J +5*<Q#j/"u:GAJ/CJAS4S$>  w<!##$PQQ<<+ ((%9MN+.u:($$0SZ3%%D 6 399J9J2KLQaLMG$gG..WQZ)= ))*E#( e-...w
EJ%Uu)<..wuE'*5z$dll+E
c$,,>O0O 0  !'$$Wgaj%@,l::y % )4==.B.+rVBZ/? #3s?F3BKPV#WX !#((3-
3s8*BG(,SE'nF  ## '26' ..0vGBK0@'@A&(<lJJ::,-LMSVV::a=D3)\&` %Q % &!%J&,  MsI   >S?XX
X 5X?	X B>W;X 'W;;X XXc                    | j                   y 	 | j                         }|S # t        $ r2}| j                  st	        d      || j                  d d  }Y d }~|S d }~ww xY w)Nr   )r   r   r   r   r   )r,   rx   r   s      r-   r   zPythonParser._header_line7  sd     ;;"	!&&(D   	!::$%DE3N::a=D	!s   ! 	A'AAc                   | j                   >t        | j                         r| j                  | j                   |      }nt        d | j                   D              rpt	        |      dkD  rt        d      g }| j                   D ]E  }t        |t              r"	 |j                  |j                  |             5|j                  |       G n<| j                   D cg c]
  }||k\  s	| }}|rt        d| d      | j                   }|D 	cg c]%  }t        |      D 	cg c]  \  }}	||v s|	 c}	}' }}}}	t        |      | _        |S # t
        $ r | j                  | j                   |       Y w xY wc c}w c c}	}w c c}	}}w )zb
        Sets self._col_indices

        usecols_key is used if there are string usecols.
        c              3  <   K   | ]  }t        |t                y wr*   )rI   rJ   ).0us     r-   	<genexpr>z/PythonParser._handle_usecols.<locals>.<genexpr>U  s     >AZ3'>s   r?   z4If using multiple headers, usecols must be integers.z<Defining usecols with out-of-bounds indices is not allowed. z are out-of-bounds.)r   rF   _evaluate_usecolsanyr[   r_   rI   rJ   r   r   _validate_usecols_namesr   r   r   rN   )
r,   rT   usecols_keyrP   col_indicesr   missing_usecolscolumnr   ns
             r-   r   zPythonParser._handle_usecolsF  s    <<#%"44T\\;O>>>w<!#$N  !<< 0C!#s+T'..{/@/@/EF $**3/0 $(<<#3:N3NC# # #%V*++>@  #ll &   )0Etq!A4DEG  !'{ 3D)  * T 88{ST
# Fs<    E
E9%E9F!E>.E>2F%E65E6>Fc                p    t        | j                        dkD  r| j                  d   S | j                         S )zH
        Return a line from buffer, filling buffer if required.
        r   )r[   rC   r   rq   s    r-   r   zPythonParser._buffered_linev  s/     txx=188A;??$$r/   c                R   |s|S t        |d   t              s|S |d   s|S |d   d   }|t        k7  r|S |d   }t        |      dkD  rR|d   | j                  k(  r@d}|d   }|dd j                  |      dz   }||| }t        |      |dz   kD  r|||dz   d z  }n|dd }|g}||dd z   S )a-  
        Checks whether the file begins with the BOM character.
        If it does, remove it. In addition, if there is quoting
        in the field subsequent to the BOM, remove it as well
        because it technically takes place at the beginning of
        the name, not the middle of it.
        r   r?      N)rI   rJ   _BOMr[   r2   r   )	r,   	first_row	first_eltfirst_row_bomstartquoteendnew_rownew_row_lists	            r-   _check_for_bomzPythonParser._check_for_bom  s     
 )A,, | aLO	!! }!mA&6$..&HE!!$E#))%014C $E#.G =!C!G+=q33 $AB'G&-Yim++r/   c                0    | xs t        d |D              S )z
        Check if a line is empty or not.

        Parameters
        ----------
        line : str, array-like
            The line of data to check.

        Returns
        -------
        boolean : Whether or not the line is empty.
        c              3  "   K   | ]  }|  	 y wr*    )r   r+   s     r-   r   z.PythonParser._is_line_empty.<locals>.<genexpr>  s     31u3s   )all)r,   rx   s     r-   _is_line_emptyzPythonParser._is_line_empty  s     x333d333r/   c                   t        | j                  t              r| j                  | j                        rT| j                  t        | j                        k\  rn1| xj                  dz  c_        | j                  | j                        rT	 	 | j                  | j                  | j                     g      d   }| xj                  dz  c_        | j                  s/| j                  | j                  | j                  dz
           s|rn| j                  r| j                  |g      }|r|d   }n| j                  | j                        rT| xj                  dz  c_        | j                  J t        | j                         | j                  | j                        rT	 | j                  | j                  dz         }| xj                  dz  c_        |O| j                  |g      d   }| j                  r| j                  |g      }|r|d   }n| j                  |      s|rn| j                  dk(  r| j                  |      }| xj                  dz  c_        | j                   j#                  |       |S # t        $ r t        w xY w)Nr?   r   row_num)rI   rB   rK   rG   rD   r[   r~   r8   r  _remove_empty_lines
IndexErrorr   next_next_iter_liner  rE   rC   r   )r,   rx   ret	orig_lines       r-   r   zPythonParser._next_line  s   dii&--)88s499~-A --)
 (//4881D0EFqIDHHMH00++DIIdhhl,CD,,"66v>#&q6D! " --)Ayy,,,TYY	 --)  00A0F	A(//<Q?D,,"66v>#&q6D!,,Y74 & 88q=&&t,DC " (''(s   A:I8 	%I8 8Jc                    | j                   | j                  j                  k(  rt        |      | j                   | j                  j                  k(  r,t        j                  d| d| dt        t                      yy)a   
        Alert a user about a malformed row, depending on value of
        `self.on_bad_lines` enum.

        If `self.on_bad_lines` is ERROR, the alert will be `ParserError`.
        If `self.on_bad_lines` is WARN, the alert will be printed out.

        Parameters
        ----------
        msg: str
            The error message to display.
        row_num: int
            The row number where the parsing error occurred.
            Because this row number is displayed, we 1-index,
            even though we 0-index internally.
        zSkipping line z: rm   )
stacklevelN)	on_bad_linesBadLineHandleMethodERRORr   WARNwarningswarnr   r   )r,   r   r  s      r-   _alert_malformedzPythonParser._alert_malformed  so    "  8 8 > >>c"" 8 8 = ==MM 	C53+- >r/   c                   	 | j                   J t        | j                         }t        |t              sJ |S # t        j
                  $ r}| j                  | j                  j                  | j                  j                  fv r@t        |      }d|v sd|v rd}| j                  dkD  r
d}|d|z   z  }| j                  ||       Y d}~yd}~ww xY w)a[  
        Wrapper around iterating through `self.data` (CSV source).

        When a CSV error is raised, we check for specific
        error messages that allow us to customize the
        error message displayed to the user.

        Parameters
        ----------
        row_num: int
            The row number of the line being parsed.
        Nz	NULL bytezline contains NULzNULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' insteadr   zError could possibly be due to parsing errors in the skipped footer rows (the skipfooter keyword is only applied after Python's csv library has parsed all rows).. )rB   r  rI   rK   r|   Errorr  r  r  r  rJ   r0   r"  )r,   r  rx   er   reasons         r-   r  zPythonParser._next_iter_line  s    !	99(((		?DdD)))Kyy 	  ((..((--%  !f#%)<)C?  ??Q&%  4&=(C%%c735	s   69 CA8C		Cc                `   | j                   |S g }|D ]  }g }|D ]~  }t        |t              r| j                   |vs|| j                  v r|j	                  |       A|d |j                  | j                          }t        |      dkD  r|j	                  |        n |j	                  |        |S r   )r=   rI   rJ   r   r   findr[   )r,   r   r  rx   rlr+   s         r-   r~   zPythonParser._check_commentsE  s    <<L 	DB "1c*||1,DNN*IIaL0AFF4<<01A1vz		! JJrN	 
r/   c                    |D cg c]F  }t        |      dkD  s4t        |      dk(  r(t        |d   t              r|d   j                         r|H }}|S c c}w )a  
        Iterate through the lines and remove any that are
        either empty or contain only one whitespace value

        Parameters
        ----------
        lines : list of list of Scalars
            The array of lines that we are to filter.

        Returns
        -------
        filtered_lines : list of list of Scalars
            The same array of lines with the "empty" ones removed.
        r?   r   )r[   rI   rJ   rw   )r,   r   rx   r  s       r-   r  z PythonParser._remove_empty_linesZ  s]    $ 
D	At9>#DGS1T!W]]_ 
 
 

s   AAc                Z    | j                   |S | j                  || j                   d      S )Nr   r   searchreplace)r;   _search_replace_num_columnsr,   r   s     r-   _check_thousandszPythonParser._check_thousandsu  s4    >>!L// 0 
 	
r/   c                `   g }|D ]  }g }t        |      D ]  \  }}t        |t              r;||vs7|| j                  v s)| j                  j                  |j                               s|j                  |       c|j                  |j                  ||              |j                  |        |S r*   )	r   rI   rJ   r&   ri   r.  rw   r   r/  )	r,   r   r.  r/  r  rx   r*  r   r+   s	            r-   r0  z(PythonParser._search_replace_num_columns}  s      	DB!$ 	:1"1c*QD66688??17795IIaLIIaii89	: JJrN	 
r/   c                n    | j                   t        d   k(  r|S | j                  || j                   d      S )Nr<   r   r-  )r<   r   r0  r1  s     r-   _check_decimalzPythonParser._check_decimal  s;    <<?955L//c 0 
 	
r/   c                    g | _         y r*   )rC   rq   s    r-   r   zPythonParser._clear_buffer  s	    r/   c                t   | j                   }t        |      }t        |      }| j                  | j                  }n	 | j                         }	 | j                         }d}|| j
                  }|durt        |      | j                  z
  }|| j                  |durt        |      t        |      | j                  z   k(  rxt        t        t        |                  | _        | j                  dd | _
        t        |      D ]  }|j                  d|        t        |      }t        |      | _        |||fS |dkD  r/d| _        | j
                  t        t        |            | _        d}n%| j                  || j
                        \  }}	| _        |||fS # t        $ r d}Y Ww xY w# t        $ r d}Y Xw xY w)a  
        Try several cases to get lines:

        0) There are headers on row 0 and row 1 and their
        total summed lengths equals the length of the next line.
        Treat row 0 as columns and row 1 as indices
        1) Look for implicit index: there are more columns
        on row 1 than row 0. If this is true, assume that row
        1 lists index columns and row 0 lists normal columns.
        2) Get index from the columns if it was listed.
        Nr   Fr?   T)rV   rK   r   r   r   r   r[   rP   r   rZ   rC   reversedinsertr   _clean_index_names)
r,   rT   rV   rx   	next_lineimplicit_first_colsr   r   
index_namerb   s
             r-   rX   zPythonParser._get_index_name  s    '+oo']
w- ($$D(
	)I
   I%&)$i$2K2K&K# %KK+U*y>SY1J1J%JJ%)%D	*:%;DN#xx|DH%d^ -q!,- "&gJ03GD-W44"#'D ~~%!%e,?&@!AJ /3.E.E/+ZDN :w..i !   	I	s#   F F( F%$F%(F76F7c                   | j                   }| j                  r|t        | j                        z  }t	        d |D              }||kD  r| j                  dur| j
                  | j                  r| j                  nd}g }t        |      }t        |      }g }|D ]  \  }}	t        |	      }
|
|kD  rt        | j                        r&| j                  |	      }|?|j                  |       Q| j                  | j                  j                  | j                  j                  fv s| j                  ||z
  |z   z
  }|j                  ||
f       | j                  | j                  j                  k(  s n|j                  |	        |D ]s  \  }}
d| d|dz    d|
 }| j                  r?t        | j                        dkD  r'| j                   t"        j$                  k7  r
d}|d	|z   z  }| j'                  ||dz          u t)        t+        j,                  ||
      j.                        }| j
                  r| j0                  J | j0                  }| j                  rQt        |      D cg c]9  \  }}|t        | j                        k  s|t        | j                        z
  |v r|; }}}|S t        |      D cg c]  \  }}||v s| }}}|S c c}}w c c}}w )Nc              3  2   K   | ]  }t        |        y wr*   )r[   )r   rows     r-   r   z-PythonParser._rows_to_cols.<locals>.<genexpr>  s     23c#h2s   Fr   z	Expected z fields in line r?   z, saw zXError could possibly be due to quotes being ignored when a multi-char delimiter is used.r$  )	min_width)rP   r   r[   r   maxr   r0   r   rF   r  r   r  r  r  rD   r1   r7   r|   
QUOTE_NONEr"  rK   r   to_object_arrayTrN   )r,   r   col_lenmax_lenfooters	bad_linesiter_contentcontent_lenr   _content
actual_lennew_lr  r   r'  zipped_contentr   as                     r-   r   zPythonParser._rows_to_cols  s   ++s4>>**G2'22 ge+$)-dooaGI$W-Lg,KG+ -8 ]
' 1 12 $ 1 1( ; ,#NN51**00660055/  #'((kAo.G"H!((':)>?,,0H0H0N0NN!NN8,%-( (1 8#y(81V!l$ 
 NNDNN+a/6G  4&=(C%%c7Q;7#8( c11'WMOOP<<$$000++K## !*. 9"1C//s4>>22kA	 " "  #,N";"!QqK?OA" " ""s   (>K9KKc                   | j                   }d }|Ut        | j                         |k\  r%| j                   d | | j                   |d  c}| _         n|t        | j                         z  }|t        | j                  t              r| j
                  t        | j                        kD  rt        |/| j                  | j
                  d  }t        | j                        }n5| j                  | j
                  | j
                  |z    }| j
                  |z   }| j                  |      }|j                  |       || _        ng }	 |d}d}| j
                  | j
                  nd}||k  rX| j                  J t        | j                        }| j                  ||z         s|dz  }|dz  }|j                  |       ||k  rXt        |      }	| j                  |      }|j                  |       n>d}	 | j                  | j
                  |z   dz         }
|dz  }|
|j                  |
       ;	 | xj
                  |	z  c_        g | _         n|}| j                  r|d | j                    }| j                  |      }| j                  r| j!                  |      }| j#                  |      }| j%                  |      S # t        $ r? t        |      }	| j                  |      }|j                  |       t        |      dk(  r Y w xY w)Nr   r?   r  )rC   r[   rI   rB   rK   rD   r   _remove_skipped_rowsr   r  rG   r   r  r0   r~   r8   r  r2  r5  )r,   r   r   new_rowsnew_pos	row_indexrow_ctr   r	  len_new_rowsnext_rows              r-   r   zPythonParser._get_lines?  s    488}$%)XXet_dhhtuo"$( DHH%$))T*88c$))n,''<#yy4H!$))nG#yyDHHtODH"hhoG44X>X&" "'$%	!"-1XX-Aq$tm $(99#88#8&*499oG#'==)1C#D &!%NI$OOG4 %tm (+8}#'#<#<X#FX. "'+';';DHHtOVWDW';'XH AID'3 ( 9 #	 /$ L(DHE??,T__,-E$$U+  ,,U3E%%e,""5))' % #&x=L#88BHLL*5zQ '	s   ?A<J4 <A,J4 4AK<;K<c                    | j                   r=t        |      D cg c]&  \  }}| j                  || j                  z         r%|( c}}S |S c c}}w r*   )r(   r   rG   rD   )r,   rS  r   r@  s       r-   rR  z!PythonParser._remove_skipped_rows  sI    =="+H"53T]]1txx<=X  s   &AAc                   t               }| j                  r@| j                  r4| j                  J | j	                  | j                  | j                        }| j                  r| j
                  r| j                  J t        | j                  | j                        D ]  \  }}t        | j
                  t              s&t        | j
                        s|j                  |       t        | j
                  t              sa|| j
                  v spt        | j
                  |         rt        | j
                  |         s|j                  |        |S r*   )r   rT   parse_datesrN   _set_noconvert_dtype_columnsr   ziprI   r   r   addr   )r,   no_thousands_columnsr   r   s       r-   r^   z%PythonParser._set_no_thousand_columns  s   ),<<D,,$$000#'#D#D!!4<<$  <<DJJ$$000d//> 03!$**d3<LJJ= ),,Q/tzz40tzz),TZZ_=(C9 ),,Q/0 $#r/   )r`   zReadCsvBuffer[str] | listreturnNone)r`  z
re.Pattern)r`   IO[str] | ReadCsvBuffer[str]r*   )r   
int | Noner`  Rtuple[Index | None, Sequence[Hashable] | MultiIndex, Mapping[Hashable, ArrayLike]])r   list[np.ndarray]r`  z8tuple[Mapping[Hashable, np.ndarray], Sequence[Hashable]])r   rc  r`  rd  )rB   zMapping[Hashable, np.ndarray]r`  zMapping[Hashable, ArrayLike])r`  bool)r`  z9tuple[list[list[Scalar | None]], int, set[Scalar | None]])rT   list[list[Scalar | None]]r   zlist[Scalar | None]rP   r   r`  rg  )r`  list[Scalar])r  rh  r`  rh  )rx   rh  r`  rf  )r   rJ   r  r   r`  ra  )r  r   r`  zlist[Scalar] | Noner   list[list[Scalar]]r`  rj  )r   rj  r.  rJ   r/  rJ   r`  rj  )r`  ra  )r`  z@tuple[Sequence[Hashable] | None, list[Hashable], list[Hashable]])r   rj  r`  re  )r   rc  r`  rj  )rS  rj  r`  rj  )r`  r%   )"rn   ro   rp   __annotations__rA   r   ri   rM   r   r   r   r   r   rO   r   r   r   r  r  r   r"  r  r~   r  r2  r0  r5  r   rX   r   r   rR  r^   __classcell__)rc   s   @r-   r$   r$   G   s   ##]H~ 
! 
!>B "&4040
40l! 
B6 "&$$
$!
+!
 
&!
F  w;	Bw;r  .*. ). "	.
 
#.`%3,j46p4.`*6
'14?B	&
L/	IL/\TlO*b$r/   r$   c                  `    e Zd ZdZ	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZd	d
dZ	 d	 	 	 	 	 ddZddZy)FixedWidthReaderz(
    A reader of fixed-width lines.
    Nc           	     J   || _         d | _        |rd|z   nd| _        || _        |dk(  r| j	                  ||      | _        n|| _        t        | j
                  t        t        f      s!t        dt        |      j                         | j
                  D ]  }t        |t        t        f      rit        |      dk(  r[t        |d   t        t        j                  t        d       f      r.t        |d   t        t        j                  t        d       f      rt        d	       y )
Nz
z
	 infer)infer_nrowsr(   z;column specifications must be a list or tuple, input was a r  r   r?   zEEach column specification must be 2 element tuple or list of integers)r`   bufferr1   r=   detect_colspecscolspecsrI   r   rK   r   typern   r[   r   r   integer)r,   r`   rt  r1   r=   r(   rq  colspecs           r-   rA   zFixedWidthReader.__init__  s
    '+/8)+iw 00'( 1 DM %DM$--%7#H~6679 
 }} 
	G7UDM2LA%wqzCT$Z+HIwqzCT$Z+HI: 
	r/   c                    |
t               }g }g }t        | j                        D ];  \  }}||vr|j                  |       |j                  |       t	        |      |k\  s; n t        |      | _        |S )a  
        Read rows from self.f, skipping as specified.

        We distinguish buffer_rows (the first <= infer_nrows
        lines) from the rows returned to detect_colspecs
        because it's simpler to leave the other locations
        with skiprows logic alone than to modify them to
        deal with the fact we skipped some rows here as
        well.

        Parameters
        ----------
        infer_nrows : int
            Number of rows to read from self.f, not counting
            rows that are skipped.
        skiprows: set, optional
            Indices of rows to skip.

        Returns
        -------
        detect_rows : list of str
            A list containing the rows to read.

        )r   r   r`   r   r[   iterrr  )r,   rq  r(   buffer_rowsdetect_rowsr   r@  s          r-   get_rowszFixedWidthReader.get_rows  s    2 uH' 	FAs ""3's#;;.	 ;'r/   c           	        dj                  | j                  D cg c]  }d| 	 c}      }t        j                  d| d      }| j	                  ||      }|st        d      t        t        t        |            }t        j                  |dz   t              }| j                  +|D 	cg c]   }	|	j                  | j                        d   " }}	|D ]9  }	|j                  |	      D ]#  }
d||
j                         |
j!                          % ; t        j"                  |d      }d|d<   t        j$                  ||z  dk(        d   }t'        t)        |d d d	   |dd d	               }|S c c}w c c}	w )
Nr   \z([^z]+)z(No rows from which to infer column widthr?   )r   r   r  )r   r1   re   rg   r|  r   rB  r   r[   r   zerosr   r=   	partitionfinditerr  r  rollwhererK   r]  )r,   rq  r(   r+   
delimiterspatternr   rG  maskr@  mshiftededges
edge_pairss                 r-   rs  z FixedWidthReader.detect_colspecs  s]    WW?11#h?@
**s:,c23}}[(3 !KLLc#tn%xx!3/<<#>BCsCMM$,,/2CDC 	.C%%c* .,-QWWY).	. ''$"
$.Q./2#eCaCj%1+67
! @ Ds   E-,%E2c                J   | j                   	 t        | j                         }nt        | j                        }| j                  D cg c]#  \  }}||| j                  | j                        % c}}S # t        $ r d | _         t        | j                        }Y bw xY wc c}}w r*   )rr  r  r   r`   rt  rw   r1   )r,   rx   from_tos       r-   __next__zFixedWidthReader.__next__  s     ;;"$DKK(
 <DHLV%U2$$T^^4VV ! $"DFF|$ Ws   A4 (B4%BB)Nd   )r`   rb  rt  z(list[tuple[int, int]] | Literal['infer']r1   
str | Noner=   r  r(   set[int] | Nonerq  r   r`  ra  r*   )rq  r   r(   r  r`  	list[str])r  N)rq  r   r(   r  r`  zlist[tuple[int, int]])r`  r  )rn   ro   rp   __doc__rA   r|  rs  r  r  r/   r-   rn  rn    s     %)$'$ ;$ 	$
 $ "$ $ 
$L$N CG0?	,Wr/   rn  c                  (    e Zd ZdZddZddZddZy)	FixedWidthFieldParserzl
    Specialization that Converts fixed-width fields into DataFrames.
    See PythonParser for details.
    c                    |j                  d      | _        |j                  d      | _        t        j                  | |fi | y )Nrt  rq  )poprt  rq  r$   rA   )r,   r`   ra   s      r-   rA   zFixedWidthFieldParser.__init__-  s8    ,88M2dA..r/   c                    t        || j                  | j                  | j                  | j                  | j
                        S r*   )rn  rt  r1   r=   r(   rq  )r,   r`   s     r-   rM   z"FixedWidthFieldParser._make_reader3  s7    MMNNLLMM
 	
r/   c                P    |D cg c]  }t        d |D              r| c}S c c}w )z
        Returns the list of lines without the empty ones. With fixed-width
        fields, empty lines become arrays of empty strings.

        See PythonParser._remove_empty_lines.
        c              3  b   K   | ]'  }t        |t               xs |j                          ) y wr*   )rI   rJ   rw   )r   r&  s     r-   r   z<FixedWidthFieldParser._remove_empty_lines.<locals>.<genexpr>G  s(     E1z!S))6QWWY6Es   -/)r   )r,   r   rx   s      r-   r  z)FixedWidthFieldParser._remove_empty_lines=  s2     
EEE 
 	
 
s   #N)r`   zReadCsvBuffer[str]r`  ra  )r`   rb  r`  rn  ri  )rn   ro   rp   r  rA   rM   r  r  r/   r-   r  r  '  s    
/

r/   r  c                &    t        d | D              S )Nc              3  0   K   | ]  }|d k(  s|d  yw)r   Nr?   r  )r   vs     r-   r   z#count_empty_vals.<locals>.<genexpr>L  s     7Q!r'QYq7s   )sum)valss    r-   r   r   K  s    7$777r/   c                R    t        |       st        d      | dk  rt        d      | S )a  
    Validate the 'skipfooter' parameter.

    Checks whether 'skipfooter' is a non-negative integer.
    Raises a ValueError if that is not the case.

    Parameters
    ----------
    skipfooter : non-negative integer
        The number of rows to skip at the end of the file.

    Returns
    -------
    validated_skipfooter : non-negative integer
        The original input if the validation succeeds.

    Raises
    ------
    ValueError : 'skipfooter' was not a non-negative integer.
    zskipfooter must be an integerr   zskipfooter cannot be negative)r   r_   )r0   s    r-   rH   rH   O  s3    * j!899A~899 r/   )r`  r   )r0   r   r`  r   ):
__future__r   collectionsr   r   collections.abcr   r   r   r	   r|   ior
   re   typingr   r   r   r   r   r   numpyr   pandas._libsr   pandas.errorsr   r   r   pandas.util._decoratorsr   pandas.util._exceptionsr   pandas.core.dtypes.commonr   r   r   pandas.core.dtypes.inferencer   pandas.io.commonr   r   pandas.io.parsers.base_parserr   r   pandas._typingr   r   r    pandasr!   r"   r  r$   rn  r  r   rH   r  r/   r-   <module>r     s    "    	     
 3 4 
 6
   g$: g$T#sWs|| sWl!
L !
H8r/   