
    *g#                       d dl mZ d dlmZ d dlmZ d dlZd dlZd dlmZ d dl	Z	d dl
mZmZmZmZmZmZ d dlZd dlZd dlmZmZ d dlmc mZ d dlmZ d d	lmZ d d
lm Z  d dl!m"Z"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4 d dl5m6Z6m7Z7 d dl8m9Z9 d dl:m;Z;m<Z<m=Z=m>Z>m?Z? d dl@mAZA d dlBmCZCmDZDmEZEmFZFmGZGmHZHmIZI d dlJmKZK d dlLmMZMmNZNmOZOmPZP d dlQmRZR d dlSmTZU d dlVmWZW erd dlXmYZYmZZZm[Z[m\Z\ d dl]m^Z^m_Z_m`Z`maZa  G d d      Zbej                  dddf	 	 	 	 	 dLd"Zdi d#dd$dd%d&d'ej                  d(dd)dd*dd+d,d-dd.dd/dd0d d1dd2dd3dd4dd5di d6dd7dd dd8dd9dd:d;d<dd=dddd>ej                  d!dd?dd@ddAddBddCddDddEebj                  j                  ej                  dFZhdej                  f	 	 	 dMdGZi	 dN	 	 	 dOdHZjdPdIZkdJ ZldQdKZmy)R    )annotations)defaultdictcopyN)Enum)TYPE_CHECKINGAnyCallablecastfinaloverload)libparsers)STR_NA_VALUES)parsing)import_optional_dependency)ParserErrorParserWarning)find_stack_level)astype_array)ensure_objectis_bool_dtypeis_dict_likeis_extension_array_dtypeis_float_dtype
is_integeris_integer_dtypeis_list_likeis_object_dtype	is_scalaris_string_dtypepandas_dtype)CategoricalDtypeExtensionDtype)isna)
ArrowDtype	DataFrameDatetimeIndexStringDtypeconcat)
algorithms)ArrowExtensionArrayBaseMaskedArrayBooleanArrayCategoricalExtensionArrayFloatingArrayIntegerArray)BooleanDtype)Index
MultiIndexdefault_indexensure_index_from_sequences)Series)	datetimes)is_potential_multi_index)HashableIterableMappingSequence)	ArrayLikeDtypeArgDtypeObjScalarc                     e Zd ZU  G d de      Zded<   ded<   ded<   ded<   ded<   ded	<   d
ed<   d(dZd)dZd(dZe	e
d*d              Ze	d+d       Ze		 d,	 	 	 	 	 d-d       Ze		 d.	 	 	 	 	 d/d       Ze		 d.	 	 	 d0d       Ze	d        Ze	d        Ze	d        Ze	d1d2d       Ze		 	 	 d3	 	 	 d4d       Ze		 	 	 	 	 	 d5d       Ze		 d1	 	 	 d6d       Ze	d7d       Ze	 	 	 	 	 	 d8d       Ze	 	 	 	 	 	 d9d       Ze		 	 	 	 	 	 d:d       Ze		 	 	 	 	 	 d;d        Ze	 	 	 	 	 	 d<d!       Ze	 	 	 	 	 	 d=d"       Ze		 	 	 	 	 	 d>d#       Ze	d?d$       Ze	d%        Ze	d@d&       Ze	d.dAd'       Zy)B
ParserBasec                      e Zd ZdZdZdZy)ParserBase.BadLineHandleMethodr         N)__name__
__module____qualname__ERRORWARNSKIP     g/var/www/html/articles-backend/trend/venv/lib/python3.12/site-packages/pandas/io/parsers/base_parser.pyBadLineHandleMethodrF   f   s    rP   rR   bool_implicit_index_first_chunkkeep_default_nadayfirstcache_dateskeep_date_col
str | Noneusecols_dtypec                   d| _         |j                  d      | _        d | _        |j                  dd       | _        t               | _        d | _        d | _        t        |j                  dd            | _        g | _        |j                  dt        j                        | _        |j                  dd       | _        |j                  dd      | _        |j                  dd      | _        |j                  d	      | _        |j                  d
      | _        |j                  dd      | _        |j                  dd      | _        t/        |j                  dd             | _        |j                  d      | _        |j                  d      | _        |j                  d      | _        |j                  d      | _        |j                  dd      | _        t=        | j                  | j                   | j"                  | j:                        | _        |j                  d      | _         tC        | j@                  d      r|j                  d      rtE        d      |j                  d      rtE        d      | j                  tG        | j                        r| j                  g| _        n_tC        | j                  d      r#tI        tK        tF        | j                              stE        d      tM        | j                        | _        d| _'        d| _(        | jS                  |d         \  | _*        | _+        |j                  d| jX                  jZ                        | _.        y )NFnames	index_colparse_datesdate_parserdate_formatrW   rY   	na_values
na_fvalues	na_filterrV   Tdtype
convertersdtype_backendtrue_valuesfalse_valuesrX   )r`   ra   rW   rX   header)
allow_setsusecolsz;cannot specify usecols when specifying a multi-index headerz9cannot specify names when specifying a multi-index headerzLindex_col must only contain row numbers when specifying a multi-index headeron_bad_lines)/rT   getr]   
orig_namesr^   setunnamed_colsindex_names	col_names_validate_parse_dates_argpopr_   _parse_date_colsr   
no_defaultr`   ra   rW   rY   rb   rc   rd   rV   r   re   rf   rg   rh   ri   rX   _make_date_converter
_date_convrj   r   
ValueErrorr   allmaplist_name_processedrU   _validate_usecols_argrl   r[   rR   rL   rm   )selfkwdss     rQ   __init__zParserBase.__init__s   s   $XXg&
59+t4!$6:484TXXmU5ST*,88M3>>B88M48U3!XXou=+.((<0+u5#xx(94@$((7D12
((<0!XXo688M2 HH^488M48.((((]]((	
 hhx(6xx	" Q  xx  O 
 ~~)dnn-&*nn%5DN EBC
DNN;<$? 
 &*$..%9DN$ +/+E+Ed9o+V(d( !HH^T5M5M5S5STrP   c                (   t        | j                        r+t        j                  | j                  j	                          }nHt        | j                        r1t        j                  j                  d | j                  D              }ng }t        |      }dj                  t        |D ch c]  }t        |t              r||vr| c}            }|rt        d| d      |D cg c]  }t        |t              s||v r|n||    c}S c c}w c c}w )a  
        Check if parse_dates are in columns.

        If user has provided names for parse_dates, check if those columns
        are available.

        Parameters
        ----------
        columns : list
            List of names of the dataframe.

        Returns
        -------
        The names of the columns which will get parsed later if a dict or list
        is given as specification.

        Raises
        ------
        ValueError
            If column to parse_date is not in dataframe.

        c              3  \   K   | ]$  }t        |      rt        |t              s|n|g & y wN)r   
isinstancetuple).0cols     rQ   	<genexpr>z<ParserBase._validate_parse_dates_presence.<locals>.<genexpr>   s/      8 $C(C1GcUR8s   *,z, z+Missing column provided to 'parse_dates': '')r   r_   	itertoolschainvaluesr   from_iterabler}   joinsortedr   strrz   )r   columnscols_neededr   missing_colss        rQ   _validate_parse_dates_presencez)ParserBase._validate_parse_dates_presence   s   0 (()#//4+;+;+B+B+DEK$**+ $//77 8++8 K
 K;' yy  +!#s+70B 
 =l^1M  #
 sC(C7NCM
 	

s   (D
%"Dc                     y r   rO   r   s    rQ   closezParserBase.close   s    rP   c                    t        | j                  t              xsS t        | j                  t              xr7 t	        | j                        dkD  xr t        | j                  d   t              S )Nr   )r   r_   dictr}   lenr   s    rQ   _has_complex_date_colz ParserBase._has_complex_date_col   s`     $**D1 
t''. 6D$$%)64++A.5	
rP   c                (   t        j                  | j                        rt        | j                        S | j                  | j                  |   }nd }| j
                  |n| j
                  |   }|| j                  v xs |d uxr || j                  v S r   )r   is_boolr_   rS   rr   r^   )r   inamejs       rQ   _should_parse_dateszParserBase._should_parse_dates  s    ;;t''((())+''*^^+1BA))) D =TT-=-=%=rP   c                   t        |      dk  r	|d   |d|fS | j                  }|g }t        |t        t        t
        j                  f      s|g}t        |      |j                  d      }| j                  || j                        \  }}}t        |d         t        fd|dd D              st        d      fdt        t        fd	|D               }|j                         }t        |      D ]  }|j                  ||        t        |      r4|D 	cg c](  }	|	|d      |	|d      | j                   vr|	|d      nd* }
}	ndgt        |      z  }
d
}|||
|fS c c}	w )a  
        Extract and return the names, index_names, col_names if the column
        names are a MultiIndex.

        Parameters
        ----------
        header: list of lists
            The header rows
        index_names: list, optional
            The names of the future index
        passed_names: bool, default False
            A flag specifying if names where passed

        rH   r   Nc              3  :   K   | ]  }t        |      k(    y wr   )r   )r   header_iterfield_counts     rQ   r   z<ParserBase._extract_multi_indexer_columns.<locals>.<genexpr>D  s     Q{3{#{2Qs   rG   z1Header rows must have an equal number of columns.c                B     t         fdt              D              S )Nc              3  2   K   | ]  }|vs|     y wr   rO   )r   r   rsics     rQ   r   zMParserBase._extract_multi_indexer_columns.<locals>.extract.<locals>.<genexpr>H  s     J!Qc\1Js   	
)r   range)r   r   r   s   `rQ   extractz:ParserBase._extract_multi_indexer_columns.<locals>.extractG  s    Ju['9JJJrP   c              3  .   K   | ]  } |        y wr   rO   )r   r   r   s     rQ   r   z<ParserBase._extract_multi_indexer_columns.<locals>.<genexpr>J  s     8AWQZ8   T)r   r^   r   r}   r   npndarrayrp   ru   _clean_index_namesr{   r   zipr   r   insertrq   )r   rj   rr   passed_namesic_r   r]   	single_icr   rs   r   r   r   s              @@@rQ   _extract_multi_indexer_columnsz)ParserBase._extract_multi_indexer_columns  s   . v;?!9k4==
 ^^:B"tUBJJ78B"g jjn 33KPQ &)n QfQRjQQQRR	K s889: 	/ILLI.	/ r7
  	  r!uX)qAxt?P?P/P "Q%I  V,Ik9l::s    -E'Nc                x    t        |      r.t        t        t           |      }t	        j
                  ||      S |S )Nr]   )r:   r   r}   r   r5   from_tuples)r   r   rs   list_columnss       rQ   _maybe_make_multi_index_columnsz*ParserBase._maybe_make_multi_index_columns^  s4     $G,UW5L)),iHHrP   c                :   t        | j                        r| j                  sd }n| j                  s$| j                  ||      }| j	                  |      }nw| j                  rk| j
                  s:| j                  t        |      | j                        \  | _        }| _        d| _        | j                  ||      }| j	                  |d      }|r/t        |      t        |      z
  }	J |j                  |d |	       }| j                  || j                        }|fS )NTF)try_parse_dates)is_index_colr^   r   _get_simple_index
_agg_indexr~   r   r}   rr   _get_complex_date_indexr   	set_namesr   rs   )
r   dataalldatar   indexnamerowindexsimple_indexr   
date_indexcoffsets
             rQ   _make_indexzParserBase._make_indexj  s   
 DNN+4>>E++11'7CLOOL1E''''8<8O8OM4>>95!1dn (,$55dGDJOOJOFE ,'#g,6G$$$OOL'$:;E 66wOg~rP   c                   d }g }g }| j                   D ]/  } ||      }|j                  |       |j                  ||          1 t        |d      D ]1  }|j                  |       | j                  r!|j                  |       3 |S )Nc                D    t        | t              s| S t        d|  d      )NzIndex z invalid)r   r   rz   r   s    rQ   ixz(ParserBase._get_simple_index.<locals>.ix  s%    c3'
vcU(344rP   Treverse)r^   appendr   ru   rT   )r   r   r   r   	to_remover   idxr   s           rQ   r   zParserBase._get_simple_index  s    	5
 	>> 	"C3AQLLa!	" 	40 	AHHQK''A	
 rP   c                    fd}g }g }| j                   D ]/  } ||      }|j                  |       |j                  ||          1 t        |d      D ]$  }|j                  |       j	                  |       & |S )Nc                    t        | t              r| S t        d| d      t              D ]  \  }}|| k(  s|c S  y )Nz Must supply column order to use z	 as index)r   r   rz   	enumerate)icolr   crs   s      rQ   	_get_namez5ParserBase._get_complex_date_index.<locals>._get_name  sQ    $$  #CD89!UVV!), 19HrP   Tr   )r^   r   r   ru   remove)	r   r   rs   r   r   r   r   r   r   s	     `      rQ   r   z"ParserBase._get_complex_date_index  s    		 	>> 	%CS>DT"LLd$	% 	40 	 AHHQKQ	  rP   c                   t        |t              s|S i }| j                  J |j                         D ]7  \  }}t        |t              r|| j                  vr| j                  |   }|||<   9 t        |t
              rRt        | j                        t        |j                               z
  }|j                  |D ci c]  }|||   
 c}       |S c c}w )zconverts col numbers to names)	r   r   ro   itemsintr   rp   keysupdate)r   mappingcleanr   vremaining_colss         rQ   _clean_mappingzParserBase._clean_mapping  s     '4(N***mmo 	FC#s#4??(Booc*E#J	 g{+ 1C

4EENLL~F#ws|+FG Gs   ;Cc                   g }| j                  | j                        }t        |      D ]  \  }}|r?| j                  |      r.| j	                  || j
                  | j
                  |   nd       }| j                  r| j                  }| j                  }nt               }t               }t        | j                  t              rN| j
                  J | j
                  |   }	|	/t        |	| j                  | j                  | j                        \  }}| j                  | j                        }
d }d}| j
                  _t        |
t              r|
j                  | j
                  |   d       }t        |t              r |j                  | j
                  |         d u}|xr t!        |      xs | }| j#                  |||z  |d u |      \  }}|j%                  |        | j
                  }t'        ||      }|S )Nr   F)r   rf   r   r   ry   rr   rd   rb   rc   rp   r   r   _get_na_valuesrV   re   rn   r!   _infer_typesr   r7   )r   r   r   arraysrf   r   arrcol_na_valuescol_na_fvaluescol_nameclean_dtypes	cast_typeindex_convertertry_num_boolr   r]   s                   rQ   r   zParserBase._agg_index  s   ((9
& (	FAs4#;#;A#>oo/3/?/?/K((+QU & 
 ~~ $!% #!$$..$/''333++A.'4B $..$//4CWCW51M>  ..tzz:LI#O+lD1 , 0 01A1A!1Dd KIj$/&0nnT5E5Ea5H&IQU&UO 8oi8KOL &&]^3Y$5FFC MM#Q(	T   +FE:rP   c                   i }|j                         D ]  \  }}	|d n|j                  |d       }
t        |t              r|j                  |d       }n|}| j                  rt        |||| j                        \  }}nt               t               }}|| j                  v rMt        j                  |	t        |      |z        }t        j                  |	|t        j                         |	||<   |
h|(t        j                  d| dt         t#                      	 t%        j&                  |	|
      }	| j3                  |	t        |      |z  |d u d      \  }}nt5        |      }|xs t7        |      }|xr | }| j3                  |	t        |      |z  |d u |      \  }}|t9        |      }|rD|j:                  |k7  s|r3|s|dkD  rt=        |      rt)        d|       | j?                  |||      }|||<   |s|stA        d| d	|        |S # t(        $ rW t        j                  |	t+        |            j-                  t        j.                        }t%        j0                  |	|
|      }	Y Bw xY w)
Nz5Both a converter and dtype were specified for column z# - only the converter will be used.
stacklevelF)r   r   z$Bool column has NA values in column zFilled z NA values in column )!r   rn   r   r   rd   r   rV   rp   rv   r+   isinr   putmasknanwarningswarnr   r   r   	map_inferrz   r}   viewuint8map_infer_maskr   r   r!   r"   re   r   _cast_typesprint)r   dctrb   rc   verboserf   dtypesresultr   r   conv_fr   r   r   maskcvalsna_countis_eais_str_or_ea_dtyper   s                       rQ   _convert_to_ndarrayszParserBase._convert_to_ndarrays  s     I	FIAv'/TZ^^At5LF&$'"JJq$/	 #	~~0>y*d.B.B1-~ 14su~D))) "vs=/AN/RS

640"q	!(MM**+,OQ &#3#5F ]]66:F
 #'"3"3&7%!&	 #4 #x 1;%*%Hoi.H" %.$D2DE #'"3"3&7% 	#x ( ,Y 7I%++":e X\(3",/STUSV-W"XX ,,UIqAEF1I8z)>qeDESI	FT K " F%??64	?CHHRD //EFFs   HAI21I2c                   t               }| j                  dk(  rt        | j                        n(t	        | j                        s| j                  dvrnddfd}t        | j                  t              rY| j                  D ]H  }t        |t              r|D ]  }|j                   ||              2|j                   ||             J |S t        | j                  t              rg| j                  j                         D ]H  }t        |t              r|D ]  }|j                   ||              2|j                   ||             J |S | j                  rqt        | j                  t              r*| j                  D ]  }|j                   ||              |S | j                  !|j                   || j                               |S )a  
        Set the columns that should not undergo dtype conversions.

        Currently, any column that is involved with date parsing will not
        undergo such conversions. If usecols is specified, the positions of the columns
        not to cast is relative to the usecols not to all columns.

        Parameters
        ----------
        col_indices: The indices specifying order and positions of the columns
        names: The column names which order is corresponding with the order
               of col_indices

        Returns
        -------
        A set of integers containing the positions of the columns not to convert.
        integer)emptyNNc                j    t        |       r|    } t        |       sj                  |          } | S r   )r   r   )xcol_indicesr]   rl   s    rQ   _setz5ParserBase._set_noconvert_dtype_columns.<locals>._set|  s6    "z!}AJa=A/HrP   )returnr   )rp   r[   r   rl   callabler   r_   r}   addr   r   r^   )r   r  r]   noconvert_columnsr  valkrl   s    ``    @rQ   _set_noconvert_dtype_columnsz'ParserBase._set_noconvert_dtype_columnsY  s   ,  E* T\\*Gdll#t'9'9'P "G G	 d&&-'' 5c4(  7)--d1g67 &))$s)45, !  (($/''..0 5c4(  7)--d1g67 &))$s)45 !  $..$/ 3A%))$q'23
 !  +!%%d4>>&:;  rP   c           	     	   d}t        |j                  j                  t        j                  t        j
                  f      rt        j                  |D cg c]  }t        |t              r| c}      }t        j                  ||      }|j                  dd      j                         }|dkD  rOt        |      r|j                  t        j                        }t        j                  ||t        j                          ||fS | j"                  }|xr |t$        j&                  u}	|r"t)        |j                        r	 t%        j*                  ||d|	      \  }
}|	r|/t        j,                  |
j.                  t        j
                        }|j1                         r:t3        t        j4                  |j.                  t        j6                        |      }
nGt        |
      rt3        |
|      }
n/t9        |
      rt;        |
|      }
nt=        |
      rt?        |
|      }
|j                         }nOtA        |
      j                         }n5|}
|j                  t        jJ                  k(  rtG        jH                  ||      }|
j                  t        jJ                  k(  r|rtM        jN                  t        jP                  |      | jR                  | jT                  |	      \  }
}|
j                  t        j
                  k(  r@|	r>|/t        j,                  |
j.                  t        j
                        }t;        |
|      }
nc|
j                  t        jJ                  k(  rF|	rDt%        jV                  |
d	
      s-tY               }|j[                         }|j]                  ||      }
|dk(  rt_        d      }t        |
t        j`                        r tc        |j                  |
d	            }
|
|fS t        |
td              ry|
jf                  j1                         r+tc        |j                  dgti        |
      z              }
|
|fS tc        |j                  |
jj                  |
jf                              }
|
|fS tc        |j                  |
jm                         d	            }
|
|fS c c}w # tB        tD        f$ r tG        jH                  ||      }|}
Y Tw xY w)a  
        Infer types of values, possibly casting

        Parameters
        ----------
        values : ndarray
        na_values : set
        no_dtype_specified: Specifies if we want to cast explicitly
        try_num_bool : bool, default try
           try to cast values to numeric (first preference) or boolean

        Returns
        -------
        converted : ndarray or ExtensionArray
        na_count : int
        r   r   Fr   )convert_to_masked_nullableNre   )rh   ri   r  Tskipnapyarrow)from_pandas)r  )7
issubclassre   typer   numberbool_arrayr   r   r+   r   astypesumr   float64r   r   rg   r   rw   r   maybe_convert_numericzerosshaper{   r2   onesint64r   r.   r   r1   r%   rz   	TypeErrorr   sanitize_objectsobject_libopsmaybe_convert_boolasarrayrh   ri   is_datetime_arrayr)   construct_array_type_from_sequencer   r   r,   r-   _maskr   _datato_numpy)r   r   rb   no_dtype_specifiedr   r
  r  r  rg   non_default_dtype_backendr  result_mask	bool_maskre   clspas                   rQ   r   zParserBase._infer_types  s   ( fll''"))RXX)>?!W#*SRUBV#!WXI??695D{{7{7;;=H!|#F+#]]2::6F

6408##**F=#F 	"
 OFLL92&)&?&?/H	'# -"*&(hhv||288&L"(!-GGK$5$5RXXF" *&1!-fk!B&v.!-fk!B'/!.v{!C*0H#F|//1HF||rzz)"33FIF<<2::%, & 9 9

6" ,,!..+D	!FI ||rxx',E$ "RXX FI%fi8+0I,,VDA'ME446C //e/DFI%+I6B&"**-,RXXf$X-OP x FO4<<##%04&3v;:N1OPF x 1FLLAF x	 -HHV__.DHA xq "X0 	*   #33FIF	 s   R%*R% R* *'SSc                   t        |t              r|j                  du}t        |j                        s|st        j                  |dd      }t        |      j                         j                         }t        j                  ||j                  |      || j                        }|S t        |t              r]|j                         }	 t        |t               r)|j#                  ||| j                  | j$                        S |j#                  ||      S t        |t(              r|j+                  |d	      }|S t-        |j.                  t0              rt        j                  |d
d      }|S 	 t3        ||d
	      }|S # t&        $ r}t'        d| d      |d}~ww xY w# t4        $ r}t5        d| d|       |d}~ww xY w)ar  
        Cast values to specified type

        Parameters
        ----------
        values : ndarray or ExtensionArray
        cast_type : np.dtype or ExtensionDtype
           dtype to cast values to
        column : string
            column name - used only for error reporting

        Returns
        -------
        converted : ndarray or ExtensionArray
        NF)r   convert_na_value)rh   )re   rh   ri   r  zExtension Array: zO must implement _from_sequence_of_strings in order to be used in parser methodsr   TzUnable to convert column z	 to type )r   r#   
categoriesr   re   r   ensure_string_arrayr4   uniquedropnar/   _from_inferred_categoriesget_indexerrh   r$   r7  r3   _from_sequence_of_stringsri   NotImplementedErrorr0   r(  r#  r$  r   r   rz   )r   r   r   column
known_catscats
array_typeerrs           rQ   r  zParserBase._cast_types  s   " i!12"--T9J"6<<0 0055 ='')002D ::d&&v.	tGWGWFR I 	>2"779Ji6 &??'$($4$4%)%6%6	 @   &??i?XX /]]95]9F  	, ,,teF %fidC
 - ' )'
| 4V V $   /xyLs6   
8F  F  0F!  	F	FF!	G*F<<Gc                     y r   rO   r   r]   r   s      rQ   _do_date_conversionszParserBase._do_date_conversionsW       	rP   c                     y r   rO   rR  s      rQ   rS  zParserBase._do_date_conversions_  rT  rP   c           
         | j                   Rt        || j                  | j                   | j                  | j                  || j
                  | j                        \  }}||fS )N)rY   rg   )r_   _process_date_conversionry   r^   rr   rY   rg   rR  s      rQ   rS  zParserBase._do_date_conversionsg  sb     '2    "00"00	KD% d{rP   c                N   | j                   st        |      t        |      k7  r|r~t        |d         xr |d   dk(  }|t        |d         z  }t        |      t        |      dz
  k(  rt	        j
                  |      ryt        j                  dt        t                      yyyy)ae  Checks if length of data is equal to length of column names.

        One set of trailing commas is allowed. self.index_col not False
        results in a ParserError previously when lengths do not match.

        Parameters
        ----------
        columns: list of column names
        data: list of array-likes containing the data column-wise.
        r    rG   NzkLength of header or names does not match length of data. This leads to a loss of data with index_col=False.r   )
r^   r   r   r%   r   r{   r   r   r   r   )r   r   r   	empty_strempty_str_or_nas        rQ   _check_data_lengthzParserBase._check_data_length}  s      ~~#g,#d)";'R1Dd2h"nI ($tBx.8O7|s4y1},1HMM:+-	 AH";~rP   c                     y r   rO   r   rl   r]   s      rQ   _evaluate_usecolszParserBase._evaluate_usecols  rT  rP   c                     y r   rO   r^  s      rQ   r_  zParserBase._evaluate_usecols  s     	rP   c                v    t        |      r't        |      D ch c]  \  }} ||      s| c}}S |S c c}}w )a  
        Check whether or not the 'usecols' parameter
        is a callable.  If so, enumerates the 'names'
        parameter and returns a set of indices for
        each entry in 'names' that evaluates to True.
        If not a callable, returns 'usecols'.
        )r  r   )r   rl   r]   r   r   s        rQ   r_  zParserBase._evaluate_usecols  s7     G%.u%5G'!TAGG Hs   55c                p    |D cg c]	  }||vs| }}t        |      dkD  rt        d|       |S c c}w )ai  
        Validates that all usecols are present in a given
        list of names. If not, raise a ValueError that
        shows what usecols are missing.

        Parameters
        ----------
        usecols : iterable of usecols
            The columns to validate are present in names.
        names : iterable of names
            The column names to check against.

        Returns
        -------
        usecols : iterable of usecols
            The `usecols` parameter if the validation succeeds.

        Raises
        ------
        ValueError : Columns were missing. Error message will list them.
        r   z>Usecols do not match columns, columns expected but not found: )r   rz   )r   rl   r]   r   missings        rQ   _validate_usecols_namesz"ParserBase._validate_usecols_names  sO    . &8%188w<!P) 
  9s   	33c                    d}|Zt        |      r|dfS t        |      st        |      t        j                  |d      }|dvrt        |      t        |      }||fS |dfS )as  
        Validate the 'usecols' parameter.

        Checks whether or not the 'usecols' parameter contains all integers
        (column selection by index), strings (column by name) or is a callable.
        Raises a ValueError if that is not the case.

        Parameters
        ----------
        usecols : list-like, callable, or None
            List of columns to use when parsing or a callable that can be used
            to filter a list of table columns.

        Returns
        -------
        usecols_tuple : tuple
            A tuple of (verified_usecols, usecols_dtype).

            'verified_usecols' is either a set if an array-like is passed in or
            'usecols' if a callable or None is passed in.

            'usecols_dtype` is the inferred dtype of 'usecols' if an array-like
            is passed in or None if a callable or None is passed in.
        z['usecols' must either be list-like of all strings, all unicode, all integers or a callable.NFr  )r  r  string)r  r   rz   r   infer_dtyperp   )r   rl   msgr[   s       rQ   r   z ParserBase._validate_usecols_arg  s{    6* 	  }$( !o%OOGEBM$BB o%'lGM))}rP   c                "   t        |      sd ||fS t        |      }|sd gt        |      z  ||fS t        |      }g }t        |      }t        |      D ]~  \  }}t	        |t
              rB|j                  |       t        |      D ]"  \  }}||k(  s|||<   |j                  |        V X||   }|j                  |       |j                  |        t        |      D ]*  \  }}t	        |t
              s|| j                  v s&d ||<   , |||fS r   )	r   r}   r   r   r   r   r   r   rq   )	r   r   r^   cp_colsrr   r   r   r   r   s	            rQ   r   zParserBase._clean_index_names  s+   I&)++w- 6C	N*GY>>w-.0 O	i( 	)DAq!S!""1%(1 GAtqy'(	!t,	 qzt$""4(	) !- 	&GAt$$1B1B)B!%A	& GY..rP   c           
        t        |      }| j                  }| j                  }t        |      s|xs t        t        fd      }nOt        t        |      }t        d |j                         D ci c]  \  }}t        |      r||   n|| c}}      }||du s|t        d      }na|D 	cg c]  }	t        g ||	          }
}	t        |
|      }|j                          t        |      D ]  \  }}|j                  ||z
          |D ci c]  }|t        g ||          }}|||fS c c}}w c c}	w c c}w )Nc                      S r   rO   )default_dtypes   rQ   <lambda>z,ParserBase._get_empty_meta.<locals>.<lambda>@  s    ] rP   c                     t         S r   )objectrO   rP   rQ   rn  z,ParserBase._get_empty_meta.<locals>.<lambda>D  s     rP   Fr   r  r   )r}   r^   rr   r   rp  r   r   r   r   r   r6   r8   r7   sortr   ru   )r   r   re   r^   rr   
dtype_dictr  r   r   r   r   r   nr   col_dictrm  s                  @rQ   _get_empty_metazParserBase._get_empty_meta2  sJ   w-NN	&& E"!OVM$%:;Ju%E$CH;;=Q41az!}!Q6QJ e!38K!!$ECNO4F2Z%56ODO/KHENN!), #1AE"# NU
AIHfRz(';<<
 
 gx''7 R  P
s   3D00D6D;)r  None)r   Sequence[Hashable]r  r<   r  rS   )r   r   r  rS   )F)rr   Sequence[Hashable] | Noner   rS   r  zUtuple[Sequence[Hashable], Sequence[Hashable] | None, Sequence[Hashable] | None, bool]r   )r   rw  rs   ry  r  zSequence[Hashable] | MultiIndex)r   zlist[Scalar] | Noner  z4tuple[Index | None, Sequence[Hashable] | MultiIndex])T)r   rS   r  r4   )FNN)r  r=   r  rS   )r  z	list[int]r]   rw  r  set[int])r   rS   r  ztuple[ArrayLike, int])r   r?   r   rA   r  r?   )r]   r4   r   r'   r  z,tuple[Sequence[Hashable] | Index, DataFrame])r]   rw  r   zMapping[Hashable, ArrayLike]r  z7tuple[Sequence[Hashable], Mapping[Hashable, ArrayLike]])r]   zSequence[Hashable] | Indexr   z(Mapping[Hashable, ArrayLike] | DataFramer  zKtuple[Sequence[Hashable] | Index, Mapping[Hashable, ArrayLike] | DataFrame])r   rw  r   zSequence[ArrayLike]r  rv  )rl   z'set[int] | Callable[[Hashable], object]r]   rw  r  rz  )rl   set[str]r]   rw  r  r{  )rl   z2Callable[[Hashable], object] | set[str] | set[int]r]   rw  r  zset[str] | set[int])r]   r>   )r  ztuple[list | None, list, list])re   zDtypeArg | None) rI   rJ   rK   r   rR   __annotations__r   r   r   r   propertyr   r   r   r   r   r   r   r   r   r  r  r   r  r   rS  r\  r_  rd  r   r   ru  rO   rP   rQ   rD   rD   e   s   d 
 NJUX;
z 
  
   
 #	D; /D; 	D;

D; D;L  04	#	 -	 
)		 	 JN4G	= <  ,  6  " 1 1f  TT
 T Tl B!$B!-?B!	B! B!H JNn CGn 	n  n ` E EN   
6	  ! + 
A	  ) 7 
U	 * # " 
	 : 8 " 
	  (:	 
 C " 
	    > / /b #/ #/J -( -(rP   rD   FTrW   rX   ra   c                      t         j                  ur$t        j                  dt        t                       t         j                  urt        d      d d fd}|S )NzThe argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.r   z/Cannot use both 'date_parser' and 'date_format'c                ~    t        | t        j                        r"| j                  dk(  rt	        |       dk(  r| d   S | S )NrG   r   )r   r   r   ndimr   )args    rQ   unpack_if_single_elementz6_make_date_converter.<locals>.unpack_if_single_elementu  s2    c2::&388q=SX]q6M
rP   c                N   t        |      dk(  r |d   j                  j                  dv r|d   S 
t        j                  u rt        j                  |      }t        	t              r	j                  |       n	}t        j                         5  t        j                  ddt               t        |      }	 t        j                   ||d      }	 d d d        t        t&              r#|j)                         }d	|j*                  _        |S |j.                  S 	 t        j                         5  t        j                  ddt                
fd
|D         }	 t        j                   |      }d d d        t        t0        j0                        rt3        d      |S # t"        t$        f$ r |cY cd d d        S w xY w# 1 sw Y   xY w# t"        t$        f$ r |}Y nw xY w# 1 sw Y   sxY w# t2        $ r t        j                         5  t        j                  ddt               t        j4                  t        j                  |      
      }	 t        j                   |      cd d d        cY S # t"        t$        f$ r |cY cd d d        cY S w xY w# 1 sw Y   Y y xY ww xY w)NrG   r   Mmignorez=.*parsing datetimes with mixed time zones will raise an error)categoryF)formatutcrW   cacheTc              3  .   K   | ]  } |        y wr   rO   )r   r  r  s     rQ   r   z:_make_date_converter.<locals>.converter.<locals>.<genexpr>  s     MC237Mr   )r  zscalar parser)parser)r   re   kindr   rw   r   concat_date_colsr   r   rn   r   catch_warningsfilterwarningsFutureWarningr   toolsto_datetimerz   r0  r(   r;  flags	writeable_valuesdatetime	Exceptionr   )r   	date_colsstrsdate_fmtstr_objsr  r   
pre_parsedrX   ra   r`   rW   r  s           rQ   	converterz'_make_date_converter.<locals>.converter{  s   y>Q9Q<#5#5#:#:d#BQ<#..(++I6D(2;(E$;  ((* $''S*
 ).
$".. '!!))F$& &-0oo'&*		#
>>!'*,,. ,++ .!.	 "-M9M"J,!&!2!2&"-",$ fh&7&78#O44? #I. $#O#$ $$$ $R '	2 ,!+,, ,*  *,,. *++ .!.	 ")!8!800;*"J*$00<* * '	2 *))* *** **s   (G 7F G/ .+G#G1.G/  F=1G <F==G  G	G G#G  G##G,(G/ /J$AJI4(
J$4JJ
J$JJJ 	J$ J$)r   r;   )r   rw   r   r   r  r   r0  )r`   rW   rX   ra   r  r  s   ```` @rQ   rx   rx   c  sf     #..(+ ')	
 #..([-DIJJJ* J*X rP   	delimiter
escapechar	quotechar"quotingdoublequoteskipinitialspacelineterminatorrj   inferr^   r]   skiprows
skipfooternrowsrb   rV   rh   ri   rf   re   	thousandscommentdecimal.r_   rY   r`   rl   	chunksizer  encodingcompressionskip_blank_linesstrict)encoding_errorsrm   rg   c                   fd}g }	i }
|}t        |      }t               }|t        |t              r| |fS t        |t               r)|D ]"  }t	        |      st        |t
              rt        |t              r	|| vr||   } ||      rA|dk(  rndd l}| |   j                  }t        |t              rK|j                  j                  |j                        s%|j                  j                  |j                        r |t        j                  | |         |      | |<   t!        ||| |      \  }}}|| v rt#        d|       ||
|<   |	j%                  |       |j'                  |       % nt        |t(              r||j+                         D ]i  \  }}|| v rt#        d| d      t!        ||| ||      \  }}}||
|<   t-        |      d	k(  r||
|d   <   |	j%                  |       |j'                  |       k t        | t.              rt1        t/        |
      | gd	d
      } n| j'                  |
       |	j3                  |       |s2t        |      D ]$  }| j5                  |       |	j7                  |       & | |	fS )Nc                `    t        t              xr | v xs t        t              xr | v S r   )r   r}   )colspecr^   rr   s    rQ   _isindexz*_process_date_conversion.<locals>._isindex  s5    9d+D90D 
{D)Dg.D	
rP   r!  r   r   z New date column already in dict zDate column z already in dict)target_namerG   F)axisr   )r}   rp   r   rS   r    r   r   r!  re   r&   typesis_timestamppyarrow_dtypeis_dater   r5  _try_convert_datesrz   r   r   r   r   r   r'   r*   extendru   r   )	data_dictr  
parse_specr^   rr   r   rY   rg   r  new_colsnew_dataro   r  r  rA  re   new_namer   	old_namesr   r   s      ``                rQ   rW  rW    s   

 HHJ7mGIZ
D9'!!*d#! 	,G!Z%?gs+y0H(1GG$"i/(%g.44E!%4--e.A.AB88++E,?,?@  &/JJy12&	'" ,>w	:,(#y y($'Gz%RSS%(")  +;	,> 
J	%!+!1!1!3 	(Hg9$ <z9I!JKK 2$!AsI "%HX 7|q '*$OOH%Y')	(, )Y'Ih/;!%P	"OOGi 	AMM!OOA	 hrP   c                   t        |      }g }|D ]R  }||v r|j                  |       t        |t              r||vr|j                  ||          B|j                  |       T t	        d |D              r&t        t        dj                  t        |             }n(dj                  |D 	cg c]  }	t        |	       c}	      }|D cg c]  }||v st        j                  ||         ! }
} | |
d||n|i}|||fS c c}	w c c}w )Nc              3  <   K   | ]  }t        |t                y wr   )r   r   )r   r  s     rQ   r   z%_try_convert_dates.<locals>.<genexpr>e  s     
2A:a
2s   r   r   )rp   r   r   r   r{   r   r|   r   r   r   r   r5  )r  r  r  r   r  colsetcolnamesr   r  r  to_parsenew_cols               rQ   r  r  V  s     \FH ;OOA3AW$4OOGAJ'OOA 
2
22SXXsH~6788X6SV672:MQa9n

9Q<(MHMhU0CHUGWh&&	 7Ms   ,D 		DDc                    t        |t              r4| |v r
||    ||    fS |rt        t               fS t               t               fS ||fS )a  
    Get the NaN values for a given column.

    Parameters
    ----------
    col : str
        The name of the column.
    na_values : array-like, dict
        The object listing the NaN values as strings.
    na_fvalues : array-like, dict
        The object listing the NaN values as floats.
    keep_default_na : bool
        If `na_values` is a dict, and the column is not mapped in the
        dictionary, whether to return the default NaN values or the empty set.

    Returns
    -------
    nan_tuple : A length-two tuple composed of

        1) na_values : the string NaN values for that column.
        2) na_fvalues : the float NaN values for that column.
    )r   r   r   rp   )r   rb   rc   rV   s       rQ   r   r   o  sR    . )T")S>:c?22$ce++5#%<*$$rP   c                z    d}| 6t        j                  |       s!t        | t        t        f      st        |      | S )z
    Check whether or not the 'parse_dates' parameter
    is a non-boolean scalar. Raises a ValueError if
    that is the case.
    zSOnly booleans, lists, and dictionaries are accepted for the 'parse_dates' parameter)r   r   r   r}   r   r0  )r_   rh  s     rQ   rt   rt     s>    	*  	;;{#kD$<0nrP   c                    | d uxr | duS )NFrO   r   s    rQ   r   r     s    d?/s%//rP   )rW   rS   rX   rS   ra   z dict[Hashable, str] | str | None)r  r
   rY   rS   r   )r  r
   r  rZ   )rV   rS   rx  )n
__future__r   collectionsr   r   csvr  enumr   r   typingr   r	   r
   r   r   r   r   numpyr   pandas._libsr   r   pandas._libs.ops_libsopsr3  pandas._libs.parsersr   pandas._libs.tslibsr   pandas.compat._optionalr   pandas.errorsr   r   pandas.util._exceptionsr   pandas.core.dtypes.astyper   pandas.core.dtypes.commonr   r   r   r   r   r   r   r   r   r    r!   r"   pandas.core.dtypes.dtypesr#   r$   pandas.core.dtypes.missingr%   pandasr&   r'   r(   r)   r*   pandas.corer+   pandas.core.arraysr,   r-   r.   r/   r0   r1   r2   pandas.core.arrays.booleanr3   pandas.core.indexes.apir4   r5   r6   r7   pandas.core.seriesr8   pandas.core.toolsr9   r  pandas.io.commonr:   collections.abcr;   r<   r=   r>   pandas._typingr?   r@   rA   rB   rD   rw   rx   QUOTE_MINIMALrR   rL   parser_defaultsrW  r  r   rt   r   rO   rP   rQ   <module>r     s&   " #  
       " ! . ' > 5 2    ,  #   4  & 0 5  {( {(~ 48	dd d 2	dN(($( ( s  	(
 4( ( d( g( ( T( ( !( T( ( t(  4!(" D#($ $%(& T'(( 4)(* +(, t-(. s/(2 53(4 U5(6 7(8 3>>9(: 4;(< t=(@ A(B uC(D E(F 4G(H I(J  2288^^O(d  ..^^ ^D NR''@J'2 %F*0rP   