
    giz              
          d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 dZdZdZdded	efd
Zd	e
e         fdZd Zde
e         ded	e
e         fdZdde
e         deded	e
e         fdZd Zedk    r e             dS dS )    N)BytesIO)List b01e3890d44f4456820f88b2fd6e5351112011z1https://api.bls.gov/publicAPI/v2/timeseries/data/soc_codes.csvout_csvreturnc                    g d}|D ];}	 t          d| t          j                   |                                                    d          s'|                                                    d          rt          j        |d          }|                                 	 t          j	        t          |j                  t                    }n'# t          $ r+}t          d	| d
| t          j                   Y d}~d}~ww xY wt          j        |d          }|                                 	 t          j        t          |j                  t                    }n# t          $ r}	 t          j        t          j                            |j                  t                    }n@# t          $ r3}t          d| d
| d| t          j                   Y d}~Y d}~d}~ww xY wY d}~nd}~ww xY wd |j        D             }d}d}	|                                D ]I\  }
}d|
v r0|
                    d          s|
                    d          sd|
v r|}d|
v sd|
v sd|
v r|	p|}	J|~|j        D ]v}||                                                             t                                        d                                          }t1          d |D                       r|} nw| t          d| t          j                   ||g|	r|	|j        v r|	gng z                                            }dg|	r|	|j        v rdgng z   |_        |d                             t                    j                            d          d         |d<   |                    dg                              dg                              d          }|                    | d           t          d|  t          j                    dS # t          $ r,}t          d | d!| t          j                   Y d}~5d}~ww xY wt          d"t          j                   dS )#zAttempt to download a list of 6-digit SOC codes from several candidate sources and save to CSV.
    Returns True on success, False otherwise.)z4https://www.bls.gov/soc/2018/soc_2018_structure.xlsxz3https://www.bls.gov/soc/2018/soc_2018_structure.xlszJhttps://raw.githubusercontent.com/ProGovViz/soc/master/soc_2010_6digit.csvzEhttps://raw.githubusercontent.com/txopio/soc-2010/master/soc_2010.csvzTrying SOC source: filez.xlsxz.xls   )timeout)dtypezFailed to parse Excel from : NzFailed to parse CSV from z; c                 8    i | ]}|                                 |S  )lower).0cs     %/home/asher/public_html/census/bls.py
<dictcomp>z&download_soc_codes.<locals>.<dictcomp>:   s"    777qaggii777    codesocsoc_code
occupationtitlename
   c              3      K   | ]B}t          |                                          d k    ot          d |D                       V  CdS )   c              3   >   K   | ]}|                                 V  d S )N)isdigit)r   chs     r   	<genexpr>z/download_soc_codes.<locals>.<genexpr>.<genexpr>G   s*      6P6Przz||6P6P6P6P6P6Pr   N)lenstripany)r   ss     r   r%   z%download_soc_codes.<locals>.<genexpr>G   sR      aaUV3qwwyy>>Q.P36P6Pa6P6P6P3P3Paaaaaar   z"Could not find SOC code column in z
([0-9]{6})r   )subsetT)dropF)indexzSaved SOC codes to: zSOC source z	 failed: z!All SOC download attempts failed.)printsysstderrr   endswithrequestsgetraise_for_statuspd
read_excelr   contentstr	Exceptionread_csvcompatStringIOtextcolumnsitemsdropnaastypeheadtolistallcopyextractdrop_duplicatesreset_indexto_csv)r   
candidatesurlrespdfee2colmapcode_col	title_colkvsampleouts                 r   download_soc_codesrV      s     J  : :9	---CJ????yy{{##G,, !		0D0DV0L0L !|C444%%'''wt|'<'<CHHHBB    BBBqBBTTTTHHHH
  |C444%%'''!WT\%:%:#FFFBB  ! ! !![););DI)F)FcRRR$ ! ! !J#JJJJbJJQTQ[\\\\ ! ! 87BJ777FHI / /1Q;;AJJu$5$5;J9O9O;SX\]S]S] H1$$1! )QI  AU\\^^22377<<R@@GGIIFaaZ`aaaaa #$ @3@@szRRRRhZ)#_	RZ@W@WI;;]_`affhhC%,y*dYRTR\E\E\7))bdeCK!*o44S99=EEmTTUVWC
O**ZL*11AA*AVVbbhlbmmCJJweJ,,,222DDDD44 	 	 	111a11
CCCCHHHH	 

-CJ????5s   BP"-CP
D!D<PD-P4-E"!P"
G5-=F+*G0+
G(5$G#G0P#G((G0+P0G55DPC;P
Q!P<<Qc            	         g d} ddi}d}| D ]o}	 t          j        |d|          }|                                 |j        } n9# t          $ r,}t          d| d| d	t          j        
           Y d}~hd}~ww xY w|st          dt          j        
           g S g }|                                D ]}|	                                }|r|
                    d          r.d|v r|                    d          n|                    dd          }|d         	                                }	t          |          dk    r|d         	                                nd}
t          j        d|	                                          }|r+|                    |                    d          |
d           t%                      }g }|D ]<}|d         |vr0|                    |d                    |                    |           =|S )zDownload known area files and return list of metro area dicts: {'code','title'}.

    Tries several candidate endpoints and parses lines to find codes beginning with MT.
    )z3https://download.bls.gov/pub/time.series/oe/oe.areaz7https://download.bls.gov/pub/time.series/oe/oe.area.txtz2https://download.bls.gov/pub/time.series/area/areaz6https://download.bls.gov/pub/time.series/area/area.txtz<https://download.bls.gov/pub/time.series/oe/oe.area?raw=truez
User-AgentzaMozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0 Safari/537.36Nr   )r   headerszArea candidate failed: z ()r   z3Failed to download area codes from known endpoints.#	   r    z
^(MT\d{5}))r   r   r   )r1   r2   r3   r<   r8   r-   r.   r/   
splitlinesr'   
startswithsplitr&   rematchupperappendgroupsetadd)rI   rX   r<   rJ   rK   rM   metroslnpartsr   r   mseenrU   s                 r   fetch_all_metro_area_codesrm   `   s8   
  J   A  BGD  	<RAAAD!!###9DE 	 	 	7C771777cjIIIIHHHH	  C#*UUUU	Foo 
@ 
@XXZZ 	R]]3'' 	"&"**"((42C2CQx~~$'JJNNa   H]DJJLL11 	@MM1771::>>??? 55D
C  V9D  HHQvYJJqMMMJs   2A
A:"A55A:c              #      K   t          |           }t          dt          |          |          D ]}||||z            V  dS )z.Yield successive n-sized chunks from iterable.r   N)listranger&   )iterablenlis       r   chunkedru      sW      XA1c!ffa    !a%j r   
area_codesr   c                      fd| D             S )Nc                     g | ]	}d |  
S )OEUMr   )r   acr   s     r   
<listcomp>z$build_series_ids.<locals>.<listcomp>   s'    777b!2!x!!777r   r   )rv   r   s    `r   build_series_idsr|      s    7777J7777r   2023
series_ids	startyearendyearc                    g }t          | d          D ]N}|||t          d}	 t          j        t          |d          }|                                 |                                }n5# t          $ r(}t          d| t          j
                   Y d}~d}~ww xY w|                    di                               d	g           }	|	D ]}
|
                    d
          }|
                    dg           D ]i}|                    ||                    d          |                    d          |                    d          |                    d          d           jP|S )zTQuery BLS API for the provided series_ids (handles batching). Returns rows of dicts.2   )seriesidr   r   registrationkey<   )jsonr   zBLS request failed for batch: r   NResultsseriesseriesIDdatayearperiodvalue	footnotes)	series_idr   r   r   r   )ru   API_KEYr1   postBLS_API_URLr3   r   r8   r-   r.   r/   r2   rd   )r~   r   r   rowsbatchpayloadrK   r   rM   resultsr   siditems                r   query_bls_seriesr      s   DR((  $9elmm	=7BGGGD!!###99;;DD 	 	 	6166SZHHHHHHHH	 ((9b))--h;; 		 		F**Z((C

62..  !$ HHV,,"hhx00!XXg..!%+!6!6     		 Ks   AA((
B2BBc            	      :   t                      } | st          dt          j                   d S d}	 t	          |ddd          5 }t          j        |dd	g
          }|                                 | D ]4}|                    |d         |	                    d	d          d           5	 d d d            n# 1 swxY w Y   t          dt          |            d|            d S # t          $ r,}t          d| d| t          j                   Y d }~d S d }~ww xY w)Nz$No metro area codes found; aborting.r   zmetro_codes.csvwr]   zutf-8)newlineencoding	area_coder   )
fieldnamesr   )r   r   zSaved z metro area codes to: zFailed to write r   )rm   r-   r.   r/   opencsv
DictWriterwriteheaderwriterowr2   r&   r8   )rh   r   fhwriterrk   rM   s         r   mainr      s   '))F 43:FFFFG	'3W=== 	W^BK3IJJJF    W Wai!%%QSBTBT U UVVVVW	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W
 	Cs6{{CC'CCDDDDD   ///A//cjAAAAs<   C$ A$B5)C$ 5B99C$ <B9=%C$ $
D.!DD__main__)r   )r}   r}   )r1   pandasr4   r.   mathra   r   ior   typingr   r   SOC_CODEr   r7   boolrV   rm   ru   r|   dictr   r   __name__r   r   r   <module>r      su        



  				 



            
, AJ J J$ J J J J\1DI 1 1 1 1h  8c 8c 8d3i 8 8 8 8 c s c _cdh_i    6  ( zDFFFFF r   