
    qi0                     f   d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZmZ ddlZddlmZ ddlmZmZ ddlZddlZddlZddlZddlZddlZ	 ddlmZ n# e$ r dZ ej        d           Y nw xY wddlZdd	lmZ dd
lm Z m!Z!m"Z"m#Z#m$Z$ da%d Z&dZ'i a( G d de          Z)defdZ*dee+         dee+         fdZ,dej-        fdZ.d Z/d'dZ0dee	         fdZ1d Z2d Z3dee+         fdZ4dee+         fdZ5d  Z6d!ee	         de	fd"Z7d#e+de	e+e+f         fd$Z8d% Z9e:d&k    r e9             dS dS )(z
pulse.py

Finds and stores a history of the most recent news articles for major
cities, mapping them based on solar cycles with clustering for usability.
    N)Path)ListDictTupleOptional
NamedTuple)
quote_plus)datetimetimezone)Llamaz>llama-cpp-python not installed; LLM features will be disabled.)BeautifulSoup) fetch_and_process_current_eventsget_coords_from_wikidataRemoteLLMClientAIModelSuppressStderrFc                     t           r/t          dt          j                   t          j        d           da t          dt          j                   d S )Nz
Force quitting immediately.)file   Tz/
Graceful shutdown initiated. Saving results...)SHUTDOWN_REQUESTEDprintsysstderrexit)signumframes     !/home/asher/github/pulse/pulse.pyhandle_shutdown_signalr   %   sO     -CJ????	
<3:NNNNNN    z0pulse/1.0 (+https://github.com/htmlfarmer/pulse)c                   $    e Zd ZU eed<   eed<   dS )SunEventnametime_utcN)__name__
__module____qualname__str__annotations__r
    r   r   r!   r!   0   s'         
IIIr   r!   	file_pathc                    |                                  st          j        d|             	 |                     dd          5 }t	          j        |          }|D ]}|                    dd                                          }|r_t          |d                   t          |d                   |                    d	d          ft          |<   t# t          t          f$ r Y w xY wt          j        d
t          t                     d|             d d d            d S # 1 swxY w Y   d S # t          $ r%}t          j        d|  d|            Y d }~d S d }~ww xY w)Nz FATAL: Cities file not found at rutf-8)modeencodingcity latlngcountryzLoaded z cities from zFailed to load cities from z: )existsr   r   opencsv
DictReadergetstripfloatCITIES_CACHE
ValueErrorKeyErrorlogginginfolen	Exception)r*   freaderrow	city_namees         r   load_cities_csvrH   4   s    Wsx(V9(V(VWWW	T^^w^77 	P1^A&&F < <GGFB//5577	 <49#e*4E4EuSQVZGXGXZ]ZaZabkmoZpZp3qi00&1;;;88;< LN3|#4#4NN9NNOOO	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P SSS38$R)$R$Rq$R$RSSSSSSSSSSsa   D- AD ACD C# D "C##0D D-  D$$D- 'D$(D- -
E7EEsreturnc                 z    | | S 	 | }	 t          j        dd|          }||k    rn|} |S # t          $ r | cY S w xY w)zFix LLM-inserted spaces after commas in numeric groupings.

    Examples: '1, 600' -> '1,600', '2, 000, 000' -> '2,000,000'.
    NTz(?<=\d),\s+(?=\d{3}\b),)resubrB   )rI   outnews      r   _normalize_number_commasrQ   D   sr    
 	y
	&2C==CczzC		
 
   s   $+ ::connc                     |                      d           |                      d           |                      d           |                      d           |                                  dS )z1Initializes all necessary tables in the database.z
        CREATE TABLE IF NOT EXISTS last_checked (
            city TEXT PRIMARY KEY, last_check_ts INTEGER, last_event TEXT
        )zx
        CREATE TABLE IF NOT EXISTS city_queue (
            city_name TEXT PRIMARY KEY, process_order INTEGER
        )a;  
        CREATE TABLE IF NOT EXISTS articles (
            article_link TEXT PRIMARY KEY,
            city_name TEXT NOT NULL,
            title TEXT,
            source TEXT,
            summary TEXT,
            published_ts INTEGER,
            image_url TEXT,
            geojson_feature TEXT NOT NULL
        )z]CREATE INDEX IF NOT EXISTS idx_city_name_published ON articles (city_name, published_ts DESC)Nexecutecommit)rR   s    r   _init_dbrW   X   s}    LL     	LL    
 	LL 
 
 
 
 	LLpqqqKKMMMMMr   c                     d }|                      d|d         |d         |d         |d          ||                    d                    |d         |d	         t          j        |d
                   f           |                                  dS )z/Inserts or replaces an article in the database.c                 F    	 t          |           S # t          $ r | cY S w xY wN)rQ   rB   )rI   s    r   _maybe_normz)_store_article_in_db.<locals>._maybe_normt   s9    	+A... 	 	 	HHH	s      z
        INSERT OR REPLACE INTO articles (article_link, city_name, title, source, summary, published_ts, image_url, geojson_feature)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
    linkr0   titlesourcesummarypublished_tsimagefeatureN)rU   r9   jsondumpsrV   )rR   article_datar[   s      r   _store_article_in_dbrf   q   s       	LL  	Vl62L4IXL,<,<Y,G,G H H,WeJfWtz,y*ABB
   	KKMMMMMr      c                 ^    |                      d||f           |                                  dS )z1Keeps only the N most recent articles for a city.z
        DELETE FROM articles WHERE article_link IN (
            SELECT article_link FROM articles
            WHERE city_name = ?
            ORDER BY published_ts DESC
            LIMIT -1 OFFSET ?
        )
    NrT   )rR   rF   max_articless      r   _trim_article_historyrj      s:    LL  \	"$ $ $ 	KKMMMMMr   c                 h    |                      d          }d |                                D             S )z8Retrieves all stored GeoJSON features from the database.z$SELECT geojson_feature FROM articlesc                 B    g | ]}t          j        |d                    S r   )rc   loads.0rE   s     r   
<listcomp>z-_get_all_features_from_db.<locals>.<listcomp>   s&    9993DJs1v999r   rU   fetchallrR   curs     r   _get_all_features_from_dbrv      s0    
,,=
>
>C99#,,..9999r   c                     |                      d|f          }|                                }|r.t          j        |d         t          j                  |d         fnd S )NzASELECT last_check_ts, last_event FROM last_checked WHERE city = ?r   )tzr   )rU   fetchoner
   fromtimestampr   utc)rR   r0   ru   rE   s       r   _get_last_checkedr|      sW    
,,Z]a\c
d
dC
,,..CHKUH"3q6hl;;;SVDDQUUr   c                     t          t          j                              }|                     d|||f           |                                  d S )NzKREPLACE INTO last_checked(city, last_check_ts, last_event) VALUES (?, ?, ?))inttimerU   rV   )rR   r0   eventnow_tss       r   _set_last_checkedr      sE    FLL^aegmot`uvvvKKMMMMMr   c                 h    |                      d          }d |                                D             S )Nz7SELECT city_name FROM city_queue ORDER BY process_orderc                     g | ]
}|d          S rm   r)   ro   s     r   rq   z#_get_city_queue.<locals>.<listcomp>   s    ---sCF---r   rr   rt   s     r   _get_city_queuer      s0    
,,P
Q
QC--cllnn----r   citiesc                 B   |                      d           t          |          }t          j        |           |                     dd t          |          D                        |                                  t          j        dt          |           d           d S )NzDELETE FROM city_queuez?INSERT INTO city_queue (city_name, process_order) VALUES (?, ?)c                     g | ]	\  }}||f
S r)   r)   )rp   ir0   s      r   rq   z(_populate_city_queue.<locals>.<listcomp>   s     JJJGAttQiJJJr   z"Created a new randomized queue of z cities.)
rU   listrandomshuffleexecutemany	enumeraterV   r?   r@   rA   )rR   r   shuffled_citiess      r   _populate_city_queuer      s    LL)***6llO
N?###VJJy/I/IJJJL L LKKMMMLTc/6J6JTTTUUUUUr   c                 \    |                      d|f           |                                  d S )Nz*DELETE FROM city_queue WHERE city_name = ?rT   )rR   rF   s     r   _remove_city_from_queuer      s*    LL=	|LLLKKMMMMMr   featuresc                     d| dS )NFeatureCollection)typer   r)   )r   s    r   
to_geojsonr      s    $(;;;r   r]   c           	      0   |                                  g ddddg ddddg dd	d
dg ddddg ddddg ddddd}|                                D ]:\  }}t          fd|d         D                       r||d         |d         dc S ;ddddS )z@Analyzes an article title to assign a category, icon, and color.)
earthquakequake	emergencyalertzexclamation-trianglered)keywordsiconcolor)weathersnowrainsunstormforecasttemperature	hurricanecloudblue)
sportsgamematchteamplayerdrawsfootballsoccernbaolympicszfutbol-ogreen)z
robinson'selection
governmentsenatecongress	politicalmayorbankdarkred)businesseconomymarketstocksfinancesharesz
line-chartpurple)techapplegoogle	microsoftsoftwarehardwareaicogsorange)AlertWeatherSportsPoliticsBusiness
Technologyc              3       K   | ]}|v V  	d S rZ   r)   )rp   keywordtitle_lowers     r   	<genexpr>z%categorize_article.<locals>.<genexpr>   s(      FF'w+%FFFFFFr   r   r   r   )categoryr   markerColorNewszinfo-circlegray)loweritemsany)r]   
categoriesr   datar   s       @r   categorize_articler      s]   ++--KKKKUkv{|| s s s  ~E  PV  W  W~~~  IS  ^e  f  f!u!u!u  @F  QZ  [  [!a!a!akw  CK  L  L#i#i#isy  EM  N  N J %**,, ^ ^$FFFFT*5EFFFFF 	^ ($v,tT[}]]]]]	^ fMMMr   c                  l	   t          j         t           j        t                     t          j                    } |                     dd           |                     dd           |                     dt          d            |                     dd	
           |                     dd d           |                     dd d           |                     dd	d           |                                 }t          j	        t          j
        d           |j        r0|j        t          j        d<   t          j        d|j                    |j        r0|j        t          j        d<   t          j        d|j                    |j        r#dt          j        d<   t          j        d           t#          t%          |j                             t(          }t%          d          dz  }|j                            d           t/          j        t3          |          d !          }t5          |           t7          |          }|s6t9          |t:                                                     t7          |          }t          j        d"t?          |           d#           t          j        d$           tA          |          }t%          d%          }	 |!                    d&          }g }	d'}
|"                                }tG          |          D ]Y}tI          j%        |d'                   }t          |d(                   }||d)         d*<   |	&                    |           ||
k    r|}
Zd+|	|
d,}|'                    tI          j(        |d-.          d/0           t          j        d1t?          |	           d2|            n.# tR          $ r!}t          j*        d3|            Y d }~nd }~ww xY w|+                                 t%          |j,                  }|j                            dd4           |'                    tI          j(        t[          |          d-d56          d/0           t          j        d1t?          |           d7|            t%          d8          }t]          ||           d S )9Nz--cities-csvz
cities.csv)defaultz--outzdata/articles.geojsonz--max-cities)r   r   z--force-check
store_true)actionz--llm-serverz3Override LLM server URL (e.g. http://host:5005/ask))r   helpz--llm-providerzsHint provider to remote LLM server (e.g. 'local' or 'gemini-3-flash-preview'). Defaults to 'gemini-2.5-flash-lite'.z--allow-local-llmzPAllow loading a local GGUF model as fallback if the remote server is unavailable)r   r   z%(levelname)s: %(message)s)levelformatLLM_SERVER_URLzLLM server overridden to LLM_SERVER_PROVIDERz$LLM server provider override set to 1ALLOW_LOCAL_LLMz:Local GGUF model fallback explicitly ENABLED for this run.z.cachezpulse_state.sqliteT)exist_ok
   )timeoutzStarting run with z cities in queue.z6Run finished. Generating output files from database...zdata/live_news.jsonzVSELECT geojson_feature, published_ts FROM articles ORDER BY published_ts DESC LIMIT 15r   r   
propertiesr`   r   )r   r   latestF)ensure_asciir-   )r/   zWrote z recent articles to z#Failed to generate live news file: )parentsr      )r   indentz total historical features to zdata/current_events.geojson)/signalSIGINTr   argparseArgumentParseradd_argumentr~   
parse_argsr?   basicConfigINFO
llm_serverosenvironr@   llm_providerallow_local_llmrH   r   
cities_csv
USER_AGENTparentmkdirsqlite3connectr'   rW   r   r   r<   keysrA   rv   rU   rs   reversedrc   rn   append
write_textrd   rB   errorcloserO   r   r   )pargs
user_agentdb_pathrR   
city_queueall_featureslive_news_out_pathru   r   max_tsrowsrE   feature_objts	live_datarG   out_pathcurrent_events_out_paths                      r   mainr     s   
M&-!7888!!ANN><N888NN7$;N<<<NN>TN:::NN?<N888NN>46kNlllNN#T  9nN  o  o  oNN&|  CUN  V  V  V<<>>Dgl3OPPPP  D'+
#$BBBCCC Q,0,=
()OD<MOOPPP S(+
$%QRRRD))***J8nn33GN$'''?3w<<444DTNNN &&J +T<#4#4#6#6777$T**
LHc*ooHHHIII
 LIJJJ,T22L 344Allstt||~~D>> 	 	C*SV,,KSVB8:K%n5OOK(((F{{ ( 
 
	
 	%%dj&O&O&OZa%bbbUc(mmUUASUUVVVV A A A?A??@@@@@@@@A 	JJLLLDH~~HO$666
:l#;#;%XYZZZelmmmLU#l++UU8UUVVV ##@AA$%<jIIIIIs   C.O 
O.O))O.__main__)rg   );__doc__r   rc   r   r?   pathlibr   typingr   r   r   r   r   r7   urllib.parser	   r
   r   r   r   r   r  r   rM   	llama_cppr   ImportErrorwarningrequestsbs4r   	wikipediar   r   r   r   r   r   r   r  r<   r!   rH   r'   rQ   
ConnectionrW   rf   rj   rv   r|   r   r   r   r   r   r   r  r$   r)   r   r   <module>r*     sx              : : : : : : : : : : : : : : 



 # # # # # # ' ' ' ' ' ' ' '   



  				 				V V V VEGOTUUUUUV        z z z z z z z z z z z z z z  O O O @
    z   Tt T T T T  (3-    (7%    2  &
 
 
 
:tDz : : : :V V V
  
.T#Y . . . .VtCy V V V V  <d < < < < <Nc Nd38n N N N NFPJ PJ PJd zDFFFFF s   A A32A3