
    qiB              	       r   d Z ddlZddlZddlZddlZddlmZ ddlmZmZ ddl	m
Z
mZmZmZ ddlZddlmZ ddlZddlZdZde
e         d	e
e         fd
Z G d d          Z	 ddlmZ n# e$ r dZY nw xY w G d d          Z G d d          Zdeded	e
eeef                  fdZdee         d	efdZefdedefdZe dk    rpddl!Z! e!j"                    Z#e#$                    dd           e#%                                Z& ej'        ej(        d            e ee&j)                             dS dS )z
wikipedia.py

Extracted Wikipedia current events processing and geolocation helpers.
This module provides `fetch_and_process_current_events(out_path, user_agent)`
which mirrors the logic previously embedded in `pulse.py`.
    N)Path)datetimetimezone)OptionalTupleListDict)BeautifulSoupz0pulse/1.0 (+https://github.com/htmlfarmer/pulse)sreturnc                 z    | | S 	 | }	 t          j        dd|          }||k    rn|} |S # t          $ r | cY S w xY w)NTz(?<=\d),\s+(?=\d{3}\b),)resub	Exception)r   outnews      %/home/asher/github/pulse/wikipedia.py_normalize_number_commasr      sp    y		&2C==CczzC		
 
   s   $+ ::c                       e Zd ZdZd Zd ZdS )SuppressStderrzDContext manager to suppress C-level stderr from llama_cpp if loaded.c                     t          j        d          | _        t          j        t           j        t           j                  | _        t          j        | j        d           d S N   )osduporiginal_stderropendevnullO_WRONLYdup2)selfs    r   	__enter__zSuppressStderr.__enter__)   sC    !vayywrz2;77
a         c                 l    t          j        | j        d           t          j        | j                   d S r   )r   r!   r   closer   )r"   exc_typeexc_valexc_tbs       r   __exit__zSuppressStderr.__exit__.   s/    
$a(((
r$   N)__name__
__module____qualname____doc__r#   r*    r$   r   r   r   '   s8        NN! ! !
    r$   r   )Llamac                   *    e Zd Zd ZddededefdZdS )AIModelc                    d | _         ddddddddd	d
dgdd| _        d| _        t          j        d           t
          t          j        d           d | _         d S 	 t                      5  t          dd|i| j        d         | _         d d d            n# 1 swxY w Y   | j         t          j        d           d S t          j        d           d S # t          $ r)}t          j        d|            d | _         Y d }~d S d }~ww xY w)Ni      r   F)n_ctx	n_threadsn_gpu_layersverboseg?(   gffffff?g?2   z
<|eot_id|>)temperaturetop_ktop_prepeat_penalty
max_tokensstop)llama_paramsgeneration_paramsz7You are a helpful assistant. Keep your answers concise.z<--> AI Core: Loading model for geolocation (wikipedia.py)...z9llama-cpp Llama class not available; skipping model load.
model_pathrA   z'--> AI Core: Model loaded successfully.z-!!! FATAL: AI Model not loaded (llm is None).z !!! FATAL: Error loading model: r/   )	llmconfigdefault_system_promptlogginginfor0   errorr   r   )r"   rC   es      r   __init__zAIModel.__init__:   s   &*AZ_``14rDdgwy  EQ  DR  "S  "S
 
 &_"STTT=MUVVVDHF		!! W W VVJV$+n:UVVW W W W W W W W W W W W W W Wx#FGGGGGMNNNNN 	 	 	M@Q@@AAADHHHHHHH	sB   C -BC BC B C  C 
D	 DD	Nuser_questionsystem_promptr   c                 t   | j         st          j        d           dS |d}d|dd|dg}	  | j         j        dd|i| j        d         }|d	         d
         d                             d          }|r|                                ndS # t          $ r"}t          j        d|            Y d }~dS d }~ww xY w)Nz"Error: The AI model is not loaded.zError: Model not loaded.zYou are a geolocator. Based on the news headline, identify the main city and country. Respond ONLY with the format 'City, Country'. If you cannot determine the location, respond with 'Unknown'.system)rolecontentusermessagesrB   choicesr   messagerQ   UnknownzError during AI generation: zError: Generation failed.r/   )rD   rG   rI   create_chat_completionrE   getstripr   )r"   rL   rM   rS   responserQ   rJ   s          r   askzAIModel.askQ   s    x 	.M>???--  `M%-@@6^kBlBlm	/6tx6mmmDKXkLlmmHy)!,Y7;;IFFG&-<7==???9< 	/ 	/ 	/M<<<===......	/s   AB 
B7B22B7N)r+   r,   r-   rK   strr[   r/   r$   r   r2   r2   9   sQ          ./ / /S /C / / / / / /r$   r2   c                   0    e Zd ZdefdZddededefdZdS )	RemoteLLMClient
server_urlc                    |                     d          | _        | j                            d          s| j        dz   | _        d| _        dddii| _        	 | j                            dd          d         p| j        }t          j        |d	
          }|j        | _        d S # t          $ r1}t          j        d| j         d|            d| _        Y d }~d S d }~ww xY w)N/z/askFrB   r?         r      )timeoutz(RemoteLLMClient health check failed for : )rstripr`   endswith	availablerE   rsplitrequestsrX   okr   rG   warning)r"   r`   baserrJ   s        r   rK   zRemoteLLMClient.__init__b   s    $++C00''// 	7"o6DO*\4,@A	#?))&!44Q7J4?DT1---ATDNNN 	# 	# 	#O]t]]Z[]]^^^"DNNNNNNN	#s   A
B# #
C-&CCNrL   rM   r   c                    | j         st          j        d           dS |g d}|r||d<   t          j                            d          }|r||d<   n#t          j                            dd          |d<   d	d	d
}t          t          j                            dd                    }t          t          j                            dd                    }d}d }	||k    r	 t          j	        | j
        ||d          }
|
                                 |
j                            d          pd                                }d	|v r-|
                                }|                    dd          pd}n	|
j        pd}|                                }d|v rd|v s	d|v sd|v r|                    d          dk    st          j        d           d|d<   	 t          j	        | j
        ||d          }|                                 |j                            d          pd                                }d	|v r,|                                }|                    dd          pdS |j                                        pdS # t$          $ r!}t          j        d|            Y d }~nd }~ww xY w|r|ndS # t          j        $ r3}|}	d }	 |j        j        }n# t$          $ r Y nw xY wt          j        d| d| d| d |            ||k     r?|d!|z  z  }t          j        d"| d#           t/          j        |           |d$z  }Y d }~n|(t3          |t                    rd%|cxk    rd&k     r|n nx	 | j
        }d'|vr|d'z   }d	d(d
}t          j	        |||d)d*+          5 }	 |                                 n*# t$          $ r}t          j        d,|             d }~ww xY wg }|                    d*-          D ]w}||                                }|s|                    d.          rF|t9          d.          d                                          }|d/k    r n|                    |           xd0                    |                                          }|pdcd d d            cY d }~S # 1 swxY w Y   n0# t$          $ r#}|}	t          j        d1|            Y d }~nd }~ww xY wY d }~nd }~wt          j        $ rn}|}	t          j        d2| d | d3|            ||k     r?|d!|z  z  }t          j        d"| d#           t/          j        |           |d$z  }Y d }~Y d }~nd }~ww xY wt          j        d4|	            d5| d6|	 S )7Nz Remote LLM server not available.z#Error: Remote server not available.)promptconversationrM   LLM_SERVER_PROVIDERproviderLLM_DEFAULT_PROVIDERzgemini-2.5-flash-litezapplication/json)zContent-TypeAcceptLLM_RETRY_COUNT2LLM_RETRY_BACKOFFz0.5r      )jsonheadersrf   zcontent-type rZ   403zAPI keyzYour API keyleakedlocalzLDetected API key error in remote response; retrying once with provider=localrV   z"Retry with provider=local failed: z$Remote LLM POST returned HTTP error z	 (status=z); attempt rb   r   zRetrying after zs...rd   i  iX  z	?stream=1ztext/event-stream<   T)r|   r}   rf   streamzStreaming POST failed: )decode_unicodezdata:z[DONE]
zStreaming fallback failed: z(Remote LLM request exception on attempt rg   z All Remote LLM attempts failed: z#Error: Remote request failed after z attempts: ) rj   rG   rI   r   environrX   intfloatrl   postr`   raise_for_statusr}   lowerr|   textrY   rn   r   	HTTPErrorrZ   status_coderH   timesleep
isinstance
iter_lines
startswithlenappendjoinRequestException)r"   rL   rM   payloadru   r}   retry_countbackoff_baseattemptlast_excrp   ctj	resp_textr2ct2j2rJ   hestatus
sleep_time
stream_urlstream_headersresppartsrawlinedatafinalr   s                                 r   r[   zRemoteLLMClient.askp   s<   ~ 	9M<===88*B?? 	5'4GO$:>>"788 	b"*GJ"$*..1GI`"a"aGJ#5ASTT"*..):C@@AARZ^^,?GGHH$$OM$/Z\]]]""$$$immN339r@@BB%++A !j" 5 5 ;II !"I%OO--	i''I,B,B~YbGbGbfnr{f{f{  FM  FQ  FQ  R\  F]  F]  ah  Fh  FhO$rsss*1GJ'	R%]4?RYcefff++---!z~~n==CJJLL-44!#B#%66*i#@#@#MIM!w}};);$ R R R(PQ(P(PQQQQQQQQR$-<yy9<% + + +[4FF    D ~r ~ ~TZ ~ ~gn ~ ~q| ~ ~[((!-g!>JL!C:!C!C!CDDDJz***qLGHHHH>j&=&=>#BUBUBUBURUBUBUBUBUBUK%)_
&j88)3k)AJ:LXk)l)l%]:G^egptuuu 6y}& $ 5 5 7 7 7 7#, & & & '.K.K.K L L L %& %'E'+d'K'K 
7 
7#&;$,'*yy{{'+ !-$,#'??7#;#; !7+/G+>+D+D+F+FD'+x'7'7(-$)LL$6$6$6$(IIe$4$4$:$:$<$<E#(#5I'6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6( % K K K#$(Ia(I(IJJJJJJJJK , 
 
 
 h7 h hU` h hdf h hiii[((!-g!>JL!C:!C!C!CDDDJz***qLGHHHHEEEE
 	CCCDDDSWSSSSSs   C0J. BI: I: :
J%J J.  J%%J. .U,>S,KS,
KS,KA"S,*S,0.R7R+ N54R+5
O?OOB=R+R7%U,+R/	/R72R/	3R76S,7
S$SS,S$$S,,U,>AU''U,r\   )r+   r,   r-   r]   rK   r[   r/   r$   r   r_   r_   a   sq        #3 # # # #cT cT cTS cTC cT cT cT cT cT cTr$   r_   location_name
user_agentc                    | r|                                  dv rd S t          j        d|             dt          j                            |            d}d|i}	 t          j        ||d          }|                                }|                    d          st          j        d	|  d
           d S |d         d         d         }d| d}t          j        ||d          }|                                }	|	                    di                               |i                               di           }
d|
v rU|
d         d         d         d         d         }|d         |d         }}t          j        d| d| d|  d| d	           ||fS t          j        d|  d| d           d S # t          $ r%}t          j
        d|  d|            Y d }~d S d }~ww xY w)N)unknownzerror: model not loaded.zerror: generation failed.z  -> Querying Wikidata for: zBhttps://www.wikidata.org/w/api.php?action=wbsearchentities&search=z &language=en&limit=1&format=json
User-Agent
   r}   rf   search&  -> Wikidata: No search results for 'z'.r   idz<https://www.wikidata.org/w/api.php?action=wbgetentities&ids=z&format=json&props=claimsentitiesclaimsP625mainsnak	datavaluevaluelatitude	longitudez"  -> Wikidata: Found coordinates (, z) for z (z).z/  -> Wikidata: No coordinates (P625) found for z  -> Wikidata API error for '': )r   rG   rH   rl   utilsquoterX   r|   rn   r   rI   )r   r   
search_urlr}   search_responsesearch_dataqid
entity_urlentity_responseentity_datar   coordslatlonrJ   s                  r   get_coords_from_wikidatar      sJ    M//115yyytL???@@@ \V^VdVjVjkxVyVy  \  \  \JZ(G",z7BOOO%**,,x(( 	OV]VVVWWW4(#A&t,rTWrrr
",z7BOOO%**,,R0044S"==AA(BOOVF^A&z2;?HFj)6++>CLhchhShhP]hhadhhhiii:Ofmff_bfffggg4   KmKKKKLLLttttts&   AF- 4CF- F- -
G7GGfeaturesc                     d| dS )NFeatureCollection)typer   r/   )r   s    r   
to_geojsonr      s    'X>>>r$   out_pathc                 /  abc t           j                            dd          }dbt           j                            dd          c	 t          |          }|j        rt          j        d|            |bnt          j        d| d           n.# t          $ r!}t          j        d	|            Y d}~nd}~ww xY wbt           j                            d
d          	                                dv }|rt          j        d           t          c                                          r7t          c          bt          bdd          st          j        d           dbn-t          j        dc d           nt          j        d           t          j        d           d}d|i}	 t          j        ||d          }|                                 n4# t          j        $ r"}t          j        d|            Y d}~dS d}~ww xY wbcfdadabfd	}	t%          |j        d          }
 |
g d          D ]}|                                 t+          j        d          }g }|
                    dd            }|s_|
                    dd!           |
                    dd"           z   }|D ]-}|                    |                    dd#                       .|sJ|
                    dd$%          D ]2}|                    |d&                   r|                    |           3|D ]}d'|                    d(          pg v r n|                    dd)           p|                    dd*           }|sPd}|j        D ]v}t          |d+d          d,k    r|                    d-          }|                    d.d$/          }|r0t=          |          d0k     r|                    d$/          |k    r|}v|r#|r| d1| n|}|                    |g d2           t          |d+d          d3k    r|                    d4d56          D ]}|                    d.d$/          }|sg }|                    d7d$8          D ]T}|d9         }|                    d:          rd;|z   }n|                    d<          rd=|z   }|                    |           U|r| d1| n|}|                    ||d2           x| j         !                    d$d$>           | j         d?z  }	 |"                    d@dAB           n# t          $ r Y nw xY wg }|st          j        dC           tG          |          D ] \  }} |                     dDdE          }!|                     dFg           }"dE}#dE}$dE}%dx}&}'|!}(d})d}*br=dG|! }+dH},dIbj$        dJ         dK<    |	|+|,L          }-|-}*	 tK          dM| d1|* tL          j'        N           n# t          $ r Y nw xY w|-(                                }.dO|.v rm|.                    dO          dPz   }/|.                    dQ|/          }0|0dRk    r|.|/|0         (                                n|.|/d         (                                }.npdQ|.v rl|.                    dQ          dSz   }/|.                    dQ|/          }0|0dRk    r|.|/|0         (                                n|.|/d         (                                }.	 tS          j*        |.          }1tW          |1tX                    r|1                    dT          pdE}#|1                    dU          pdE}$|1                    dV          }2tW          |2tX                    r|2                    dWdE          }3|2                    dXdE          }4|3 dY|4 (                    dY          }%|2                    dZ|1                    dZ                    }&|2                    d[|1                    d[                    }'nntW          |2tZ                    r-|2}%|1                    dZ          }&|1                    d[          }'n,dE}%|1                    dZ          }&|1                    d[          }'|1                    d\          p|(}(|1                    dV          r|1                    dV          nd})n<# t          $ r/}t          j        d]| d1| d^|-dd_                     Y d}~nd}~ww xY wd}5d}6br	 d`|! }7 |	|7          }5	 tK          da| db|!dd0          dc|5 tL          j'        N           n# t          $ r Y nw xY w|5pdE(                                }8dO|8v rm|8                    dO          dPz   }/|8                    dQ|/          }0|0dRk    r|8|/|0         (                                n|8|/d         (                                }8npdQ|8v rl|8                    dQ          dSz   }/|8                    dQ|/          }0|0dRk    r|8|/|0         (                                n|8|/d         (                                }8d5}9	 tS          j*        |8          }:tW          |:tX                    rodZ|:v rkd[|:v rgt]          |:dZ                   };t]          |:d[                   }<|;|<dd}6d$}9	 tK          de| d1|; df|< tL          j'        N           n# t          $ r Y nw xY wn# t          $ r d5}9Y nw xY w|9sdgdl}=|=/                    dh|8          }>|>r	 t]          |>0                    di                    };t]          |>0                    dj                    }<|;|<dd}6d$}9	 tK          dk| d1|; df|< tL          j'        N           n# t          $ r Y nw xY wn# t          $ r d5}9Y nw xY wn# t          $ r$}t          j        dl| d1|            Y d}~nd}~ww xY wd}5d}6n|"r|"dg         }?|?                    dm          rV|?1                    dn          dR         2                    dod.          }@tg          |@|          }A|Ar|Adg         |Adi         }'}&|@}%|@}#nRt+          j/        dp|!          }>|>r;|>0                    di          }Btg          |B|          }A|Ar|Adg         |Adi         }'}&|B}%|B}#	 |&t]          |&          nd}&|'t]          |'          nd}'n# th          tj          f$ r dx}&}'Y nw xY w|%rtg          |%|          nd}Adx}C}Dd}Ed}F|Ar/|Adg         |Adi         }D}Ct          j        dq|% dr|C df|D            nvt          j        ds|% dt           br[|%rX	 du|% dv}G |	|G          }Et          j        dw|% drtm          |E          ddx                     	 tK          dy|% dr|E tL          j'        N           n# t          $ r Y nw xY w|E(                                }HdO|Hv rm|H                    dO          dPz   }/|H                    dQ|/          }0|0dRk    r|H|/|0         (                                n|H|/d         (                                }HnpdQ|Hv rl|H                    dQ          dSz   }/|H                    dQ|/          }0|0dRk    r|H|/|0         (                                n|H|/d         (                                }Hd5}Idz }J	 tS          j*        |H          }KtW          |KtX                    rdZ|Kv rd[|Kv r	 t]          |KdZ                   }Lt]          |Kd[                   }M|L|M}D}C|L|Mdd}Ft          j        d{|% dr|C df|D            	 tK          d||% dr|L df|M tL          j'        N           n# t          $ r Y nw xY wd$}In# t          $ r d5}IY nw xY wn# t          $ r d5}IY nw xY w|Ist          j        d}|% dr|Hddx                     	 tK          d~|% dr|H tL          j'        N           n# t          $ r Y nw xY wdgdl}N|Isd|Hv sd|Hv sd|Hv r|N7                    d|H          }Ot=          |O          djk    r	  |J|Odg          }L |J|Odi          }M|L`|M^|L|M}D}C|L|Mdd}Ft          j        d|% dr|L df|M            	 tK          d|% dr|L df|M tL          j'        N           n# t          $ r Y nw xY wd$}In# t          $ r d5}IY nw xY w|N/                    dh|H          }>|>r	 t]          |>0                    di                    }Lt]          |>0                    dj                    }M|L|M}D}C|L|Mdd}Ft          j        d|% dr|L df|M            	 tK          d|% dr|L df|M tL          j'        N           n# t          $ r Y nw xY wd$}In# t          $ r d5}IY nw xY w|Is|N/                    d|H|Nj8                  }P|Pr	 t]          |P0                    di                    }Lt]          |P0                    dj                    }M|L|M}D}C|L|Mdd}Ft          j        d|% dr|L df|M            	 tK          d|% dr|L df|M tL          j'        N           n# t          $ r Y nw xY wd$}In# t          $ r d5}IY nw xY w|Is|N                    d|H          }Q|Qr|Q0                    di          (                                 dY|Q0                    dj          (                                 }Rt          j        d|R d           	 tg          |R|          }S|Srj|Sdg         |Sdi         }D}C|C|Ddd}Ft          j        d|R d|C df|D            	 tK          d|% dr|C df|D tL          j'        N           n# t          $ r Y nw xY wd$}In# t          $ r d5}IY nw xY wn1# t          $ r$}t          j        d|% dr|            Y d}~nd}~ww xY wdd}Td5}U|&+|')|C$|D" |T|&|Cd          r |T|'|Dd          rd$}Und5}Und$}Un
|C|D|C|D}'}&d5}U|&c|'`|6rotW          |6tX                    rZ	 t]          |6                    dZ                    }Vt]          |6                    d[                    }W|V}&|W}'n# t          $ r Y nw xY w|#pdEdz   |%pdEz   dz   t[          |          z   }Xts          j:        |X;                    dA                    <                                }Yd}Zdt{                      v r|Urd}Zn|C|Dd}Zn|*rdnd}Zt}          |$          }$d|Yi d&|YdT|#p|!dd         dz   dU|$dV|%dZ|&d[|'d\|(d|"r|"dg         ndd|"d|)dtW          |*tZ                    r
|*ddx         nddtW          |EtZ                    r
|Eddx         ndd|FdtW          |5tZ                    r
|5ddx         ndd|6d|Zddd|)rdndid|'|&gdd}[|                    |[           	 | j         | j?        dz   z  }\|\"                    tS          j@        t          |          d5dj          dAB           t          j2        t[          |\          t[          |                      n.# t          $ r!}t          j        d|            Y d}~nd}~ww xY w	 | j         dz  }]dE}^|Fr.|F                    dZ           df|F                    d[           }^n |Er|E2                    dd.          dd         }^t          t          jC                               d|Y d|Z d|% d|& df|' d|^ d|# d}_|]D                    d7dAB          5 }`|`E                    |_           ddd           n# 1 swxY w Y   n*# t          $ r Y nw xY wt          j        d|# d|%            t          jF        d           "| "                    tS          j@        t          |          d5dj          dAB           t          j        dt=          |           d|             dS )z}Main entry: fetch the Wikipedia Portal:Current_events page and geolocate items.
    Writes a GeoJSON file to `out_path`.
    LLM_SERVER_URLzhttp://127.0.0.1:5005/askNLOCAL_LLM_MODEL_PATHz\/home/asher/.lmstudio/models/lmstudio-community/gemma-3-1b-it-GGUF/gemma-3-1b-it-Q4_K_M.ggufzUsing remote LLM server at zRemote LLM server at z not available.zRemote LLM client init failed: ALLOW_LOCAL_LLM0)1trueyesuK   ALLOW_LOCAL_LLM is set — attempting to load local GGUF model as fallback.rD   zDFailed to load local GGUF LLM. Continuing with fallback geolocation.Local model path not found at ; cannot load fallback.z\Local GGUF fallback is disabled (set ALLOW_LOCAL_LLM=1 or pass --allow-local-llm to enable).zAFetching full Wikipedia Current Events page for LLM extraction...z3https://en.wikipedia.org/wiki/Portal:Current_eventsr      r   z%Failed to fetch current events page: c                  l   t          dd           t          u rt          dd           rS t                                                    rNt                    } t          | dd           rt	          j        d           | S t	          j        d           d S t	          j        d d           d S )N	__class__rD   z Loaded local model for fallback.z=Local model class created but underlying Llama not available.r   r   )getattrr2   r   existsrG   rH   rn   )lmai_modelrC   s    r   _load_local_modelz;fetch_and_process_current_events.<locals>._load_local_model!  s    8[$//7::wxQVX\?]?]:O
""$$ 	$$Br5$'' ?@@@ _```tL]*]]]^^^4r$   c                    s%             }|sdS |                     | |          S 	                      | |          }t          |t                    rQ|                    d          r<t	          j        d|                         }|r||                     | |          S |S # t          $ rP}t	          j        d|                         }|r||                     | |          cY d }~S d| cY d }~S d }~ww xY w)NzError: No LLM available.rM   zError:z2LLM reported an error; attempting local fallback: zLLM ask failed with exception: zError: )r[   r   r]   r   rG   rn   r   rI   )rr   rM   r   rp   rJ   r   r   s        r   _ask_with_fallbackz<fetch_and_process_current_events.<locals>._ask_with_fallback2  sc    	?""$$B 21166&6>>>	!V=AAA!S!! Gall8&<&< G XUV X XYYY&&(( G!H66&6FFFH 	! 	! 	!M?A??@@@""$$B CvvfMvBBBBBBBB Q==      	!s0   A<B) 'B) )
D3;C>.D4C>8D>Dzhtml.parser)scriptstyleheaderfooternavasidez^\d{4}_[A-Z][a-z]+_\d{1,2}$divc                     | od| v S Nzcurrent-events-mainr/   cs    r   <lambda>z2fetch_and_process_current_events.<locals>.<lambda>Q  s    8X>SWX>X r$   )class_zp-current-events-eventszcurrent-eventsc                     | od| v S r   r/   r   s    r   r   z2fetch_and_process_current_events.<locals>.<lambda>U  s    !JjPeijPj r$   T)r   r   zcurrent-events-moreclasszcurrent-events-contentz"current-events-content descriptionnamepb )rY   r   rg   )r   linksulliF)	recursivea)hrefr  z//zhttps:rb   zhttps://en.wikipedia.org)parentsexist_okzcurrent_events.runningr   zutf-8)encodingz1No news items found for the current date section.r   r~   r   zYou are a news geolocator. Given the following news item, extract a JSON object with: 'title', 'summary', 'place' (city, country), 'lat', 'lng', and 'event_text'. Respond ONLY with a JSON object, no extra text. News Item: zYou are a helpful assistant that extracts a single news story as a JSON object with 'title', 'summary', 'place', 'lat', 'lng', and 'event_text'. Do not include any extra commentary or explanation.rc   rB   r?   r   zLLM story raw for item )filez```json   z```   titlesummaryplacecitycountryr   r   lng
event_textz'LLM did not return valid JSON for item z
Response: i,  a  You are a geocoder. Read the news item below and estimate the most likely coordinates (latitude and longitude) of the main location associated with this story. Respond ONLY with a JSON object containing numeric 'lat' and 'lng' fields, or the single word 'Unknown'.

News Item: zLLM-only geocode raw for item z ('z...'): )r   r  z!LLM-only geocode parsed for item r   r   z7([-+]?[0-9]{1,3}\.?[0-9]*)\D+([-+]?[0-9]{1,3}\.?[0-9]*)rd   r   z+LLM-only geocode parsed (numeric) for item z'Error during LLM-only geocode for item zhttps://en.wikipedia.org/wiki/z/wiki/_z, in ([A-Z][a-zA-Z\-]+(?: [A-Z][a-zA-Z\-]+)*)z   -> Using Wikidata coords for 'r   r   z('. Will ask LLM for coords if available.z9You are a geocoder. Given the place name or descriptor: 'z'. Respond ONLY with a JSON object containing numeric fields 'lat' and 'lng', or the single word 'Unknown'. If you are unsure, respond with 'Unknown'.z   -> LLM raw geocode reply for 'i  zLLM geocode raw for 'c                     	 t          |           t          |          dz  z   t          |          dz  z   }|r&|                                dv rt          |           S |S # t          $ r Y d S w xY w)Ng      N@g      @)SW)r   upperabsr   )dmr   hemivals        r   _dms_to_decimalz9fetch_and_process_current_events.<locals>._dms_to_decimal8  s    ("'((eAhho">%((VBS"TC# 1


(B(B(+Cy 0#&J( ( ( (#'44(s   AA! A! !
A/.A/z&  -> LLM geocode provided coords for 'zLLM geocode parsed for 'z#  -> LLM geocode parse failed for 'zLLM geocode parse failed for '   °u   ′"uQ   ([0-9]{1,3})°\s*([0-9]{1,2})['′]\s*([0-9]{1,2}(?:\.[0-9]+)?)\"?\s*([NnSsEeWw])z.  -> Extracted DMS coords from LLM reply for 'zLLM geocode parsed (DMS) for 'z2  -> Extracted numeric coords from LLM reply for 'z"LLM geocode parsed (numeric) for 'zRlat[^0-9-]*([-+]?[0-9]{1,3}\.?[0-9]*)[^0-9-]+lng[^0-9-]*([-+]?[0-9]{1,3}\.?[0-9]*)z3  -> Extracted lat/lng pattern from LLM reply for 'z"LLM geocode parsed (pattern) for 'z0^\s*([\w\-\.\'\s]{2,}),\s*([\w\-\.\'\s]{2,})\s*$z*  -> LLM geocode looks like place string 'z', trying Wikidata lookup...z  -> Resolved LLM place 'z' via Wikidata: z'LLM geocode resolved via Wikidata for 'z&  -> Error asking LLM for geocode of '      ?c                     	 t          t          |           t          |          z
            |k    S # t          $ r Y dS w xY w)NF)r  r   r   )r  r   tols      r   is_closez2fetch_and_process_current_events.<locals>.is_close  sM    588eAhh.//366   uus   /2 
A A g       @)r   |r   use_llmwikidatafallbackFeatured   z...urlevent_linksllm_sentencellm_rawllm_geocode_rawllm_geocode_parsedllm_only_geocode_rawllm_only_geocode_parseddecisionsourcezWikipedia Current Eventsgeolocation_sourcePoint)r   coordinates)r   r   
propertiesgeometryz.tmp)ensure_asciiindentz-Failed incremental write for current events: zcurrent_events_debug.logr      	zCould not geolocate story: z / g      ?zWrote z geolocated current events to r\   )r  )Gr   r   rX   r_   rj   rG   rH   rn   r   r   r   r   r2   r   rI   rl   r   r   r
   rQ   	decomposer   compilefind_allextendmatchr   findchildrenget_textr   r   parentmkdir
write_text	enumeraterE   printsysstderrrY   r|   loadsr   dictr]   r   r   groupsplitreplacer   	TypeError
ValueErrorreprfindallIhashlibmd5encode	hexdigestlocalsr   r   dumpsr   r   r   r   writer   )dr   r   llm_server_urlremote_clientrJ   allow_localr(  r}   rZ   r   souptag
date_id_re
news_itemsdate_blocks
containers	containerr   content_divcurrent_categoryelemr   ptextr   r   litextr   r  r  running_flagr   idxitemnews_item_text
news_linksr
  r  	place_strr   r  r  r*  r+  single_promptrM   llm_responsecleanedstartendstoryr  r  r  r.  r/  geocode_only_promptcleaned_geo2
parsed_ok2geo_obj2lat_geo2lng_geo2__re2r  first
wiki_titler   	candidatelat_wikilng_wikir,  r-  geocode_promptcleaned_geo	parsed_okr  geo_objlat_geolng_geo__redms_matchesm2m3place_guesscoords2r!  r#  	llm_lat_v	llm_lng_vfid_srcfidr0  featuretmpdbgllm_geo_strr   dfr   r   rC   sd                                                                                                    @@@r    fetch_and_process_current_eventsr     s!    Z^^$46QRRNH 6  9W  X  XJ?'77" 	ULG~GGHHH$HHOSNSSSTTT ? ? ?=!==>>>>>>>>? jnn%6<<BBDDH\\ 
	yLfgggJ&&(( c":..x55 $M"hiii#HajaaabbbbLwxxxLTUUU
?CZ(G<WbAAA!!####$   AaAABBB     "! ! ! ! ! ! !2 )=99DtKKKLL  :;;JJ--.X.X-YYK m]]51J]KKdmm\ajzmN{N{{
# 	m 	mIy11%@j@j1kkllll (==4=00 	( 	(CD	** (""3''' F F SWWW%5%5%;<<Ehhu-EhFF  G#((SX  bF(  KG  KG 	( 	F 	FDtVT**c11IIcNNc66 CUbajjtj.D.D.M.M',$$ C=MX.99%999SXD%%tb&A&ABBBvt,,44--->> F FB[[D[99F! ! E[[4[88 + + y??400 E#+d?DD!__S11 E#=#DDT****>NZ.::&:::TZD%%te&D&DEEEE/	F2 O$666?%==Lg6666    H MKLLLz** r r	T&"--XXgr**
	c#
 @	./,/ / F  BFHO/0>--m=YYYL"G@@@w@@szRRRRR   "((**GG##Y//!3ll5%008;r		'%),22444wuvvG\G\G^G^'!!U++a/ll5%008;r		'%),22444wuvvG\G\G^G^v
7++eT** V!IIg..4"E#ii	228bG!IIg..E!%.. /$yy44"'))Ir":":'+$8$8w$8$8$>$>t$D$D	#iiuyy/?/?@@#iiuyy/?/?@@#E3// /$)	#ii..#ii..$&	#ii..#ii..!&<!8!8!FJJ9>79K9K#U599W#5#5#5QUL v v v t# t tQR t t`lmqnqmq`r t tuuuuuuuuv $( &*# 7/3X7 '57 7 ( ,>+=>Q+R+R(ysyy~VYWYVYGZyycwyy  AD  AK  L  L  L  L  L$   $8$>B#E#E#G#GL L00 , 1 1) < <q @*//u==JMQS))|E#I'>'D'D'F'F'FYefkflflYmYsYsYuYu,.. , 1 1% 8 81 <*//u==JMQS))|E#I'>'D'D'F'F'FYefkflflYmYsYsYuYu!&J+#':l#;#;%h55 %%8:K:KPUYaPaPa',Xe_'='=H',Xe_'='=H>Fx6X6X3)-J% %&f#&f&fQY&f&f\d&f&fmpmw x x x x x#, % % % $%$ + + +%*


+% 3****!LL)ceqrr 3
3+0+<+<+0+<+<BJS[:\:\ 7-1
!)$)*tX[*t*t_g*t*tjr*t*t{~  |F  %G  %G  %G  %G  %G'0 !) !) !)$(D!)#, 3 3 3-2


3  X X XM"VC"V"VST"V"VWWWWWWWWX (,$*.'' ."1##$DEE .!&X!6!6r!:!B!B3!L!LJ5j*MMF +#)!9fQiS$.	 *	"QSabbA .$%GGAJJ	!9)Z!P!P! .'-ay&)C(1I$-E	 #%***TC #%***TCC:& 	 	 	C###	 ENW))Z@@@SW""8! D	^!'F1IhHL_I__(__U]__````OxYxxxyyy ^I ^~^ET] E E E #
 '9&8&H&HOL!pI!p!pRVWfRgRghmimhmRn!p!pqqqUiUUOUU\_\fggggg$   "1"7"7"9"9K K// + 0 0 ; ;a ?)..ue<<HKr		k%)&<&B&B&D&D&DWbchciciWjWpWpWrWr+-- + 0 0 7 7! ;)..ue<<HKr		k%)&<&B&B&D&D&DWbchciciWjWpWpWrWr %I( ( (*"&*["9"9%gt44 2'9I9IeW^N^N^2*/*?*?*/*?*?5<g(=DW5U5U 2 '-yV_-y-ydl-y-yow-y-y z z z!)$)*fY*f*fSZ*f*f]d*f*fmpmw$x$x$x$x$x'0 !) !) !)$(D!),0		#, 2 2 2,1			2$ * * *$)			* % K6(pi(p(p\ghmimhm\n(p(pqqq!!"^9"^"^Q\"^"^eheoppppp( ! ! ! D!))))( 6dk.A.AUkEYEY]`do]o]o*.,,  8L  NY  +Z  +ZK";//144!6.=o{1~.NG.=o{1~.NG'.':w?R=Dg(ELU\=]=](:(/  6Hfo  6H  6Ht{  6H  6H  F  6H  6H  )I  )I  )I)1,12tS\2t2tah2t2tkr2t2t{~  |F  -G  -G  -G  -G  -G/8 )1 )1 )1,0D)148	'0 !6 !6 !605III!6 !KK(bdopp 22*/

*;*;*/

*;*;5<g(=DW5U5U 2 '  .Dbk  .D  .Dpw  .D  .D  {B  .D  .D  !E  !E  !E!)$)*py*p*p]d*p*pgn*p*pwz  xB  %C  %C  %C  %C  %C'0 !) !) !)$(D!),0		#, 2 2 2,1			2  ) 6!%  .C  EP  RV  RX  "Y  "YB! 6!6.3BHHQKK.@.@G.3BHHQKK.@.@G9@'hHAHQX9Y9Y$6$+L  2Igp  2I  2Iu|  2I  2I  @G  2I  2I  %J  %J  %J%-(-.tS\.t.tah.t.tkr.t.t{~  |F  )G  )G  )G  )G  )G+4 %- %- %-(,%-04II'0 !6 !6 !605III!6  ) 6!%,_al!m!mB! 613!1B1B1D1D.].]QRHYHYH[H[.].] '  .DZe  .D  .D  .D  !E  !E  !E!6.F{T^._._G'. %9=DQZQR(EMV^=_=_(:(/  6DQ\  6D  6Dnv  6D  6D  zB  6D  6D  )E  )E  )E)1,12\e22jr22u}22  GJ  GQ  -R  -R  -R  -R  -R/8 )1 )1 )1,0D)148	'0 !6 !6 !605III!6  ^ ^ ^M"\9"\"\YZ"\"\]]]]]]]]^	 	 	 	 ?s#(<8Cs333 $hTW8X8X8X $"GG#GG!h&:CG?s& :6Mt+T+T  %&=&A&A%&H&H I II %&=&A&A%&H&H I II#C#CC    D {c)Y_"=Cc#hhNG+gnnW5566@@BBC HFHH$$$ %(*>%$+;55.w77G!#UD~dsd';e'C w Y	
 3 3 !* J@:a==D ": #L *Wc2J2J TPT &*_^aBbBb(l(>(>hl )*< +JWkmpLqLq-{-A%4%-H-Hw{ ./F  !" 8#$ )<*O%%Z% ( &-c3ZHH/ G2 OOG$$$Uo)?@tz*X*>*>U[\]]]hoppp
3s88S]]3333 U U U SPQ S STTTTTTTTUo(BB % K%7%;%;E%B%B"d"dEWE[E[\aEbEb"d"dKK$ K"1"9"9$"D"DTcT"JKdikk**sscssXsssscssTWss[fssjosssXXcGX44 #HHTNNN# # # # # # # # # # # # # # #    OO%OOIOOPPP
3
:h#7#7eTUVVVahiiiLQ#h--QQxQQRRRRRs>  AB 
B;B66B;*+G H%HH'T? ?
UU"!X
XX
Gc
d!%ddo,,eo
e&#o%e&&C<o#A"k<$k+*k<+
k85k<7k88k<;o<lo
l!o-An/9$nn/
n+(n/*n++n/.o/n>;o=n>>o
o0o++o0 &s''s?>s?.A AN/!wAN
wANwC=AN1~*A~$~~
~~~~~*~&#~*%~&&~*)AN*~96AN8~99(AN"!A@@AN@
A@@AN@A@@>ANAA AC	B$AB6B5AC	B6
ACC AC	CACCAC	CANC	ACCANCACCANC4A*AFE$AFFAFF
AFFAFFAFFAFFANFAF&F#ANF%AF&F&#ANG
A*AI-H5$AIIAI-I
AI'I$AI-I&AI'I'AI-I,ANI-AI<I9ANI;AI<I<BANLAANM	$AM.M-ANM.
AM;M8ANM:AM;M;ANN ANNANNANNANNANN
AONAN=N=AOP AAQ)Q)
AQ6Q5AQ6WA:AYY
AY;YAY6Y6AY;Y?B+A]\*A]] A]]A]	]A]]A]	]A]]
A]%]$A]%__main__z--outzdata/current_events.geojson)defaultz%(levelname)s: %(message)s)levelformat)*r.   r|   rG   r   r   pathlibr   r   r   typingr   r   r   r	   rl   bs4r
   r   rT  
USER_AGENTr]   r   r   	llama_cppr0   r   r2   r_   r   r   r   r  r+   argparseArgumentParserr   add_argument
parse_argsargsbasicConfigINFOr   r/   r$   r   <module>r     s      				        ' ' ' ' ' ' ' ' . . . . . . . . . . . .        				 ?
 (3-    	 	 	 	 	 	 	 	   EEE%/ %/ %/ %/ %/ %/ %/ %/PrT rT rT rT rT rT rT rTjC S XeTY[`T`NaEb    <?d ? ? ? ? ? HR CS CSt CS CS CS CS CSL zOOO!!ANN7$ANBBB<<>>DGgl3OPPPP$$TT$(^^44444 s   #A* *A43A4