
    ei]                     8	   d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlZddlZddlmZmZmZ ddlmZmZmZ ddlmZ ddlmZ 	 dd	lmZ n# e$ r dZY nw xY w ee                                          j        Z e d
z  Z!ej"        #                    dd          Z$ej"        #                    d          Z%ej"        #                    dd          Z& e'ej"        #                    dd                    Z( ej)        ej*        d            e            Z+e+,                    edgddgdg            G d de          Z- G d de          Z.de/de/fdZ0e+1                    d          d             Z2e+1                    d           d!             Z3e+#                    d"          d#             Z4e+#                    d$e%          d&             Z5e+6                    d'          d(ed)e-fd*            Z7e+6                    d+          d(efd,            Z8e9d-k    rtddl:Z:ddl;Z;ddl<Z<ddl=Z=ddl>Z>d. Z?dPd0Z@dQd2ZAd3 ZB e:jC        d45          ZDeDE                    d67          ZFeFG                    d8d9:           eFG                    d;d<:          ZHeHI                    d=d/d>?           eFG                    d@dA:           eFG                    dBdC:           eFG                    dDdE:           eFG                    dFdG:           eDJ                                ZKeKjL        d;k    r e@eKjM        H           dS eKjL        d@k    r eA             dS eKjL        dBk    r	 ddl;Z; e;jN        g dIdJK           e!e dLz  fD ]6ZO	 eOP                                reOQ                                 ,# e$ r Y 3w xY w eRdM           dS # e$ rZS eRdNeS           Y dZS[SdS dZS[Sww xY weKjL        dDk    r	 ddl;Z; e;jN        g dIdJK           n# e$ r Y nw xY we!e dLz  fD ]6ZO	 eOP                                reOQ                                 ,# e$ r Y 3w xY w e@ eTeKdO          reKjM        nd/H           dS eKjL        dFk    r eB             dS  e?             dS dS )RaC  
Simple LLM HTTP server using FastAPI.
- Loads a local GGUF model once on startup (configurable via MODEL_PATH env var)
- POST /ask accepts JSON { prompt, system_prompt?, generation_params? } and returns { response }
- POST /shutdown optionally protected by LLM_SHUTDOWN_TOKEN env var (if set)
- Writes .llm_server_pid in the project dir for compatibility with existing scripts

Run: python3 llm_server.py
Or run via: uvicorn llm_server:app --host 127.0.0.1 --port 5005

This server intentionally does NOT keep conversation state between requests: each /ask call is independent.
    N)Path)OptionalDictAny)FastAPIHTTPExceptionRequest)HTMLResponseStreamingResponseResponse)CORSMiddleware)	BaseModel)Llamaz.llm_server_pid
MODEL_PATHz\/home/asher/.lmstudio/models/lmstudio-community/gemma-3-1b-it-GGUF/gemma-3-1b-it-Q4_K_M.ggufLLM_SHUTDOWN_TOKENLLM_HOSTz0.0.0.0LLM_PORT5005z'%(asctime)s [%(levelname)s] %(message)s)levelformat*T)allow_originsallow_credentialsallow_methodsallow_headersc                   ^    e Zd ZU eed<   dZee         ed<   dZeeee	f                  ed<   dS )
AskRequestpromptNsystem_promptgeneration_params)
__name__
__module____qualname__str__annotations__r   r   r    r   r        &/home/asher/github/pulse/llm_server.pyr   r   9   sM         KKK#'M8C='''26xS#X/66666r'   r   c                       e Zd ZU eed<   dS )AskResponseresponseN)r!   r"   r#   r$   r%   r&   r'   r(   r*   r*   >   s         MMMMMr'   r*   datareturnc                     | dS |                      d          }g }|D ]}|                    d|            d                    |          dz   S )z
    Format a string as one or more SSE 'data:' lines, preserving blank lines.
    Ensures output ends with the required double-newline separator.
    Nzdata: 


data: 

)splitappendjoin)r,   parts	out_linesps       r(   
format_sser8   A   sg    
 ||JJtEI ' '!&&&&99Y&((r'   startupc            	         t           t          j        d           d S t          j                            dt                    } t          |                                           st          j        d|             d S | t          t          j                            dd                    t          t          j                            dd                    t          t          j                            dd	                    d
d}t          j
        d|  d           	 t          di |t          j        _        t          j
        d           n=# t          $ r0}t          j        d|           d t          j        _        Y d }~nd }~ww xY w	 t!          j                    t          j        _        n!# t          $ r d t          j        _        Y nw xY w	 t          j                    }t(                              t-          |                     t          j
        d| dt(                      d S # t          $ r t          j        d           Y d S w xY w)Nz1llama_cpp not available; cannot start LLM server.r   zModel path not found: 	LLM_N_CTX2048LLM_N_THREADS4LLM_N_GPU_LAYERS0F)
model_pathn_ctx	n_threadsn_gpu_layersverbosezLoading model from z ...zModel loaded successfully.zFailed to load model: %sz
Wrote PID z to zFailed to write PID filer&   )r   loggingerrorosenvirongetDEFAULT_MODELr   existsintinfoappstatellm	Exception	exceptionasyncioLockllm_lockgetpidPID_FILE
write_textr$   )rA   llama_paramsepids       r(   startup_eventr]   P   s0   }IJJJm<<J
""$$ ;z;;<<< !RZ^^K8899==>>BJNN+=sCCDD L L7z777888----	12222   4a888	"$\^^	 " " "!	"
6ikkCHH%%%5#5585566666 6 6 645555556s=   /E 
F	&FF	"F0 0GGAH- -IIshutdownc                      	 t                                           r/t                                            t          j        d           d S d S # t
          $ r t          j        d           Y d S w xY w)NzRemoved PID file.z#Error removing PID file on shutdown)rX   rL   unlinkrF   rN   rR   rS   r&   r'   r(   shutdown_eventra   |   s    A?? 	.OOL,-----	. 	.  A A A?@@@@@@As   AA A.-A.z/healthc                  X    dt          t          t          j        dd                     dS )NokrQ   )statusmodel_loaded)boolgetattrrO   rP   r&   r'   r(   healthrh      s&    DE41P1P,Q,QRRRr'   /)response_classc                  &    d} t          |           S )Nu   
    <!doctype html>
    <html>
    <head><meta charset="utf-8"><title>LLM Server</title></head>
    <body style="font-family: Arial, Helvetica, sans-serif; margin:20px;">
        <h2>LLM Server</h2>
        <form id="frm">
            <label>System prompt (optional)</label><br>
            <input id="system" style="width:100%" placeholder="System prompt"><br><br>
            <label>Prompt</label><br>
            <textarea id="prompt" rows="6" style="width:100%" placeholder="Enter your prompt"></textarea><br>
            <button type="submit">Ask</button>
        </form>
        <h3>Response</h3>
        <pre id="resp" style="white-space:pre-wrap; background:#f6f6f6; padding:12px; border-radius:6px; max-width:800px;"></pre>

        <hr>
        <h3>News Q&amp;A</h3>
        <p style="margin-top:0;"><small>Ask a concise question about recent news items — response streams below.</small></p>
        <form id="news-qa">
            <textarea id="qa_prompt" rows="3" style="width:100%" placeholder="Ask about the latest news in this area..."></textarea><br>
            <button type="submit">Ask News</button>
        </form>
        <h4>Answer</h4>
        <pre id="qa_resp" style="white-space:pre-wrap; background:#fff9e6; padding:12px; border-radius:6px; max-width:800px;"></pre>

        <script>
            // Append a stream chunk but ensure words don't run together across chunk boundaries.
            function appendChunk(el, chunk) {
              if (!chunk) return;
              try {
                const existing = el.textContent || '';
                // If existing ends with whitespace or chunk starts with whitespace, just concatenate.
                if (existing && !(/\s$/.test(existing)) && !(/^\s/.test(chunk))) {
                  // Only insert a space when the last char of existing and first char of chunk are both alphanumeric.
                  const last = existing.slice(-1);
                  const first = chunk.charAt(0);
                  const isAlnum = ch => /[A-Za-z0-9]/.test(ch);
                  if (isAlnum(last) && isAlnum(first)) {
                    el.textContent += ' ';
                  }
                }
              } catch (e) { /* ignore */ }
              el.textContent += chunk;
            }
            document.getElementById('frm').addEventListener('submit', async function(e){
                 e.preventDefault();
                 const prompt = document.getElementById('prompt').value;
                 const system = document.getElementById('system').value || undefined;
                 const payload = { prompt: prompt };
                 if (system) payload.system_prompt = system;
 
                 const respEl = document.getElementById('resp');
                 respEl.textContent = '';
 
                 try {
                     const r = await fetch('/ask?stream=1', {
                         method: 'POST',
                         headers: {
                             'Content-Type': 'application/json',
                             'Accept': 'text/event-stream'
                         },
                         body: JSON.stringify(payload)
                     });
 
                     const ct = (r.headers.get('content-type') || '').toLowerCase();
                     if (!r.ok) {
                         const txt = await r.text();
                         respEl.textContent = 'Error: ' + r.status + '\n' + txt;
                         return;
                     }
 
                     // If server returned JSON (no streaming), show it and return
                     if (ct.includes('application/json')) {
                         const j = await r.json();
                         respEl.textContent = JSON.stringify(j, null, 2);
                         return;
                     }
 
                     // Otherwise consume the body as a stream (SSE style framing expected)
                     const reader = r.body.getReader();
                     const dec = new TextDecoder();
                     let buf = '';
 
                     while (true) {
                         const { done, value } = await reader.read();
                         if (done) break;
                         buf += dec.decode(value, { stream: true });
                         // SSE events are separated by double-newline
                         const parts = buf.split('\n\n');
                         buf = parts.pop();
                         for (const part of parts) {
                             // each part may contain lines like "data: ..." or "event: done"
                             const lines = part.split('\n');
                             let dataLines = lines.filter(l => l.startsWith('data:'));
                             if (dataLines.length) {
                                 const data = dataLines.map(l => l.slice(6)).join('\n');
                                 if (data === '[DONE]') {
                                     // done marker; optionally stop
                                 } else {
                                    appendChunk(respEl, data);
                                 }
                             } else {
                                 // fallback: append raw chunk
                                appendChunk(respEl, part);
                             }
                             // keep UI scrolled
                             respEl.scrollTop = respEl.scrollHeight;
                         }
                     }
 
                     // flush any remaining buffer
                    if (buf && buf.trim()) {
                        appendChunk(respEl, buf);
                    }
                 } catch (err) {
                     respEl.textContent = 'Request failed: ' + String(err);
                 }
             });
 
            // News Q&A handler: sends the prompt with a concise news-analyst system prompt and streams into qa_resp
            document.getElementById('news-qa').addEventListener('submit', async function(e){
                 e.preventDefault();
                 const prompt = document.getElementById('qa_prompt').value;
                 if (!prompt || !prompt.trim()) return;
                 const payload = { prompt: prompt, system_prompt: 'You are a concise news analyst. Answer briefly and focus on recent relevant events.' };
 
                 const respEl = document.getElementById('qa_resp');
                 respEl.textContent = '';
                 try {
                     const r = await fetch('/ask?stream=1', {
                         method: 'POST',
                         headers: { 'Content-Type': 'application/json', 'Accept': 'text/event-stream' },
                         body: JSON.stringify(payload)
                     });
                     if (!r.ok) {
                         const txt = await r.text();
                         respEl.textContent = 'Error: ' + r.status + '\n' + txt;
                         return;
                     }
                     const reader = r.body.getReader();
                     const dec = new TextDecoder();
                     let buf = '';
                     while (true) {
                         const { done, value } = await reader.read();
                         if (done) break;
                         buf += dec.decode(value, { stream: true });
                         const parts = buf.split('\n\n');
                         buf = parts.pop();
                         for (const part of parts) {
                             const lines = part.split('\n');
                             let dataLines = lines.filter(l => l.startsWith('data:'));
                             if (dataLines.length) {
                                 const data = dataLines.map(l => l.slice(6)).join('\n');
                                 if (data === '[DONE]') continue;
                                 appendChunk(respEl, data);
                             } else {
                                 appendChunk(respEl, part);
                             }
                             respEl.scrollTop = respEl.scrollHeight;
                         }
                     }
                     if (buf && buf.trim()) appendChunk(respEl, buf);
                 } catch (err) {
                     respEl.textContent = 'Request failed: ' + String(err);
                 }
            });
          </script>
     </body>
     </html>
     )content)r
   )htmls    r(   indexrn      s    j	DV %%%%r'   z/askrequestreqc           
        K   t          t          j        dd           st          dd          | j                            d          p| j        r| j        j        nd                    d          d         	                                }t          j        d	|           |j        pd
}d|dd|j        dgt          t          j                            dd                    t#          t          j                            dd                    t          t          j                            dd                    t          t          j                            dd                    t#          t          j                            dd                    dgd|j        r                    |j                   | j                            dd          pd}d|v p| j                            d          dv }|r fd}t+           |            d           S 	 t          t          j        d!d           }| j        d(d"i}n;|4 d {V   j        d(d"i}d d d           d {V  n# 1 d {V swxY w Y   |d#         d         d$                             d%          }	|	r|		                                nd}
d&|
iS # t.          $ r8}t          j        d'|           t          dt3          |                    d }~ww xY w))NrQ   i  zLLM not loadedstatus_codedetailzX-Forwarded-Forunknown,r   zReceived /ask from %sz2You are a helpful assistant. Keep answers concise.system)rolerl   userLLM_TEMPERATUREz0.2	LLM_TOP_K40	LLM_TOP_Pz0.95LLM_REPEAT_PENALTYz1.1LLM_MAX_TOKENS512z
<|eot_id|>)temperaturetop_ktop_prepeat_penalty
max_tokensstopaccept ztext/event-streamstream)1truec                   K   t          t          j        dd           } | t          j                    } | 4 d {V  	  j        ddd
D ]}	 |                    d          pi gd         }|                    d          p|                    d          pi }d}t          |t                    r|                    d	          pd}nt          |          }|rYt          j        d
d|t          j                  }|                                r#d| dW V  t          j        d           d {V  # t          $ r Y w xY wdW V  n# t          $ r 	  j        ddi
}|r+d|v r'|d         d         d                             d	          nd}|pd                                }d}t!          dt#          |          |          D ]0}d||||z             dW V  t          j        d           d {V  1dW V  nB# t          $ r5}	t%          j        d|	           dt          |	           dW V  Y d }	~	nd }	~	ww xY wY nw xY wd d d           d {V  d S # 1 d {V swxY w Y   d S )NrV   T)messagesr   choicesr   deltamessager   rl   z^\s*assistant[:\s]*)flagsr0   r1   zevent: done
data: [DONE]

r      z*LLM generation error (stream fallback): %szevent: error
data: r&   )rg   rO   rP   rT   rU   create_chat_completionrJ   
isinstancedictr$   resubIstripsleeprR   rangelenrF   rS   )lockchunkchoicer   	text_partr+   rl   
chunk_sizeir[   genrQ   r   s             r(   event_streamzask.<locals>.event_stream[  s     39j$77D||~~ (B (B (B (B (B (B (B (B'B!;!;!bXVZ!b!b^a!b!b % %%&+ii	&:&:&BrdA%FF %+JJw$7$7$V6::i;P;P$VTVE(*I)%66 7,1IIi,@,@,FB		,/JJ	( ;,.F3I2y`b`d,e,e,e	#,??#4#4 !;*B9*B*B*B$B$B$B$B*1-*:*:$:$:$:$:$:$:$:( % % %$H% :99999  B B BB#=3#=#W#Wx#WSV#W#WV^  #Aclpxcxcx(9"5a"8"C"G"G	"R"R"R  A#*=b"7"7"9"9%(
!&q#g,,
!C!C 3 3A"H71Qz\>+B"H"H"HHHHH")-"2"222222222======$ B B B)*VXYZZZASVVAAAAAAAAAAAABB5(B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (B (Bs   IEC#D54E5
E?EEE
I
IB*H I
I +H;6I;I  IIII
I&)I&)
media_typerV   r   r   r   rl   r+   zLLM generation error: %sr&   )rg   rO   rP   r   headersrJ   clienthostr2   r   rF   rN   r   r   floatrH   rI   rM   r    updatequery_paramsr   r   rR   rS   r$   )ro   rp   	client_ipr   r   stream_requestedr   r   r+   rl   textr[   r   rQ   r   s               @@@r(   askr   :  s     
#)UD
)
)C F4DEEEE $$%677qSZSa<pGN<O<Ogpxxy|}}~  A  G  G  I  IIL()444 %])]Mm44CJ//H RZ^^,=uEEFFRZ^^K6677rz~~k6::;;
/CU K KLL"*..)95AABB C  *

3()))_  2..4"F*f4k8L8P8PQY8Z8Z^k8k /Q,	B ,	B ,	B ,	B ,	B ,	B ,	B\ !<OPPPP<sy*d33<1s1KK8KsKKHH P P P P P P P P535OOxO3OOP P P P P P P P P P P P P P P P P P P P P P P P P P P9%a(377	BB")1w}}rD!! < < <4a888CFF;;;;<s=   5K I4"K 4
I>>K I>AK 
L
3LL
z	/shutdownc                    t           rP| j                            d          p| j                            d          }|t           k    rt	          dd          d }t          j        d|                                           dd	iS )
NtokenzX-Shutdown-Tokeni  zInvalid shutdown tokenrr   c                      t          j        d           	 t                                          rt                                           n# t
          $ r Y nw xY wt          j        d           d S )Nz&Shutting down LLM server as requested.r   )rF   rN   rX   rL   r`   rR   rH   _exitr&   r'   r(   r   zshutdown.<locals>._exit  sl    =>>>	   "!!! 	 	 	D	
s   2A	 	
AAg?rd   shutting_down)SHUTDOWN_TOKENr   rJ   r   r   	threadingTimerstart)ro   r   r   s      r(   r^   r^     s      R$((11\W_5H5HI[5\5\N""C8PQQQQ   OC%%'''o&&r'   __main__c                  T    dd l } |                     dt          t          d           d S )Nr   zllm_server:apprN   )r   port	log_level)uvicornrunHOSTPORT)r   s    r(   start_foregroundr     s,    $4dfMMMMMr'   /tmp/llm_server.logc           	      &   t                                           rr	 t          t                                                                                     }t          j        |d           t          d|            d S # t          $ r Y nw xY wt          j
        t          t                    dg}t          | d          5 }t          j        |||t          t                               }d d d            n# 1 swxY w Y   t#          j        d           |                                Zt          d|j         d|             	 t                               t          |j                             d S # t          $ r Y d S w xY wt          d	|            d S )
Nr   z Server already running with PID r   ab)stdoutstderrcwdg      ?zStarted background server (PID z), logging to z"Failed to start server. Check log:)rX   rL   rM   	read_textr   rH   killprintrR   sys
executabler$   __file__open
subprocessPopenAPP_DIRtimer   pollr\   rY   )logfileexistingcmdoutr7   s        r(   start_backgroundr     s   ?? 	x113399;;<<!$$$CCCDDD   ~s8}}g6'4   	PC S#g,,OOOA	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	
36688RAERRRRSSS##CJJ/////    6@@@@@s6   AA< <
B	B	=+C44C8;C8,E0 0
E>=E>   c                 r   t                                           st          d           d S 	 t          t                                                                                     }n;# t          $ r. t          d           t                               d           Y d S w xY w	 t          j	        |t          j                   t          | dz            D ]=}t          j        d           	 t          j	        |d           -# t          $ r Y  n#w xY wt          j	        |t          j                   t          d| d	           n9# t           $ r t          d
           Y nt"          $ r t          d           Y nw xY wt                               d           d S # t                               d           w xY w)Nz-No PID file found; server may not be running.zInvalid PID file. Removing it.T)
missing_ok
   g?r   zStopped server (PID z).z+Process not found; removing stale PID file.z.Permission denied when trying to stop process.)rX   rL   r   rM   r   r   rR   r`   rH   r   signalSIGTERMr   r   r   SIGKILLProcessLookupErrorPermissionError)timeoutr\   _s      r(   stop_serverr     s      	ABBBF	h((**002233CC 	 	 	2333OOtO,,,FF		-GC(((7R<(( - -
3GCOOOO    EE V^,,,00001111! 	A 	A 	A?@@@@@ 	D 	D 	DBCCCCC	D OOtO,,,,,HOOtO,,,,sl   8A% %4BB!AE )C?>E ?
D	E D5E F E9F E96F 8E99F F6c                  \   t                                           st          d           d S 	 t          t                                                                                     } t          j        | d           t          d|  d           d S # t          $ r t          d           Y d S w xY w)Nz&No PID file found; server not running.r   z&Server appears to be running with PID .z?PID file exists but process not running. PID file may be stale.)	rX   rL   r   rM   r   r   rH   r   rR   )r\   s    r(   status_serverr     s       	:;;;F	Uh((**002233CGCOOOA3AAABBBBB 	U 	U 	USTTTTTT	Us   A B B+*B+zLLM server control)descriptionr   )destr   zStart server in foreground)helpzstart-bgz%Start server in background (detached)z--logzLog file for background server)defaultr   r   zStop server using PID filezstop-allz(Stop all running llm_server.py processeszrestart-allz5Stop all instances then start one background instancerd   zShow server status)r   )pkillz-fzllm_server.pyF)checkz.llm_pidz/Requested stop for all llm_server.py processes.zFailed to stop all processes:log)r   )r   )U__doc__rm   rH   jsonrF   r   pathlibr   typingr   r   r   rT   r   fastapir   r   r	   fastapi.responsesr
   r   r   fastapi.middleware.corsr   pydanticr   	llama_cppr   rR   r   resolveparentr   rX   rI   rJ   rK   r   r   rM   r   basicConfigINFOrO   add_middlewarer   r*   r$   r8   on_eventr]   ra   rh   rn   postr   r^   r!   argparser   r   r   r   r   r   r   r   ArgumentParserparseradd_subparsersr   
add_parsersbgadd_argument
parse_argsargsr   r   r   r7   rL   r`   r   r[   hasattrr&   r'   r(   <module>r	     s     				             & & & & & & & & & &  				 3 3 3 3 3 3 3 3 3 3 G G G G G G G G G G 2 2 2 2 2 2         EEE $x..
 
 
"
"
)&&
|  .L  M  M 455 
z~~j),,
s2:>>*f--..  ',/X Y Y Y Ygii   %%%    7 7 7 7 7 7 7 7
    )   )S )S ) ) ) ) i)6 )6 )6V jA A A S S S \**m& m& +*m&^ &^<w ^<Z ^< ^< ^< ^<@ +'g ' ' ' '( zOOOMMMJJJKKKN N NA A A A0- - - -<	U 	U 	U %X$1EFFFF


U

+
+CNN7!=N>>>
..*Q.
R
RCW&;BbcccNN6 <N===NN:$NNOOONN='^N___NN8"6N777Dx:******	V			Z			6JN;;;5IIII* 45  xxzz #


    DECDDDDD 	6 	6 	6E11555555555	6	]	"	"	JN;;;5IIIII 	 	 	D	 Gj01 	 	A88:: HHJJJ    	WWT5-A-A!\G\]]]]]]	X		 	} sr   A A! A!&!O	 (N10O	 1N96O	 8N99O	 	O%O  O%4P PP"(QQQ