import os, json
from typing import Optional, List, Dict, Any, Union, AsyncGenerator, Literal
from pydantic import BaseModel, Field
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from openai import AsyncOpenAI
app = FastAPI()
client = AsyncOpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
# base_url=os.getenv("LLM_BASE_URL"), # 커스텀 엔드포인트 사용 시
)
class ChatCompletionRequest(BaseModel):
model: str
messages: List[Dict[str, Any]]
temperature: Optional[float] = Field(default=1.0, ge=0, le=2)
max_tokens: Optional[int] = None
stream: Optional[bool] = True
tools: Optional[List[Dict[str, Any]]] = None
tool_choice: Optional[Union[Literal["auto", "none", "required"], Dict[str, Any]]] = None
async def generate_streaming_response(stream) -> AsyncGenerator[str, None]:
async for chunk in stream:
yield f"data: {chunk.model_dump_json()}\n\n"
yield "data: [DONE]\n\n"
@app.post("/chat/completions")
async def chat_completions(request: ChatCompletionRequest):
response = await client.chat.completions.create(**request.model_dump(exclude_none=True))
if request.stream:
return StreamingResponse(generate_streaming_response(response), media_type="text/event-stream")
return response.model_dump()