초기 커밋: 5130 레거시 시스템
- URL 하드코딩 → .env APP_URL 기반 동적 URL로 변경 - DB 연결 하드코딩 → .env 기반으로 변경 - MySQL strict mode DATE 오류 수정
This commit is contained in:
161
chatbot/rag_api.php
Normal file
161
chatbot/rag_api.php
Normal file
@@ -0,0 +1,161 @@
|
||||
<?php
|
||||
// chatbot/rag_api.php
|
||||
require_once($_SERVER['DOCUMENT_ROOT'] . "/session.php");
|
||||
// JSON 응답을 위해 에러 출력 방지
|
||||
error_reporting(0);
|
||||
ini_set('display_errors', 0);
|
||||
// Increase memory limit for API context
|
||||
ini_set('memory_limit', '512M');
|
||||
ob_start();
|
||||
|
||||
header('Content-Type: application/json; charset=utf-8');
|
||||
|
||||
if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
|
||||
ob_clean();
|
||||
echo json_encode(['error' => 'Invalid request method']);
|
||||
exit;
|
||||
}
|
||||
|
||||
$input = json_decode(file_get_contents('php://input'), true);
|
||||
$userMessage = $input['message'] ?? '';
|
||||
$history = $input['history'] ?? [];
|
||||
|
||||
if (empty($userMessage)) {
|
||||
ob_clean();
|
||||
echo json_encode(['error' => 'Empty message']);
|
||||
exit;
|
||||
}
|
||||
|
||||
$googleApiKeyPath = $_SERVER['DOCUMENT_ROOT'] . "/apikey/google_vertex_api.txt";
|
||||
if (!file_exists($googleApiKeyPath)) {
|
||||
ob_clean();
|
||||
echo json_encode(['reply' => "API Key not found."]);
|
||||
exit;
|
||||
}
|
||||
$googleApiKey = trim(file_get_contents($googleApiKeyPath));
|
||||
|
||||
require_once __DIR__ . '/rag/search.php';
|
||||
|
||||
try {
|
||||
// 1. Vector Search (Semantic Search)
|
||||
// 기존 NotionClient->search() 대신 VectorSearch 사용
|
||||
$vectorSearch = new VectorSearch();
|
||||
$results = $vectorSearch->search($userMessage, $googleApiKey, 5); // 상위 5개
|
||||
|
||||
$context = "";
|
||||
if (empty($results)) {
|
||||
$context = "관련된 내부 문서를 찾을 수 없습니다. (벡터 데이터가 없거나 매칭 실패)";
|
||||
} else {
|
||||
// Deduplicate documents (Group text by URL)
|
||||
$processedDocs = [];
|
||||
foreach ($results as $doc) {
|
||||
$url = $doc['url'];
|
||||
if (!isset($processedDocs[$url])) {
|
||||
$processedDocs[$url] = [
|
||||
'title' => $doc['title'],
|
||||
'score' => $doc['score'],
|
||||
'text' => $doc['text']
|
||||
];
|
||||
} else {
|
||||
// Determine if this text chunk is already in the compiled text to avoid exact duplication
|
||||
// (Simple check, can be improved)
|
||||
if (strpos($processedDocs[$url]['text'], $doc['text']) === false) {
|
||||
$processedDocs[$url]['text'] .= "\n[...추가 내용...]\n" . $doc['text'];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($processedDocs as $url => $doc) {
|
||||
$score = round($doc['score'] * 100, 1);
|
||||
$context .= "문서 제목: [{$doc['title']}] (유사도: {$score}%)\nURL: {$url}\n내용:\n{$doc['text']}\n---\n";
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Gemini Answer Generation
|
||||
$url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=" . $googleApiKey;
|
||||
$headers = ['Content-Type: application/json'];
|
||||
|
||||
$historyText = "";
|
||||
if (!empty($history)) {
|
||||
foreach ($history as $msg) {
|
||||
$role = $msg['role'] === 'user' ? "User" : "Assistant";
|
||||
$text = is_array($msg['parts']) ? $msg['parts'][0]['text'] : $msg['parts'];
|
||||
$historyText .= "$role: $text\n";
|
||||
}
|
||||
}
|
||||
|
||||
$systemInstruction = "You are a helpful customer support agent for 'codebridge-x.com'.
|
||||
Use the provided [Context] (retrieved via Semantic Vector Search) to answer the user's question.
|
||||
If you cannot find the answer in the context, say so.
|
||||
|
||||
[Conversation History]
|
||||
$historyText
|
||||
|
||||
IMPORTANT: List the used documents at the bottom.
|
||||
|
||||
Format:
|
||||
[Answer]
|
||||
|
||||
관련 문서 (Vector Search Result):
|
||||
- [Title](URL)
|
||||
|
||||
[Context]
|
||||
$context";
|
||||
$data = [
|
||||
'contents' => [['parts' => [['text' => $userMessage]]]],
|
||||
'systemInstruction' => ['parts' => [['text' => $systemInstruction]]]
|
||||
];
|
||||
|
||||
$ch = curl_init($url);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_POST, true);
|
||||
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
|
||||
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
|
||||
$response = curl_exec($ch);
|
||||
|
||||
if (curl_errno($ch)) throw new Exception(curl_error($ch));
|
||||
curl_close($ch);
|
||||
|
||||
$responseData = json_decode($response, true);
|
||||
|
||||
// Check for standard text response
|
||||
$reply = $responseData['candidates'][0]['content']['parts'][0]['text'] ?? null;
|
||||
|
||||
// If empty, check for finishReason
|
||||
if (!$reply) {
|
||||
$finishReason = $responseData['candidates'][0]['finishReason'] ?? 'UNKNOWN';
|
||||
if ($finishReason === 'SAFETY') {
|
||||
$reply = "죄송합니다. 해당 질문에 대한 답변은 안전 정책상 제공해드릴 수 없습니다.";
|
||||
} else {
|
||||
$reply = "답변 생성 실패 (Finish Reason: $finishReason). Raw Response를 확인해주세요.";
|
||||
}
|
||||
}
|
||||
|
||||
// Debug File Status
|
||||
$vectorFile = __DIR__ . '/rag/data/vectors.json';
|
||||
$fileStatus = [
|
||||
'path' => $vectorFile,
|
||||
'exists' => file_exists($vectorFile),
|
||||
'size' => file_exists($vectorFile) ? filesize($vectorFile) : -1,
|
||||
'memory_limit' => ini_get('memory_limit')
|
||||
];
|
||||
|
||||
ob_clean();
|
||||
echo json_encode([
|
||||
'reply' => $reply,
|
||||
'debug' => [
|
||||
'refinedQuery' => $userMessage,
|
||||
'vectorCount' => $vectorSearch->getVectorCount(),
|
||||
'loadError' => $vectorSearch->getLastError(),
|
||||
'fileStatus' => $fileStatus,
|
||||
'context' => $context,
|
||||
'systemInstruction' => $systemInstruction,
|
||||
'rawResponse' => $responseData
|
||||
]
|
||||
]);
|
||||
|
||||
} catch (Exception $e) {
|
||||
ob_clean();
|
||||
echo json_encode(['reply' => "Error: " . $e->getMessage()]);
|
||||
}
|
||||
?>
|
||||
Reference in New Issue
Block a user