Files
sam-kd/chatbot/md_rag/api.php
hskwon aca1767eb9 초기 커밋: 5130 레거시 시스템
- URL 하드코딩 → .env APP_URL 기반 동적 URL로 변경
- DB 연결 하드코딩 → .env 기반으로 변경
- MySQL strict mode DATE 오류 수정
2025-12-10 20:14:31 +09:00

187 lines
6.3 KiB
PHP

<?php
// chatbot/md_rag/api.php
require_once($_SERVER['DOCUMENT_ROOT'] . "/session.php");
// Error handling for API
error_reporting(0);
ini_set('display_errors', 0);
ini_set('memory_limit', '512M');
ob_start();
header('Content-Type: application/json; charset=utf-8');
if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
ob_clean();
echo json_encode(['error' => 'Invalid request method']);
exit;
}
$input = json_decode(file_get_contents('php://input'), true);
$userMessage = $input['message'] ?? '';
$history = $input['history'] ?? [];
if (empty($userMessage)) {
ob_clean();
echo json_encode(['error' => 'Empty message']);
exit;
}
$projectRoot = dirname(__DIR__, 2);
$googleApiKeyPath = $projectRoot . "/apikey/google_vertex_api.txt";
$googleApiKey = trim(file_get_contents($googleApiKeyPath));
// Reuse VectorSearch class but override file path?
// Or just instantiate it and inject path if possible?
// The current VectorSearch class has hardcoded path.
// We should modify VectorSearch to accept a path in constructor,
// OR simpler: we just write a mini-search logic here or a subclass.
// Let's use a modified Include logic or subclassing.
// Actually, modifying `rag/search.php` to accept an optional $customFile path is the BEST architecture.
// But to avoid touching working code during this task, I will clone the logic lightly here or simpler: define a new class in this file.
class MDSearch {
private $vectors;
public function __construct() {
$dataFile = __DIR__ . '/data/vectors.json';
if (file_exists($dataFile)) {
$this->vectors = json_decode(file_get_contents($dataFile), true);
} else {
$this->vectors = [];
}
}
private function cosineSimilarity($vecA, $vecB) {
$dotProduct = 0;
$normA = 0;
$normB = 0;
foreach ($vecA as $i => $val) {
$dotProduct += $val * $vecB[$i];
$normA += $val * $val;
$normB += $val * $val;
}
return ($normA * $normB) == 0 ? 0 : $dotProduct / (sqrt($normA) * sqrt($normB));
}
public function search($query, $apiKey, $topK = 3) {
if (empty($this->vectors)) return [];
// Embed Query
$url = "https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=" . $apiKey;
$data = ['model' => 'models/text-embedding-004', 'content' => ['parts' => [['text' => $query]]]];
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
$response = curl_exec($ch);
curl_close($ch);
$result = json_decode($response, true);
$queryVector = $result['embedding']['values'] ?? null;
if (!$queryVector) return [];
// Search
$scores = [];
foreach ($this->vectors as $doc) {
$score = $this->cosineSimilarity($queryVector, $doc['vector']);
$scores[] = [
'id' => $doc['id'],
'score' => $score,
'text' => $doc['text'],
'title' => $doc['title'],
'url' => $doc['url']
];
}
usort($scores, function($a, $b) { return $b['score'] <=> $a['score']; });
return array_slice($scores, 0, $topK);
}
}
try {
// 1. Search
$searcher = new MDSearch();
$results = $searcher->search($userMessage, $googleApiKey, 5);
$context = "";
if (empty($results)) {
$context = "관련된 도움말 문서를 찾을 수 없습니다.";
} else {
// Simple Dedupe
$processed = [];
foreach ($results as $doc) {
if ($doc['score'] < 0.5) continue; // Threshold
$context .= "문서: {$doc['title']}\n내용:\n{$doc['text']}\n---\n";
}
}
// 2. Generation
$url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=" . $googleApiKey;
$headers = ['Content-Type: application/json'];
// Debug Data Collection
$debugInfo = [];
$context = "";
if (empty($results)) {
$context = "관련된 도움말 문서를 찾을 수 없습니다.";
$debugInfo['message'] = "No results found from vector search.";
} else {
// Simple Dedupe
$processed = [];
$debugInfo['candidates'] = [];
foreach ($results as $doc) {
// Log all candidates to debug
$debugInfo['candidates'][] = [
'title' => $doc['title'],
'score' => round($doc['score'], 4),
'text_preview' => mb_substr($doc['text'], 0, 100) . "..."
];
// Threshold Check (Lowered to 0.4)
if ($doc['score'] < 0.4) continue;
$context .= "문서: {$doc['title']}\n내용:\n{$doc['text']}\n---\n";
}
if (empty($context)) {
$context = "검색된 문서들의 유사도가 너무 낮습니다. (Threshold < 0.4)";
}
}
// Construct Prompt
$systemInstruction = "You are a helpful assistant for 'Tenant Knowledge Base'.
Answer the user's question using ONLY the provided [Context].
If the answer is not in the context, say '죄송합니다. 제공된 도움말 문서에 해당 내용이 없습니다.'
[Context]
$context";
$data = [
'contents' => [['parts' => [['text' => $userMessage]]]],
'systemInstruction' => ['parts' => [['text' => $systemInstruction]]]
];
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$response = curl_exec($ch);
curl_close($ch);
$responseData = json_decode($response, true);
$reply = $responseData['candidates'][0]['content']['parts'][0]['text'] ?? "죄송합니다. 응답 생성 중 오류가 발생했습니다.";
ob_clean();
echo json_encode(['reply' => $reply, 'debug' => $debugInfo]);
} catch (Exception $e) {
ob_clean();
echo json_encode(['reply' => "System Error: " . $e->getMessage()]);
}
?>