187 lines
6.3 KiB
PHP
187 lines
6.3 KiB
PHP
|
|
<?php
|
||
|
|
// chatbot/md_rag/api.php
|
||
|
|
require_once($_SERVER['DOCUMENT_ROOT'] . "/session.php");
|
||
|
|
// Error handling for API
|
||
|
|
error_reporting(0);
|
||
|
|
ini_set('display_errors', 0);
|
||
|
|
ini_set('memory_limit', '512M');
|
||
|
|
ob_start();
|
||
|
|
|
||
|
|
header('Content-Type: application/json; charset=utf-8');
|
||
|
|
|
||
|
|
if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
|
||
|
|
ob_clean();
|
||
|
|
echo json_encode(['error' => 'Invalid request method']);
|
||
|
|
exit;
|
||
|
|
}
|
||
|
|
|
||
|
|
$input = json_decode(file_get_contents('php://input'), true);
|
||
|
|
$userMessage = $input['message'] ?? '';
|
||
|
|
$history = $input['history'] ?? [];
|
||
|
|
|
||
|
|
if (empty($userMessage)) {
|
||
|
|
ob_clean();
|
||
|
|
echo json_encode(['error' => 'Empty message']);
|
||
|
|
exit;
|
||
|
|
}
|
||
|
|
|
||
|
|
$projectRoot = dirname(__DIR__, 2);
|
||
|
|
$googleApiKeyPath = $projectRoot . "/apikey/google_vertex_api.txt";
|
||
|
|
$googleApiKey = trim(file_get_contents($googleApiKeyPath));
|
||
|
|
|
||
|
|
// Reuse VectorSearch class but override file path?
|
||
|
|
// Or just instantiate it and inject path if possible?
|
||
|
|
// The current VectorSearch class has hardcoded path.
|
||
|
|
// We should modify VectorSearch to accept a path in constructor,
|
||
|
|
// OR simpler: we just write a mini-search logic here or a subclass.
|
||
|
|
// Let's use a modified Include logic or subclassing.
|
||
|
|
// Actually, modifying `rag/search.php` to accept an optional $customFile path is the BEST architecture.
|
||
|
|
// But to avoid touching working code during this task, I will clone the logic lightly here or simpler: define a new class in this file.
|
||
|
|
|
||
|
|
class MDSearch {
|
||
|
|
private $vectors;
|
||
|
|
|
||
|
|
public function __construct() {
|
||
|
|
$dataFile = __DIR__ . '/data/vectors.json';
|
||
|
|
if (file_exists($dataFile)) {
|
||
|
|
$this->vectors = json_decode(file_get_contents($dataFile), true);
|
||
|
|
} else {
|
||
|
|
$this->vectors = [];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
private function cosineSimilarity($vecA, $vecB) {
|
||
|
|
$dotProduct = 0;
|
||
|
|
$normA = 0;
|
||
|
|
$normB = 0;
|
||
|
|
foreach ($vecA as $i => $val) {
|
||
|
|
$dotProduct += $val * $vecB[$i];
|
||
|
|
$normA += $val * $val;
|
||
|
|
$normB += $val * $val;
|
||
|
|
}
|
||
|
|
return ($normA * $normB) == 0 ? 0 : $dotProduct / (sqrt($normA) * sqrt($normB));
|
||
|
|
}
|
||
|
|
|
||
|
|
public function search($query, $apiKey, $topK = 3) {
|
||
|
|
if (empty($this->vectors)) return [];
|
||
|
|
|
||
|
|
// Embed Query
|
||
|
|
$url = "https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=" . $apiKey;
|
||
|
|
$data = ['model' => 'models/text-embedding-004', 'content' => ['parts' => [['text' => $query]]]];
|
||
|
|
|
||
|
|
$ch = curl_init($url);
|
||
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||
|
|
curl_setopt($ch, CURLOPT_POST, true);
|
||
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
|
||
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
|
||
|
|
$response = curl_exec($ch);
|
||
|
|
curl_close($ch);
|
||
|
|
|
||
|
|
$result = json_decode($response, true);
|
||
|
|
$queryVector = $result['embedding']['values'] ?? null;
|
||
|
|
|
||
|
|
if (!$queryVector) return [];
|
||
|
|
|
||
|
|
// Search
|
||
|
|
$scores = [];
|
||
|
|
foreach ($this->vectors as $doc) {
|
||
|
|
$score = $this->cosineSimilarity($queryVector, $doc['vector']);
|
||
|
|
$scores[] = [
|
||
|
|
'id' => $doc['id'],
|
||
|
|
'score' => $score,
|
||
|
|
'text' => $doc['text'],
|
||
|
|
'title' => $doc['title'],
|
||
|
|
'url' => $doc['url']
|
||
|
|
];
|
||
|
|
}
|
||
|
|
|
||
|
|
usort($scores, function($a, $b) { return $b['score'] <=> $a['score']; });
|
||
|
|
return array_slice($scores, 0, $topK);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
try {
|
||
|
|
// 1. Search
|
||
|
|
$searcher = new MDSearch();
|
||
|
|
$results = $searcher->search($userMessage, $googleApiKey, 5);
|
||
|
|
|
||
|
|
$context = "";
|
||
|
|
if (empty($results)) {
|
||
|
|
$context = "관련된 도움말 문서를 찾을 수 없습니다.";
|
||
|
|
} else {
|
||
|
|
// Simple Dedupe
|
||
|
|
$processed = [];
|
||
|
|
foreach ($results as $doc) {
|
||
|
|
if ($doc['score'] < 0.5) continue; // Threshold
|
||
|
|
$context .= "문서: {$doc['title']}\n내용:\n{$doc['text']}\n---\n";
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// 2. Generation
|
||
|
|
$url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=" . $googleApiKey;
|
||
|
|
$headers = ['Content-Type: application/json'];
|
||
|
|
|
||
|
|
// Debug Data Collection
|
||
|
|
$debugInfo = [];
|
||
|
|
$context = "";
|
||
|
|
|
||
|
|
if (empty($results)) {
|
||
|
|
$context = "관련된 도움말 문서를 찾을 수 없습니다.";
|
||
|
|
$debugInfo['message'] = "No results found from vector search.";
|
||
|
|
} else {
|
||
|
|
// Simple Dedupe
|
||
|
|
$processed = [];
|
||
|
|
$debugInfo['candidates'] = [];
|
||
|
|
|
||
|
|
foreach ($results as $doc) {
|
||
|
|
// Log all candidates to debug
|
||
|
|
$debugInfo['candidates'][] = [
|
||
|
|
'title' => $doc['title'],
|
||
|
|
'score' => round($doc['score'], 4),
|
||
|
|
'text_preview' => mb_substr($doc['text'], 0, 100) . "..."
|
||
|
|
];
|
||
|
|
|
||
|
|
// Threshold Check (Lowered to 0.4)
|
||
|
|
if ($doc['score'] < 0.4) continue;
|
||
|
|
|
||
|
|
$context .= "문서: {$doc['title']}\n내용:\n{$doc['text']}\n---\n";
|
||
|
|
}
|
||
|
|
|
||
|
|
if (empty($context)) {
|
||
|
|
$context = "검색된 문서들의 유사도가 너무 낮습니다. (Threshold < 0.4)";
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Construct Prompt
|
||
|
|
$systemInstruction = "You are a helpful assistant for 'Tenant Knowledge Base'.
|
||
|
|
Answer the user's question using ONLY the provided [Context].
|
||
|
|
If the answer is not in the context, say '죄송합니다. 제공된 도움말 문서에 해당 내용이 없습니다.'
|
||
|
|
|
||
|
|
[Context]
|
||
|
|
$context";
|
||
|
|
|
||
|
|
$data = [
|
||
|
|
'contents' => [['parts' => [['text' => $userMessage]]]],
|
||
|
|
'systemInstruction' => ['parts' => [['text' => $systemInstruction]]]
|
||
|
|
];
|
||
|
|
|
||
|
|
$ch = curl_init($url);
|
||
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||
|
|
curl_setopt($ch, CURLOPT_POST, true);
|
||
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
|
||
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
|
||
|
|
$response = curl_exec($ch);
|
||
|
|
curl_close($ch);
|
||
|
|
|
||
|
|
$responseData = json_decode($response, true);
|
||
|
|
$reply = $responseData['candidates'][0]['content']['parts'][0]['text'] ?? "죄송합니다. 응답 생성 중 오류가 발생했습니다.";
|
||
|
|
|
||
|
|
ob_clean();
|
||
|
|
echo json_encode(['reply' => $reply, 'debug' => $debugInfo]);
|
||
|
|
|
||
|
|
} catch (Exception $e) {
|
||
|
|
ob_clean();
|
||
|
|
echo json_encode(['reply' => "System Error: " . $e->getMessage()]);
|
||
|
|
}
|
||
|
|
?>
|