- URL 하드코딩 → .env APP_URL 기반 동적 URL로 변경 - DB 연결 하드코딩 → .env 기반으로 변경 - MySQL strict mode DATE 오류 수정
119 lines
3.9 KiB
PHP
119 lines
3.9 KiB
PHP
<?php
|
|
// chatbot/rag/search.php
|
|
class VectorSearch {
|
|
private $vectors;
|
|
private $dataFile;
|
|
private $lastError = null;
|
|
|
|
public function __construct() {
|
|
$this->dataFile = __DIR__ . '/data/vectors.json';
|
|
|
|
// GCS Sync (If local file missing)
|
|
if (!file_exists($this->dataFile)) {
|
|
require_once 'gcs_helper.php';
|
|
try {
|
|
$gcs = new GCSHelper();
|
|
if ($gcs->getBucketName()) {
|
|
$gcs->download('chatbot/vectors.json', $this->dataFile);
|
|
}
|
|
} catch (Exception $e) {
|
|
// Ignore download error, start with empty
|
|
$this->lastError = "GCS Download Failed: " . $e->getMessage();
|
|
error_log($this->lastError);
|
|
}
|
|
}
|
|
|
|
if (file_exists($this->dataFile)) {
|
|
// Increase memory limit for large JSON
|
|
ini_set('memory_limit', '512M');
|
|
$content = file_get_contents($this->dataFile);
|
|
$this->vectors = json_decode($content, true);
|
|
|
|
if ($this->vectors === null) {
|
|
$this->lastError = "JSON Decode Error: " . json_last_error_msg();
|
|
error_log("RAG Error: " . $this->lastError);
|
|
$this->vectors = [];
|
|
} else {
|
|
// Success
|
|
}
|
|
} else {
|
|
$this->lastError = "File Not Found: " . $this->dataFile;
|
|
error_log("RAG Error: " . $this->lastError);
|
|
$this->vectors = [];
|
|
}
|
|
}
|
|
|
|
public function getLastError() {
|
|
return $this->lastError;
|
|
}
|
|
|
|
// 코사인 유사도 계산
|
|
private function cosineSimilarity($vecA, $vecB) {
|
|
$dotProduct = 0;
|
|
$normA = 0;
|
|
$normB = 0;
|
|
|
|
// 벡터 크기가 다르면 0 반환 (예외처리)
|
|
if (count($vecA) !== count($vecB)) return 0;
|
|
|
|
for ($i = 0; $i < count($vecA); $i++) {
|
|
$dotProduct += $vecA[$i] * $vecB[$i];
|
|
$normA += $vecA[$i] * $vecA[$i];
|
|
$normB += $vecB[$i] * $vecB[$i];
|
|
}
|
|
|
|
if ($normA == 0 || $normB == 0) return 0;
|
|
|
|
return $dotProduct / (sqrt($normA) * sqrt($normB));
|
|
}
|
|
|
|
public function getVectorCount() {
|
|
return count($this->vectors);
|
|
}
|
|
|
|
public function search($query, $apiKey, $limit = 5) {
|
|
// 1. 쿼리 임베딩
|
|
$url = "https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=" . $apiKey;
|
|
$data = [
|
|
'model' => 'models/text-embedding-004',
|
|
'content' => ['parts' => [['text' => $query]]]
|
|
];
|
|
|
|
$ch = curl_init($url);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_POST, true);
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
|
|
$response = curl_exec($ch);
|
|
curl_close($ch);
|
|
|
|
$responseData = json_decode($response, true);
|
|
|
|
if (!isset($responseData['embedding']['values'])) {
|
|
error_log("RAG embedding failed: " . json_encode($responseData));
|
|
return [];
|
|
}
|
|
|
|
$queryVector = $responseData['embedding']['values'];
|
|
|
|
// 2. 유사도 계산
|
|
$scored = [];
|
|
foreach ($this->vectors as $doc) {
|
|
$score = $this->cosineSimilarity($queryVector, $doc['vector']);
|
|
$doc['score'] = $score;
|
|
// 벡터 데이터는 결과에서 제외 (용량 절약)
|
|
unset($doc['vector']);
|
|
$scored[] = $doc;
|
|
}
|
|
|
|
// 3. 정렬 (유사도 내림차순)
|
|
usort($scored, function($a, $b) {
|
|
return $b['score'] <=> $a['score'];
|
|
});
|
|
|
|
// 4. 상위 N개 반환
|
|
return array_slice($scored, 0, $limit);
|
|
}
|
|
}
|
|
?>
|