Files
sam-kd/chatbot/rag/search.php
hskwon aca1767eb9 초기 커밋: 5130 레거시 시스템
- URL 하드코딩 → .env APP_URL 기반 동적 URL로 변경
- DB 연결 하드코딩 → .env 기반으로 변경
- MySQL strict mode DATE 오류 수정
2025-12-10 20:14:31 +09:00

119 lines
3.9 KiB
PHP

<?php
// chatbot/rag/search.php
class VectorSearch {
private $vectors;
private $dataFile;
private $lastError = null;
public function __construct() {
$this->dataFile = __DIR__ . '/data/vectors.json';
// GCS Sync (If local file missing)
if (!file_exists($this->dataFile)) {
require_once 'gcs_helper.php';
try {
$gcs = new GCSHelper();
if ($gcs->getBucketName()) {
$gcs->download('chatbot/vectors.json', $this->dataFile);
}
} catch (Exception $e) {
// Ignore download error, start with empty
$this->lastError = "GCS Download Failed: " . $e->getMessage();
error_log($this->lastError);
}
}
if (file_exists($this->dataFile)) {
// Increase memory limit for large JSON
ini_set('memory_limit', '512M');
$content = file_get_contents($this->dataFile);
$this->vectors = json_decode($content, true);
if ($this->vectors === null) {
$this->lastError = "JSON Decode Error: " . json_last_error_msg();
error_log("RAG Error: " . $this->lastError);
$this->vectors = [];
} else {
// Success
}
} else {
$this->lastError = "File Not Found: " . $this->dataFile;
error_log("RAG Error: " . $this->lastError);
$this->vectors = [];
}
}
public function getLastError() {
return $this->lastError;
}
// 코사인 유사도 계산
private function cosineSimilarity($vecA, $vecB) {
$dotProduct = 0;
$normA = 0;
$normB = 0;
// 벡터 크기가 다르면 0 반환 (예외처리)
if (count($vecA) !== count($vecB)) return 0;
for ($i = 0; $i < count($vecA); $i++) {
$dotProduct += $vecA[$i] * $vecB[$i];
$normA += $vecA[$i] * $vecA[$i];
$normB += $vecB[$i] * $vecB[$i];
}
if ($normA == 0 || $normB == 0) return 0;
return $dotProduct / (sqrt($normA) * sqrt($normB));
}
public function getVectorCount() {
return count($this->vectors);
}
public function search($query, $apiKey, $limit = 5) {
// 1. 쿼리 임베딩
$url = "https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=" . $apiKey;
$data = [
'model' => 'models/text-embedding-004',
'content' => ['parts' => [['text' => $query]]]
];
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
$response = curl_exec($ch);
curl_close($ch);
$responseData = json_decode($response, true);
if (!isset($responseData['embedding']['values'])) {
error_log("RAG embedding failed: " . json_encode($responseData));
return [];
}
$queryVector = $responseData['embedding']['values'];
// 2. 유사도 계산
$scored = [];
foreach ($this->vectors as $doc) {
$score = $this->cosineSimilarity($queryVector, $doc['vector']);
$doc['score'] = $score;
// 벡터 데이터는 결과에서 제외 (용량 절약)
unset($doc['vector']);
$scored[] = $doc;
}
// 3. 정렬 (유사도 내림차순)
usort($scored, function($a, $b) {
return $b['score'] <=> $a['score'];
});
// 4. 상위 N개 반환
return array_slice($scored, 0, $limit);
}
}
?>