dataFile = __DIR__ . '/data/vectors.json'; // GCS Sync (If local file missing) if (!file_exists($this->dataFile)) { require_once 'gcs_helper.php'; try { $gcs = new GCSHelper(); if ($gcs->getBucketName()) { $gcs->download('chatbot/vectors.json', $this->dataFile); } } catch (Exception $e) { // Ignore download error, start with empty $this->lastError = "GCS Download Failed: " . $e->getMessage(); error_log($this->lastError); } } if (file_exists($this->dataFile)) { // Increase memory limit for large JSON ini_set('memory_limit', '512M'); $content = file_get_contents($this->dataFile); $this->vectors = json_decode($content, true); if ($this->vectors === null) { $this->lastError = "JSON Decode Error: " . json_last_error_msg(); error_log("RAG Error: " . $this->lastError); $this->vectors = []; } else { // Success } } else { $this->lastError = "File Not Found: " . $this->dataFile; error_log("RAG Error: " . $this->lastError); $this->vectors = []; } } public function getLastError() { return $this->lastError; } // 코사인 유사도 계산 private function cosineSimilarity($vecA, $vecB) { $dotProduct = 0; $normA = 0; $normB = 0; // 벡터 크기가 다르면 0 반환 (예외처리) if (count($vecA) !== count($vecB)) return 0; for ($i = 0; $i < count($vecA); $i++) { $dotProduct += $vecA[$i] * $vecB[$i]; $normA += $vecA[$i] * $vecA[$i]; $normB += $vecB[$i] * $vecB[$i]; } if ($normA == 0 || $normB == 0) return 0; return $dotProduct / (sqrt($normA) * sqrt($normB)); } public function getVectorCount() { return count($this->vectors); } public function search($query, $apiKey, $limit = 5) { // 1. 쿼리 임베딩 $url = "https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=" . $apiKey; $data = [ 'model' => 'models/text-embedding-004', 'content' => ['parts' => [['text' => $query]]] ]; $ch = curl_init($url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']); $response = curl_exec($ch); curl_close($ch); $responseData = json_decode($response, true); if (!isset($responseData['embedding']['values'])) { error_log("RAG embedding failed: " . json_encode($responseData)); return []; } $queryVector = $responseData['embedding']['values']; // 2. 유사도 계산 $scored = []; foreach ($this->vectors as $doc) { $score = $this->cosineSimilarity($queryVector, $doc['vector']); $doc['score'] = $score; // 벡터 데이터는 결과에서 제외 (용량 절약) unset($doc['vector']); $scored[] = $doc; } // 3. 정렬 (유사도 내림차순) usort($scored, function($a, $b) { return $b['score'] <=> $a['score']; }); // 4. 상위 N개 반환 return array_slice($scored, 0, $limit); } } ?>