초기 커밋: 5130 레거시 시스템

- URL 하드코딩 → .env APP_URL 기반 동적 URL로 변경
- DB 연결 하드코딩 → .env 기반으로 변경
- MySQL strict mode DATE 오류 수정
This commit is contained in:
2025-12-10 20:14:31 +09:00
commit aca1767eb9
6728 changed files with 1863265 additions and 0 deletions

View File

@@ -0,0 +1,39 @@
<?php
// chatbot/rag/debug_search.php
require_once __DIR__ . '/search.php';
require_once dirname(__DIR__) . '/../apikey/google_vertex_api.txt'; // Path verify
$apiKeyPath = dirname(__DIR__, 2) . '/apikey/google_vertex_api.txt';
$apiKey = trim(file_get_contents($apiKeyPath));
echo "=== RAG Debug Tool ===\n";
echo "API Key Path: $apiKeyPath\n";
echo "API Key Length: " . strlen($apiKey) . "\n";
$search = new VectorSearch();
// Reflect into the object to check vector count
$reflection = new ReflectionClass($search);
$property = $reflection->getProperty('vectors');
$property->setAccessible(true);
$vectors = $property->getValue($search);
echo "Loaded Vectors Count: " . count($vectors) . "\n";
if (empty($vectors)) {
echo "CRITICAL ERROR: No vectors loaded. Check vectors.json format or path.\n";
exit;
}
$query = "개발 dump";
echo "Testing Query: '$query'\n";
// Run Search
try {
$results = $search->search($query, $apiKey, 3);
echo "Search Result Count: " . count($results) . "\n";
print_r($results);
} catch (Exception $e) {
echo "Search Error: " . $e->getMessage() . "\n";
}
?>

132
chatbot/rag/gcs_helper.php Normal file
View File

@@ -0,0 +1,132 @@
<?php
// chatbot/rag/gcs_helper.php
class GCSHelper {
private $bucketName;
private $serviceAccountPath;
private $accessToken = null;
public function __construct() {
// Load Bucket Name
$configFile = dirname(__DIR__, 2) . '/apikey/gcs_config.txt';
if (file_exists($configFile)) {
$config = parse_ini_file($configFile);
$this->bucketName = $config['bucket_name'] ?? null;
}
// Load Service Account Path
$this->serviceAccountPath = dirname(__DIR__, 2) . '/apikey/google_service_account.json';
}
public function getBucketName() {
return $this->bucketName;
}
private function getAccessToken() {
if ($this->accessToken) return $this->accessToken;
if (!file_exists($this->serviceAccountPath)) {
throw new Exception("Service account file not found: " . $this->serviceAccountPath);
}
$serviceAccount = json_decode(file_get_contents($this->serviceAccountPath), true);
if (!$serviceAccount) {
throw new Exception("Invalid service account JSON");
}
$now = time();
$jwtHeader = base64_encode(json_encode(['alg' => 'RS256', 'typ' => 'JWT']));
$jwtClaim = base64_encode(json_encode([
'iss' => $serviceAccount['client_email'],
'scope' => 'https://www.googleapis.com/auth/devstorage.full_control',
'aud' => 'https://oauth2.googleapis.com/token',
'exp' => $now + 3600,
'iat' => $now
]));
$privateKey = openssl_pkey_get_private($serviceAccount['private_key']);
if (!$privateKey) throw new Exception("Failed to load private key");
openssl_sign($jwtHeader . '.' . $jwtClaim, $signature, $privateKey, OPENSSL_ALGO_SHA256);
openssl_free_key($privateKey);
$jwt = $jwtHeader . '.' . $jwtClaim . '.' . base64_encode($signature);
$ch = curl_init('https://oauth2.googleapis.com/token');
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query([
'grant_type' => 'urn:ietf:params:oauth:grant-type:jwt-bearer',
'assertion' => $jwt
]));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode !== 200) {
throw new Exception("Failed to get OAuth token: " . $response);
}
$data = json_decode($response, true);
$this->accessToken = $data['access_token'];
return $this->accessToken;
}
public function upload($filePath, $objectName) {
$token = $this->getAccessToken();
$fileContent = file_get_contents($filePath);
$mimeType = 'application/json';
$url = 'https://storage.googleapis.com/upload/storage/v1/b/' .
urlencode($this->bucketName) . '/o?uploadType=media&name=' .
urlencode($objectName);
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Authorization: Bearer ' . $token,
'Content-Type: ' . $mimeType,
'Content-Length: ' . strlen($fileContent)
]);
curl_setopt($ch, CURLOPT_POSTFIELDS, $fileContent);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode == 200) {
return true;
} else {
throw new Exception("GCS Upload Failed ($httpCode): " . $response);
}
}
public function download($objectName, $savePath) {
$token = $this->getAccessToken();
$url = 'https://storage.googleapis.com/storage/v1/b/' .
urlencode($this->bucketName) . '/o/' .
urlencode($objectName) . '?alt=media';
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Authorization: Bearer ' . $token
]);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$content = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode == 200) {
file_put_contents($savePath, $content);
return true;
} else {
throw new Exception("GCS Download Failed ($httpCode)");
}
}
}
?>

177
chatbot/rag/ingest.php Normal file
View File

@@ -0,0 +1,177 @@
<?php
// chatbot/rag/ingest.php
// CLI Environment Check
if (php_sapi_name() !== 'cli') {
// If run from browser, detach process? For now, we assume user runs or we trigger via CLI.
}
ini_set('max_execution_time', 0); // 무제한
ini_set('memory_limit', '512M');
$projectRoot = dirname(__DIR__, 2);
$notionApiKey = trim(file_get_contents($projectRoot . "/apikey/notion.txt"));
$googleApiKey = trim(file_get_contents($projectRoot . "/apikey/google_vertex_api.txt"));
require_once dirname(__DIR__) . '/notion_client.php';
// Data Directories
$dataDir = __DIR__ . '/data';
if (!is_dir($dataDir)) mkdir($dataDir, 0777, true);
$vectorsFile = $dataDir . '/vectors.json';
$progressFile = $dataDir . '/progress.json';
// Load existing vectors if any (for resume)
$vectors = [];
if (file_exists($vectorsFile)) {
$vectors = json_decode(file_get_contents($vectorsFile), true);
}
$processedIds = array_map(function($v) { return explode('_', $v['id'])[0]; }, $vectors);
$processedIds = array_unique($processedIds);
// Helper to update progress
function updateProgress($file, $current, $total, $lastTitle, $startTime) {
file_put_contents($file, json_encode([
'current' => $current,
'total' => $total,
'last_title' => $lastTitle,
'start_time' => $startTime
]));
}
// 1. Fetch Pages
function fetchAllNotionPages($apiKey) {
$pages = [];
$hasMore = true;
$nextCursor = null;
while ($hasMore) {
$url = "https://api.notion.com/v1/search";
$data = [
'filter' => ['value' => 'page', 'property' => 'object'],
'sort' => ['direction' => 'descending', 'timestamp' => 'last_edited_time'],
'page_size' => 100
];
if ($nextCursor) $data['start_cursor'] = $nextCursor;
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Authorization: Bearer ' . $apiKey,
'Notion-Version: 2022-06-28',
'Content-Type: application/json'
]);
$response = curl_exec($ch);
curl_close($ch);
$result = json_decode($response, true);
if (isset($result['results'])) $pages = array_merge($pages, $result['results']);
$hasMore = $result['has_more'] ?? false;
$nextCursor = $result['next_cursor'] ?? null;
// Rate limit guard
usleep(500000);
}
return $pages;
}
// Start
$startTime = time();
updateProgress($progressFile, 0, 0, "Fetching Page List...", $startTime);
$notionpages = fetchAllNotionPages($notionApiKey);
$total = count($notionpages);
updateProgress($progressFile, 0, $total, "Starting Processing...", $startTime);
$notionClient = new NotionClient($notionApiKey);
$count = 0;
foreach ($notionpages as $index => $page) {
$pageId = $page['id'];
// Resume Logic: Skip if already processed
// if (in_array($pageId, $processedIds)) {
// $count++;
// continue;
// }
// (Simpler: just overwrite or append? For now, let's process all to ensure freshness,
// unless we strictly want to resume. Given the timeout previously, maybe safest to re-process but save often.)
// Title
$title = "Untitled";
if (isset($page['properties']['Name']['title'][0]['plain_text'])) {
$title = $page['properties']['Name']['title'][0]['plain_text'];
} elseif (isset($page['properties']['title']['title'][0]['plain_text'])) {
$title = $page['properties']['title']['title'][0]['plain_text'];
}
// Update Progress
$count++;
updateProgress($progressFile, $count, $total, $title, $startTime);
// Content
$content = $notionClient->getPageContent($pageId);
$fullText = "Title: $title\n\n$content";
// Chunking
$chunks = function_exists('mb_str_split') ? mb_str_split($fullText, 500) : str_split($fullText, 500);
foreach ($chunks as $chunkIndex => $chunkText) {
if (mb_strlen(trim($chunkText)) < 10) continue;
// Embed
$url = "https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=" . $googleApiKey;
$data = ['model' => 'models/text-embedding-004', 'content' => ['parts' => [['text' => $chunkText]]]];
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode == 200) {
$respData = json_decode($response, true);
if (isset($respData['embedding']['values'])) {
$vectors[] = [
'id' => $pageId . "_" . $chunkIndex,
'title' => $title,
'url' => $page['url'] ?? '',
'text' => $chunkText,
'vector' => $respData['embedding']['values']
];
}
}
usleep(100000); // 0.1s delay
}
// Save periodically (every 10 pages) to prevent total loss
if ($count % 10 == 0) {
file_put_contents($vectorsFile, json_encode($vectors));
}
}
// Final Save
file_put_contents($vectorsFile, json_encode($vectors));
updateProgress($progressFile, $total, $total, "Uploading to Google Cloud Storage...", $startTime);
// GCS Upload
require_once 'gcs_helper.php';
try {
$gcs = new GCSHelper();
if ($gcs->getBucketName()) {
$gcs->upload($vectorsFile, 'chatbot/vectors.json');
updateProgress($progressFile, $total, $total, "Complete! (Saved to GCS)", $startTime);
echo "Successfully uploaded to GCS: " . $gcs->getBucketName() . "/chatbot/vectors.json";
} else {
updateProgress($progressFile, $total, $total, "Complete! (Local Only - No Bucket Config)", $startTime);
}
} catch (Exception $e) {
echo "GCS Upload Error: " . $e->getMessage();
updateProgress($progressFile, $total, $total, "Complete (Local Saved, GCS Error)", $startTime);
}
?>

View File

@@ -0,0 +1,16 @@
@echo off
:: RAG 데이터 자동 갱신 스크립트
:: Windows 작업 스케줄러(Task Scheduler)에 등록하여 사용하세요.
:: 1. 프로젝트 폴더로 이동 (경로가 다르면 수정 필요)
cd /d C:\Project\5130
:: 2. 날짜 기록
echo [DATE: %date% %time%] Starting Auto-Ingestion... >> chatbot\rag\data\auto_log.txt
:: 3. PHP 스크립트 실행 (PHP 경로가 다르면 수정 필요)
:: ingest.php는 GCS 업로드까지 자동으로 수행합니다.
C:\xampp\php\php.exe chatbot\rag\ingest.php >> chatbot\rag\data\auto_log.txt 2>&1
echo [DATE: %date% %time%] Finished. >> chatbot\rag\data\auto_log.txt
exit

118
chatbot/rag/search.php Normal file
View File

@@ -0,0 +1,118 @@
<?php
// chatbot/rag/search.php
class VectorSearch {
private $vectors;
private $dataFile;
private $lastError = null;
public function __construct() {
$this->dataFile = __DIR__ . '/data/vectors.json';
// GCS Sync (If local file missing)
if (!file_exists($this->dataFile)) {
require_once 'gcs_helper.php';
try {
$gcs = new GCSHelper();
if ($gcs->getBucketName()) {
$gcs->download('chatbot/vectors.json', $this->dataFile);
}
} catch (Exception $e) {
// Ignore download error, start with empty
$this->lastError = "GCS Download Failed: " . $e->getMessage();
error_log($this->lastError);
}
}
if (file_exists($this->dataFile)) {
// Increase memory limit for large JSON
ini_set('memory_limit', '512M');
$content = file_get_contents($this->dataFile);
$this->vectors = json_decode($content, true);
if ($this->vectors === null) {
$this->lastError = "JSON Decode Error: " . json_last_error_msg();
error_log("RAG Error: " . $this->lastError);
$this->vectors = [];
} else {
// Success
}
} else {
$this->lastError = "File Not Found: " . $this->dataFile;
error_log("RAG Error: " . $this->lastError);
$this->vectors = [];
}
}
public function getLastError() {
return $this->lastError;
}
// 코사인 유사도 계산
private function cosineSimilarity($vecA, $vecB) {
$dotProduct = 0;
$normA = 0;
$normB = 0;
// 벡터 크기가 다르면 0 반환 (예외처리)
if (count($vecA) !== count($vecB)) return 0;
for ($i = 0; $i < count($vecA); $i++) {
$dotProduct += $vecA[$i] * $vecB[$i];
$normA += $vecA[$i] * $vecA[$i];
$normB += $vecB[$i] * $vecB[$i];
}
if ($normA == 0 || $normB == 0) return 0;
return $dotProduct / (sqrt($normA) * sqrt($normB));
}
public function getVectorCount() {
return count($this->vectors);
}
public function search($query, $apiKey, $limit = 5) {
// 1. 쿼리 임베딩
$url = "https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=" . $apiKey;
$data = [
'model' => 'models/text-embedding-004',
'content' => ['parts' => [['text' => $query]]]
];
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
$response = curl_exec($ch);
curl_close($ch);
$responseData = json_decode($response, true);
if (!isset($responseData['embedding']['values'])) {
error_log("RAG embedding failed: " . json_encode($responseData));
return [];
}
$queryVector = $responseData['embedding']['values'];
// 2. 유사도 계산
$scored = [];
foreach ($this->vectors as $doc) {
$score = $this->cosineSimilarity($queryVector, $doc['vector']);
$doc['score'] = $score;
// 벡터 데이터는 결과에서 제외 (용량 절약)
unset($doc['vector']);
$scored[] = $doc;
}
// 3. 정렬 (유사도 내림차순)
usort($scored, function($a, $b) {
return $b['score'] <=> $a['score'];
});
// 4. 상위 N개 반환
return array_slice($scored, 0, $limit);
}
}
?>

80
chatbot/rag/status.php Normal file
View File

@@ -0,0 +1,80 @@
<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>RAG Ingestion Status</title>
<script src="https://cdn.tailwindcss.com"></script>
<meta http-equiv="refresh" content="3"> <!-- 3초마다 새로고침 -->
</head>
<body class="bg-gray-100 p-10">
<div class="max-w-2xl mx-auto bg-white rounded-xl shadow-lg p-8">
<h1 class="text-2xl font-bold mb-6 text-gray-800">Vector Search 데이터 구축 현황</h1>
<?php
$statusFile = __DIR__ . '/data/progress.json';
if (file_exists($statusFile)) {
$status = json_decode(file_get_contents($statusFile), true);
$current = $status['current'] ?? 0;
$total = $status['total'] ?? 100;
$percent = $total > 0 ? round(($current / $total) * 100) : 0;
$lastTitle = $status['last_title'] ?? 'Initializing...';
$startTime = $status['start_time'] ?? time();
$elapsed = time() - $startTime;
// 예상 남은 시간
$remaining = "Calculating...";
if ($current > 0) {
$rate = $elapsed / $current; // 초/개
$remSecs = ($total - $current) * $rate;
$remaining = round($remSecs / 60) . "";
}
} else {
$current = 0;
$total = 0;
$percent = 0;
$lastTitle = "작업 대기 중...";
$remaining = "-";
}
?>
<div class="mb-4">
<div class="flex justify-between mb-1">
<span class="text-sm font-medium text-blue-700">진행률 (<?=$current?> / <?=$total?> Pages)</span>
<span class="text-sm font-medium text-blue-700"><?=$percent?>%</span>
</div>
<div class="w-full bg-gray-200 rounded-full h-4">
<div class="bg-blue-600 h-4 rounded-full transition-all duration-500" style="width: <?=$percent?>%"></div>
</div>
</div>
<div class="space-y-4">
<div class="p-4 bg-gray-50 rounded-lg border border-gray-200">
<p class="text-sm text-gray-500">현재 작업 중인 문서</p>
<p class="font-semibold text-gray-800 truncate"><?=$lastTitle?></p>
</div>
<div class="grid grid-cols-2 gap-4">
<div class="p-4 bg-gray-50 rounded-lg border border-gray-200 text-center">
<p class="text-sm text-gray-500">경과 시간</p>
<p class="font-mono text-xl"><?=gmdate("i:s", $elapsed ?? 0)?></p>
</div>
<div class="p-4 bg-gray-50 rounded-lg border border-gray-200 text-center">
<p class="text-sm text-gray-500">예상 남은 시간</p>
<p class="font-mono text-xl"><?=$remaining?></p>
</div>
</div>
</div>
<div class="mt-8 text-center">
<?php if ($percent >= 100): ?>
<a href="../rag_index.php" class="inline-block px-6 py-3 bg-green-600 text-white rounded-lg font-bold hover:bg-green-700 transition">
데이터 구축 완료! 챗봇 시작하기
</a>
<?php else: ?>
<p class="text-gray-400 text-sm">작업이 완료되면 버튼이 나타납니다.</p>
<?php endif; ?>
</div>
</div>
</body>
</html>