From cf6525c8f31d417e382cb8fac7b37806f2a78649 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EB=B3=B4=EA=B3=A4?= Date: Sat, 21 Feb 2026 16:02:10 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20[video]=20=EC=A2=8C=ED=91=9C=20?= =?UTF-8?q?=EA=B2=80=EC=A6=9D=20=EB=A3=A8=ED=94=84(Coordinate=20Verificati?= =?UTF-8?q?on=20Loop)=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Gemini 2-pass 자기 검증 메커니즘 구현 - runCoordinateVerification: 검증 오케스트레이터 - createVerificationImage: 색상별 스포트라이트 렌더링 - verifyCoordinates: Gemini에게 좌표 정확도 확인 요청 - applyVerifiedCoordinates: 보정 좌표 적용 --- app/Services/Video/ScreenAnalysisService.php | 365 +++++++++++++++++++ 1 file changed, 365 insertions(+) diff --git a/app/Services/Video/ScreenAnalysisService.php b/app/Services/Video/ScreenAnalysisService.php index 6acbfc09..459c3a14 100644 --- a/app/Services/Video/ScreenAnalysisService.php +++ b/app/Services/Video/ScreenAnalysisService.php @@ -275,6 +275,9 @@ private function analyzeSingleScreen(string $imagePath, int $screenNumber, int $ // screen_number 강제 보정 $parsed['screen_number'] = $screenNumber; + // 좌표 검증 루프 (normalizeCoordinates 이전, 0~1000 좌표 상태에서 실행) + $parsed = $this->runCoordinateVerification($imagePath, $parsed); + // 0~1000 좌표 → 0~1 비율로 변환 $this->normalizeCoordinates($parsed); @@ -458,6 +461,15 @@ private function createGridOverlay(string $imagePath): ?string private const SPOTLIGHT_PADDING = 0.015; // 전체 크기 대비 1.5% 패딩 + // 좌표 검증 루프용 색상 (step별) + private const VERIFICATION_COLORS = [ + 1 => [255, 60, 60], // 빨강 + 2 => [60, 200, 60], // 초록 + 3 => [60, 100, 255], // 파랑 + 4 => [255, 160, 0], // 주황 + 5 => [180, 60, 255], // 보라 + ]; + /** * 0~1000 정수 좌표 → 0~1 비율로 변환 + 타입별 최소 크기 보정 + 패딩 * @@ -573,4 +585,357 @@ public function normalizeCoordinates(array &$parsed): void } unset($step, $el); } + + /** + * 좌표 검증 루프 오케스트레이터 + * + * 1-pass로 추정한 좌표를 원본 이미지 위에 렌더링한 뒤, + * Gemini에게 2-pass로 위치 정확도를 확인/보정하게 한다. + */ + private function runCoordinateVerification(string $imagePath, array $parsed): array + { + // focused_element가 있는 step만 추출 + $stepsWithElement = []; + foreach ($parsed['steps'] ?? [] as $step) { + if (! empty($step['focused_element'])) { + $stepsWithElement[] = $step; + } + } + + if (empty($stepsWithElement)) { + Log::debug('ScreenAnalysis: 좌표 검증 건너뜀 - focused_element 없음'); + + return $parsed; + } + + Log::info('ScreenAnalysis: 좌표 검증 루프 시작', [ + 'screen' => $parsed['screen_number'] ?? '?', + 'steps_count' => count($stepsWithElement), + ]); + + // 검증 이미지 생성 + $verificationImagePath = $this->createVerificationImage($imagePath, $stepsWithElement); + if (! $verificationImagePath) { + Log::warning('ScreenAnalysis: 검증 이미지 생성 실패, 원본 좌표 유지'); + + return $parsed; + } + + // Gemini 2-pass 검증 호출 + $verifiedSteps = $this->verifyCoordinates($verificationImagePath, $stepsWithElement); + + // 보정 적용 + $parsed = $this->applyVerifiedCoordinates($parsed, $verifiedSteps); + + // 검증 이미지 정리 + if (file_exists($verificationImagePath)) { + @unlink($verificationImagePath); + } + + Log::info('ScreenAnalysis: 좌표 검증 루프 완료'); + + return $parsed; + } + + /** + * 검증용 이미지 생성 + * + * 원본 이미지 위에 각 step의 focused_element를 색상별 반투명 사각형 + 번호 원으로 표시. + * 그리드 없이 깨끗한 원본 위에 렌더링하여 AI가 UI 요소를 명확히 볼 수 있도록 한다. + */ + private function createVerificationImage(string $imagePath, array $steps): ?string + { + try { + $info = getimagesize($imagePath); + if (! $info) { + return null; + } + + $source = match ($info[2]) { + IMAGETYPE_PNG => imagecreatefrompng($imagePath), + IMAGETYPE_JPEG => imagecreatefromjpeg($imagePath), + IMAGETYPE_GIF => imagecreatefromgif($imagePath), + IMAGETYPE_WEBP => imagecreatefromwebp($imagePath), + default => null, + }; + + if (! $source) { + return null; + } + + $srcW = imagesx($source); + $srcH = imagesy($source); + + // annotateSlideWithSpotlight()와 동일한 레터박스 레이아웃 + $canvas = imagecreatetruecolor(self::ANALYSIS_WIDTH, self::ANALYSIS_HEIGHT); + $black = imagecolorallocate($canvas, 0, 0, 0); + imagefill($canvas, 0, 0, $black); + + $availH = self::ANALYSIS_HEIGHT - self::CAPTION_HEIGHT; + $scale = min(self::ANALYSIS_WIDTH / $srcW, $availH / $srcH); + $newW = (int) ($srcW * $scale); + $newH = (int) ($srcH * $scale); + $offsetX = (int) ((self::ANALYSIS_WIDTH - $newW) / 2); + $offsetY = (int) (($availH - $newH) / 2); + + imagecopyresampled($canvas, $source, $offsetX, $offsetY, 0, 0, $newW, $newH, $srcW, $srcH); + imagedestroy($source); + + // 각 step의 focused_element를 색상별 반투명 사각형 + 번호 원으로 표시 + $colorNames = [1 => 'Red', 2 => 'Green', 3 => 'Blue', 4 => 'Orange', 5 => 'Purple']; + + foreach ($steps as $idx => $step) { + $stepNum = $step['step_number'] ?? ($idx + 1); + $colorIdx = min($stepNum, 5); + [$r, $g, $b] = self::VERIFICATION_COLORS[$colorIdx]; + + $el = $step['focused_element']; + $x = $el['x'] ?? 0; + $y = $el['y'] ?? 0; + $w = $el['w'] ?? 200; + $h = $el['h'] ?? 200; + + // 0~1000 좌표를 캔버스 좌표로 변환 + $cx1 = (int) ($offsetX + ($x / 1000) * $newW); + $cy1 = (int) ($offsetY + ($y / 1000) * $newH); + $cx2 = (int) ($offsetX + (($x + $w) / 1000) * $newW); + $cy2 = (int) ($offsetY + (($y + $h) / 1000) * $newH); + + // 반투명 사각형 (테두리 + 내부 채움) + $fillColor = imagecolorallocatealpha($canvas, $r, $g, $b, 100); + $borderColor = imagecolorallocate($canvas, $r, $g, $b); + + imagefilledrectangle($canvas, $cx1, $cy1, $cx2, $cy2, $fillColor); + // 테두리 3px + for ($t = 0; $t < 3; $t++) { + imagerectangle($canvas, $cx1 + $t, $cy1 + $t, $cx2 - $t, $cy2 - $t, $borderColor); + } + + // 번호 원 (좌상단에 배치) + $circleX = $cx1 + 12; + $circleY = $cy1 + 12; + $circleR = 12; + $white = imagecolorallocate($canvas, 255, 255, 255); + imagefilledellipse($canvas, $circleX, $circleY, $circleR * 2, $circleR * 2, $borderColor); + imagestring($canvas, 5, $circleX - 4, $circleY - 7, (string) $stepNum, $white); + } + + // 하단 캡션에 범례 표시 + $captionY = self::ANALYSIS_HEIGHT - self::CAPTION_HEIGHT + 10; + $legendX = 12; + $captionTextColor = imagecolorallocate($canvas, 220, 220, 220); + + imagestring($canvas, 3, $legendX, $captionY, 'Verification Overlay - Step Legend:', $captionTextColor); + $legendX = 12; + $captionY += 20; + + foreach ($steps as $idx => $step) { + $stepNum = $step['step_number'] ?? ($idx + 1); + $colorIdx = min($stepNum, 5); + [$r, $g, $b] = self::VERIFICATION_COLORS[$colorIdx]; + $label = $step['focused_element']['label'] ?? '?'; + $colorName = $colorNames[$colorIdx] ?? '?'; + + $swatchColor = imagecolorallocate($canvas, $r, $g, $b); + imagefilledrectangle($canvas, $legendX, $captionY, $legendX + 14, $captionY + 14, $swatchColor); + $legendText = "#{$stepNum} ({$colorName}): {$label}"; + imagestring($canvas, 2, $legendX + 18, $captionY + 1, $legendText, $captionTextColor); + $legendX += strlen($legendText) * 7 + 30; + + // 줄바꿈 (너비 초과 시) + if ($legendX > self::ANALYSIS_WIDTH - 200) { + $legendX = 12; + $captionY += 18; + } + } + + $outputPath = $imagePath.'_verify.png'; + imagepng($canvas, $outputPath, 6); + imagedestroy($canvas); + + Log::debug('ScreenAnalysis: 검증 이미지 생성 완료', ['path' => $outputPath]); + + return $outputPath; + } catch (\Exception $e) { + Log::warning('ScreenAnalysis: 검증 이미지 생성 실패', ['error' => $e->getMessage()]); + + return null; + } + } + + /** + * Gemini 2-pass 검증 호출 + * + * 검증 이미지를 Gemini에 보내 각 하이라이트의 위치 정확도를 확인한다. + */ + private function verifyCoordinates(string $verificationImagePath, array $steps): ?array + { + try { + $imageData = base64_encode(file_get_contents($verificationImagePath)); + $mimeType = 'image/png'; + + $colorNames = [1 => 'Red', 2 => 'Green', 3 => 'Blue', 4 => 'Orange', 5 => 'Purple']; + + // 각 step 정보를 텍스트로 나열 + $stepDescriptions = ''; + foreach ($steps as $idx => $step) { + $stepNum = $step['step_number'] ?? ($idx + 1); + $colorIdx = min($stepNum, 5); + $colorName = $colorNames[$colorIdx] ?? '?'; + $el = $step['focused_element']; + $label = $el['label'] ?? '?'; + $x = $el['x'] ?? 0; + $y = $el['y'] ?? 0; + $w = $el['w'] ?? 0; + $h = $el['h'] ?? 0; + $stepDescriptions .= "- Step {$stepNum} ({$colorName}): \"{$label}\" → x={$x}, y={$y}, w={$w}, h={$h}\n"; + } + + $prompt = << $prompt], + [ + 'inlineData' => [ + 'mimeType' => $mimeType, + 'data' => $imageData, + ], + ], + ]; + + $result = $this->gemini->callGeminiWithParts($parts, 0.1, 1024); + + if (! $result) { + Log::warning('ScreenAnalysis: 좌표 검증 API 호출 실패'); + + return null; + } + + $verified = $this->gemini->parseJson($result); + + if (! $verified || ! isset($verified['verifications'])) { + Log::warning('ScreenAnalysis: 좌표 검증 JSON 파싱 실패', [ + 'result' => substr($result, 0, 300), + ]); + + return null; + } + + // 검증 결과 로그 + $accurateCount = 0; + $correctedCount = 0; + foreach ($verified['verifications'] as $v) { + if ($v['accurate'] ?? true) { + $accurateCount++; + } else { + $correctedCount++; + } + } + Log::info("ScreenAnalysis: 좌표 검증 결과 - 정확: {$accurateCount}, 보정: {$correctedCount}"); + + return $verified['verifications']; + } catch (\Exception $e) { + Log::warning('ScreenAnalysis: 좌표 검증 예외 발생', ['error' => $e->getMessage()]); + + return null; + } + } + + /** + * 검증 결과를 원본 $parsed에 적용 + * + * accurate: true → 좌표 유지, accurate: false → corrected 좌표로 교체 + */ + private function applyVerifiedCoordinates(array $parsed, ?array $verifiedSteps): array + { + if (! $verifiedSteps) { + Log::debug('ScreenAnalysis: 검증 결과 없음, 원본 좌표 유지 (fallback)'); + + return $parsed; + } + + // step_number를 키로 하는 검증 맵 생성 + $verificationMap = []; + foreach ($verifiedSteps as $v) { + $verificationMap[$v['step_number']] = $v; + } + + foreach ($parsed['steps'] as &$step) { + if (empty($step['focused_element'])) { + continue; + } + + $stepNum = $step['step_number'] ?? 0; + $verification = $verificationMap[$stepNum] ?? null; + + if (! $verification) { + continue; + } + + if (! ($verification['accurate'] ?? true)) { + $oldX = $step['focused_element']['x']; + $oldY = $step['focused_element']['y']; + $oldW = $step['focused_element']['w']; + $oldH = $step['focused_element']['h']; + + $step['focused_element']['x'] = $verification['corrected_x'] ?? $oldX; + $step['focused_element']['y'] = $verification['corrected_y'] ?? $oldY; + $step['focused_element']['w'] = $verification['corrected_w'] ?? $oldW; + $step['focused_element']['h'] = $verification['corrected_h'] ?? $oldH; + + Log::info("ScreenAnalysis: 좌표 보정 Step {$stepNum}", [ + 'label' => $step['focused_element']['label'] ?? '?', + 'before' => "x={$oldX}, y={$oldY}, w={$oldW}, h={$oldH}", + 'after' => "x={$step['focused_element']['x']}, y={$step['focused_element']['y']}, w={$step['focused_element']['w']}, h={$step['focused_element']['h']}", + ]); + } + } + unset($step); + + return $parsed; + } }