feat: [sound-logo] TTS 음성 오버레이 기능 추가

- Gemini TTS API 연동 (한국어 Kore 음성) - 사이드바에 음성 오버레이 컨트롤: 텍스트 입력, 시작 시점, 볼륨 - 재생/WAV 내보내기 시 신스 + 음성 자동 합성 - POST /rd/sound-logo/tts 엔드포인트 추가 - L16 PCM → AudioBuffer 디코더 구현
2026-03-08 12:44:05 +09:00
parent 85304bdfbc
commit ff2296d4d8
3 changed files with 207 additions and 2 deletions
--- a/app/Http/Controllers/RdController.php
+++ b/app/Http/Controllers/RdController.php
@@ -452,4 +452,63 @@ public function soundLogoGenerate(Request $request): JsonResponse

        return response()->json(['success' => true, 'data' => $result]);
    }
+
+    /**
+     * 사운드 로고 TTS 음성 생성 (Gemini TTS API)
+     */
+    public function soundLogoTts(Request $request): JsonResponse
+    {
+        $request->validate([
+            'text' => 'required|string|max:200',
+        ]);
+
+        $apiKey = config('services.gemini.api_key');
+        $baseUrl = config('services.gemini.base_url', 'https://generativelanguage.googleapis.com/v1beta');
+
+        if (! $apiKey) {
+            return response()->json(['success' => false, 'error' => 'Gemini API 키가 설정되지 않았습니다.'], 500);
+        }
+
+        try {
+            $response = Http::timeout(30)->post(
+                "{$baseUrl}/models/gemini-2.5-flash-preview-tts:generateContent?key={$apiKey}",
+                [
+                    'contents' => [
+                        ['parts' => [['text' => $request->text]]],
+                    ],
+                    'generationConfig' => [
+                        'responseModalities' => ['AUDIO'],
+                        'speechConfig' => [
+                            'voiceConfig' => [
+                                'prebuiltVoiceConfig' => [
+                                    'voiceName' => 'Kore',
+                                ],
+                            ],
+                        ],
+                    ],
+                ]
+            );
+        } catch (\Exception $e) {
+            Log::error('SoundLogo TTS 생성 실패', ['error' => $e->getMessage()]);
+
+            return response()->json(['success' => false, 'error' => 'TTS 서버 연결 실패'], 500);
+        }
+
+        if (! $response->successful()) {
+            return response()->json(['success' => false, 'error' => 'TTS 생성 실패: '.$response->status()], 500);
+        }
+
+        $data = $response->json();
+        $inlineData = $data['candidates'][0]['content']['parts'][0]['inlineData'] ?? null;
+
+        if (! $inlineData || empty($inlineData['data'])) {
+            return response()->json(['success' => false, 'error' => '음성 데이터를 받지 못했습니다.'], 500);
+        }
+
+        return response()->json([
+            'success' => true,
+            'audio_data' => $inlineData['data'],
+            'mime_type' => $inlineData['mimeType'] ?? 'audio/L16;rate=24000',
+        ]);
+    }
 }
--- a/resources/views/rd/sound-logo/index.blade.php
+++ b/resources/views/rd/sound-logo/index.blade.php
@@ -226,6 +226,9 @@
        <span style="font-size: 11px; color: var(--sl-text2);">
            <i class="ri-music-line"></i> <span x-text="notes.length"></span>개 음표 |
            <span x-text="getTotalDuration().toFixed(2)"></span>초
+            <template x-if="voiceBuffer">
+                <span style="color: var(--sl-green);"> | <i class="ri-mic-fill"></i> 음성</span>
+            </template>
        </span>
    </div>

@@ -278,6 +281,44 @@
                </div>
            </div>

+            <!-- Voice Overlay -->
+            <div class="sl-section">
+                <div class="sl-section-title">음성 오버레이 (TTS)</div>
+                <div style="margin-bottom: 6px;">
+                    <input class="sl-input" x-model="voiceText" placeholder='예: 쌤!, SAM' style="margin-bottom: 6px;">
+                    <button class="sl-btn sm primary" style="width:100%;" @click="generateVoice()" :disabled="voiceLoading || !voiceText.trim()">
+                        <template x-if="!voiceLoading">
+                            <span><i class="ri-mic-line"></i> 음성 생성</span>
+                        </template>
+                        <template x-if="voiceLoading">
+                            <span><i class="ri-loader-4-line" style="animation: spin 1s linear infinite;"></i> 생성 중...</span>
+                        </template>
+                    </button>
+                </div>
+                <template x-if="voiceBuffer">
+                    <div>
+                        <div style="padding: 6px 8px; border-radius: 6px; background: rgba(16,185,129,.1); border: 1px solid rgba(16,185,129,.3); margin-bottom: 8px;">
+                            <div style="font-size: 11px; color: var(--sl-green); display: flex; align-items: center; gap: 4px;">
+                                <i class="ri-checkbox-circle-fill"></i>
+                                <span x-text="'\"' + voiceText + '\" · ' + voiceBuffer.duration.toFixed(1) + '초'"></span>
+                            </div>
+                        </div>
+                        <div class="sl-param">
+                            <div class="sl-param-label"><span>시작 시점</span><span x-text="voiceDelay.toFixed(1) + '초'"></span></div>
+                            <input type="range" class="sl-slider" min="0" max="30" :value="voiceDelay * 10" @input="voiceDelay = $event.target.value / 10">
+                        </div>
+                        <div class="sl-param">
+                            <div class="sl-param-label"><span>음성 볼륨</span><span x-text="(voiceVolume * 100).toFixed(0) + '%'"></span></div>
+                            <input type="range" class="sl-slider" min="0" max="100" :value="voiceVolume * 100" @input="voiceVolume = $event.target.value / 100">
+                        </div>
+                        <div style="display: flex; gap: 4px;">
+                            <button class="sl-btn sm" @click="playVoiceOnly()" style="flex:1;"><i class="ri-play-fill"></i> 음성만</button>
+                            <button class="sl-btn sm danger" @click="clearVoice()" style="flex:1;"><i class="ri-delete-bin-line"></i> 삭제</button>
+                        </div>
+                    </div>
+                </template>
+            </div>
+
            <!-- Saved Sounds -->
            <div class="sl-section">
                <div class="sl-section-title">내 사운드</div>
@@ -561,6 +602,15 @@ function soundLogo() {
        aiLoading: false,
        aiResult: null,
        aiError: '',
+        // 음성 오버레이
+        voiceText: '',
+        voiceAudioData: null,
+        voiceMimeType: '',
+        voiceLoading: false,
+        voiceDelay: 0.0,
+        voiceVolume: 0.8,
+        voiceBuffer: null,
+
        aiQuickPrompts: [
            '밝고 미래적인 IT 기업 로고',
            '따뜻하고 친근한 카페 알림음',
@@ -905,10 +955,22 @@ function soundLogo() {
                t += n.duration || 0.2;
            });

+            // Voice overlay
+            if (this.voiceBuffer) {
+                const voiceSrc = ctx.createBufferSource();
+                voiceSrc.buffer = this.voiceBuffer;
+                const voiceGain = ctx.createGain();
+                voiceGain.gain.value = this.voiceVolume;
+                voiceSrc.connect(voiceGain).connect(ctx.destination);
+                voiceSrc.start(startTime + this.voiceDelay);
+            }
+
            // Draw waveform
            this.drawWaveform(ctx);

-            const totalMs = (t - startTime) * 1000 + (this.adsr.release || 500);
+            const synthMs = (t - startTime) * 1000 + (this.adsr.release || 500);
+            const voiceMs = this.voiceBuffer ? (this.voiceDelay + this.voiceBuffer.duration) * 1000 + 200 : 0;
+            const totalMs = Math.max(synthMs, voiceMs);
            setTimeout(() => {
                this.isPlaying = false;
                this.playingIdx = -1;
@@ -974,7 +1036,9 @@ function soundLogo() {
            if (this.notes.length === 0) return this.toast('음표를 추가해 주세요');

            const sampleRate = 44100;
-            const totalDur = this.getTotalDuration() + this.adsr.release / 1000 + 0.5;
+            const synthDur = this.getTotalDuration() + this.adsr.release / 1000 + 0.5;
+            const voiceDur = this.voiceBuffer ? this.voiceDelay + this.voiceBuffer.duration + 0.5 : 0;
+            const totalDur = Math.max(synthDur, voiceDur);
            const offline = new OfflineAudioContext(2, sampleRate * totalDur, sampleRate);

            let t = 0.05;
@@ -1009,6 +1073,16 @@ function soundLogo() {
                t += n.duration || 0.2;
            });

+            // Voice overlay in offline context
+            if (this.voiceBuffer) {
+                const voiceSrc = offline.createBufferSource();
+                voiceSrc.buffer = this.voiceBuffer;
+                const voiceGain = offline.createGain();
+                voiceGain.gain.value = this.voiceVolume;
+                voiceSrc.connect(voiceGain).connect(offline.destination);
+                voiceSrc.start(0.05 + this.voiceDelay);
+            }
+
            const buffer = await offline.startRendering();
            const wav = this.bufferToWav(buffer);
            const blob = new Blob([wav], { type: 'audio/wav' });
@@ -1060,6 +1134,77 @@ function soundLogo() {
            return buf;
        },

+        // ===== 음성 오버레이 (TTS) =====
+        async generateVoice() {
+            if (this.voiceLoading || !this.voiceText.trim()) return;
+            this.voiceLoading = true;
+            this.voiceBuffer = null;
+
+            try {
+                const res = await fetch('{{ route("rd.sound-logo.tts") }}', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                        'X-CSRF-TOKEN': document.querySelector('meta[name="csrf-token"]').content,
+                    },
+                    body: JSON.stringify({ text: this.voiceText }),
+                });
+
+                const data = await res.json();
+                if (data.success && data.audio_data) {
+                    this.voiceAudioData = data.audio_data;
+                    this.voiceMimeType = data.mime_type || 'audio/L16;rate=24000';
+                    // 샘플레이트 파싱
+                    const rateMatch = this.voiceMimeType.match(/rate=(\d+)/);
+                    const sampleRate = rateMatch ? parseInt(rateMatch[1]) : 24000;
+                    this.voiceBuffer = this.decodeL16(data.audio_data, sampleRate);
+                    this.toast('음성 생성 완료: "' + this.voiceText + '"');
+                } else {
+                    this.toast(data.error || '음성 생성 실패');
+                }
+            } catch (e) {
+                this.toast('음성 생성 중 오류 발생');
+            } finally {
+                this.voiceLoading = false;
+            }
+        },
+
+        decodeL16(base64Data, sampleRate) {
+            const binaryStr = atob(base64Data);
+            const bytes = new Uint8Array(binaryStr.length);
+            for (let i = 0; i < binaryStr.length; i++) bytes[i] = binaryStr.charCodeAt(i);
+
+            const view = new DataView(bytes.buffer);
+            const numSamples = Math.floor(bytes.length / 2);
+            const ctx = this.getAudioCtx();
+            const buffer = ctx.createBuffer(1, numSamples, sampleRate);
+            const ch = buffer.getChannelData(0);
+
+            for (let i = 0; i < numSamples; i++) {
+                ch[i] = view.getInt16(i * 2, false) / 32768; // L16 = big-endian
+            }
+            return buffer;
+        },
+
+        async playVoiceOnly() {
+            if (!this.voiceBuffer) return;
+            const ctx = this.getAudioCtx();
+            if (ctx.state === 'suspended') await ctx.resume();
+            const src = ctx.createBufferSource();
+            src.buffer = this.voiceBuffer;
+            const gain = ctx.createGain();
+            gain.gain.value = this.voiceVolume;
+            src.connect(gain).connect(ctx.destination);
+            src.start();
+        },
+
+        clearVoice() {
+            this.voiceBuffer = null;
+            this.voiceAudioData = null;
+            this.voiceMimeType = '';
+            this.toast('음성 삭제됨');
+        },
+
        // ===== AI 어시스트 =====
        async generateWithAi() {
            if (this.aiLoading || !this.aiPrompt.trim()) return;
--- a/routes/web.php
+++ b/routes/web.php
@@ -424,6 +424,7 @@
        // 사운드 로고 생성기
        Route::get('/sound-logo', [RdController::class, 'soundLogo'])->name('sound-logo');
        Route::post('/sound-logo/generate', [RdController::class, 'soundLogoGenerate'])->name('sound-logo.generate');
+        Route::post('/sound-logo/tts', [RdController::class, 'soundLogoTts'])->name('sound-logo.tts');
    });

    // 일일 스크럼 (Blade 화면만)