feat: [sound-logo] TTS 음성 오버레이 기능 추가

- Gemini TTS API 연동 (한국어 Kore 음성)
- 사이드바에 음성 오버레이 컨트롤: 텍스트 입력, 시작 시점, 볼륨
- 재생/WAV 내보내기 시 신스 + 음성 자동 합성
- POST /rd/sound-logo/tts 엔드포인트 추가
- L16 PCM → AudioBuffer 디코더 구현
This commit is contained in:
김보곤
2026-03-08 12:44:05 +09:00
parent 75dbe2910a
commit 301369bb37
3 changed files with 207 additions and 2 deletions

View File

@@ -452,4 +452,63 @@ public function soundLogoGenerate(Request $request): JsonResponse
return response()->json(['success' => true, 'data' => $result]);
}
/**
* 사운드 로고 TTS 음성 생성 (Gemini TTS API)
*/
public function soundLogoTts(Request $request): JsonResponse
{
$request->validate([
'text' => 'required|string|max:200',
]);
$apiKey = config('services.gemini.api_key');
$baseUrl = config('services.gemini.base_url', 'https://generativelanguage.googleapis.com/v1beta');
if (! $apiKey) {
return response()->json(['success' => false, 'error' => 'Gemini API 키가 설정되지 않았습니다.'], 500);
}
try {
$response = Http::timeout(30)->post(
"{$baseUrl}/models/gemini-2.5-flash-preview-tts:generateContent?key={$apiKey}",
[
'contents' => [
['parts' => [['text' => $request->text]]],
],
'generationConfig' => [
'responseModalities' => ['AUDIO'],
'speechConfig' => [
'voiceConfig' => [
'prebuiltVoiceConfig' => [
'voiceName' => 'Kore',
],
],
],
],
]
);
} catch (\Exception $e) {
Log::error('SoundLogo TTS 생성 실패', ['error' => $e->getMessage()]);
return response()->json(['success' => false, 'error' => 'TTS 서버 연결 실패'], 500);
}
if (! $response->successful()) {
return response()->json(['success' => false, 'error' => 'TTS 생성 실패: '.$response->status()], 500);
}
$data = $response->json();
$inlineData = $data['candidates'][0]['content']['parts'][0]['inlineData'] ?? null;
if (! $inlineData || empty($inlineData['data'])) {
return response()->json(['success' => false, 'error' => '음성 데이터를 받지 못했습니다.'], 500);
}
return response()->json([
'success' => true,
'audio_data' => $inlineData['data'],
'mime_type' => $inlineData['mimeType'] ?? 'audio/L16;rate=24000',
]);
}
}

View File

@@ -226,6 +226,9 @@
<span style="font-size: 11px; color: var(--sl-text2);">
<i class="ri-music-line"></i> <span x-text="notes.length"></span> 음표 |
<span x-text="getTotalDuration().toFixed(2)"></span>
<template x-if="voiceBuffer">
<span style="color: var(--sl-green);"> | <i class="ri-mic-fill"></i> 음성</span>
</template>
</span>
</div>
@@ -278,6 +281,44 @@
</div>
</div>
<!-- Voice Overlay -->
<div class="sl-section">
<div class="sl-section-title">음성 오버레이 (TTS)</div>
<div style="margin-bottom: 6px;">
<input class="sl-input" x-model="voiceText" placeholder='예: 쌤!, SAM' style="margin-bottom: 6px;">
<button class="sl-btn sm primary" style="width:100%;" @click="generateVoice()" :disabled="voiceLoading || !voiceText.trim()">
<template x-if="!voiceLoading">
<span><i class="ri-mic-line"></i> 음성 생성</span>
</template>
<template x-if="voiceLoading">
<span><i class="ri-loader-4-line" style="animation: spin 1s linear infinite;"></i> 생성 ...</span>
</template>
</button>
</div>
<template x-if="voiceBuffer">
<div>
<div style="padding: 6px 8px; border-radius: 6px; background: rgba(16,185,129,.1); border: 1px solid rgba(16,185,129,.3); margin-bottom: 8px;">
<div style="font-size: 11px; color: var(--sl-green); display: flex; align-items: center; gap: 4px;">
<i class="ri-checkbox-circle-fill"></i>
<span x-text="'\"' + voiceText + '\" · ' + voiceBuffer.duration.toFixed(1) + '초'"></span>
</div>
</div>
<div class="sl-param">
<div class="sl-param-label"><span>시작 시점</span><span x-text="voiceDelay.toFixed(1) + '초'"></span></div>
<input type="range" class="sl-slider" min="0" max="30" :value="voiceDelay * 10" @input="voiceDelay = $event.target.value / 10">
</div>
<div class="sl-param">
<div class="sl-param-label"><span>음성 볼륨</span><span x-text="(voiceVolume * 100).toFixed(0) + '%'"></span></div>
<input type="range" class="sl-slider" min="0" max="100" :value="voiceVolume * 100" @input="voiceVolume = $event.target.value / 100">
</div>
<div style="display: flex; gap: 4px;">
<button class="sl-btn sm" @click="playVoiceOnly()" style="flex:1;"><i class="ri-play-fill"></i> 음성만</button>
<button class="sl-btn sm danger" @click="clearVoice()" style="flex:1;"><i class="ri-delete-bin-line"></i> 삭제</button>
</div>
</div>
</template>
</div>
<!-- Saved Sounds -->
<div class="sl-section">
<div class="sl-section-title"> 사운드</div>
@@ -561,6 +602,15 @@ function soundLogo() {
aiLoading: false,
aiResult: null,
aiError: '',
// 음성 오버레이
voiceText: '',
voiceAudioData: null,
voiceMimeType: '',
voiceLoading: false,
voiceDelay: 0.0,
voiceVolume: 0.8,
voiceBuffer: null,
aiQuickPrompts: [
'밝고 미래적인 IT 기업 로고',
'따뜻하고 친근한 카페 알림음',
@@ -905,10 +955,22 @@ function soundLogo() {
t += n.duration || 0.2;
});
// Voice overlay
if (this.voiceBuffer) {
const voiceSrc = ctx.createBufferSource();
voiceSrc.buffer = this.voiceBuffer;
const voiceGain = ctx.createGain();
voiceGain.gain.value = this.voiceVolume;
voiceSrc.connect(voiceGain).connect(ctx.destination);
voiceSrc.start(startTime + this.voiceDelay);
}
// Draw waveform
this.drawWaveform(ctx);
const totalMs = (t - startTime) * 1000 + (this.adsr.release || 500);
const synthMs = (t - startTime) * 1000 + (this.adsr.release || 500);
const voiceMs = this.voiceBuffer ? (this.voiceDelay + this.voiceBuffer.duration) * 1000 + 200 : 0;
const totalMs = Math.max(synthMs, voiceMs);
setTimeout(() => {
this.isPlaying = false;
this.playingIdx = -1;
@@ -974,7 +1036,9 @@ function soundLogo() {
if (this.notes.length === 0) return this.toast('음표를 추가해 주세요');
const sampleRate = 44100;
const totalDur = this.getTotalDuration() + this.adsr.release / 1000 + 0.5;
const synthDur = this.getTotalDuration() + this.adsr.release / 1000 + 0.5;
const voiceDur = this.voiceBuffer ? this.voiceDelay + this.voiceBuffer.duration + 0.5 : 0;
const totalDur = Math.max(synthDur, voiceDur);
const offline = new OfflineAudioContext(2, sampleRate * totalDur, sampleRate);
let t = 0.05;
@@ -1009,6 +1073,16 @@ function soundLogo() {
t += n.duration || 0.2;
});
// Voice overlay in offline context
if (this.voiceBuffer) {
const voiceSrc = offline.createBufferSource();
voiceSrc.buffer = this.voiceBuffer;
const voiceGain = offline.createGain();
voiceGain.gain.value = this.voiceVolume;
voiceSrc.connect(voiceGain).connect(offline.destination);
voiceSrc.start(0.05 + this.voiceDelay);
}
const buffer = await offline.startRendering();
const wav = this.bufferToWav(buffer);
const blob = new Blob([wav], { type: 'audio/wav' });
@@ -1060,6 +1134,77 @@ function soundLogo() {
return buf;
},
// ===== 음성 오버레이 (TTS) =====
async generateVoice() {
if (this.voiceLoading || !this.voiceText.trim()) return;
this.voiceLoading = true;
this.voiceBuffer = null;
try {
const res = await fetch('{{ route("rd.sound-logo.tts") }}', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRF-TOKEN': document.querySelector('meta[name="csrf-token"]').content,
},
body: JSON.stringify({ text: this.voiceText }),
});
const data = await res.json();
if (data.success && data.audio_data) {
this.voiceAudioData = data.audio_data;
this.voiceMimeType = data.mime_type || 'audio/L16;rate=24000';
// 샘플레이트 파싱
const rateMatch = this.voiceMimeType.match(/rate=(\d+)/);
const sampleRate = rateMatch ? parseInt(rateMatch[1]) : 24000;
this.voiceBuffer = this.decodeL16(data.audio_data, sampleRate);
this.toast('음성 생성 완료: "' + this.voiceText + '"');
} else {
this.toast(data.error || '음성 생성 실패');
}
} catch (e) {
this.toast('음성 생성 중 오류 발생');
} finally {
this.voiceLoading = false;
}
},
decodeL16(base64Data, sampleRate) {
const binaryStr = atob(base64Data);
const bytes = new Uint8Array(binaryStr.length);
for (let i = 0; i < binaryStr.length; i++) bytes[i] = binaryStr.charCodeAt(i);
const view = new DataView(bytes.buffer);
const numSamples = Math.floor(bytes.length / 2);
const ctx = this.getAudioCtx();
const buffer = ctx.createBuffer(1, numSamples, sampleRate);
const ch = buffer.getChannelData(0);
for (let i = 0; i < numSamples; i++) {
ch[i] = view.getInt16(i * 2, false) / 32768; // L16 = big-endian
}
return buffer;
},
async playVoiceOnly() {
if (!this.voiceBuffer) return;
const ctx = this.getAudioCtx();
if (ctx.state === 'suspended') await ctx.resume();
const src = ctx.createBufferSource();
src.buffer = this.voiceBuffer;
const gain = ctx.createGain();
gain.gain.value = this.voiceVolume;
src.connect(gain).connect(ctx.destination);
src.start();
},
clearVoice() {
this.voiceBuffer = null;
this.voiceAudioData = null;
this.voiceMimeType = '';
this.toast('음성 삭제됨');
},
// ===== AI 어시스트 =====
async generateWithAi() {
if (this.aiLoading || !this.aiPrompt.trim()) return;

View File

@@ -424,6 +424,7 @@
// 사운드 로고 생성기
Route::get('/sound-logo', [RdController::class, 'soundLogo'])->name('sound-logo');
Route::post('/sound-logo/generate', [RdController::class, 'soundLogoGenerate'])->name('sound-logo.generate');
Route::post('/sound-logo/tts', [RdController::class, 'soundLogoTts'])->name('sound-logo.tts');
});
// 일일 스크럼 (Blade 화면만)