feat:회의록 자동 화자 분리(Phase 2) 구현 및 세그먼트 저장 에러 수정

- GoogleCloudService에 speechToTextWithDiarization 메서드 추가
- Google STT V1 diarizationConfig 활성화로 자동 화자 구분
- MeetingMinuteService에 processDiarization 메서드 추가
- POST /{id}/diarize 엔드포인트 및 라우트 추가
- 프론트엔드에 '화자 분리' 버튼 추가 (RecordingControlBar)
- saveSegments 컨트롤러에 try-catch 에러 핸들링 추가
- 빈 텍스트 세그먼트 필터링 로직 추가 (서버/클라이언트 양쪽)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
김보곤
2026-02-10 10:29:16 +09:00
parent 0f312bcf77
commit b2fbd3d113
5 changed files with 418 additions and 20 deletions

View File

@@ -135,21 +135,46 @@ public function saveSegments(Request $request, int $id): JsonResponse
$validated = $request->validate([
'segments' => 'required|array',
'segments.*.speaker_name' => 'required|string|max:100',
'segments.*.speaker_name' => 'nullable|string|max:100',
'segments.*.speaker_label' => 'nullable|string|max:20',
'segments.*.text' => 'required|string',
'segments.*.text' => 'nullable|string',
'segments.*.start_time_ms' => 'nullable|integer|min:0',
'segments.*.end_time_ms' => 'nullable|integer|min:0',
'segments.*.is_manual_speaker' => 'nullable|boolean',
]);
$meeting = $this->service->saveSegments($meeting, $validated['segments']);
// 빈 텍스트 세그먼트 필터링
$segments = array_values(array_filter($validated['segments'], function ($seg) {
return ! empty(trim($seg['text'] ?? ''));
}));
return response()->json([
'success' => true,
'message' => '세그먼트가 저장되었습니다.',
'data' => $meeting,
]);
if (empty($segments)) {
return response()->json([
'success' => false,
'message' => '저장할 세그먼트가 없습니다.',
], 422);
}
try {
$meeting = $this->service->saveSegments($meeting, $segments);
return response()->json([
'success' => true,
'message' => '세그먼트가 저장되었습니다.',
'data' => $meeting,
]);
} catch (\Exception $e) {
\Illuminate\Support\Facades\Log::error('MeetingMinute: 세그먼트 저장 실패', [
'meeting_id' => $id,
'error' => $e->getMessage(),
'trace' => $e->getTraceAsString(),
]);
return response()->json([
'success' => false,
'message' => '세그먼트 저장 중 오류가 발생했습니다: ' . $e->getMessage(),
], 500);
}
}
public function uploadAudio(Request $request, int $id): JsonResponse
@@ -218,6 +243,44 @@ public function summarize(int $id): JsonResponse
]);
}
public function diarize(Request $request, int $id): JsonResponse
{
$meeting = MeetingMinute::find($id);
if (! $meeting) {
return response()->json([
'success' => false,
'message' => '회의록을 찾을 수 없습니다.',
], 404);
}
if (empty($meeting->audio_gcs_uri)) {
return response()->json([
'success' => false,
'message' => '오디오 파일이 없습니다. 먼저 녹음을 진행해주세요.',
], 422);
}
$minSpeakers = (int) ($request->input('min_speakers', 2));
$maxSpeakers = (int) ($request->input('max_speakers', 6));
$result = $this->service->processDiarization($meeting, $minSpeakers, $maxSpeakers);
if (! $result) {
return response()->json([
'success' => false,
'message' => '자동 화자 분리에 실패했습니다.',
], 500);
}
return response()->json([
'success' => true,
'message' => "자동 화자 분리가 완료되었습니다. ({$result['speaker_count']}명 감지)",
'data' => $meeting->fresh()->load('segments'),
'speaker_count' => $result['speaker_count'],
]);
}
public function downloadAudio(Request $request, int $id): Response|JsonResponse
{
$meeting = MeetingMinute::find($id);

View File

@@ -288,6 +288,218 @@ private function waitForSttOperation(string $operationName, int $maxAttempts = 6
return null;
}
/**
* Speaker Diarization을 포함한 Speech-to-Text API 호출
*
* @return array|null ['segments' => [...], 'full_transcript' => '...']
*/
public function speechToTextWithDiarization(
string $gcsUri,
string $languageCode = 'ko-KR',
int $minSpeakers = 2,
int $maxSpeakers = 6
): ?array {
$token = $this->getAccessToken();
if (! $token) {
return null;
}
try {
$response = Http::withToken($token)
->post('https://speech.googleapis.com/v1/speech:longrunningrecognize', [
'config' => [
'encoding' => 'WEBM_OPUS',
'sampleRateHertz' => 48000,
'languageCode' => $languageCode,
'enableAutomaticPunctuation' => true,
'model' => 'latest_long',
'enableWordTimeOffsets' => true,
'diarizationConfig' => [
'enableSpeakerDiarization' => true,
'minSpeakerCount' => $minSpeakers,
'maxSpeakerCount' => $maxSpeakers,
],
],
'audio' => [
'uri' => $gcsUri,
],
]);
if (! $response->successful()) {
Log::error('Google Cloud: STT Diarization 요청 실패', ['response' => $response->body()]);
return null;
}
$operation = $response->json();
$operationName = $operation['name'] ?? null;
if (! $operationName) {
Log::error('Google Cloud: STT Diarization 작업 이름 없음');
return null;
}
Log::info('Google Cloud: STT Diarization 요청 시작', ['operationName' => $operationName]);
$rawResult = $this->waitForSttDiarizationOperation($operationName);
if (! $rawResult) {
return null;
}
return $this->parseDiarizationResult($rawResult);
} catch (\Exception $e) {
Log::error('Google Cloud: STT Diarization 예외', ['error' => $e->getMessage()]);
return null;
}
}
/**
* STT Diarization 작업 완료 대기 (raw 결과 반환)
*/
private function waitForSttDiarizationOperation(string $operationName, int $maxAttempts = 60): ?array
{
$token = $this->getAccessToken();
if (! $token) {
return null;
}
for ($i = 0; $i < $maxAttempts; $i++) {
sleep(5);
$response = Http::withToken($token)
->get("https://speech.googleapis.com/v1/operations/{$operationName}");
if (! $response->successful()) {
continue;
}
$result = $response->json();
if (isset($result['done']) && $result['done']) {
if (isset($result['error'])) {
Log::error('Google Cloud: STT Diarization 작업 실패', ['error' => $result['error']]);
return null;
}
return $result;
}
}
Log::error('Google Cloud: STT Diarization 작업 타임아웃');
return null;
}
/**
* Diarization 결과를 화자별 세그먼트로 파싱
*/
private function parseDiarizationResult(array $operationResult): ?array
{
$results = $operationResult['response']['results'] ?? [];
if (empty($results)) {
return null;
}
// Diarization 결과는 마지막 result의 alternatives[0].words에 전체 word-level 정보가 있음
$lastResult = end($results);
$words = $lastResult['alternatives'][0]['words'] ?? [];
if (empty($words)) {
// word-level 결과 없으면 일반 transcript로 폴백
$transcript = '';
foreach ($results as $res) {
$transcript .= ($res['alternatives'][0]['transcript'] ?? '') . ' ';
}
return [
'segments' => [[
'speaker_name' => '화자 1',
'speaker_label' => '1',
'text' => trim($transcript),
'start_time_ms' => 0,
'end_time_ms' => null,
'is_manual_speaker' => false,
]],
'full_transcript' => '[화자 1] ' . trim($transcript),
'speaker_count' => 1,
];
}
// word-level 화자 정보를 세그먼트로 그룹핑
$segments = [];
$currentSpeaker = null;
$currentWords = [];
$segmentStartMs = 0;
foreach ($words as $word) {
$speakerTag = $word['speakerTag'] ?? 0;
$wordText = $word['word'] ?? '';
$startMs = $this->parseGoogleTimeToMs($word['startTime'] ?? '0s');
$endMs = $this->parseGoogleTimeToMs($word['endTime'] ?? '0s');
if ($speakerTag !== $currentSpeaker && $currentSpeaker !== null && ! empty($currentWords)) {
$segments[] = [
'speaker_name' => '화자 ' . $currentSpeaker,
'speaker_label' => (string) $currentSpeaker,
'text' => trim(implode(' ', $currentWords)),
'start_time_ms' => $segmentStartMs,
'end_time_ms' => $startMs,
'is_manual_speaker' => false,
];
$currentWords = [];
$segmentStartMs = $startMs;
}
$currentSpeaker = $speakerTag;
$currentWords[] = $wordText;
}
// 마지막 세그먼트
if (! empty($currentWords)) {
$lastWord = end($words);
$segments[] = [
'speaker_name' => '화자 ' . $currentSpeaker,
'speaker_label' => (string) $currentSpeaker,
'text' => trim(implode(' ', $currentWords)),
'start_time_ms' => $segmentStartMs,
'end_time_ms' => $this->parseGoogleTimeToMs($lastWord['endTime'] ?? '0s'),
'is_manual_speaker' => false,
];
}
// full_transcript 생성
$fullTranscript = '';
foreach ($segments as $seg) {
$fullTranscript .= "[{$seg['speaker_name']}] {$seg['text']}\n";
}
// 고유 화자 수
$speakerCount = count(array_unique(array_column($segments, 'speaker_label')));
return [
'segments' => $segments,
'full_transcript' => trim($fullTranscript),
'speaker_count' => $speakerCount,
];
}
/**
* Google STT 시간 형식("1.500s")을 밀리초로 변환
*/
private function parseGoogleTimeToMs(string $timeStr): int
{
if (preg_match('/^([\d.]+)s$/', $timeStr, $matches)) {
return (int) round((float) $matches[1] * 1000);
}
return 0;
}
/**
* GCS 파일 삭제
*/

View File

@@ -203,6 +203,79 @@ public function logSttUsage(int $durationSeconds): void
AiTokenHelper::saveSttUsage('회의록-음성인식', $durationSeconds);
}
/**
* 업로드된 오디오에 대해 자동 화자 분리(Speaker Diarization) 실행
*/
public function processDiarization(MeetingMinute $meeting, int $minSpeakers = 2, int $maxSpeakers = 6): ?array
{
if (empty($meeting->audio_gcs_uri)) {
return null;
}
$meeting->update(['status' => MeetingMinute::STATUS_PROCESSING]);
try {
$result = $this->googleCloudService->speechToTextWithDiarization(
$meeting->audio_gcs_uri,
$meeting->stt_language ?? 'ko-KR',
$minSpeakers,
$maxSpeakers
);
if (! $result || empty($result['segments'])) {
Log::warning('MeetingMinute: 화자 분리 결과 없음', ['meeting_id' => $meeting->id]);
$meeting->update(['status' => MeetingMinute::STATUS_FAILED]);
return null;
}
// 기존 세그먼트 교체
$meeting->segments()->delete();
$fullTranscript = '';
foreach ($result['segments'] as $index => $segment) {
MeetingMinuteSegment::create([
'meeting_minute_id' => $meeting->id,
'segment_order' => $index,
'speaker_name' => $segment['speaker_name'] ?? '화자 1',
'speaker_label' => $segment['speaker_label'] ?? null,
'text' => $segment['text'] ?? '',
'start_time_ms' => $segment['start_time_ms'] ?? 0,
'end_time_ms' => $segment['end_time_ms'] ?? null,
'is_manual_speaker' => false,
]);
$speakerName = $segment['speaker_name'] ?? '화자 1';
$text = $segment['text'] ?? '';
$fullTranscript .= "[{$speakerName}] {$text}\n";
}
$meeting->update([
'full_transcript' => trim($fullTranscript),
'status' => MeetingMinute::STATUS_DRAFT,
]);
// STT 사용량 기록
if ($meeting->duration_seconds > 0) {
AiTokenHelper::saveSttUsage('회의록-화자분리', $meeting->duration_seconds);
}
return [
'segments' => $result['segments'],
'speaker_count' => $result['speaker_count'] ?? 1,
'full_transcript' => trim($fullTranscript),
];
} catch (\Exception $e) {
Log::error('MeetingMinute: 화자 분리 실패', [
'meeting_id' => $meeting->id,
'error' => $e->getMessage(),
]);
$meeting->update(['status' => MeetingMinute::STATUS_FAILED]);
return null;
}
}
private function buildSummaryPrompt(string $transcript): string
{
return <<<PROMPT

View File

@@ -25,6 +25,7 @@
uploadAudio: (id) => `/juil/meeting-minutes/${id}/upload-audio`,
summarize: (id) => `/juil/meeting-minutes/${id}/summarize`,
downloadAudio: (id) => `/juil/meeting-minutes/${id}/download-audio`,
diarize: (id) => `/juil/meeting-minutes/${id}/diarize`,
logSttUsage: '/juil/meeting-minutes/log-stt-usage',
};
@@ -265,6 +266,7 @@ function MeetingDetail({ meetingId, onBack, showToast }) {
const [titleValue, setTitleValue] = useState('');
const [saving, setSaving] = useState(false);
const [summarizing, setSummarizing] = useState(false);
const [diarizing, setDiarizing] = useState(false);
const [alertModal, setAlertModal] = useState(null);
// refs
@@ -432,13 +434,15 @@ function MeetingDetail({ meetingId, onBack, showToast }) {
// 1. 세그먼트 저장
setSaving(true);
try {
const segmentsToSave = localSegments.filter(s => s.is_final).map((s, i) => ({
speaker_name: s.speaker_name,
text: s.text,
start_time_ms: s.start_time_ms || 0,
end_time_ms: s.end_time_ms || null,
is_manual_speaker: true,
}));
const segmentsToSave = localSegments
.filter(s => s.is_final && s.text && s.text.trim())
.map((s, i) => ({
speaker_name: s.speaker_name || '화자 1',
text: s.text.trim(),
start_time_ms: Math.round(s.start_time_ms || 0),
end_time_ms: s.end_time_ms ? Math.round(s.end_time_ms) : null,
is_manual_speaker: true,
}));
if (segmentsToSave.length > 0) {
await apiFetch(API.saveSegments(meetingId), {
@@ -561,6 +565,37 @@ function MeetingDetail({ meetingId, onBack, showToast }) {
}
};
// ===== 자동 화자 분리 =====
const handleDiarize = async () => {
if (!meeting || !meeting.audio_gcs_uri) {
setAlertModal({
title: '오디오 파일이 없습니다',
message: '자동 화자 분리를 실행하려면 먼저 녹음을 진행하여 오디오를 저장해주세요.',
icon: 'warning',
});
return;
}
if (!confirm('자동 화자 분리를 실행하면 기존 대화 기록이 새로운 결과로 교체됩니다. 계속하시겠습니까?')) {
return;
}
setDiarizing(true);
try {
const res = await apiFetch(API.diarize(meetingId), {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ min_speakers: 2, max_speakers: speakers.length || 4 }),
});
showToast(res.message || '자동 화자 분리가 완료되었습니다.');
await loadMeeting();
} catch (e) {
showToast('화자 분리 실패: ' + e.message, 'error');
} finally {
setDiarizing(false);
}
};
if (loading) {
return <div className="flex items-center justify-center h-96"><div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600"></div></div>;
}
@@ -666,9 +701,12 @@ function MeetingDetail({ meetingId, onBack, showToast }) {
onAddSpeaker={addSpeaker}
onLanguageChange={setSttLanguage}
onSummarize={handleSummarize}
onDiarize={handleDiarize}
saving={saving}
summarizing={summarizing}
diarizing={diarizing}
hasSegments={segments.length > 0}
hasAudio={!!meeting?.audio_gcs_uri}
/>
</div>
);
@@ -819,10 +857,10 @@ function SummaryPanel({ meeting, onSummarize, summarizing }) {
}
// ========== RecordingControlBar ==========
function RecordingControlBar({ isRecording, recordingTime, currentSpeakerIdx, speakers, sttLanguage, onStart, onStop, onSwitchSpeaker, onAddSpeaker, onLanguageChange, onSummarize, saving, summarizing, hasSegments }) {
function RecordingControlBar({ isRecording, recordingTime, currentSpeakerIdx, speakers, sttLanguage, onStart, onStop, onSwitchSpeaker, onAddSpeaker, onLanguageChange, onSummarize, onDiarize, saving, summarizing, diarizing, hasSegments, hasAudio }) {
return (
<div className="bg-white border-t shadow-lg px-4 py-3 flex-shrink-0">
<div className="flex items-center justify-between gap-4">
<div className="flex items-center justify-between gap-3">
{/* Language */}
<div className="flex items-center gap-2">
<select value={sttLanguage} onChange={(e) => onLanguageChange(e.target.value)} disabled={isRecording} className="text-sm border rounded px-2 py-1.5 bg-white disabled:bg-gray-100 disabled:text-gray-400">
@@ -853,7 +891,7 @@ function RecordingControlBar({ isRecording, recordingTime, currentSpeakerIdx, sp
중지
</button>
) : (
<button onClick={onStart} disabled={saving} className="bg-red-500 text-white px-5 py-2 rounded-full hover:bg-red-600 transition flex items-center gap-2 text-sm font-medium shadow-lg disabled:opacity-50">
<button onClick={onStart} disabled={saving || diarizing} className="bg-red-500 text-white px-5 py-2 rounded-full hover:bg-red-600 transition flex items-center gap-2 text-sm font-medium shadow-lg disabled:opacity-50">
<span className="w-3 h-3 bg-white rounded-full"></span>
녹음
</button>
@@ -862,11 +900,22 @@ function RecordingControlBar({ isRecording, recordingTime, currentSpeakerIdx, sp
{isRecording && <span className="flex items-center gap-1 text-xs text-red-500"><span className="w-2 h-2 bg-red-500 rounded-full animate-pulse"></span>REC</span>}
</div>
{/* AI Summary Button */}
{/* Status indicators */}
<div className="flex items-center gap-2">
{saving && <span className="text-xs text-blue-500 flex items-center gap-1"><div className="animate-spin rounded-full h-3 w-3 border-b-2 border-blue-500"></div>저장 ...</span>}
{summarizing && <span className="text-xs text-purple-500 flex items-center gap-1"><div className="animate-spin rounded-full h-3 w-3 border-b-2 border-purple-500"></div>요약 ...</span>}
<button onClick={onSummarize} disabled={summarizing || isRecording || !hasSegments} className="bg-purple-600 text-white px-3 py-1.5 rounded-lg hover:bg-purple-700 transition text-xs font-medium disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-1">
{diarizing && <span className="text-xs text-indigo-500 flex items-center gap-1"><div className="animate-spin rounded-full h-3 w-3 border-b-2 border-indigo-500"></div>화자 분리 ...</span>}
</div>
{/* Action Buttons */}
<div className="flex items-center gap-2">
{/* 자동 화자 분리 버튼 */}
<button onClick={onDiarize} disabled={diarizing || summarizing || isRecording || !hasAudio} className="bg-indigo-600 text-white px-3 py-1.5 rounded-lg hover:bg-indigo-700 transition text-xs font-medium disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-1" title="업로드된 오디오에서 AI로 자동 화자 구분">
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M17 20h5v-2a3 3 0 00-5.356-1.857M17 20H7m10 0v-2c0-.656-.126-1.283-.356-1.857M7 20H2v-2a3 3 0 015.356-1.857M7 20v-2c0-.656.126-1.283.356-1.857m0 0a5.002 5.002 0 019.288 0M15 7a3 3 0 11-6 0 3 3 0 016 0z" /></svg>
화자 분리
</button>
{/* AI 요약 버튼 */}
<button onClick={onSummarize} disabled={summarizing || diarizing || isRecording || !hasSegments} className="bg-purple-600 text-white px-3 py-1.5 rounded-lg hover:bg-purple-700 transition text-xs font-medium disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-1">
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z" /></svg>
AI 요약
</button>

View File

@@ -1341,6 +1341,7 @@
Route::post('/{id}/segments', [MeetingMinuteController::class, 'saveSegments'])->name('save-segments');
Route::post('/{id}/upload-audio', [MeetingMinuteController::class, 'uploadAudio'])->name('upload-audio');
Route::post('/{id}/summarize', [MeetingMinuteController::class, 'summarize'])->name('summarize');
Route::post('/{id}/diarize', [MeetingMinuteController::class, 'diarize'])->name('diarize');
Route::get('/{id}/download-audio', [MeetingMinuteController::class, 'downloadAudio'])->name('download-audio');
});
});