feat:회의록 자동 화자 분리(Phase 2) 구현 및 세그먼트 저장 에러 수정

- GoogleCloudService에 speechToTextWithDiarization 메서드 추가
- Google STT V1 diarizationConfig 활성화로 자동 화자 구분
- MeetingMinuteService에 processDiarization 메서드 추가
- POST /{id}/diarize 엔드포인트 및 라우트 추가
- 프론트엔드에 '화자 분리' 버튼 추가 (RecordingControlBar)
- saveSegments 컨트롤러에 try-catch 에러 핸들링 추가
- 빈 텍스트 세그먼트 필터링 로직 추가 (서버/클라이언트 양쪽)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
김보곤
2026-02-10 10:29:16 +09:00
parent 0f312bcf77
commit b2fbd3d113
5 changed files with 418 additions and 20 deletions

View File

@@ -288,6 +288,218 @@ private function waitForSttOperation(string $operationName, int $maxAttempts = 6
return null;
}
/**
* Speaker Diarization을 포함한 Speech-to-Text API 호출
*
* @return array|null ['segments' => [...], 'full_transcript' => '...']
*/
public function speechToTextWithDiarization(
string $gcsUri,
string $languageCode = 'ko-KR',
int $minSpeakers = 2,
int $maxSpeakers = 6
): ?array {
$token = $this->getAccessToken();
if (! $token) {
return null;
}
try {
$response = Http::withToken($token)
->post('https://speech.googleapis.com/v1/speech:longrunningrecognize', [
'config' => [
'encoding' => 'WEBM_OPUS',
'sampleRateHertz' => 48000,
'languageCode' => $languageCode,
'enableAutomaticPunctuation' => true,
'model' => 'latest_long',
'enableWordTimeOffsets' => true,
'diarizationConfig' => [
'enableSpeakerDiarization' => true,
'minSpeakerCount' => $minSpeakers,
'maxSpeakerCount' => $maxSpeakers,
],
],
'audio' => [
'uri' => $gcsUri,
],
]);
if (! $response->successful()) {
Log::error('Google Cloud: STT Diarization 요청 실패', ['response' => $response->body()]);
return null;
}
$operation = $response->json();
$operationName = $operation['name'] ?? null;
if (! $operationName) {
Log::error('Google Cloud: STT Diarization 작업 이름 없음');
return null;
}
Log::info('Google Cloud: STT Diarization 요청 시작', ['operationName' => $operationName]);
$rawResult = $this->waitForSttDiarizationOperation($operationName);
if (! $rawResult) {
return null;
}
return $this->parseDiarizationResult($rawResult);
} catch (\Exception $e) {
Log::error('Google Cloud: STT Diarization 예외', ['error' => $e->getMessage()]);
return null;
}
}
/**
* STT Diarization 작업 완료 대기 (raw 결과 반환)
*/
private function waitForSttDiarizationOperation(string $operationName, int $maxAttempts = 60): ?array
{
$token = $this->getAccessToken();
if (! $token) {
return null;
}
for ($i = 0; $i < $maxAttempts; $i++) {
sleep(5);
$response = Http::withToken($token)
->get("https://speech.googleapis.com/v1/operations/{$operationName}");
if (! $response->successful()) {
continue;
}
$result = $response->json();
if (isset($result['done']) && $result['done']) {
if (isset($result['error'])) {
Log::error('Google Cloud: STT Diarization 작업 실패', ['error' => $result['error']]);
return null;
}
return $result;
}
}
Log::error('Google Cloud: STT Diarization 작업 타임아웃');
return null;
}
/**
* Diarization 결과를 화자별 세그먼트로 파싱
*/
private function parseDiarizationResult(array $operationResult): ?array
{
$results = $operationResult['response']['results'] ?? [];
if (empty($results)) {
return null;
}
// Diarization 결과는 마지막 result의 alternatives[0].words에 전체 word-level 정보가 있음
$lastResult = end($results);
$words = $lastResult['alternatives'][0]['words'] ?? [];
if (empty($words)) {
// word-level 결과 없으면 일반 transcript로 폴백
$transcript = '';
foreach ($results as $res) {
$transcript .= ($res['alternatives'][0]['transcript'] ?? '') . ' ';
}
return [
'segments' => [[
'speaker_name' => '화자 1',
'speaker_label' => '1',
'text' => trim($transcript),
'start_time_ms' => 0,
'end_time_ms' => null,
'is_manual_speaker' => false,
]],
'full_transcript' => '[화자 1] ' . trim($transcript),
'speaker_count' => 1,
];
}
// word-level 화자 정보를 세그먼트로 그룹핑
$segments = [];
$currentSpeaker = null;
$currentWords = [];
$segmentStartMs = 0;
foreach ($words as $word) {
$speakerTag = $word['speakerTag'] ?? 0;
$wordText = $word['word'] ?? '';
$startMs = $this->parseGoogleTimeToMs($word['startTime'] ?? '0s');
$endMs = $this->parseGoogleTimeToMs($word['endTime'] ?? '0s');
if ($speakerTag !== $currentSpeaker && $currentSpeaker !== null && ! empty($currentWords)) {
$segments[] = [
'speaker_name' => '화자 ' . $currentSpeaker,
'speaker_label' => (string) $currentSpeaker,
'text' => trim(implode(' ', $currentWords)),
'start_time_ms' => $segmentStartMs,
'end_time_ms' => $startMs,
'is_manual_speaker' => false,
];
$currentWords = [];
$segmentStartMs = $startMs;
}
$currentSpeaker = $speakerTag;
$currentWords[] = $wordText;
}
// 마지막 세그먼트
if (! empty($currentWords)) {
$lastWord = end($words);
$segments[] = [
'speaker_name' => '화자 ' . $currentSpeaker,
'speaker_label' => (string) $currentSpeaker,
'text' => trim(implode(' ', $currentWords)),
'start_time_ms' => $segmentStartMs,
'end_time_ms' => $this->parseGoogleTimeToMs($lastWord['endTime'] ?? '0s'),
'is_manual_speaker' => false,
];
}
// full_transcript 생성
$fullTranscript = '';
foreach ($segments as $seg) {
$fullTranscript .= "[{$seg['speaker_name']}] {$seg['text']}\n";
}
// 고유 화자 수
$speakerCount = count(array_unique(array_column($segments, 'speaker_label')));
return [
'segments' => $segments,
'full_transcript' => trim($fullTranscript),
'speaker_count' => $speakerCount,
];
}
/**
* Google STT 시간 형식("1.500s")을 밀리초로 변환
*/
private function parseGoogleTimeToMs(string $timeStr): int
{
if (preg_match('/^([\d.]+)s$/', $timeStr, $matches)) {
return (int) round((float) $matches[1] * 1000);
}
return 0;
}
/**
* GCS 파일 삭제
*/