fix:회의록 화자 텍스트에서 __ 노이즈 문자 제거

- GoogleCloudService: STT 결과에서 언더스코어만으로 구성된 단어 필터링
- 프론트엔드: 기존 저장된 데이터 표시 시에도 언더스코어 제거

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
김보곤
2026-02-11 20:13:01 +09:00
parent ae58f11179
commit b4f5d1ff1a
2 changed files with 8 additions and 2 deletions

View File

@@ -442,6 +442,12 @@ private function parseDiarizationResult(array $operationResult): ?array
$startMs = $this->parseGoogleTimeToMs($word['startTime'] ?? '0s');
$endMs = $this->parseGoogleTimeToMs($word['endTime'] ?? '0s');
// 언더스코어만으로 구성된 노이즈 단어 제거
$cleanWord = preg_replace('/^_+$/', '', $wordText);
if ($cleanWord === '') {
continue;
}
if ($speakerTag !== $currentSpeaker && $currentSpeaker !== null && ! empty($currentWords)) {
$segments[] = [
'speaker_name' => '화자 ' . $currentSpeaker,
@@ -456,7 +462,7 @@ private function parseDiarizationResult(array $operationResult): ?array
}
$currentSpeaker = $speakerTag;
$currentWords[] = $wordText;
$currentWords[] = $cleanWord;
}
// 마지막 세그먼트

View File

@@ -796,7 +796,7 @@ function ConversationView({ segments, interimText, isRecording, currentSpeaker,
)}
</div>
<div className="text-sm text-gray-800 leading-relaxed">
{group.texts.map((t, ti) => <span key={ti}>{ti > 0 ? ' ' : ''}{t.text}</span>)}
{group.texts.map((t, ti) => <span key={ti}>{ti > 0 ? ' ' : ''}{t.text.replace(/_+/g, '')}</span>)}
</div>
</div>
);