diff --git a/resources/views/juil/meeting-minutes.blade.php b/resources/views/juil/meeting-minutes.blade.php index 84cc76b7..b8758c58 100644 --- a/resources/views/juil/meeting-minutes.blade.php +++ b/resources/views/juil/meeting-minutes.blade.php @@ -257,8 +257,8 @@ function MeetingDetail({ meetingId, onBack, showToast }) { const [recordingTime, setRecordingTime] = useState(0); const [localSegments, setLocalSegments] = useState([]); const [interimText, setInterimText] = useState(''); - const [currentSpeakerIdx, setCurrentSpeakerIdx] = useState(0); - const [speakers, setSpeakers] = useState([{ name: '화자 1' }, { name: '화자 2' }]); + const [detectedSpeaker, setDetectedSpeaker] = useState('화자 1'); + const [speakers, setSpeakers] = useState([{ name: '화자 1' }]); const [sttLanguage, setSttLanguage] = useState('ko-KR'); // 편집 상태 @@ -289,6 +289,11 @@ function MeetingDetail({ meetingId, onBack, showToast }) { const audioContextRef = useRef(null); const analyserRef = useRef(null); const gainNodeRef = useRef(null); + const rawAnalyserRef = useRef(null); + const speakerDetectorRef = useRef(null); + const speakerProfilesRef = useRef([]); + const recentFeaturesRef = useRef([]); + const detectedSpeakerRef = useRef('화자 1'); const loadMeeting = useCallback(async () => { try { @@ -337,14 +342,122 @@ function MeetingDetail({ meetingId, onBack, showToast }) { } }, [localSegments, interimText]); - const currentSpeaker = speakers[currentSpeakerIdx] || speakers[0]; - const speakerColor = SPEAKER_COLORS[currentSpeakerIdx % SPEAKER_COLORS.length]; - const getSpeakerColor = (name) => { const idx = speakers.findIndex(s => s.name === name); return SPEAKER_COLORS[idx >= 0 ? idx % SPEAKER_COLORS.length : 0]; }; + // ===== 화자 분류 (Spectral Centroid 기반) ===== + const classifySpeaker = useCallback((centroid, spread) => { + const profiles = speakerProfilesRef.current; + + if (profiles.length === 0) { + profiles.push({ centroid, spread, count: 1, name: '화자 1' }); + return '화자 1'; + } + + const distances = profiles.map(p => Math.abs(p.centroid - centroid) / p.centroid); + const minDist = Math.min(...distances); + const minIdx = distances.indexOf(minDist); + + if (minDist < 0.15) { + const p = profiles[minIdx]; + p.centroid = (p.centroid * p.count + centroid) / (p.count + 1); + p.spread = (p.spread * p.count + spread) / (p.count + 1); + p.count++; + return p.name; + } + + if (profiles.length < 4) { + const newName = `화자 ${profiles.length + 1}`; + profiles.push({ centroid, spread, count: 1, name: newName }); + setSpeakers(prev => { + if (prev.find(s => s.name === newName)) return prev; + return [...prev, { name: newName }]; + }); + return newName; + } + + return profiles[minIdx].name; + }, []); + + // ===== 화자 감지 (100ms 간격 호출) ===== + const detectSpeaker = useCallback(() => { + const analyser = rawAnalyserRef.current; + if (!analyser) return; + + const sampleRate = audioContextRef.current?.sampleRate || 48000; + const fftSize = analyser.fftSize; + const binCount = analyser.frequencyBinCount; // fftSize / 2 + const binHz = sampleRate / fftSize; // 각 bin의 Hz 간격 + + // 주파수 데이터 (0~255) + const freqData = new Uint8Array(binCount); + analyser.getByteFrequencyData(freqData); + + // 시간 도메인 데이터 (VAD용) + const timeData = new Uint8Array(fftSize); + analyser.getByteTimeDomainData(timeData); + + // RMS 계산 (VAD) + let rmsSum = 0; + for (let i = 0; i < timeData.length; i++) { + const v = (timeData[i] - 128) / 128; + rmsSum += v * v; + } + const rms = Math.sqrt(rmsSum / timeData.length); + + // VAD: 음성 미감지 시 무시 + if (rms < 0.015) return; + + // 음성 범위 bin (85Hz ~ 1000Hz) + const minBin = Math.floor(85 / binHz); + const maxBin = Math.min(Math.ceil(1000 / binHz), binCount - 1); + + // Spectral Centroid 계산 + let weightedSum = 0; + let amplitudeSum = 0; + for (let i = minBin; i <= maxBin; i++) { + const freq = i * binHz; + const amp = freqData[i]; + weightedSum += freq * amp; + amplitudeSum += amp; + } + + if (amplitudeSum < 10) return; // 에너지 부족 + + const centroid = weightedSum / amplitudeSum; + + // Spectral Spread 계산 + let spreadSum = 0; + for (let i = minBin; i <= maxBin; i++) { + const freq = i * binHz; + const amp = freqData[i]; + spreadSum += Math.pow(freq - centroid, 2) * amp; + } + const spread = Math.sqrt(spreadSum / amplitudeSum); + + // 최근 피처 버퍼에 추가 (500ms 윈도우) + const now = Date.now(); + const features = recentFeaturesRef.current; + features.push({ centroid, spread, timestamp: now }); + + // 500ms 이전 피처 제거 + const cutoff = now - 500; + recentFeaturesRef.current = features.filter(f => f.timestamp > cutoff); + + // 다수결: 최근 피처들의 화자 분류 + const votes = recentFeaturesRef.current.map(f => classifySpeaker(f.centroid, f.spread)); + if (votes.length === 0) return; + + // 최빈값 계산 + const counts = {}; + votes.forEach(v => { counts[v] = (counts[v] || 0) + 1; }); + const winner = Object.entries(counts).sort((a, b) => b[1] - a[1])[0][0]; + + setDetectedSpeaker(winner); + }, [classifySpeaker]); + // ===== 녹음 시작 ===== const startRecording = async () => { try { @@ -386,8 +499,15 @@ function MeetingDetail({ meetingId, onBack, showToast }) { // MediaStreamDestination: 처리된 스트림 const destination = audioCtx.createMediaStreamDestination(); - // 체인 연결: source → gain → compressor → analyser → destination - source.connect(gainNode); + // rawAnalyser: 화자 감지용 (compressor 이전, 원본 신호 분석) + const rawAnalyser = audioCtx.createAnalyser(); + rawAnalyser.fftSize = 2048; + rawAnalyser.smoothingTimeConstant = 0.3; + rawAnalyserRef.current = rawAnalyser; + + // 체인 연결: source → rawAnalyser(화자감지) → gain → compressor → analyser → destination + source.connect(rawAnalyser); + rawAnalyser.connect(gainNode); gainNode.connect(compressor); compressor.connect(analyser); analyser.connect(destination); @@ -429,7 +549,7 @@ function MeetingDetail({ meetingId, onBack, showToast }) { const now = Date.now(); const startMs = startTimeRef.current ? now - startTimeRef.current : 0; setLocalSegments(prev => [...prev, { - speaker_name: currentSpeaker.name, + speaker_name: detectedSpeakerRef.current, text: text.trim(), start_time_ms: startMs, end_time_ms: null, @@ -461,6 +581,13 @@ function MeetingDetail({ meetingId, onBack, showToast }) { startTimeRef.current = Date.now(); setRecordingTime(0); setIsRecording(true); + + // 화자 감지 시작 + speakerProfilesRef.current = []; + recentFeaturesRef.current = []; + setDetectedSpeaker('화자 1'); + setSpeakers([{ name: '화자 1' }]); + speakerDetectorRef.current = setInterval(detectSpeaker, 100); } catch (e) { showToast('마이크 접근 권한이 필요합니다.', 'error'); } @@ -469,6 +596,7 @@ function MeetingDetail({ meetingId, onBack, showToast }) { // isRecording ref (onend에서 접근) const isRecordingRef = useRef(false); useEffect(() => { isRecordingRef.current = isRecording; }, [isRecording]); + useEffect(() => { detectedSpeakerRef.current = detectedSpeaker; }, [detectedSpeaker]); // ===== 녹음 중지 ===== const stopRecording = async () => { @@ -485,12 +613,19 @@ function MeetingDetail({ meetingId, onBack, showToast }) { streamRef.current = null; } + // 화자 감지 중지 + if (speakerDetectorRef.current) { + clearInterval(speakerDetectorRef.current); + speakerDetectorRef.current = null; + } + // AudioContext 정리 if (audioContextRef.current) { audioContextRef.current.close().catch(() => {}); audioContextRef.current = null; analyserRef.current = null; gainNodeRef.current = null; + rawAnalyserRef.current = null; } // MediaRecorder 중지 → blob 생성 @@ -613,17 +748,6 @@ function MeetingDetail({ meetingId, onBack, showToast }) { } }; - // ===== 화자 전환 ===== - const switchSpeaker = (idx) => { - setCurrentSpeakerIdx(idx); - }; - - const addSpeaker = () => { - if (speakers.length >= 4) return; - const newIdx = speakers.length; - setSpeakers(prev => [...prev, { name: `화자 ${newIdx + 1}` }]); - }; - // ===== 제목 인라인 편집 ===== const saveTitle = async () => { if (!titleValue.trim()) return; @@ -881,7 +1005,7 @@ function MeetingDetail({ meetingId, onBack, showToast }) { {/* Left: Transcript */}